1// Copyright (c) 2009 The Regents of The University of Michigan 2// Copyright (c) 2015 Advanced Micro Devices, Inc. 3// 4// All rights reserved. 5// 6// Redistribution and use in source and binary forms, with or without 7// modification, are permitted provided that the following conditions are 8// met: redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer; 10// redistributions in binary form must reproduce the above copyright 11// notice, this list of conditions and the following disclaimer in the 12// documentation and/or other materials provided with the distribution; 13// neither the name of the copyright holders nor the names of its 14// contributors may be used to endorse or promote products derived from 15// this software without specific prior written permission. 16// 17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28// 29// Authors: Gabe Black 30 31def template MediaOpExecute {{ 32 Fault %(class_name)s::execute(ExecContext *xc, 33 Trace::InstRecord *traceData) const 34 { 35 Fault fault = NoFault; 36 37 %(op_decl)s; 38 %(op_rd)s; 39 40 %(code)s; 41 42 //Write the resulting state to the execution context 43 if(fault == NoFault) 44 { 45 %(op_wb)s; 46 } 47 return fault; 48 } 49}}; 50 51def template MediaOpRegDeclare {{ 52 class %(class_name)s : public %(base_class)s 53 { 54 public: 55 %(class_name)s(ExtMachInst _machInst, 56 const char * instMnem, uint64_t setFlags, 57 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 58 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 59 60 Fault execute(ExecContext *, Trace::InstRecord *) const; 61 }; 62}}; 63 64def template MediaOpImmDeclare {{ 65 66 class %(class_name)s : public %(base_class)s 67 { 68 public: 69 %(class_name)s(ExtMachInst _machInst, 70 const char * instMnem, uint64_t setFlags, 71 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 72 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 73 74 Fault execute(ExecContext *, Trace::InstRecord *) const; 75 }; 76}}; 77 78def template MediaOpRegConstructor {{ 79 %(class_name)s::%(class_name)s( 80 ExtMachInst machInst, const char * instMnem, uint64_t setFlags, 81 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 82 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 83 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags, 84 _src1, _src2, _dest, _srcSize, _destSize, _ext, 85 %(op_class)s) 86 { 87 %(constructor)s; 88 } 89}}; 90 91def template MediaOpImmConstructor {{ 92 %(class_name)s::%(class_name)s( 93 ExtMachInst machInst, const char * instMnem, uint64_t setFlags, 94 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 95 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 96 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags, 97 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 98 %(op_class)s) 99 { 100 %(constructor)s; 101 } 102}}; 103 104let {{ 105 # Make these empty strings so that concatenating onto 106 # them will always work. 107 header_output = "" 108 decoder_output = "" 109 exec_output = "" 110 111 immTemplates = ( 112 MediaOpImmDeclare, 113 MediaOpImmConstructor, 114 MediaOpExecute) 115 116 regTemplates = ( 117 MediaOpRegDeclare, 118 MediaOpRegConstructor, 119 MediaOpExecute) 120 121 class MediaOpMeta(type): 122 def buildCppClasses(self, name, Name, suffix, code): 123 124 # Globals to stick the output in 125 global header_output 126 global decoder_output 127 global exec_output 128 129 # If op2 is used anywhere, make register and immediate versions 130 # of this code. 131 matcher = re.compile(r"(?<!\w)(?P<prefix>s?)op2(?P<typeQual>_[^\W_]+)?") 132 match = matcher.search(code) 133 if match: 134 typeQual = "" 135 if match.group("typeQual"): 136 typeQual = match.group("typeQual") 137 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual) 138 self.buildCppClasses(name, Name, suffix, 139 matcher.sub(src2_name, code)) 140 self.buildCppClasses(name + "i", Name, suffix + "Imm", 141 matcher.sub("imm8", code)) 142 return 143 144 base = "X86ISA::MediaOp" 145 146 # If imm8 shows up in the code, use the immediate templates, if 147 # not, hopefully the register ones will be correct. 148 matcher = re.compile("(?<!\w)imm8(?!\w)") 149 if matcher.search(code): 150 base += "Imm" 151 templates = immTemplates 152 else: 153 base += "Reg" 154 templates = regTemplates 155 156 # Get everything ready for the substitution 157 opt_args = [] 158 if self.op_class: 159 opt_args.append(self.op_class) 160 iop = InstObjParams(name, Name + suffix, base, {"code" : code}, 161 opt_args) 162 163 # Generate the actual code (finally!) 164 header_output += templates[0].subst(iop) 165 decoder_output += templates[1].subst(iop) 166 exec_output += templates[2].subst(iop) 167 168 169 def __new__(mcls, Name, bases, dict): 170 abstract = False 171 name = Name.lower() 172 if "abstract" in dict: 173 abstract = dict['abstract'] 174 del dict['abstract'] 175 if not "op_class" in dict: 176 dict["op_class"] = None 177 178 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict) 179 if not abstract: 180 cls.className = Name 181 cls.base_mnemonic = name 182 code = cls.code 183 184 # Set up the C++ classes 185 mcls.buildCppClasses(cls, name, Name, "", code) 186 187 # Hook into the microassembler dict 188 global microopClasses 189 microopClasses[name] = cls 190 191 # If op2 is used anywhere, make register and immediate versions 192 # of this code. 193 matcher = re.compile(r"op2(?P<typeQual>_[^\W_]+)?") 194 if matcher.search(code): 195 microopClasses[name + 'i'] = cls 196 return cls 197 198 199 class MediaOp(X86Microop): 200 __metaclass__ = MediaOpMeta 201 # This class itself doesn't act as a microop 202 abstract = True 203 204 def __init__(self, dest, src1, op2, 205 size = None, destSize = None, srcSize = None, ext = None): 206 self.dest = dest 207 self.src1 = src1 208 self.op2 = op2 209 if size is not None: 210 self.srcSize = size 211 self.destSize = size 212 if srcSize is not None: 213 self.srcSize = srcSize 214 if destSize is not None: 215 self.destSize = destSize 216 if self.srcSize is None: 217 raise Exception, "Source size not set." 218 if self.destSize is None: 219 raise Exception, "Dest size not set." 220 if ext is None: 221 self.ext = 0 222 else: 223 self.ext = ext 224 225 def getAllocator(self, microFlags): 226 className = self.className 227 if self.mnemonic == self.base_mnemonic + 'i': 228 className += "Imm" 229 allocator = '''new %(class_name)s(machInst, macrocodeBlock, 230 %(flags)s, %(src1)s, %(op2)s, %(dest)s, 231 %(srcSize)s, %(destSize)s, %(ext)s)''' % { 232 "class_name" : className, 233 "flags" : self.microFlagsText(microFlags), 234 "src1" : self.src1, "op2" : self.op2, 235 "dest" : self.dest, 236 "srcSize" : self.srcSize, 237 "destSize" : self.destSize, 238 "ext" : self.ext} 239 return allocator 240 241 class Mov2int(MediaOp): 242 def __init__(self, dest, src1, src2 = 0, \ 243 size = None, destSize = None, srcSize = None, ext = None): 244 super(Mov2int, self).__init__(dest, src1,\ 245 src2, size, destSize, srcSize, ext) 246 op_class = 'SimdMiscOp' 247 code = ''' 248 int items = sizeof(double) / srcSize; 249 int offset = imm8; 250 if (bits(src1, 0) && (ext & 0x1)) 251 offset -= items; 252 if (offset >= 0 && offset < items) { 253 uint64_t fpSrcReg1 = 254 bits(FpSrcReg1_uqw, 255 (offset + 1) * srcSize * 8 - 1, 256 (offset + 0) * srcSize * 8); 257 DestReg = merge(0, fpSrcReg1, destSize); 258 } else { 259 DestReg = DestReg; 260 } 261 ''' 262 263 class Mov2fp(MediaOp): 264 def __init__(self, dest, src1, src2 = 0, \ 265 size = None, destSize = None, srcSize = None, ext = None): 266 super(Mov2fp, self).__init__(dest, src1,\ 267 src2, size, destSize, srcSize, ext) 268 op_class = 'SimdMiscOp' 269 code = ''' 270 int items = sizeof(double) / destSize; 271 int offset = imm8; 272 if (bits(dest, 0) && (ext & 0x1)) 273 offset -= items; 274 if (offset >= 0 && offset < items) { 275 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); 276 FpDestReg_uqw = 277 insertBits(FpDestReg_uqw, 278 (offset + 1) * destSize * 8 - 1, 279 (offset + 0) * destSize * 8, srcReg1); 280 } else { 281 FpDestReg_uqw = FpDestReg_uqw; 282 } 283 ''' 284 285 class Movsign(MediaOp): 286 def __init__(self, dest, src, \ 287 size = None, destSize = None, srcSize = None, ext = None): 288 super(Movsign, self).__init__(dest, src,\ 289 "InstRegIndex(0)", size, destSize, srcSize, ext) 290 op_class = 'SimdMiscOp' 291 code = ''' 292 int items = sizeof(double) / srcSize; 293 uint64_t result = 0; 294 int offset = (ext & 0x1) ? items : 0; 295 for (int i = 0; i < items; i++) { 296 uint64_t picked = 297 bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1); 298 result = insertBits(result, i + offset, i + offset, picked); 299 } 300 DestReg = DestReg | result; 301 ''' 302 303 class Maskmov(MediaOp): 304 op_class = 'SimdMiscOp' 305 code = ''' 306 assert(srcSize == destSize); 307 int size = srcSize; 308 int sizeBits = size * 8; 309 int items = numItems(size); 310 uint64_t result = FpDestReg_uqw; 311 312 for (int i = 0; i < items; i++) { 313 int hiIndex = (i + 1) * sizeBits - 1; 314 int loIndex = (i + 0) * sizeBits; 315 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 316 if (bits(FpSrcReg2_uqw, hiIndex)) 317 result = insertBits(result, hiIndex, loIndex, arg1Bits); 318 } 319 FpDestReg_uqw = result; 320 ''' 321 322 class shuffle(MediaOp): 323 op_class = 'SimdMiscOp' 324 code = ''' 325 assert(srcSize == destSize); 326 int size = srcSize; 327 int sizeBits = size * 8; 328 int items = sizeof(double) / size; 329 int options; 330 int optionBits; 331 if (size == 8) { 332 options = 2; 333 optionBits = 1; 334 } else { 335 options = 4; 336 optionBits = 2; 337 } 338 339 uint64_t result = 0; 340 uint8_t sel = ext; 341 342 for (int i = 0; i < items; i++) { 343 uint64_t resBits; 344 uint8_t lsel = sel & mask(optionBits); 345 if (lsel * size >= sizeof(double)) { 346 lsel -= options / 2; 347 resBits = bits(FpSrcReg2_uqw, 348 (lsel + 1) * sizeBits - 1, 349 (lsel + 0) * sizeBits); 350 } else { 351 resBits = bits(FpSrcReg1_uqw, 352 (lsel + 1) * sizeBits - 1, 353 (lsel + 0) * sizeBits); 354 } 355 356 sel >>= optionBits; 357 358 int hiIndex = (i + 1) * sizeBits - 1; 359 int loIndex = (i + 0) * sizeBits; 360 result = insertBits(result, hiIndex, loIndex, resBits); 361 } 362 FpDestReg_uqw = result; 363 ''' 364 365 class Unpack(MediaOp): 366 op_class = 'SimdMiscOp' 367 code = ''' 368 assert(srcSize == destSize); 369 int size = destSize; 370 int items = (sizeof(double) / size) / 2; 371 int offset = ext ? items : 0; 372 uint64_t result = 0; 373 for (int i = 0; i < items; i++) { 374 uint64_t pickedLow = 375 bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1, 376 (i + offset) * 8 * size); 377 result = insertBits(result, 378 (2 * i + 1) * 8 * size - 1, 379 (2 * i + 0) * 8 * size, 380 pickedLow); 381 uint64_t pickedHigh = 382 bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1, 383 (i + offset) * 8 * size); 384 result = insertBits(result, 385 (2 * i + 2) * 8 * size - 1, 386 (2 * i + 1) * 8 * size, 387 pickedHigh); 388 } 389 FpDestReg_uqw = result; 390 ''' 391 392 class Pack(MediaOp): 393 op_class = 'SimdMiscOp' 394 code = ''' 395 assert(srcSize == destSize * 2); 396 int items = (sizeof(double) / destSize); 397 int destBits = destSize * 8; 398 int srcBits = srcSize * 8; 399 uint64_t result = 0; 400 int i; 401 for (i = 0; i < items / 2; i++) { 402 uint64_t picked = 403 bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1, 404 (i + 0) * srcBits); 405 unsigned signBit = bits(picked, srcBits - 1); 406 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 407 408 // Handle saturation. 409 if (signBit) { 410 if (overflow != mask(destBits - srcBits + 1)) { 411 if (signedOp()) 412 picked = (ULL(1) << (destBits - 1)); 413 else 414 picked = 0; 415 } 416 } else { 417 if (overflow != 0) { 418 if (signedOp()) 419 picked = mask(destBits - 1); 420 else 421 picked = mask(destBits); 422 } 423 } 424 result = insertBits(result, 425 (i + 1) * destBits - 1, 426 (i + 0) * destBits, 427 picked); 428 } 429 for (;i < items; i++) { 430 uint64_t picked = 431 bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1, 432 (i - items + 0) * srcBits); 433 unsigned signBit = bits(picked, srcBits - 1); 434 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 435 436 // Handle saturation. 437 if (signBit) { 438 if (overflow != mask(destBits - srcBits + 1)) { 439 if (signedOp()) 440 picked = (ULL(1) << (destBits - 1)); 441 else 442 picked = 0; 443 } 444 } else { 445 if (overflow != 0) { 446 if (signedOp()) 447 picked = mask(destBits - 1); 448 else 449 picked = mask(destBits); 450 } 451 } 452 result = insertBits(result, 453 (i + 1) * destBits - 1, 454 (i + 0) * destBits, 455 picked); 456 } 457 FpDestReg_uqw = result; 458 ''' 459 460 class Mxor(MediaOp): 461 def __init__(self, dest, src1, src2): 462 super(Mxor, self).__init__(dest, src1, src2, 1) 463 op_class = 'SimdAluOp' 464 code = ''' 465 FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw; 466 ''' 467 468 class Mor(MediaOp): 469 def __init__(self, dest, src1, src2): 470 super(Mor, self).__init__(dest, src1, src2, 1) 471 op_class = 'SimdAluOp' 472 code = ''' 473 FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw; 474 ''' 475 476 class Mand(MediaOp): 477 def __init__(self, dest, src1, src2): 478 super(Mand, self).__init__(dest, src1, src2, 1) 479 op_class = 'SimdAluOp' 480 code = ''' 481 FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw; 482 ''' 483 484 class Mandn(MediaOp): 485 def __init__(self, dest, src1, src2): 486 super(Mandn, self).__init__(dest, src1, src2, 1) 487 op_class = 'SimdAluOp' 488 code = ''' 489 FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw; 490 ''' 491 492 class Mminf(MediaOp): 493 op_class = 'SimdFloatCmpOp' 494 code = ''' 495 union floatInt 496 { 497 float f; 498 uint32_t i; 499 }; 500 union doubleInt 501 { 502 double d; 503 uint64_t i; 504 }; 505 506 assert(srcSize == destSize); 507 int size = srcSize; 508 int sizeBits = size * 8; 509 assert(srcSize == 4 || srcSize == 8); 510 int items = numItems(size); 511 uint64_t result = FpDestReg_uqw; 512 513 for (int i = 0; i < items; i++) { 514 double arg1, arg2; 515 int hiIndex = (i + 1) * sizeBits - 1; 516 int loIndex = (i + 0) * sizeBits; 517 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 518 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 519 520 if (size == 4) { 521 floatInt fi; 522 fi.i = arg1Bits; 523 arg1 = fi.f; 524 fi.i = arg2Bits; 525 arg2 = fi.f; 526 } else { 527 doubleInt di; 528 di.i = arg1Bits; 529 arg1 = di.d; 530 di.i = arg2Bits; 531 arg2 = di.d; 532 } 533 534 if (arg1 < arg2) { 535 result = insertBits(result, hiIndex, loIndex, arg1Bits); 536 } else { 537 result = insertBits(result, hiIndex, loIndex, arg2Bits); 538 } 539 } 540 FpDestReg_uqw = result; 541 ''' 542 543 class Mmaxf(MediaOp): 544 op_class = 'SimdFloatCmpOp' 545 code = ''' 546 union floatInt 547 { 548 float f; 549 uint32_t i; 550 }; 551 union doubleInt 552 { 553 double d; 554 uint64_t i; 555 }; 556 557 assert(srcSize == destSize); 558 int size = srcSize; 559 int sizeBits = size * 8; 560 assert(srcSize == 4 || srcSize == 8); 561 int items = numItems(size); 562 uint64_t result = FpDestReg_uqw; 563 564 for (int i = 0; i < items; i++) { 565 double arg1, arg2; 566 int hiIndex = (i + 1) * sizeBits - 1; 567 int loIndex = (i + 0) * sizeBits; 568 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 569 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 570 571 if (size == 4) { 572 floatInt fi; 573 fi.i = arg1Bits; 574 arg1 = fi.f; 575 fi.i = arg2Bits; 576 arg2 = fi.f; 577 } else { 578 doubleInt di; 579 di.i = arg1Bits; 580 arg1 = di.d; 581 di.i = arg2Bits; 582 arg2 = di.d; 583 } 584 585 if (arg1 > arg2) { 586 result = insertBits(result, hiIndex, loIndex, arg1Bits); 587 } else { 588 result = insertBits(result, hiIndex, loIndex, arg2Bits); 589 } 590 } 591 FpDestReg_uqw = result; 592 ''' 593 594 class Mmini(MediaOp): 595 op_class = 'SimdCmpOp' 596 code = ''' 597 598 assert(srcSize == destSize); 599 int size = srcSize; 600 int sizeBits = size * 8; 601 int items = numItems(size); 602 uint64_t result = FpDestReg_uqw; 603 604 for (int i = 0; i < items; i++) { 605 int hiIndex = (i + 1) * sizeBits - 1; 606 int loIndex = (i + 0) * sizeBits; 607 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 608 int64_t arg1 = arg1Bits | 609 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 610 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 611 int64_t arg2 = arg2Bits | 612 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 613 uint64_t resBits; 614 615 if (signedOp()) { 616 if (arg1 < arg2) { 617 resBits = arg1Bits; 618 } else { 619 resBits = arg2Bits; 620 } 621 } else { 622 if (arg1Bits < arg2Bits) { 623 resBits = arg1Bits; 624 } else { 625 resBits = arg2Bits; 626 } 627 } 628 result = insertBits(result, hiIndex, loIndex, resBits); 629 } 630 FpDestReg_uqw = result; 631 ''' 632 633 class Mmaxi(MediaOp): 634 op_class = 'SimdCmpOp' 635 code = ''' 636 637 assert(srcSize == destSize); 638 int size = srcSize; 639 int sizeBits = size * 8; 640 int items = numItems(size); 641 uint64_t result = FpDestReg_uqw; 642 643 for (int i = 0; i < items; i++) { 644 int hiIndex = (i + 1) * sizeBits - 1; 645 int loIndex = (i + 0) * sizeBits; 646 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 647 int64_t arg1 = arg1Bits | 648 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 649 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 650 int64_t arg2 = arg2Bits | 651 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 652 uint64_t resBits; 653 654 if (signedOp()) { 655 if (arg1 > arg2) { 656 resBits = arg1Bits; 657 } else { 658 resBits = arg2Bits; 659 } 660 } else { 661 if (arg1Bits > arg2Bits) { 662 resBits = arg1Bits; 663 } else { 664 resBits = arg2Bits; 665 } 666 } 667 result = insertBits(result, hiIndex, loIndex, resBits); 668 } 669 FpDestReg_uqw = result; 670 ''' 671 672 class Msqrt(MediaOp): 673 op_class = 'SimdFloatSqrtOp' 674 def __init__(self, dest, src, \ 675 size = None, destSize = None, srcSize = None, ext = None): 676 super(Msqrt, self).__init__(dest, src,\ 677 "InstRegIndex(0)", size, destSize, srcSize, ext) 678 code = ''' 679 union floatInt 680 { 681 float f; 682 uint32_t i; 683 }; 684 union doubleInt 685 { 686 double d; 687 uint64_t i; 688 }; 689 690 assert(srcSize == destSize); 691 int size = srcSize; 692 int sizeBits = size * 8; 693 assert(srcSize == 4 || srcSize == 8); 694 int items = numItems(size); 695 uint64_t result = FpDestReg_uqw; 696 697 for (int i = 0; i < items; i++) { 698 int hiIndex = (i + 1) * sizeBits - 1; 699 int loIndex = (i + 0) * sizeBits; 700 uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 701 702 if (size == 4) { 703 floatInt fi; 704 fi.i = argBits; 705 fi.f = sqrt(fi.f); 706 argBits = fi.i; 707 } else { 708 doubleInt di; 709 di.i = argBits; 710 di.d = sqrt(di.d); 711 argBits = di.i; 712 } 713 result = insertBits(result, hiIndex, loIndex, argBits); 714 } 715 FpDestReg_uqw = result; 716 ''' 717 718 # compute approximate reciprocal --- single-precision only 719 class Mrcp(MediaOp): 720 def __init__(self, dest, src, \ 721 size = None, destSize = None, srcSize = None, ext = None): 722 super(Mrcp, self).__init__(dest, src,\ 723 "InstRegIndex(0)", size, destSize, srcSize, ext) 724 op_class = 'SimdFloatAluOp' 725 code = ''' 726 union floatInt 727 { 728 float f; 729 uint32_t i; 730 }; 731 732 assert(srcSize == 4); // ISA defines single-precision only 733 assert(srcSize == destSize); 734 const int size = 4; 735 const int sizeBits = size * 8; 736 int items = numItems(size); 737 uint64_t result = FpDestReg_uqw; 738 739 for (int i = 0; i < items; i++) { 740 int hiIndex = (i + 1) * sizeBits - 1; 741 int loIndex = (i + 0) * sizeBits; 742 uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 743 744 floatInt fi; 745 fi.i = argBits; 746 // This is more accuracy than HW provides, but oh well 747 fi.f = 1.0 / fi.f; 748 argBits = fi.i; 749 result = insertBits(result, hiIndex, loIndex, argBits); 750 } 751 FpDestReg_uqw = result; 752 ''' 753 754 class Maddf(MediaOp): 755 op_class = 'SimdFloatAddOp' 756 code = ''' 757 union floatInt 758 { 759 float f; 760 uint32_t i; 761 }; 762 union doubleInt 763 { 764 double d; 765 uint64_t i; 766 }; 767 768 assert(srcSize == destSize); 769 int size = srcSize; 770 int sizeBits = size * 8; 771 assert(srcSize == 4 || srcSize == 8); 772 int items = numItems(size); 773 uint64_t result = FpDestReg_uqw; 774 775 for (int i = 0; i < items; i++) { 776 int hiIndex = (i + 1) * sizeBits - 1; 777 int loIndex = (i + 0) * sizeBits; 778 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 779 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 780 uint64_t resBits; 781 782 if (size == 4) { 783 floatInt arg1, arg2, res; 784 arg1.i = arg1Bits; 785 arg2.i = arg2Bits; 786 res.f = arg1.f + arg2.f; 787 resBits = res.i; 788 } else { 789 doubleInt arg1, arg2, res; 790 arg1.i = arg1Bits; 791 arg2.i = arg2Bits; 792 res.d = arg1.d + arg2.d; 793 resBits = res.i; 794 } 795 796 result = insertBits(result, hiIndex, loIndex, resBits); 797 } 798 FpDestReg_uqw = result; 799 ''' 800 801 class Msubf(MediaOp): 802 op_class = 'SimdFloatAddOp' 803 code = ''' 804 union floatInt 805 { 806 float f; 807 uint32_t i; 808 }; 809 union doubleInt 810 { 811 double d; 812 uint64_t i; 813 }; 814 815 assert(srcSize == destSize); 816 int size = srcSize; 817 int sizeBits = size * 8; 818 assert(srcSize == 4 || srcSize == 8); 819 int items = numItems(size); 820 uint64_t result = FpDestReg_uqw; 821 822 for (int i = 0; i < items; i++) { 823 int hiIndex = (i + 1) * sizeBits - 1; 824 int loIndex = (i + 0) * sizeBits; 825 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 826 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 827 uint64_t resBits; 828 829 if (size == 4) { 830 floatInt arg1, arg2, res; 831 arg1.i = arg1Bits; 832 arg2.i = arg2Bits; 833 res.f = arg1.f - arg2.f; 834 resBits = res.i; 835 } else { 836 doubleInt arg1, arg2, res; 837 arg1.i = arg1Bits; 838 arg2.i = arg2Bits; 839 res.d = arg1.d - arg2.d; 840 resBits = res.i; 841 } 842 843 result = insertBits(result, hiIndex, loIndex, resBits); 844 } 845 FpDestReg_uqw = result; 846 ''' 847 848 class Mmulf(MediaOp): 849 op_class = 'SimdFloatMultOp' 850 code = ''' 851 union floatInt 852 { 853 float f; 854 uint32_t i; 855 }; 856 union doubleInt 857 { 858 double d; 859 uint64_t i; 860 }; 861 862 assert(srcSize == destSize); 863 int size = srcSize; 864 int sizeBits = size * 8; 865 assert(srcSize == 4 || srcSize == 8); 866 int items = numItems(size); 867 uint64_t result = FpDestReg_uqw; 868 869 for (int i = 0; i < items; i++) { 870 int hiIndex = (i + 1) * sizeBits - 1; 871 int loIndex = (i + 0) * sizeBits; 872 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 873 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 874 uint64_t resBits; 875 876 if (size == 4) { 877 floatInt arg1, arg2, res; 878 arg1.i = arg1Bits; 879 arg2.i = arg2Bits; 880 res.f = arg1.f * arg2.f; 881 resBits = res.i; 882 } else { 883 doubleInt arg1, arg2, res; 884 arg1.i = arg1Bits; 885 arg2.i = arg2Bits; 886 res.d = arg1.d * arg2.d; 887 resBits = res.i; 888 } 889 890 result = insertBits(result, hiIndex, loIndex, resBits); 891 } 892 FpDestReg_uqw = result; 893 ''' 894 895 class Mdivf(MediaOp): 896 op_class = 'SimdFloatDivOp' 897 code = ''' 898 union floatInt 899 { 900 float f; 901 uint32_t i; 902 }; 903 union doubleInt 904 { 905 double d; 906 uint64_t i; 907 }; 908 909 assert(srcSize == destSize); 910 int size = srcSize; 911 int sizeBits = size * 8; 912 assert(srcSize == 4 || srcSize == 8); 913 int items = numItems(size); 914 uint64_t result = FpDestReg_uqw; 915 916 for (int i = 0; i < items; i++) { 917 int hiIndex = (i + 1) * sizeBits - 1; 918 int loIndex = (i + 0) * sizeBits; 919 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 920 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 921 uint64_t resBits; 922 923 if (size == 4) { 924 floatInt arg1, arg2, res; 925 arg1.i = arg1Bits; 926 arg2.i = arg2Bits; 927 res.f = arg1.f / arg2.f; 928 resBits = res.i; 929 } else { 930 doubleInt arg1, arg2, res; 931 arg1.i = arg1Bits; 932 arg2.i = arg2Bits; 933 res.d = arg1.d / arg2.d; 934 resBits = res.i; 935 } 936 937 result = insertBits(result, hiIndex, loIndex, resBits); 938 } 939 FpDestReg_uqw = result; 940 ''' 941 942 class Maddi(MediaOp): 943 op_class = 'SimdAddOp' 944 code = ''' 945 assert(srcSize == destSize); 946 int size = srcSize; 947 int sizeBits = size * 8; 948 int items = numItems(size); 949 uint64_t result = FpDestReg_uqw; 950 951 for (int i = 0; i < items; i++) { 952 int hiIndex = (i + 1) * sizeBits - 1; 953 int loIndex = (i + 0) * sizeBits; 954 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 955 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 956 uint64_t resBits = arg1Bits + arg2Bits; 957 958 if (ext & 0x2) { 959 if (signedOp()) { 960 int arg1Sign = bits(arg1Bits, sizeBits - 1); 961 int arg2Sign = bits(arg2Bits, sizeBits - 1); 962 int resSign = bits(resBits, sizeBits - 1); 963 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 964 if (resSign == 0) 965 resBits = (ULL(1) << (sizeBits - 1)); 966 else 967 resBits = mask(sizeBits - 1); 968 } 969 } else { 970 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) 971 resBits = mask(sizeBits); 972 } 973 } 974 975 result = insertBits(result, hiIndex, loIndex, resBits); 976 } 977 FpDestReg_uqw = result; 978 ''' 979 980 class Msubi(MediaOp): 981 op_class = 'SimdAddOp' 982 code = ''' 983 assert(srcSize == destSize); 984 int size = srcSize; 985 int sizeBits = size * 8; 986 int items = numItems(size); 987 uint64_t result = FpDestReg_uqw; 988 989 for (int i = 0; i < items; i++) { 990 int hiIndex = (i + 1) * sizeBits - 1; 991 int loIndex = (i + 0) * sizeBits; 992 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 993 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 994 uint64_t resBits = arg1Bits - arg2Bits; 995 996 if (ext & 0x2) { 997 if (signedOp()) { 998 int arg1Sign = bits(arg1Bits, sizeBits - 1); 999 int arg2Sign = !bits(arg2Bits, sizeBits - 1); 1000 int resSign = bits(resBits, sizeBits - 1); 1001 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 1002 if (resSign == 0) 1003 resBits = (ULL(1) << (sizeBits - 1)); 1004 else 1005 resBits = mask(sizeBits - 1); 1006 } 1007 } else { 1008 if (arg2Bits > arg1Bits) { 1009 resBits = 0; 1010 } else if (!findCarry(sizeBits, resBits, 1011 arg1Bits, ~arg2Bits)) { 1012 resBits = mask(sizeBits); 1013 } 1014 } 1015 } 1016 1017 result = insertBits(result, hiIndex, loIndex, resBits); 1018 } 1019 FpDestReg_uqw = result; 1020 ''' 1021 1022 class Mmuli(MediaOp): 1023 op_class = 'SimdMultOp' 1024 code = ''' 1025 int srcBits = srcSize * 8; 1026 int destBits = destSize * 8; 1027 assert(destBits <= 64); 1028 assert(destSize >= srcSize); 1029 int items = numItems(destSize); 1030 uint64_t result = FpDestReg_uqw; 1031 1032 for (int i = 0; i < items; i++) { 1033 int offset = 0; 1034 if (ext & 16) { 1035 if (ext & 32) 1036 offset = i * (destBits - srcBits); 1037 else 1038 offset = i * (destBits - srcBits) + srcBits; 1039 } 1040 int srcHiIndex = (i + 1) * srcBits - 1 + offset; 1041 int srcLoIndex = (i + 0) * srcBits + offset; 1042 uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex); 1043 uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex); 1044 uint64_t resBits; 1045 1046 if (signedOp()) { 1047 int64_t arg1 = arg1Bits | 1048 (0 - (arg1Bits & (ULL(1) << (srcBits - 1)))); 1049 int64_t arg2 = arg2Bits | 1050 (0 - (arg2Bits & (ULL(1) << (srcBits - 1)))); 1051 resBits = (uint64_t)(arg1 * arg2); 1052 } else { 1053 resBits = arg1Bits * arg2Bits; 1054 } 1055 1056 if (ext & 0x4) 1057 resBits += (ULL(1) << (destBits - 1)); 1058 1059 if (multHi()) 1060 resBits >>= destBits; 1061 1062 int destHiIndex = (i + 1) * destBits - 1; 1063 int destLoIndex = (i + 0) * destBits; 1064 result = insertBits(result, destHiIndex, destLoIndex, resBits); 1065 } 1066 FpDestReg_uqw = result; 1067 ''' 1068 1069 class Mavg(MediaOp): 1070 op_class = 'SimdAddOp' 1071 code = ''' 1072 assert(srcSize == destSize); 1073 int size = srcSize; 1074 int sizeBits = size * 8; 1075 int items = numItems(size); 1076 uint64_t result = FpDestReg_uqw; 1077 1078 for (int i = 0; i < items; i++) { 1079 int hiIndex = (i + 1) * sizeBits - 1; 1080 int loIndex = (i + 0) * sizeBits; 1081 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1082 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 1083 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; 1084 1085 result = insertBits(result, hiIndex, loIndex, resBits); 1086 } 1087 FpDestReg_uqw = result; 1088 ''' 1089 1090 class Msad(MediaOp): 1091 op_class = 'SimdAddOp' 1092 code = ''' 1093 int srcBits = srcSize * 8; 1094 int items = sizeof(double) / srcSize; 1095 1096 uint64_t sum = 0; 1097 for (int i = 0; i < items; i++) { 1098 int hiIndex = (i + 1) * srcBits - 1; 1099 int loIndex = (i + 0) * srcBits; 1100 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1101 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 1102 int64_t resBits = arg1Bits - arg2Bits; 1103 if (resBits < 0) 1104 resBits = -resBits; 1105 sum += resBits; 1106 } 1107 FpDestReg_uqw = sum & mask(destSize * 8); 1108 ''' 1109 1110 class Msrl(MediaOp): 1111 op_class = 'SimdShiftOp' 1112 code = ''' 1113 1114 assert(srcSize == destSize); 1115 int size = srcSize; 1116 int sizeBits = size * 8; 1117 int items = numItems(size); 1118 uint64_t shiftAmt = op2_uqw; 1119 uint64_t result = FpDestReg_uqw; 1120 1121 for (int i = 0; i < items; i++) { 1122 int hiIndex = (i + 1) * sizeBits - 1; 1123 int loIndex = (i + 0) * sizeBits; 1124 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1125 uint64_t resBits; 1126 if (shiftAmt >= sizeBits) { 1127 resBits = 0; 1128 } else { 1129 resBits = (arg1Bits >> shiftAmt) & 1130 mask(sizeBits - shiftAmt); 1131 } 1132 1133 result = insertBits(result, hiIndex, loIndex, resBits); 1134 } 1135 FpDestReg_uqw = result; 1136 ''' 1137 1138 class Msra(MediaOp): 1139 op_class = 'SimdShiftOp' 1140 code = ''' 1141 1142 assert(srcSize == destSize); 1143 int size = srcSize; 1144 int sizeBits = size * 8; 1145 int items = numItems(size); 1146 uint64_t shiftAmt = op2_uqw; 1147 uint64_t result = FpDestReg_uqw; 1148 1149 for (int i = 0; i < items; i++) { 1150 int hiIndex = (i + 1) * sizeBits - 1; 1151 int loIndex = (i + 0) * sizeBits; 1152 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1153 uint64_t resBits; 1154 if (shiftAmt >= sizeBits) { 1155 if (bits(arg1Bits, sizeBits - 1)) 1156 resBits = mask(sizeBits); 1157 else 1158 resBits = 0; 1159 } else { 1160 resBits = (arg1Bits >> shiftAmt); 1161 resBits = resBits | 1162 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt)))); 1163 } 1164 1165 result = insertBits(result, hiIndex, loIndex, resBits); 1166 } 1167 FpDestReg_uqw = result; 1168 ''' 1169 1170 class Msll(MediaOp): 1171 op_class = 'SimdShiftOp' 1172 code = ''' 1173 1174 assert(srcSize == destSize); 1175 int size = srcSize; 1176 int sizeBits = size * 8; 1177 int items = numItems(size); 1178 uint64_t shiftAmt = op2_uqw; 1179 uint64_t result = FpDestReg_uqw; 1180 1181 for (int i = 0; i < items; i++) { 1182 int hiIndex = (i + 1) * sizeBits - 1; 1183 int loIndex = (i + 0) * sizeBits; 1184 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1185 uint64_t resBits; 1186 if (shiftAmt >= sizeBits) { 1187 resBits = 0; 1188 } else { 1189 resBits = (arg1Bits << shiftAmt); 1190 } 1191 1192 result = insertBits(result, hiIndex, loIndex, resBits); 1193 } 1194 FpDestReg_uqw = result; 1195 ''' 1196 1197 class Cvtf2i(MediaOp): 1198 def __init__(self, dest, src, \ 1199 size = None, destSize = None, srcSize = None, ext = None): 1200 super(Cvtf2i, self).__init__(dest, src,\ 1201 "InstRegIndex(0)", size, destSize, srcSize, ext) 1202 op_class = 'SimdFloatCvtOp' 1203 code = ''' 1204 union floatInt 1205 { 1206 float f; 1207 uint32_t i; 1208 }; 1209 union doubleInt 1210 { 1211 double d; 1212 uint64_t i; 1213 }; 1214 1215 assert(destSize == 4 || destSize == 8); 1216 assert(srcSize == 4 || srcSize == 8); 1217 int srcSizeBits = srcSize * 8; 1218 int destSizeBits = destSize * 8; 1219 int items; 1220 int srcStart = 0; 1221 int destStart = 0; 1222 if (srcSize == 2 * destSize) { 1223 items = numItems(srcSize); 1224 if (ext & 0x2) 1225 destStart = destSizeBits * items; 1226 } else if (destSize == 2 * srcSize) { 1227 items = numItems(destSize); 1228 if (ext & 0x2) 1229 srcStart = srcSizeBits * items; 1230 } else { 1231 items = numItems(destSize); 1232 } 1233 uint64_t result = FpDestReg_uqw; 1234 1235 for (int i = 0; i < items; i++) { 1236 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1237 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1238 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex); 1239 double arg; 1240 1241 if (srcSize == 4) { 1242 floatInt fi; 1243 fi.i = argBits; 1244 arg = fi.f; 1245 } else { 1246 doubleInt di; 1247 di.i = argBits; 1248 arg = di.d; 1249 } 1250 1251 if (ext & 0x4) { 1252 if (arg >= 0) 1253 arg += 0.5; 1254 else 1255 arg -= 0.5; 1256 } 1257 1258 if (destSize == 4) { 1259 int32_t i_arg = (int32_t)arg; 1260 argBits = *((uint32_t*)&i_arg); 1261 } else { 1262 int64_t i_arg = (int64_t)arg; 1263 argBits = *((uint64_t*)&i_arg); 1264 } 1265 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1266 int destLoIndex = destStart + (i + 0) * destSizeBits; 1267 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1268 } 1269 FpDestReg_uqw = result; 1270 ''' 1271 1272 class Cvti2f(MediaOp): 1273 def __init__(self, dest, src, \ 1274 size = None, destSize = None, srcSize = None, ext = None): 1275 super(Cvti2f, self).__init__(dest, src,\ 1276 "InstRegIndex(0)", size, destSize, srcSize, ext) 1277 op_class = 'SimdFloatCvtOp' 1278 code = ''' 1279 union floatInt 1280 { 1281 float f; 1282 uint32_t i; 1283 }; 1284 union doubleInt 1285 { 1286 double d; 1287 uint64_t i; 1288 }; 1289 1290 assert(destSize == 4 || destSize == 8); 1291 assert(srcSize == 4 || srcSize == 8); 1292 int srcSizeBits = srcSize * 8; 1293 int destSizeBits = destSize * 8; 1294 int items; 1295 int srcStart = 0; 1296 int destStart = 0; 1297 if (srcSize == 2 * destSize) { 1298 items = numItems(srcSize); 1299 if (ext & 0x2) 1300 destStart = destSizeBits * items; 1301 } else if (destSize == 2 * srcSize) { 1302 items = numItems(destSize); 1303 if (ext & 0x2) 1304 srcStart = srcSizeBits * items; 1305 } else { 1306 items = numItems(destSize); 1307 } 1308 uint64_t result = FpDestReg_uqw; 1309 1310 for (int i = 0; i < items; i++) { 1311 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1312 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1313 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex); 1314 1315 int64_t sArg = argBits | 1316 (0 - (argBits & (ULL(1) << (srcSizeBits - 1)))); 1317 double arg = sArg; 1318 1319 if (destSize == 4) { 1320 floatInt fi; 1321 fi.f = arg; 1322 argBits = fi.i; 1323 } else { 1324 doubleInt di; 1325 di.d = arg; 1326 argBits = di.i; 1327 } 1328 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1329 int destLoIndex = destStart + (i + 0) * destSizeBits; 1330 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1331 } 1332 FpDestReg_uqw = result; 1333 ''' 1334 1335 class Cvtf2f(MediaOp): 1336 def __init__(self, dest, src, \ 1337 size = None, destSize = None, srcSize = None, ext = None): 1338 super(Cvtf2f, self).__init__(dest, src,\ 1339 "InstRegIndex(0)", size, destSize, srcSize, ext) 1340 op_class = 'SimdFloatCvtOp' 1341 code = ''' 1342 union floatInt 1343 { 1344 float f; 1345 uint32_t i; 1346 }; 1347 union doubleInt 1348 { 1349 double d; 1350 uint64_t i; 1351 }; 1352 1353 assert(destSize == 4 || destSize == 8); 1354 assert(srcSize == 4 || srcSize == 8); 1355 int srcSizeBits = srcSize * 8; 1356 int destSizeBits = destSize * 8; 1357 int items; 1358 int srcStart = 0; 1359 int destStart = 0; 1360 if (srcSize == 2 * destSize) { 1361 items = numItems(srcSize); 1362 if (ext & 0x2) 1363 destStart = destSizeBits * items; 1364 } else if (destSize == 2 * srcSize) { 1365 items = numItems(destSize); 1366 if (ext & 0x2) 1367 srcStart = srcSizeBits * items; 1368 } else { 1369 items = numItems(destSize); 1370 } 1371 uint64_t result = FpDestReg_uqw; 1372 1373 for (int i = 0; i < items; i++) { 1374 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1375 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1376 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex); 1377 double arg; 1378 1379 if (srcSize == 4) { 1380 floatInt fi; 1381 fi.i = argBits; 1382 arg = fi.f; 1383 } else { 1384 doubleInt di; 1385 di.i = argBits; 1386 arg = di.d; 1387 } 1388 if (destSize == 4) { 1389 floatInt fi; 1390 fi.f = arg; 1391 argBits = fi.i; 1392 } else { 1393 doubleInt di; 1394 di.d = arg; 1395 argBits = di.i; 1396 } 1397 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1398 int destLoIndex = destStart + (i + 0) * destSizeBits; 1399 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1400 } 1401 FpDestReg_uqw = result; 1402 ''' 1403 1404 class Mcmpi2r(MediaOp): 1405 op_class = 'SimdCvtOp' 1406 code = ''' 1407 union floatInt 1408 { 1409 float f; 1410 uint32_t i; 1411 }; 1412 union doubleInt 1413 { 1414 double d; 1415 uint64_t i; 1416 }; 1417 1418 assert(srcSize == destSize); 1419 int size = srcSize; 1420 int sizeBits = size * 8; 1421 int items = numItems(size); 1422 uint64_t result = FpDestReg_uqw; 1423 1424 for (int i = 0; i < items; i++) { 1425 int hiIndex = (i + 1) * sizeBits - 1; 1426 int loIndex = (i + 0) * sizeBits; 1427 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1428 int64_t arg1 = arg1Bits | 1429 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 1430 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 1431 int64_t arg2 = arg2Bits | 1432 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 1433 1434 uint64_t resBits = 0; 1435 if (((ext & 0x2) == 0 && arg1 == arg2) || 1436 ((ext & 0x2) == 0x2 && arg1 > arg2)) 1437 resBits = mask(sizeBits); 1438 1439 result = insertBits(result, hiIndex, loIndex, resBits); 1440 } 1441 FpDestReg_uqw = result; 1442 ''' 1443 1444 class Mcmpf2r(MediaOp): 1445 op_class = 'SimdFloatCvtOp' 1446 code = ''' 1447 union floatInt 1448 { 1449 float f; 1450 uint32_t i; 1451 }; 1452 union doubleInt 1453 { 1454 double d; 1455 uint64_t i; 1456 }; 1457 1458 assert(srcSize == destSize); 1459 int size = srcSize; 1460 int sizeBits = size * 8; 1461 int items = numItems(size); 1462 uint64_t result = FpDestReg_uqw; 1463 1464 for (int i = 0; i < items; i++) { 1465 int hiIndex = (i + 1) * sizeBits - 1; 1466 int loIndex = (i + 0) * sizeBits; 1467 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1468 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 1469 double arg1, arg2; 1470 1471 if (size == 4) { 1472 floatInt fi; 1473 fi.i = arg1Bits; 1474 arg1 = fi.f; 1475 fi.i = arg2Bits; 1476 arg2 = fi.f; 1477 } else { 1478 doubleInt di; 1479 di.i = arg1Bits; 1480 arg1 = di.d; 1481 di.i = arg2Bits; 1482 arg2 = di.d; 1483 } 1484 1485 uint64_t resBits = 0; 1486 bool nanop = std::isnan(arg1) || std::isnan(arg2); 1487 switch (ext & mask(3)) { 1488 case 0: 1489 if (arg1 == arg2 && !nanop) 1490 resBits = mask(sizeBits); 1491 break; 1492 case 1: 1493 if (arg1 < arg2 && !nanop) 1494 resBits = mask(sizeBits); 1495 break; 1496 case 2: 1497 if (arg1 <= arg2 && !nanop) 1498 resBits = mask(sizeBits); 1499 break; 1500 case 3: 1501 if (nanop) 1502 resBits = mask(sizeBits); 1503 break; 1504 case 4: 1505 if (arg1 != arg2 || nanop) 1506 resBits = mask(sizeBits); 1507 break; 1508 case 5: 1509 if (!(arg1 < arg2) || nanop) 1510 resBits = mask(sizeBits); 1511 break; 1512 case 6: 1513 if (!(arg1 <= arg2) || nanop) 1514 resBits = mask(sizeBits); 1515 break; 1516 case 7: 1517 if (!nanop) 1518 resBits = mask(sizeBits); 1519 break; 1520 }; 1521 1522 result = insertBits(result, hiIndex, loIndex, resBits); 1523 } 1524 FpDestReg_uqw = result; 1525 ''' 1526 1527 class Mcmpf2rf(MediaOp): 1528 def __init__(self, src1, src2,\ 1529 size = None, destSize = None, srcSize = None, ext = None): 1530 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\ 1531 src2, size, destSize, srcSize, ext) 1532 op_class = 'SimdFloatCvtOp' 1533 code = ''' 1534 union floatInt 1535 { 1536 float f; 1537 uint32_t i; 1538 }; 1539 union doubleInt 1540 { 1541 double d; 1542 uint64_t i; 1543 }; 1544 1545 assert(srcSize == destSize); 1546 assert(srcSize == 4 || srcSize == 8); 1547 int size = srcSize; 1548 int sizeBits = size * 8; 1549 1550 double arg1, arg2; 1551 uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0); 1552 uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0); 1553 if (size == 4) { 1554 floatInt fi; 1555 fi.i = arg1Bits; 1556 arg1 = fi.f; 1557 fi.i = arg2Bits; 1558 arg2 = fi.f; 1559 } else { 1560 doubleInt di; 1561 di.i = arg1Bits; 1562 arg1 = di.d; 1563 di.i = arg2Bits; 1564 arg2 = di.d; 1565 } 1566 1567 // ZF PF CF 1568 // Unordered 1 1 1 1569 // Greater than 0 0 0 1570 // Less than 0 0 1 1571 // Equal 1 0 0 1572 // OF = SF = AF = 0 1573 ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit); 1574 cfofBits = cfofBits & ~(OFBit | CFBit); 1575 1576 if (std::isnan(arg1) || std::isnan(arg2)) { 1577 ccFlagBits = ccFlagBits | (ZFBit | PFBit); 1578 cfofBits = cfofBits | CFBit; 1579 } 1580 else if(arg1 < arg2) 1581 cfofBits = cfofBits | CFBit; 1582 else if(arg1 == arg2) 1583 ccFlagBits = ccFlagBits | ZFBit; 1584 ''' 1585 1586 class Emms(MediaOp): 1587 op_class = 'FloatMiscOp' 1588 def __init__(self): 1589 super(Emms, self).__init__('InstRegIndex(MISCREG_FTW)', 1590 'InstRegIndex(0)', 'InstRegIndex(0)', 2) 1591 code = 'FTW = 0xFFFF;' 1592}}; 1593