mediaop.isa revision 12234:78ece221f9f5
1// Copyright (c) 2009 The Regents of The University of Michigan 2// Copyright (c) 2015 Advanced Micro Devices, Inc. 3// 4// All rights reserved. 5// 6// Redistribution and use in source and binary forms, with or without 7// modification, are permitted provided that the following conditions are 8// met: redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer; 10// redistributions in binary form must reproduce the above copyright 11// notice, this list of conditions and the following disclaimer in the 12// documentation and/or other materials provided with the distribution; 13// neither the name of the copyright holders nor the names of its 14// contributors may be used to endorse or promote products derived from 15// this software without specific prior written permission. 16// 17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28// 29// Authors: Gabe Black 30 31def template MediaOpExecute {{ 32 Fault %(class_name)s::execute(ExecContext *xc, 33 Trace::InstRecord *traceData) const 34 { 35 Fault fault = NoFault; 36 37 %(op_decl)s; 38 %(op_rd)s; 39 40 %(code)s; 41 42 //Write the resulting state to the execution context 43 if(fault == NoFault) 44 { 45 %(op_wb)s; 46 } 47 return fault; 48 } 49}}; 50 51def template MediaOpRegDeclare {{ 52 class %(class_name)s : public %(base_class)s 53 { 54 public: 55 %(class_name)s(ExtMachInst _machInst, 56 const char * instMnem, uint64_t setFlags, 57 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 58 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 59 60 %(BasicExecDeclare)s 61 }; 62}}; 63 64def template MediaOpImmDeclare {{ 65 66 class %(class_name)s : public %(base_class)s 67 { 68 public: 69 %(class_name)s(ExtMachInst _machInst, 70 const char * instMnem, uint64_t setFlags, 71 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 72 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 73 74 %(BasicExecDeclare)s 75 }; 76}}; 77 78def template MediaOpRegConstructor {{ 79 %(class_name)s::%(class_name)s( 80 ExtMachInst machInst, const char * instMnem, uint64_t setFlags, 81 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 82 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 83 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags, 84 _src1, _src2, _dest, _srcSize, _destSize, _ext, 85 %(op_class)s) 86 { 87 %(constructor)s; 88 } 89}}; 90 91def template MediaOpImmConstructor {{ 92 %(class_name)s::%(class_name)s( 93 ExtMachInst machInst, const char * instMnem, uint64_t setFlags, 94 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 95 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 96 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags, 97 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 98 %(op_class)s) 99 { 100 %(constructor)s; 101 } 102}}; 103 104let {{ 105 # Make these empty strings so that concatenating onto 106 # them will always work. 107 header_output = "" 108 decoder_output = "" 109 exec_output = "" 110 111 immTemplates = ( 112 MediaOpImmDeclare, 113 MediaOpImmConstructor, 114 MediaOpExecute) 115 116 regTemplates = ( 117 MediaOpRegDeclare, 118 MediaOpRegConstructor, 119 MediaOpExecute) 120 121 class MediaOpMeta(type): 122 def buildCppClasses(self, name, Name, suffix, code): 123 124 # Globals to stick the output in 125 global header_output 126 global decoder_output 127 global exec_output 128 129 # If op2 is used anywhere, make register and immediate versions 130 # of this code. 131 matcher = re.compile(r"(?<!\w)(?P<prefix>s?)op2(?P<typeQual>_[^\W_]+)?") 132 match = matcher.search(code) 133 if match: 134 typeQual = "" 135 if match.group("typeQual"): 136 typeQual = match.group("typeQual") 137 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual) 138 self.buildCppClasses(name, Name, suffix, 139 matcher.sub(src2_name, code)) 140 self.buildCppClasses(name + "i", Name, suffix + "Imm", 141 matcher.sub("imm8", code)) 142 return 143 144 base = "X86ISA::MediaOp" 145 146 # If imm8 shows up in the code, use the immediate templates, if 147 # not, hopefully the register ones will be correct. 148 matcher = re.compile("(?<!\w)imm8(?!\w)") 149 if matcher.search(code): 150 base += "Imm" 151 templates = immTemplates 152 else: 153 base += "Reg" 154 templates = regTemplates 155 156 # Get everything ready for the substitution 157 iop = InstObjParams(name, Name + suffix, base, {"code" : code}) 158 159 # Generate the actual code (finally!) 160 header_output += templates[0].subst(iop) 161 decoder_output += templates[1].subst(iop) 162 exec_output += templates[2].subst(iop) 163 164 165 def __new__(mcls, Name, bases, dict): 166 abstract = False 167 name = Name.lower() 168 if "abstract" in dict: 169 abstract = dict['abstract'] 170 del dict['abstract'] 171 172 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict) 173 if not abstract: 174 cls.className = Name 175 cls.base_mnemonic = name 176 code = cls.code 177 178 # Set up the C++ classes 179 mcls.buildCppClasses(cls, name, Name, "", code) 180 181 # Hook into the microassembler dict 182 global microopClasses 183 microopClasses[name] = cls 184 185 # If op2 is used anywhere, make register and immediate versions 186 # of this code. 187 matcher = re.compile(r"op2(?P<typeQual>_[^\W_]+)?") 188 if matcher.search(code): 189 microopClasses[name + 'i'] = cls 190 return cls 191 192 193 class MediaOp(X86Microop): 194 __metaclass__ = MediaOpMeta 195 # This class itself doesn't act as a microop 196 abstract = True 197 198 def __init__(self, dest, src1, op2, 199 size = None, destSize = None, srcSize = None, ext = None): 200 self.dest = dest 201 self.src1 = src1 202 self.op2 = op2 203 if size is not None: 204 self.srcSize = size 205 self.destSize = size 206 if srcSize is not None: 207 self.srcSize = srcSize 208 if destSize is not None: 209 self.destSize = destSize 210 if self.srcSize is None: 211 raise Exception, "Source size not set." 212 if self.destSize is None: 213 raise Exception, "Dest size not set." 214 if ext is None: 215 self.ext = 0 216 else: 217 self.ext = ext 218 219 def getAllocator(self, microFlags): 220 className = self.className 221 if self.mnemonic == self.base_mnemonic + 'i': 222 className += "Imm" 223 allocator = '''new %(class_name)s(machInst, macrocodeBlock, 224 %(flags)s, %(src1)s, %(op2)s, %(dest)s, 225 %(srcSize)s, %(destSize)s, %(ext)s)''' % { 226 "class_name" : className, 227 "flags" : self.microFlagsText(microFlags), 228 "src1" : self.src1, "op2" : self.op2, 229 "dest" : self.dest, 230 "srcSize" : self.srcSize, 231 "destSize" : self.destSize, 232 "ext" : self.ext} 233 return allocator 234 235 class Mov2int(MediaOp): 236 def __init__(self, dest, src1, src2 = 0, \ 237 size = None, destSize = None, srcSize = None, ext = None): 238 super(Mov2int, self).__init__(dest, src1,\ 239 src2, size, destSize, srcSize, ext) 240 code = ''' 241 int items = sizeof(FloatRegBits) / srcSize; 242 int offset = imm8; 243 if (bits(src1, 0) && (ext & 0x1)) 244 offset -= items; 245 if (offset >= 0 && offset < items) { 246 uint64_t fpSrcReg1 = 247 bits(FpSrcReg1_uqw, 248 (offset + 1) * srcSize * 8 - 1, 249 (offset + 0) * srcSize * 8); 250 DestReg = merge(0, fpSrcReg1, destSize); 251 } else { 252 DestReg = DestReg; 253 } 254 ''' 255 256 class Mov2fp(MediaOp): 257 def __init__(self, dest, src1, src2 = 0, \ 258 size = None, destSize = None, srcSize = None, ext = None): 259 super(Mov2fp, self).__init__(dest, src1,\ 260 src2, size, destSize, srcSize, ext) 261 code = ''' 262 int items = sizeof(FloatRegBits) / destSize; 263 int offset = imm8; 264 if (bits(dest, 0) && (ext & 0x1)) 265 offset -= items; 266 if (offset >= 0 && offset < items) { 267 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); 268 FpDestReg_uqw = 269 insertBits(FpDestReg_uqw, 270 (offset + 1) * destSize * 8 - 1, 271 (offset + 0) * destSize * 8, srcReg1); 272 } else { 273 FpDestReg_uqw = FpDestReg_uqw; 274 } 275 ''' 276 277 class Movsign(MediaOp): 278 def __init__(self, dest, src, \ 279 size = None, destSize = None, srcSize = None, ext = None): 280 super(Movsign, self).__init__(dest, src,\ 281 "InstRegIndex(0)", size, destSize, srcSize, ext) 282 code = ''' 283 int items = sizeof(FloatRegBits) / srcSize; 284 uint64_t result = 0; 285 int offset = (ext & 0x1) ? items : 0; 286 for (int i = 0; i < items; i++) { 287 uint64_t picked = 288 bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1); 289 result = insertBits(result, i + offset, i + offset, picked); 290 } 291 DestReg = DestReg | result; 292 ''' 293 294 class Maskmov(MediaOp): 295 code = ''' 296 assert(srcSize == destSize); 297 int size = srcSize; 298 int sizeBits = size * 8; 299 int items = numItems(size); 300 uint64_t result = FpDestReg_uqw; 301 302 for (int i = 0; i < items; i++) { 303 int hiIndex = (i + 1) * sizeBits - 1; 304 int loIndex = (i + 0) * sizeBits; 305 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 306 if (bits(FpSrcReg2_uqw, hiIndex)) 307 result = insertBits(result, hiIndex, loIndex, arg1Bits); 308 } 309 FpDestReg_uqw = result; 310 ''' 311 312 class shuffle(MediaOp): 313 code = ''' 314 assert(srcSize == destSize); 315 int size = srcSize; 316 int sizeBits = size * 8; 317 int items = sizeof(FloatRegBits) / size; 318 int options; 319 int optionBits; 320 if (size == 8) { 321 options = 2; 322 optionBits = 1; 323 } else { 324 options = 4; 325 optionBits = 2; 326 } 327 328 uint64_t result = 0; 329 uint8_t sel = ext; 330 331 for (int i = 0; i < items; i++) { 332 uint64_t resBits; 333 uint8_t lsel = sel & mask(optionBits); 334 if (lsel * size >= sizeof(FloatRegBits)) { 335 lsel -= options / 2; 336 resBits = bits(FpSrcReg2_uqw, 337 (lsel + 1) * sizeBits - 1, 338 (lsel + 0) * sizeBits); 339 } else { 340 resBits = bits(FpSrcReg1_uqw, 341 (lsel + 1) * sizeBits - 1, 342 (lsel + 0) * sizeBits); 343 } 344 345 sel >>= optionBits; 346 347 int hiIndex = (i + 1) * sizeBits - 1; 348 int loIndex = (i + 0) * sizeBits; 349 result = insertBits(result, hiIndex, loIndex, resBits); 350 } 351 FpDestReg_uqw = result; 352 ''' 353 354 class Unpack(MediaOp): 355 code = ''' 356 assert(srcSize == destSize); 357 int size = destSize; 358 int items = (sizeof(FloatRegBits) / size) / 2; 359 int offset = ext ? items : 0; 360 uint64_t result = 0; 361 for (int i = 0; i < items; i++) { 362 uint64_t pickedLow = 363 bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1, 364 (i + offset) * 8 * size); 365 result = insertBits(result, 366 (2 * i + 1) * 8 * size - 1, 367 (2 * i + 0) * 8 * size, 368 pickedLow); 369 uint64_t pickedHigh = 370 bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1, 371 (i + offset) * 8 * size); 372 result = insertBits(result, 373 (2 * i + 2) * 8 * size - 1, 374 (2 * i + 1) * 8 * size, 375 pickedHigh); 376 } 377 FpDestReg_uqw = result; 378 ''' 379 380 class Pack(MediaOp): 381 code = ''' 382 assert(srcSize == destSize * 2); 383 int items = (sizeof(FloatRegBits) / destSize); 384 int destBits = destSize * 8; 385 int srcBits = srcSize * 8; 386 uint64_t result = 0; 387 int i; 388 for (i = 0; i < items / 2; i++) { 389 uint64_t picked = 390 bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1, 391 (i + 0) * srcBits); 392 unsigned signBit = bits(picked, srcBits - 1); 393 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 394 395 // Handle saturation. 396 if (signBit) { 397 if (overflow != mask(destBits - srcBits + 1)) { 398 if (signedOp()) 399 picked = (ULL(1) << (destBits - 1)); 400 else 401 picked = 0; 402 } 403 } else { 404 if (overflow != 0) { 405 if (signedOp()) 406 picked = mask(destBits - 1); 407 else 408 picked = mask(destBits); 409 } 410 } 411 result = insertBits(result, 412 (i + 1) * destBits - 1, 413 (i + 0) * destBits, 414 picked); 415 } 416 for (;i < items; i++) { 417 uint64_t picked = 418 bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1, 419 (i - items + 0) * srcBits); 420 unsigned signBit = bits(picked, srcBits - 1); 421 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 422 423 // Handle saturation. 424 if (signBit) { 425 if (overflow != mask(destBits - srcBits + 1)) { 426 if (signedOp()) 427 picked = (ULL(1) << (destBits - 1)); 428 else 429 picked = 0; 430 } 431 } else { 432 if (overflow != 0) { 433 if (signedOp()) 434 picked = mask(destBits - 1); 435 else 436 picked = mask(destBits); 437 } 438 } 439 result = insertBits(result, 440 (i + 1) * destBits - 1, 441 (i + 0) * destBits, 442 picked); 443 } 444 FpDestReg_uqw = result; 445 ''' 446 447 class Mxor(MediaOp): 448 def __init__(self, dest, src1, src2): 449 super(Mxor, self).__init__(dest, src1, src2, 1) 450 code = ''' 451 FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw; 452 ''' 453 454 class Mor(MediaOp): 455 def __init__(self, dest, src1, src2): 456 super(Mor, self).__init__(dest, src1, src2, 1) 457 code = ''' 458 FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw; 459 ''' 460 461 class Mand(MediaOp): 462 def __init__(self, dest, src1, src2): 463 super(Mand, self).__init__(dest, src1, src2, 1) 464 code = ''' 465 FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw; 466 ''' 467 468 class Mandn(MediaOp): 469 def __init__(self, dest, src1, src2): 470 super(Mandn, self).__init__(dest, src1, src2, 1) 471 code = ''' 472 FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw; 473 ''' 474 475 class Mminf(MediaOp): 476 code = ''' 477 union floatInt 478 { 479 float f; 480 uint32_t i; 481 }; 482 union doubleInt 483 { 484 double d; 485 uint64_t i; 486 }; 487 488 assert(srcSize == destSize); 489 int size = srcSize; 490 int sizeBits = size * 8; 491 assert(srcSize == 4 || srcSize == 8); 492 int items = numItems(size); 493 uint64_t result = FpDestReg_uqw; 494 495 for (int i = 0; i < items; i++) { 496 double arg1, arg2; 497 int hiIndex = (i + 1) * sizeBits - 1; 498 int loIndex = (i + 0) * sizeBits; 499 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 500 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 501 502 if (size == 4) { 503 floatInt fi; 504 fi.i = arg1Bits; 505 arg1 = fi.f; 506 fi.i = arg2Bits; 507 arg2 = fi.f; 508 } else { 509 doubleInt di; 510 di.i = arg1Bits; 511 arg1 = di.d; 512 di.i = arg2Bits; 513 arg2 = di.d; 514 } 515 516 if (arg1 < arg2) { 517 result = insertBits(result, hiIndex, loIndex, arg1Bits); 518 } else { 519 result = insertBits(result, hiIndex, loIndex, arg2Bits); 520 } 521 } 522 FpDestReg_uqw = result; 523 ''' 524 525 class Mmaxf(MediaOp): 526 code = ''' 527 union floatInt 528 { 529 float f; 530 uint32_t i; 531 }; 532 union doubleInt 533 { 534 double d; 535 uint64_t i; 536 }; 537 538 assert(srcSize == destSize); 539 int size = srcSize; 540 int sizeBits = size * 8; 541 assert(srcSize == 4 || srcSize == 8); 542 int items = numItems(size); 543 uint64_t result = FpDestReg_uqw; 544 545 for (int i = 0; i < items; i++) { 546 double arg1, arg2; 547 int hiIndex = (i + 1) * sizeBits - 1; 548 int loIndex = (i + 0) * sizeBits; 549 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 550 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 551 552 if (size == 4) { 553 floatInt fi; 554 fi.i = arg1Bits; 555 arg1 = fi.f; 556 fi.i = arg2Bits; 557 arg2 = fi.f; 558 } else { 559 doubleInt di; 560 di.i = arg1Bits; 561 arg1 = di.d; 562 di.i = arg2Bits; 563 arg2 = di.d; 564 } 565 566 if (arg1 > arg2) { 567 result = insertBits(result, hiIndex, loIndex, arg1Bits); 568 } else { 569 result = insertBits(result, hiIndex, loIndex, arg2Bits); 570 } 571 } 572 FpDestReg_uqw = result; 573 ''' 574 575 class Mmini(MediaOp): 576 code = ''' 577 578 assert(srcSize == destSize); 579 int size = srcSize; 580 int sizeBits = size * 8; 581 int items = numItems(size); 582 uint64_t result = FpDestReg_uqw; 583 584 for (int i = 0; i < items; i++) { 585 int hiIndex = (i + 1) * sizeBits - 1; 586 int loIndex = (i + 0) * sizeBits; 587 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 588 int64_t arg1 = arg1Bits | 589 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 590 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 591 int64_t arg2 = arg2Bits | 592 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 593 uint64_t resBits; 594 595 if (signedOp()) { 596 if (arg1 < arg2) { 597 resBits = arg1Bits; 598 } else { 599 resBits = arg2Bits; 600 } 601 } else { 602 if (arg1Bits < arg2Bits) { 603 resBits = arg1Bits; 604 } else { 605 resBits = arg2Bits; 606 } 607 } 608 result = insertBits(result, hiIndex, loIndex, resBits); 609 } 610 FpDestReg_uqw = result; 611 ''' 612 613 class Mmaxi(MediaOp): 614 code = ''' 615 616 assert(srcSize == destSize); 617 int size = srcSize; 618 int sizeBits = size * 8; 619 int items = numItems(size); 620 uint64_t result = FpDestReg_uqw; 621 622 for (int i = 0; i < items; i++) { 623 int hiIndex = (i + 1) * sizeBits - 1; 624 int loIndex = (i + 0) * sizeBits; 625 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 626 int64_t arg1 = arg1Bits | 627 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 628 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 629 int64_t arg2 = arg2Bits | 630 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 631 uint64_t resBits; 632 633 if (signedOp()) { 634 if (arg1 > arg2) { 635 resBits = arg1Bits; 636 } else { 637 resBits = arg2Bits; 638 } 639 } else { 640 if (arg1Bits > arg2Bits) { 641 resBits = arg1Bits; 642 } else { 643 resBits = arg2Bits; 644 } 645 } 646 result = insertBits(result, hiIndex, loIndex, resBits); 647 } 648 FpDestReg_uqw = result; 649 ''' 650 651 class Msqrt(MediaOp): 652 def __init__(self, dest, src, \ 653 size = None, destSize = None, srcSize = None, ext = None): 654 super(Msqrt, self).__init__(dest, src,\ 655 "InstRegIndex(0)", size, destSize, srcSize, ext) 656 code = ''' 657 union floatInt 658 { 659 float f; 660 uint32_t i; 661 }; 662 union doubleInt 663 { 664 double d; 665 uint64_t i; 666 }; 667 668 assert(srcSize == destSize); 669 int size = srcSize; 670 int sizeBits = size * 8; 671 assert(srcSize == 4 || srcSize == 8); 672 int items = numItems(size); 673 uint64_t result = FpDestReg_uqw; 674 675 for (int i = 0; i < items; i++) { 676 int hiIndex = (i + 1) * sizeBits - 1; 677 int loIndex = (i + 0) * sizeBits; 678 uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 679 680 if (size == 4) { 681 floatInt fi; 682 fi.i = argBits; 683 fi.f = sqrt(fi.f); 684 argBits = fi.i; 685 } else { 686 doubleInt di; 687 di.i = argBits; 688 di.d = sqrt(di.d); 689 argBits = di.i; 690 } 691 result = insertBits(result, hiIndex, loIndex, argBits); 692 } 693 FpDestReg_uqw = result; 694 ''' 695 696 # compute approximate reciprocal --- single-precision only 697 class Mrcp(MediaOp): 698 def __init__(self, dest, src, \ 699 size = None, destSize = None, srcSize = None, ext = None): 700 super(Mrcp, self).__init__(dest, src,\ 701 "InstRegIndex(0)", size, destSize, srcSize, ext) 702 code = ''' 703 union floatInt 704 { 705 float f; 706 uint32_t i; 707 }; 708 709 assert(srcSize == 4); // ISA defines single-precision only 710 assert(srcSize == destSize); 711 const int size = 4; 712 const int sizeBits = size * 8; 713 int items = numItems(size); 714 uint64_t result = FpDestReg_uqw; 715 716 for (int i = 0; i < items; i++) { 717 int hiIndex = (i + 1) * sizeBits - 1; 718 int loIndex = (i + 0) * sizeBits; 719 uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 720 721 floatInt fi; 722 fi.i = argBits; 723 // This is more accuracy than HW provides, but oh well 724 fi.f = 1.0 / fi.f; 725 argBits = fi.i; 726 result = insertBits(result, hiIndex, loIndex, argBits); 727 } 728 FpDestReg_uqw = result; 729 ''' 730 731 class Maddf(MediaOp): 732 code = ''' 733 union floatInt 734 { 735 float f; 736 uint32_t i; 737 }; 738 union doubleInt 739 { 740 double d; 741 uint64_t i; 742 }; 743 744 assert(srcSize == destSize); 745 int size = srcSize; 746 int sizeBits = size * 8; 747 assert(srcSize == 4 || srcSize == 8); 748 int items = numItems(size); 749 uint64_t result = FpDestReg_uqw; 750 751 for (int i = 0; i < items; i++) { 752 int hiIndex = (i + 1) * sizeBits - 1; 753 int loIndex = (i + 0) * sizeBits; 754 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 755 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 756 uint64_t resBits; 757 758 if (size == 4) { 759 floatInt arg1, arg2, res; 760 arg1.i = arg1Bits; 761 arg2.i = arg2Bits; 762 res.f = arg1.f + arg2.f; 763 resBits = res.i; 764 } else { 765 doubleInt arg1, arg2, res; 766 arg1.i = arg1Bits; 767 arg2.i = arg2Bits; 768 res.d = arg1.d + arg2.d; 769 resBits = res.i; 770 } 771 772 result = insertBits(result, hiIndex, loIndex, resBits); 773 } 774 FpDestReg_uqw = result; 775 ''' 776 777 class Msubf(MediaOp): 778 code = ''' 779 union floatInt 780 { 781 float f; 782 uint32_t i; 783 }; 784 union doubleInt 785 { 786 double d; 787 uint64_t i; 788 }; 789 790 assert(srcSize == destSize); 791 int size = srcSize; 792 int sizeBits = size * 8; 793 assert(srcSize == 4 || srcSize == 8); 794 int items = numItems(size); 795 uint64_t result = FpDestReg_uqw; 796 797 for (int i = 0; i < items; i++) { 798 int hiIndex = (i + 1) * sizeBits - 1; 799 int loIndex = (i + 0) * sizeBits; 800 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 801 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 802 uint64_t resBits; 803 804 if (size == 4) { 805 floatInt arg1, arg2, res; 806 arg1.i = arg1Bits; 807 arg2.i = arg2Bits; 808 res.f = arg1.f - arg2.f; 809 resBits = res.i; 810 } else { 811 doubleInt arg1, arg2, res; 812 arg1.i = arg1Bits; 813 arg2.i = arg2Bits; 814 res.d = arg1.d - arg2.d; 815 resBits = res.i; 816 } 817 818 result = insertBits(result, hiIndex, loIndex, resBits); 819 } 820 FpDestReg_uqw = result; 821 ''' 822 823 class Mmulf(MediaOp): 824 code = ''' 825 union floatInt 826 { 827 float f; 828 uint32_t i; 829 }; 830 union doubleInt 831 { 832 double d; 833 uint64_t i; 834 }; 835 836 assert(srcSize == destSize); 837 int size = srcSize; 838 int sizeBits = size * 8; 839 assert(srcSize == 4 || srcSize == 8); 840 int items = numItems(size); 841 uint64_t result = FpDestReg_uqw; 842 843 for (int i = 0; i < items; i++) { 844 int hiIndex = (i + 1) * sizeBits - 1; 845 int loIndex = (i + 0) * sizeBits; 846 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 847 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 848 uint64_t resBits; 849 850 if (size == 4) { 851 floatInt arg1, arg2, res; 852 arg1.i = arg1Bits; 853 arg2.i = arg2Bits; 854 res.f = arg1.f * arg2.f; 855 resBits = res.i; 856 } else { 857 doubleInt arg1, arg2, res; 858 arg1.i = arg1Bits; 859 arg2.i = arg2Bits; 860 res.d = arg1.d * arg2.d; 861 resBits = res.i; 862 } 863 864 result = insertBits(result, hiIndex, loIndex, resBits); 865 } 866 FpDestReg_uqw = result; 867 ''' 868 869 class Mdivf(MediaOp): 870 code = ''' 871 union floatInt 872 { 873 float f; 874 uint32_t i; 875 }; 876 union doubleInt 877 { 878 double d; 879 uint64_t i; 880 }; 881 882 assert(srcSize == destSize); 883 int size = srcSize; 884 int sizeBits = size * 8; 885 assert(srcSize == 4 || srcSize == 8); 886 int items = numItems(size); 887 uint64_t result = FpDestReg_uqw; 888 889 for (int i = 0; i < items; i++) { 890 int hiIndex = (i + 1) * sizeBits - 1; 891 int loIndex = (i + 0) * sizeBits; 892 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 893 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 894 uint64_t resBits; 895 896 if (size == 4) { 897 floatInt arg1, arg2, res; 898 arg1.i = arg1Bits; 899 arg2.i = arg2Bits; 900 res.f = arg1.f / arg2.f; 901 resBits = res.i; 902 } else { 903 doubleInt arg1, arg2, res; 904 arg1.i = arg1Bits; 905 arg2.i = arg2Bits; 906 res.d = arg1.d / arg2.d; 907 resBits = res.i; 908 } 909 910 result = insertBits(result, hiIndex, loIndex, resBits); 911 } 912 FpDestReg_uqw = result; 913 ''' 914 915 class Maddi(MediaOp): 916 code = ''' 917 assert(srcSize == destSize); 918 int size = srcSize; 919 int sizeBits = size * 8; 920 int items = numItems(size); 921 uint64_t result = FpDestReg_uqw; 922 923 for (int i = 0; i < items; i++) { 924 int hiIndex = (i + 1) * sizeBits - 1; 925 int loIndex = (i + 0) * sizeBits; 926 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 927 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 928 uint64_t resBits = arg1Bits + arg2Bits; 929 930 if (ext & 0x2) { 931 if (signedOp()) { 932 int arg1Sign = bits(arg1Bits, sizeBits - 1); 933 int arg2Sign = bits(arg2Bits, sizeBits - 1); 934 int resSign = bits(resBits, sizeBits - 1); 935 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 936 if (resSign == 0) 937 resBits = (ULL(1) << (sizeBits - 1)); 938 else 939 resBits = mask(sizeBits - 1); 940 } 941 } else { 942 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) 943 resBits = mask(sizeBits); 944 } 945 } 946 947 result = insertBits(result, hiIndex, loIndex, resBits); 948 } 949 FpDestReg_uqw = result; 950 ''' 951 952 class Msubi(MediaOp): 953 code = ''' 954 assert(srcSize == destSize); 955 int size = srcSize; 956 int sizeBits = size * 8; 957 int items = numItems(size); 958 uint64_t result = FpDestReg_uqw; 959 960 for (int i = 0; i < items; i++) { 961 int hiIndex = (i + 1) * sizeBits - 1; 962 int loIndex = (i + 0) * sizeBits; 963 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 964 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 965 uint64_t resBits = arg1Bits - arg2Bits; 966 967 if (ext & 0x2) { 968 if (signedOp()) { 969 int arg1Sign = bits(arg1Bits, sizeBits - 1); 970 int arg2Sign = !bits(arg2Bits, sizeBits - 1); 971 int resSign = bits(resBits, sizeBits - 1); 972 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 973 if (resSign == 0) 974 resBits = (ULL(1) << (sizeBits - 1)); 975 else 976 resBits = mask(sizeBits - 1); 977 } 978 } else { 979 if (arg2Bits > arg1Bits) { 980 resBits = 0; 981 } else if (!findCarry(sizeBits, resBits, 982 arg1Bits, ~arg2Bits)) { 983 resBits = mask(sizeBits); 984 } 985 } 986 } 987 988 result = insertBits(result, hiIndex, loIndex, resBits); 989 } 990 FpDestReg_uqw = result; 991 ''' 992 993 class Mmuli(MediaOp): 994 code = ''' 995 int srcBits = srcSize * 8; 996 int destBits = destSize * 8; 997 assert(destBits <= 64); 998 assert(destSize >= srcSize); 999 int items = numItems(destSize); 1000 uint64_t result = FpDestReg_uqw; 1001 1002 for (int i = 0; i < items; i++) { 1003 int offset = 0; 1004 if (ext & 16) { 1005 if (ext & 32) 1006 offset = i * (destBits - srcBits); 1007 else 1008 offset = i * (destBits - srcBits) + srcBits; 1009 } 1010 int srcHiIndex = (i + 1) * srcBits - 1 + offset; 1011 int srcLoIndex = (i + 0) * srcBits + offset; 1012 uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex); 1013 uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex); 1014 uint64_t resBits; 1015 1016 if (signedOp()) { 1017 int64_t arg1 = arg1Bits | 1018 (0 - (arg1Bits & (ULL(1) << (srcBits - 1)))); 1019 int64_t arg2 = arg2Bits | 1020 (0 - (arg2Bits & (ULL(1) << (srcBits - 1)))); 1021 resBits = (uint64_t)(arg1 * arg2); 1022 } else { 1023 resBits = arg1Bits * arg2Bits; 1024 } 1025 1026 if (ext & 0x4) 1027 resBits += (ULL(1) << (destBits - 1)); 1028 1029 if (multHi()) 1030 resBits >>= destBits; 1031 1032 int destHiIndex = (i + 1) * destBits - 1; 1033 int destLoIndex = (i + 0) * destBits; 1034 result = insertBits(result, destHiIndex, destLoIndex, resBits); 1035 } 1036 FpDestReg_uqw = result; 1037 ''' 1038 1039 class Mavg(MediaOp): 1040 code = ''' 1041 assert(srcSize == destSize); 1042 int size = srcSize; 1043 int sizeBits = size * 8; 1044 int items = numItems(size); 1045 uint64_t result = FpDestReg_uqw; 1046 1047 for (int i = 0; i < items; i++) { 1048 int hiIndex = (i + 1) * sizeBits - 1; 1049 int loIndex = (i + 0) * sizeBits; 1050 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1051 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 1052 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; 1053 1054 result = insertBits(result, hiIndex, loIndex, resBits); 1055 } 1056 FpDestReg_uqw = result; 1057 ''' 1058 1059 class Msad(MediaOp): 1060 code = ''' 1061 int srcBits = srcSize * 8; 1062 int items = sizeof(FloatRegBits) / srcSize; 1063 1064 uint64_t sum = 0; 1065 for (int i = 0; i < items; i++) { 1066 int hiIndex = (i + 1) * srcBits - 1; 1067 int loIndex = (i + 0) * srcBits; 1068 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1069 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 1070 int64_t resBits = arg1Bits - arg2Bits; 1071 if (resBits < 0) 1072 resBits = -resBits; 1073 sum += resBits; 1074 } 1075 FpDestReg_uqw = sum & mask(destSize * 8); 1076 ''' 1077 1078 class Msrl(MediaOp): 1079 code = ''' 1080 1081 assert(srcSize == destSize); 1082 int size = srcSize; 1083 int sizeBits = size * 8; 1084 int items = numItems(size); 1085 uint64_t shiftAmt = op2_uqw; 1086 uint64_t result = FpDestReg_uqw; 1087 1088 for (int i = 0; i < items; i++) { 1089 int hiIndex = (i + 1) * sizeBits - 1; 1090 int loIndex = (i + 0) * sizeBits; 1091 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1092 uint64_t resBits; 1093 if (shiftAmt >= sizeBits) { 1094 resBits = 0; 1095 } else { 1096 resBits = (arg1Bits >> shiftAmt) & 1097 mask(sizeBits - shiftAmt); 1098 } 1099 1100 result = insertBits(result, hiIndex, loIndex, resBits); 1101 } 1102 FpDestReg_uqw = result; 1103 ''' 1104 1105 class Msra(MediaOp): 1106 code = ''' 1107 1108 assert(srcSize == destSize); 1109 int size = srcSize; 1110 int sizeBits = size * 8; 1111 int items = numItems(size); 1112 uint64_t shiftAmt = op2_uqw; 1113 uint64_t result = FpDestReg_uqw; 1114 1115 for (int i = 0; i < items; i++) { 1116 int hiIndex = (i + 1) * sizeBits - 1; 1117 int loIndex = (i + 0) * sizeBits; 1118 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1119 uint64_t resBits; 1120 if (shiftAmt >= sizeBits) { 1121 if (bits(arg1Bits, sizeBits - 1)) 1122 resBits = mask(sizeBits); 1123 else 1124 resBits = 0; 1125 } else { 1126 resBits = (arg1Bits >> shiftAmt); 1127 resBits = resBits | 1128 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt)))); 1129 } 1130 1131 result = insertBits(result, hiIndex, loIndex, resBits); 1132 } 1133 FpDestReg_uqw = result; 1134 ''' 1135 1136 class Msll(MediaOp): 1137 code = ''' 1138 1139 assert(srcSize == destSize); 1140 int size = srcSize; 1141 int sizeBits = size * 8; 1142 int items = numItems(size); 1143 uint64_t shiftAmt = op2_uqw; 1144 uint64_t result = FpDestReg_uqw; 1145 1146 for (int i = 0; i < items; i++) { 1147 int hiIndex = (i + 1) * sizeBits - 1; 1148 int loIndex = (i + 0) * sizeBits; 1149 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1150 uint64_t resBits; 1151 if (shiftAmt >= sizeBits) { 1152 resBits = 0; 1153 } else { 1154 resBits = (arg1Bits << shiftAmt); 1155 } 1156 1157 result = insertBits(result, hiIndex, loIndex, resBits); 1158 } 1159 FpDestReg_uqw = result; 1160 ''' 1161 1162 class Cvtf2i(MediaOp): 1163 def __init__(self, dest, src, \ 1164 size = None, destSize = None, srcSize = None, ext = None): 1165 super(Cvtf2i, self).__init__(dest, src,\ 1166 "InstRegIndex(0)", size, destSize, srcSize, ext) 1167 code = ''' 1168 union floatInt 1169 { 1170 float f; 1171 uint32_t i; 1172 }; 1173 union doubleInt 1174 { 1175 double d; 1176 uint64_t i; 1177 }; 1178 1179 assert(destSize == 4 || destSize == 8); 1180 assert(srcSize == 4 || srcSize == 8); 1181 int srcSizeBits = srcSize * 8; 1182 int destSizeBits = destSize * 8; 1183 int items; 1184 int srcStart = 0; 1185 int destStart = 0; 1186 if (srcSize == 2 * destSize) { 1187 items = numItems(srcSize); 1188 if (ext & 0x2) 1189 destStart = destSizeBits * items; 1190 } else if (destSize == 2 * srcSize) { 1191 items = numItems(destSize); 1192 if (ext & 0x2) 1193 srcStart = srcSizeBits * items; 1194 } else { 1195 items = numItems(destSize); 1196 } 1197 uint64_t result = FpDestReg_uqw; 1198 1199 for (int i = 0; i < items; i++) { 1200 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1201 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1202 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex); 1203 double arg; 1204 1205 if (srcSize == 4) { 1206 floatInt fi; 1207 fi.i = argBits; 1208 arg = fi.f; 1209 } else { 1210 doubleInt di; 1211 di.i = argBits; 1212 arg = di.d; 1213 } 1214 1215 if (ext & 0x4) { 1216 if (arg >= 0) 1217 arg += 0.5; 1218 else 1219 arg -= 0.5; 1220 } 1221 1222 if (destSize == 4) { 1223 int32_t i_arg = (int32_t)arg; 1224 argBits = *((uint32_t*)&i_arg); 1225 } else { 1226 int64_t i_arg = (int64_t)arg; 1227 argBits = *((uint64_t*)&i_arg); 1228 } 1229 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1230 int destLoIndex = destStart + (i + 0) * destSizeBits; 1231 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1232 } 1233 FpDestReg_uqw = result; 1234 ''' 1235 1236 class Cvti2f(MediaOp): 1237 def __init__(self, dest, src, \ 1238 size = None, destSize = None, srcSize = None, ext = None): 1239 super(Cvti2f, self).__init__(dest, src,\ 1240 "InstRegIndex(0)", size, destSize, srcSize, ext) 1241 code = ''' 1242 union floatInt 1243 { 1244 float f; 1245 uint32_t i; 1246 }; 1247 union doubleInt 1248 { 1249 double d; 1250 uint64_t i; 1251 }; 1252 1253 assert(destSize == 4 || destSize == 8); 1254 assert(srcSize == 4 || srcSize == 8); 1255 int srcSizeBits = srcSize * 8; 1256 int destSizeBits = destSize * 8; 1257 int items; 1258 int srcStart = 0; 1259 int destStart = 0; 1260 if (srcSize == 2 * destSize) { 1261 items = numItems(srcSize); 1262 if (ext & 0x2) 1263 destStart = destSizeBits * items; 1264 } else if (destSize == 2 * srcSize) { 1265 items = numItems(destSize); 1266 if (ext & 0x2) 1267 srcStart = srcSizeBits * items; 1268 } else { 1269 items = numItems(destSize); 1270 } 1271 uint64_t result = FpDestReg_uqw; 1272 1273 for (int i = 0; i < items; i++) { 1274 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1275 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1276 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex); 1277 1278 int64_t sArg = argBits | 1279 (0 - (argBits & (ULL(1) << (srcSizeBits - 1)))); 1280 double arg = sArg; 1281 1282 if (destSize == 4) { 1283 floatInt fi; 1284 fi.f = arg; 1285 argBits = fi.i; 1286 } else { 1287 doubleInt di; 1288 di.d = arg; 1289 argBits = di.i; 1290 } 1291 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1292 int destLoIndex = destStart + (i + 0) * destSizeBits; 1293 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1294 } 1295 FpDestReg_uqw = result; 1296 ''' 1297 1298 class Cvtf2f(MediaOp): 1299 def __init__(self, dest, src, \ 1300 size = None, destSize = None, srcSize = None, ext = None): 1301 super(Cvtf2f, self).__init__(dest, src,\ 1302 "InstRegIndex(0)", size, destSize, srcSize, ext) 1303 code = ''' 1304 union floatInt 1305 { 1306 float f; 1307 uint32_t i; 1308 }; 1309 union doubleInt 1310 { 1311 double d; 1312 uint64_t i; 1313 }; 1314 1315 assert(destSize == 4 || destSize == 8); 1316 assert(srcSize == 4 || srcSize == 8); 1317 int srcSizeBits = srcSize * 8; 1318 int destSizeBits = destSize * 8; 1319 int items; 1320 int srcStart = 0; 1321 int destStart = 0; 1322 if (srcSize == 2 * destSize) { 1323 items = numItems(srcSize); 1324 if (ext & 0x2) 1325 destStart = destSizeBits * items; 1326 } else if (destSize == 2 * srcSize) { 1327 items = numItems(destSize); 1328 if (ext & 0x2) 1329 srcStart = srcSizeBits * items; 1330 } else { 1331 items = numItems(destSize); 1332 } 1333 uint64_t result = FpDestReg_uqw; 1334 1335 for (int i = 0; i < items; i++) { 1336 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1337 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1338 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex); 1339 double arg; 1340 1341 if (srcSize == 4) { 1342 floatInt fi; 1343 fi.i = argBits; 1344 arg = fi.f; 1345 } else { 1346 doubleInt di; 1347 di.i = argBits; 1348 arg = di.d; 1349 } 1350 if (destSize == 4) { 1351 floatInt fi; 1352 fi.f = arg; 1353 argBits = fi.i; 1354 } else { 1355 doubleInt di; 1356 di.d = arg; 1357 argBits = di.i; 1358 } 1359 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1360 int destLoIndex = destStart + (i + 0) * destSizeBits; 1361 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1362 } 1363 FpDestReg_uqw = result; 1364 ''' 1365 1366 class Mcmpi2r(MediaOp): 1367 code = ''' 1368 union floatInt 1369 { 1370 float f; 1371 uint32_t i; 1372 }; 1373 union doubleInt 1374 { 1375 double d; 1376 uint64_t i; 1377 }; 1378 1379 assert(srcSize == destSize); 1380 int size = srcSize; 1381 int sizeBits = size * 8; 1382 int items = numItems(size); 1383 uint64_t result = FpDestReg_uqw; 1384 1385 for (int i = 0; i < items; i++) { 1386 int hiIndex = (i + 1) * sizeBits - 1; 1387 int loIndex = (i + 0) * sizeBits; 1388 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1389 int64_t arg1 = arg1Bits | 1390 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 1391 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 1392 int64_t arg2 = arg2Bits | 1393 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 1394 1395 uint64_t resBits = 0; 1396 if (((ext & 0x2) == 0 && arg1 == arg2) || 1397 ((ext & 0x2) == 0x2 && arg1 > arg2)) 1398 resBits = mask(sizeBits); 1399 1400 result = insertBits(result, hiIndex, loIndex, resBits); 1401 } 1402 FpDestReg_uqw = result; 1403 ''' 1404 1405 class Mcmpf2r(MediaOp): 1406 code = ''' 1407 union floatInt 1408 { 1409 float f; 1410 uint32_t i; 1411 }; 1412 union doubleInt 1413 { 1414 double d; 1415 uint64_t i; 1416 }; 1417 1418 assert(srcSize == destSize); 1419 int size = srcSize; 1420 int sizeBits = size * 8; 1421 int items = numItems(size); 1422 uint64_t result = FpDestReg_uqw; 1423 1424 for (int i = 0; i < items; i++) { 1425 int hiIndex = (i + 1) * sizeBits - 1; 1426 int loIndex = (i + 0) * sizeBits; 1427 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); 1428 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); 1429 double arg1, arg2; 1430 1431 if (size == 4) { 1432 floatInt fi; 1433 fi.i = arg1Bits; 1434 arg1 = fi.f; 1435 fi.i = arg2Bits; 1436 arg2 = fi.f; 1437 } else { 1438 doubleInt di; 1439 di.i = arg1Bits; 1440 arg1 = di.d; 1441 di.i = arg2Bits; 1442 arg2 = di.d; 1443 } 1444 1445 uint64_t resBits = 0; 1446 bool nanop = std::isnan(arg1) || std::isnan(arg2); 1447 switch (ext & mask(3)) { 1448 case 0: 1449 if (arg1 == arg2 && !nanop) 1450 resBits = mask(sizeBits); 1451 break; 1452 case 1: 1453 if (arg1 < arg2 && !nanop) 1454 resBits = mask(sizeBits); 1455 break; 1456 case 2: 1457 if (arg1 <= arg2 && !nanop) 1458 resBits = mask(sizeBits); 1459 break; 1460 case 3: 1461 if (nanop) 1462 resBits = mask(sizeBits); 1463 break; 1464 case 4: 1465 if (arg1 != arg2 || nanop) 1466 resBits = mask(sizeBits); 1467 break; 1468 case 5: 1469 if (!(arg1 < arg2) || nanop) 1470 resBits = mask(sizeBits); 1471 break; 1472 case 6: 1473 if (!(arg1 <= arg2) || nanop) 1474 resBits = mask(sizeBits); 1475 break; 1476 case 7: 1477 if (!nanop) 1478 resBits = mask(sizeBits); 1479 break; 1480 }; 1481 1482 result = insertBits(result, hiIndex, loIndex, resBits); 1483 } 1484 FpDestReg_uqw = result; 1485 ''' 1486 1487 class Mcmpf2rf(MediaOp): 1488 def __init__(self, src1, src2,\ 1489 size = None, destSize = None, srcSize = None, ext = None): 1490 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\ 1491 src2, size, destSize, srcSize, ext) 1492 code = ''' 1493 union floatInt 1494 { 1495 float f; 1496 uint32_t i; 1497 }; 1498 union doubleInt 1499 { 1500 double d; 1501 uint64_t i; 1502 }; 1503 1504 assert(srcSize == destSize); 1505 assert(srcSize == 4 || srcSize == 8); 1506 int size = srcSize; 1507 int sizeBits = size * 8; 1508 1509 double arg1, arg2; 1510 uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0); 1511 uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0); 1512 if (size == 4) { 1513 floatInt fi; 1514 fi.i = arg1Bits; 1515 arg1 = fi.f; 1516 fi.i = arg2Bits; 1517 arg2 = fi.f; 1518 } else { 1519 doubleInt di; 1520 di.i = arg1Bits; 1521 arg1 = di.d; 1522 di.i = arg2Bits; 1523 arg2 = di.d; 1524 } 1525 1526 // ZF PF CF 1527 // Unordered 1 1 1 1528 // Greater than 0 0 0 1529 // Less than 0 0 1 1530 // Equal 1 0 0 1531 // OF = SF = AF = 0 1532 ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit); 1533 cfofBits = cfofBits & ~(OFBit | CFBit); 1534 1535 if (std::isnan(arg1) || std::isnan(arg2)) { 1536 ccFlagBits = ccFlagBits | (ZFBit | PFBit); 1537 cfofBits = cfofBits | CFBit; 1538 } 1539 else if(arg1 < arg2) 1540 cfofBits = cfofBits | CFBit; 1541 else if(arg1 == arg2) 1542 ccFlagBits = ccFlagBits | ZFBit; 1543 ''' 1544 1545 class Emms(MediaOp): 1546 def __init__(self): 1547 super(Emms, self).__init__('InstRegIndex(MISCREG_FTW)', 1548 'InstRegIndex(0)', 'InstRegIndex(0)', 2) 1549 code = 'FTW = 0xFFFF;' 1550}}; 1551