mediaop.isa revision 6622
1/// Copyright (c) 2009 The Regents of The University of Michigan 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer; 8// redistributions in binary form must reproduce the above copyright 9// notice, this list of conditions and the following disclaimer in the 10// documentation and/or other materials provided with the distribution; 11// neither the name of the copyright holders nor the names of its 12// contributors may be used to endorse or promote products derived from 13// this software without specific prior written permission. 14// 15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26// 27// Authors: Gabe Black 28 29def template MediaOpExecute {{ 30 Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, 31 Trace::InstRecord *traceData) const 32 { 33 Fault fault = NoFault; 34 35 %(op_decl)s; 36 %(op_rd)s; 37 38 %(code)s; 39 40 //Write the resulting state to the execution context 41 if(fault == NoFault) 42 { 43 %(op_wb)s; 44 } 45 return fault; 46 } 47}}; 48 49def template MediaOpRegDeclare {{ 50 class %(class_name)s : public %(base_class)s 51 { 52 protected: 53 void buildMe(); 54 55 public: 56 %(class_name)s(ExtMachInst _machInst, 57 const char * instMnem, 58 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 59 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 60 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 61 62 %(class_name)s(ExtMachInst _machInst, 63 const char * instMnem, 64 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 65 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 66 67 %(BasicExecDeclare)s 68 }; 69}}; 70 71def template MediaOpImmDeclare {{ 72 73 class %(class_name)s : public %(base_class)s 74 { 75 protected: 76 void buildMe(); 77 78 public: 79 %(class_name)s(ExtMachInst _machInst, 80 const char * instMnem, 81 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 82 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 83 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 84 85 %(class_name)s(ExtMachInst _machInst, 86 const char * instMnem, 87 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 88 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 89 90 %(BasicExecDeclare)s 91 }; 92}}; 93 94def template MediaOpRegConstructor {{ 95 96 inline void %(class_name)s::buildMe() 97 { 98 %(constructor)s; 99 } 100 101 inline %(class_name)s::%(class_name)s( 102 ExtMachInst machInst, const char * instMnem, 103 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 104 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 105 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 106 false, false, false, false, 107 _src1, _src2, _dest, _srcSize, _destSize, _ext, 108 %(op_class)s) 109 { 110 buildMe(); 111 } 112 113 inline %(class_name)s::%(class_name)s( 114 ExtMachInst machInst, const char * instMnem, 115 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 116 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 117 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 118 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 119 isMicro, isDelayed, isFirst, isLast, 120 _src1, _src2, _dest, _srcSize, _destSize, _ext, 121 %(op_class)s) 122 { 123 buildMe(); 124 } 125}}; 126 127def template MediaOpImmConstructor {{ 128 129 inline void %(class_name)s::buildMe() 130 { 131 %(constructor)s; 132 } 133 134 inline %(class_name)s::%(class_name)s( 135 ExtMachInst machInst, const char * instMnem, 136 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 137 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 138 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 139 false, false, false, false, 140 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 141 %(op_class)s) 142 { 143 buildMe(); 144 } 145 146 inline %(class_name)s::%(class_name)s( 147 ExtMachInst machInst, const char * instMnem, 148 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 149 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 150 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 151 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 152 isMicro, isDelayed, isFirst, isLast, 153 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 154 %(op_class)s) 155 { 156 buildMe(); 157 } 158}}; 159 160let {{ 161 # Make these empty strings so that concatenating onto 162 # them will always work. 163 header_output = "" 164 decoder_output = "" 165 exec_output = "" 166 167 immTemplates = ( 168 MediaOpImmDeclare, 169 MediaOpImmConstructor, 170 MediaOpExecute) 171 172 regTemplates = ( 173 MediaOpRegDeclare, 174 MediaOpRegConstructor, 175 MediaOpExecute) 176 177 class MediaOpMeta(type): 178 def buildCppClasses(self, name, Name, suffix, code): 179 180 # Globals to stick the output in 181 global header_output 182 global decoder_output 183 global exec_output 184 185 # If op2 is used anywhere, make register and immediate versions 186 # of this code. 187 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?") 188 match = matcher.search(code) 189 if match: 190 typeQual = "" 191 if match.group("typeQual"): 192 typeQual = match.group("typeQual") 193 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual) 194 self.buildCppClasses(name, Name, suffix, 195 matcher.sub(src2_name, code)) 196 self.buildCppClasses(name + "i", Name, suffix + "Imm", 197 matcher.sub("imm8", code)) 198 return 199 200 base = "X86ISA::MediaOp" 201 202 # If imm8 shows up in the code, use the immediate templates, if 203 # not, hopefully the register ones will be correct. 204 matcher = re.compile("(?<!\w)imm8(?!\w)") 205 if matcher.search(code): 206 base += "Imm" 207 templates = immTemplates 208 else: 209 base += "Reg" 210 templates = regTemplates 211 212 # Get everything ready for the substitution 213 iop = InstObjParams(name, Name + suffix, base, {"code" : code}) 214 215 # Generate the actual code (finally!) 216 header_output += templates[0].subst(iop) 217 decoder_output += templates[1].subst(iop) 218 exec_output += templates[2].subst(iop) 219 220 221 def __new__(mcls, Name, bases, dict): 222 abstract = False 223 name = Name.lower() 224 if "abstract" in dict: 225 abstract = dict['abstract'] 226 del dict['abstract'] 227 228 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict) 229 if not abstract: 230 cls.className = Name 231 cls.base_mnemonic = name 232 code = cls.code 233 234 # Set up the C++ classes 235 mcls.buildCppClasses(cls, name, Name, "", code) 236 237 # Hook into the microassembler dict 238 global microopClasses 239 microopClasses[name] = cls 240 241 # If op2 is used anywhere, make register and immediate versions 242 # of this code. 243 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?") 244 if matcher.search(code): 245 microopClasses[name + 'i'] = cls 246 return cls 247 248 249 class MediaOp(X86Microop): 250 __metaclass__ = MediaOpMeta 251 # This class itself doesn't act as a microop 252 abstract = True 253 254 def __init__(self, dest, src1, op2, 255 size = None, destSize = None, srcSize = None, ext = None): 256 self.dest = dest 257 self.src1 = src1 258 self.op2 = op2 259 if size is not None: 260 self.srcSize = size 261 self.destSize = size 262 if srcSize is not None: 263 self.srcSize = srcSize 264 if destSize is not None: 265 self.destSize = destSize 266 if self.srcSize is None: 267 raise Exception, "Source size not set." 268 if self.destSize is None: 269 raise Exception, "Dest size not set." 270 if ext is None: 271 self.ext = 0 272 else: 273 self.ext = ext 274 275 def getAllocator(self, *microFlags): 276 className = self.className 277 if self.mnemonic == self.base_mnemonic + 'i': 278 className += "Imm" 279 allocator = '''new %(class_name)s(machInst, macrocodeBlock 280 %(flags)s, %(src1)s, %(op2)s, %(dest)s, 281 %(srcSize)s, %(destSize)s, %(ext)s)''' % { 282 "class_name" : className, 283 "flags" : self.microFlagsText(microFlags), 284 "src1" : self.src1, "op2" : self.op2, 285 "dest" : self.dest, 286 "srcSize" : self.srcSize, 287 "destSize" : self.destSize, 288 "ext" : self.ext} 289 return allocator 290 291 class Mov2int(MediaOp): 292 def __init__(self, dest, src1, src2 = 0, \ 293 size = None, destSize = None, srcSize = None, ext = None): 294 super(Mov2int, self).__init__(dest, src1,\ 295 src2, size, destSize, srcSize, ext) 296 code = ''' 297 int items = sizeof(FloatRegBits) / srcSize; 298 int offset = imm8; 299 if (bits(src1, 0) && (ext & 0x1)) 300 offset -= items; 301 if (offset >= 0 && offset < items) { 302 uint64_t fpSrcReg1 = 303 bits(FpSrcReg1.uqw, 304 (offset + 1) * srcSize * 8 - 1, 305 (offset + 0) * srcSize * 8); 306 DestReg = merge(0, fpSrcReg1, destSize); 307 } else { 308 DestReg = DestReg; 309 } 310 ''' 311 312 class Mov2fp(MediaOp): 313 def __init__(self, dest, src1, src2 = 0, \ 314 size = None, destSize = None, srcSize = None, ext = None): 315 super(Mov2fp, self).__init__(dest, src1,\ 316 src2, size, destSize, srcSize, ext) 317 code = ''' 318 int items = sizeof(FloatRegBits) / destSize; 319 int offset = imm8; 320 if (bits(dest, 0) && (ext & 0x1)) 321 offset -= items; 322 if (offset >= 0 && offset < items) { 323 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); 324 FpDestReg.uqw = 325 insertBits(FpDestReg.uqw, 326 (offset + 1) * destSize * 8 - 1, 327 (offset + 0) * destSize * 8, srcReg1); 328 } else { 329 FpDestReg.uqw = FpDestReg.uqw; 330 } 331 ''' 332 333 class Movsign(MediaOp): 334 def __init__(self, dest, src, \ 335 size = None, destSize = None, srcSize = None, ext = None): 336 super(Movsign, self).__init__(dest, src,\ 337 "InstRegIndex(0)", size, destSize, srcSize, ext) 338 code = ''' 339 int items = sizeof(FloatRegBits) / srcSize; 340 uint64_t result = 0; 341 int offset = (ext & 0x1) ? items : 0; 342 for (int i = 0; i < items; i++) { 343 uint64_t picked = 344 bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1); 345 result = insertBits(result, i + offset, i + offset, picked); 346 } 347 DestReg = DestReg | result; 348 ''' 349 350 class Maskmov(MediaOp): 351 code = ''' 352 assert(srcSize == destSize); 353 int size = srcSize; 354 int sizeBits = size * 8; 355 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 356 uint64_t result = FpDestReg.uqw; 357 358 for (int i = 0; i < items; i++) { 359 int hiIndex = (i + 1) * sizeBits - 1; 360 int loIndex = (i + 0) * sizeBits; 361 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 362 if (bits(FpSrcReg2.uqw, hiIndex)) 363 result = insertBits(result, hiIndex, loIndex, arg1Bits); 364 } 365 FpDestReg.uqw = result; 366 ''' 367 368 class shuffle(MediaOp): 369 code = ''' 370 assert(srcSize == destSize); 371 int size = srcSize; 372 int sizeBits = size * 8; 373 int items = sizeof(FloatRegBits) / size; 374 int options; 375 int optionBits; 376 if (size == 8) { 377 options = 2; 378 optionBits = 1; 379 } else { 380 options = 4; 381 optionBits = 2; 382 } 383 384 uint64_t result = 0; 385 uint8_t sel = ext; 386 387 for (int i = 0; i < items; i++) { 388 uint64_t resBits; 389 uint8_t lsel = sel & mask(optionBits); 390 if (lsel * size >= sizeof(FloatRegBits)) { 391 lsel -= options / 2; 392 resBits = bits(FpSrcReg2.uqw, 393 (lsel + 1) * sizeBits - 1, 394 (lsel + 0) * sizeBits); 395 } else { 396 resBits = bits(FpSrcReg1.uqw, 397 (lsel + 1) * sizeBits - 1, 398 (lsel + 0) * sizeBits); 399 } 400 401 sel >>= optionBits; 402 403 int hiIndex = (i + 1) * sizeBits - 1; 404 int loIndex = (i + 0) * sizeBits; 405 result = insertBits(result, hiIndex, loIndex, resBits); 406 } 407 FpDestReg.uqw = result; 408 ''' 409 410 class Unpack(MediaOp): 411 code = ''' 412 assert(srcSize == destSize); 413 int size = destSize; 414 int items = (sizeof(FloatRegBits) / size) / 2; 415 int offset = ext ? items : 0; 416 uint64_t result = 0; 417 for (int i = 0; i < items; i++) { 418 uint64_t pickedLow = 419 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1, 420 (i + offset) * 8 * size); 421 result = insertBits(result, 422 (2 * i + 1) * 8 * size - 1, 423 (2 * i + 0) * 8 * size, 424 pickedLow); 425 uint64_t pickedHigh = 426 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1, 427 (i + offset) * 8 * size); 428 result = insertBits(result, 429 (2 * i + 2) * 8 * size - 1, 430 (2 * i + 1) * 8 * size, 431 pickedHigh); 432 } 433 FpDestReg.uqw = result; 434 ''' 435 436 class Pack(MediaOp): 437 code = ''' 438 assert(srcSize == destSize * 2); 439 int items = (sizeof(FloatRegBits) / destSize); 440 int destBits = destSize * 8; 441 int srcBits = srcSize * 8; 442 uint64_t result = 0; 443 int i; 444 for (i = 0; i < items / 2; i++) { 445 uint64_t picked = 446 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1, 447 (i + 0) * srcBits); 448 unsigned signBit = bits(picked, srcBits - 1); 449 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 450 451 // Handle saturation. 452 if (signBit) { 453 if (overflow != mask(destBits - srcBits + 1)) { 454 if (ext & 0x1) 455 picked = (1 << (destBits - 1)); 456 else 457 picked = 0; 458 } 459 } else { 460 if (overflow != 0) { 461 if (ext & 0x1) 462 picked = mask(destBits - 1); 463 else 464 picked = mask(destBits); 465 } 466 } 467 result = insertBits(result, 468 (i + 1) * destBits - 1, 469 (i + 0) * destBits, 470 picked); 471 } 472 for (;i < items; i++) { 473 uint64_t picked = 474 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1, 475 (i - items + 0) * srcBits); 476 unsigned signBit = bits(picked, srcBits - 1); 477 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 478 479 // Handle saturation. 480 if (signBit) { 481 if (overflow != mask(destBits - srcBits + 1)) { 482 if (ext & 0x1) 483 picked = (1 << (destBits - 1)); 484 else 485 picked = 0; 486 } 487 } else { 488 if (overflow != 0) { 489 if (ext & 0x1) 490 picked = mask(destBits - 1); 491 else 492 picked = mask(destBits); 493 } 494 } 495 result = insertBits(result, 496 (i + 1) * destBits - 1, 497 (i + 0) * destBits, 498 picked); 499 } 500 FpDestReg.uqw = result; 501 ''' 502 503 class Mxor(MediaOp): 504 def __init__(self, dest, src1, src2): 505 super(Mxor, self).__init__(dest, src1, src2, 1) 506 code = ''' 507 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw; 508 ''' 509 510 class Mor(MediaOp): 511 def __init__(self, dest, src1, src2): 512 super(Mor, self).__init__(dest, src1, src2, 1) 513 code = ''' 514 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw; 515 ''' 516 517 class Mand(MediaOp): 518 def __init__(self, dest, src1, src2): 519 super(Mand, self).__init__(dest, src1, src2, 1) 520 code = ''' 521 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw; 522 ''' 523 524 class Mandn(MediaOp): 525 def __init__(self, dest, src1, src2): 526 super(Mandn, self).__init__(dest, src1, src2, 1) 527 code = ''' 528 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw; 529 ''' 530 531 class Mminf(MediaOp): 532 code = ''' 533 union floatInt 534 { 535 float f; 536 uint32_t i; 537 }; 538 union doubleInt 539 { 540 double d; 541 uint64_t i; 542 }; 543 544 assert(srcSize == destSize); 545 int size = srcSize; 546 int sizeBits = size * 8; 547 assert(srcSize == 4 || srcSize == 8); 548 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 549 uint64_t result = FpDestReg.uqw; 550 551 for (int i = 0; i < items; i++) { 552 double arg1, arg2; 553 int hiIndex = (i + 1) * sizeBits - 1; 554 int loIndex = (i + 0) * sizeBits; 555 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 556 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 557 558 if (size == 4) { 559 floatInt fi; 560 fi.i = arg1Bits; 561 arg1 = fi.f; 562 fi.i = arg2Bits; 563 arg2 = fi.f; 564 } else { 565 doubleInt di; 566 di.i = arg1Bits; 567 arg1 = di.d; 568 di.i = arg2Bits; 569 arg2 = di.d; 570 } 571 572 if (arg1 < arg2) { 573 result = insertBits(result, hiIndex, loIndex, arg1Bits); 574 } else { 575 result = insertBits(result, hiIndex, loIndex, arg2Bits); 576 } 577 } 578 FpDestReg.uqw = result; 579 ''' 580 581 class Mmaxf(MediaOp): 582 code = ''' 583 union floatInt 584 { 585 float f; 586 uint32_t i; 587 }; 588 union doubleInt 589 { 590 double d; 591 uint64_t i; 592 }; 593 594 assert(srcSize == destSize); 595 int size = srcSize; 596 int sizeBits = size * 8; 597 assert(srcSize == 4 || srcSize == 8); 598 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 599 uint64_t result = FpDestReg.uqw; 600 601 for (int i = 0; i < items; i++) { 602 double arg1, arg2; 603 int hiIndex = (i + 1) * sizeBits - 1; 604 int loIndex = (i + 0) * sizeBits; 605 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 606 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 607 608 if (size == 4) { 609 floatInt fi; 610 fi.i = arg1Bits; 611 arg1 = fi.f; 612 fi.i = arg2Bits; 613 arg2 = fi.f; 614 } else { 615 doubleInt di; 616 di.i = arg1Bits; 617 arg1 = di.d; 618 di.i = arg2Bits; 619 arg2 = di.d; 620 } 621 622 if (arg1 > arg2) { 623 result = insertBits(result, hiIndex, loIndex, arg1Bits); 624 } else { 625 result = insertBits(result, hiIndex, loIndex, arg2Bits); 626 } 627 } 628 FpDestReg.uqw = result; 629 ''' 630 631 class Mmini(MediaOp): 632 code = ''' 633 634 assert(srcSize == destSize); 635 int size = srcSize; 636 int sizeBits = size * 8; 637 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 638 uint64_t result = FpDestReg.uqw; 639 640 for (int i = 0; i < items; i++) { 641 int hiIndex = (i + 1) * sizeBits - 1; 642 int loIndex = (i + 0) * sizeBits; 643 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 644 int64_t arg1 = arg1Bits | 645 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 646 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 647 int64_t arg2 = arg2Bits | 648 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 649 uint64_t resBits; 650 651 if (ext & 0x2) { 652 if (arg1 < arg2) { 653 resBits = arg1Bits; 654 } else { 655 resBits = arg2Bits; 656 } 657 } else { 658 if (arg1Bits < arg2Bits) { 659 resBits = arg1Bits; 660 } else { 661 resBits = arg2Bits; 662 } 663 } 664 result = insertBits(result, hiIndex, loIndex, resBits); 665 } 666 FpDestReg.uqw = result; 667 ''' 668 669 class Mmaxi(MediaOp): 670 code = ''' 671 672 assert(srcSize == destSize); 673 int size = srcSize; 674 int sizeBits = size * 8; 675 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 676 uint64_t result = FpDestReg.uqw; 677 678 for (int i = 0; i < items; i++) { 679 int hiIndex = (i + 1) * sizeBits - 1; 680 int loIndex = (i + 0) * sizeBits; 681 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 682 int64_t arg1 = arg1Bits | 683 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 684 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 685 int64_t arg2 = arg2Bits | 686 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 687 uint64_t resBits; 688 689 if (ext & 0x2) { 690 if (arg1 > arg2) { 691 resBits = arg1Bits; 692 } else { 693 resBits = arg2Bits; 694 } 695 } else { 696 if (arg1Bits > arg2Bits) { 697 resBits = arg1Bits; 698 } else { 699 resBits = arg2Bits; 700 } 701 } 702 result = insertBits(result, hiIndex, loIndex, resBits); 703 } 704 FpDestReg.uqw = result; 705 ''' 706 707 class Msqrt(MediaOp): 708 def __init__(self, dest, src, \ 709 size = None, destSize = None, srcSize = None, ext = None): 710 super(Msqrt, self).__init__(dest, src,\ 711 "InstRegIndex(0)", size, destSize, srcSize, ext) 712 code = ''' 713 union floatInt 714 { 715 float f; 716 uint32_t i; 717 }; 718 union doubleInt 719 { 720 double d; 721 uint64_t i; 722 }; 723 724 assert(srcSize == destSize); 725 int size = srcSize; 726 int sizeBits = size * 8; 727 assert(srcSize == 4 || srcSize == 8); 728 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 729 uint64_t result = FpDestReg.uqw; 730 731 for (int i = 0; i < items; i++) { 732 int hiIndex = (i + 1) * sizeBits - 1; 733 int loIndex = (i + 0) * sizeBits; 734 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 735 736 if (size == 4) { 737 floatInt fi; 738 fi.i = argBits; 739 fi.f = sqrt(fi.f); 740 argBits = fi.i; 741 } else { 742 doubleInt di; 743 di.i = argBits; 744 di.d = sqrt(di.d); 745 argBits = di.i; 746 } 747 result = insertBits(result, hiIndex, loIndex, argBits); 748 } 749 FpDestReg.uqw = result; 750 ''' 751 752 class Maddf(MediaOp): 753 code = ''' 754 union floatInt 755 { 756 float f; 757 uint32_t i; 758 }; 759 union doubleInt 760 { 761 double d; 762 uint64_t i; 763 }; 764 765 assert(srcSize == destSize); 766 int size = srcSize; 767 int sizeBits = size * 8; 768 assert(srcSize == 4 || srcSize == 8); 769 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 770 uint64_t result = FpDestReg.uqw; 771 772 for (int i = 0; i < items; i++) { 773 int hiIndex = (i + 1) * sizeBits - 1; 774 int loIndex = (i + 0) * sizeBits; 775 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 776 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 777 uint64_t resBits; 778 779 if (size == 4) { 780 floatInt arg1, arg2, res; 781 arg1.i = arg1Bits; 782 arg2.i = arg2Bits; 783 res.f = arg1.f + arg2.f; 784 resBits = res.i; 785 } else { 786 doubleInt arg1, arg2, res; 787 arg1.i = arg1Bits; 788 arg2.i = arg2Bits; 789 res.d = arg1.d + arg2.d; 790 resBits = res.i; 791 } 792 793 result = insertBits(result, hiIndex, loIndex, resBits); 794 } 795 FpDestReg.uqw = result; 796 ''' 797 798 class Msubf(MediaOp): 799 code = ''' 800 union floatInt 801 { 802 float f; 803 uint32_t i; 804 }; 805 union doubleInt 806 { 807 double d; 808 uint64_t i; 809 }; 810 811 assert(srcSize == destSize); 812 int size = srcSize; 813 int sizeBits = size * 8; 814 assert(srcSize == 4 || srcSize == 8); 815 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 816 uint64_t result = FpDestReg.uqw; 817 818 for (int i = 0; i < items; i++) { 819 int hiIndex = (i + 1) * sizeBits - 1; 820 int loIndex = (i + 0) * sizeBits; 821 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 822 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 823 uint64_t resBits; 824 825 if (size == 4) { 826 floatInt arg1, arg2, res; 827 arg1.i = arg1Bits; 828 arg2.i = arg2Bits; 829 res.f = arg1.f - arg2.f; 830 resBits = res.i; 831 } else { 832 doubleInt arg1, arg2, res; 833 arg1.i = arg1Bits; 834 arg2.i = arg2Bits; 835 res.d = arg1.d - arg2.d; 836 resBits = res.i; 837 } 838 839 result = insertBits(result, hiIndex, loIndex, resBits); 840 } 841 FpDestReg.uqw = result; 842 ''' 843 844 class Mmulf(MediaOp): 845 code = ''' 846 union floatInt 847 { 848 float f; 849 uint32_t i; 850 }; 851 union doubleInt 852 { 853 double d; 854 uint64_t i; 855 }; 856 857 assert(srcSize == destSize); 858 int size = srcSize; 859 int sizeBits = size * 8; 860 assert(srcSize == 4 || srcSize == 8); 861 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 862 uint64_t result = FpDestReg.uqw; 863 864 for (int i = 0; i < items; i++) { 865 int hiIndex = (i + 1) * sizeBits - 1; 866 int loIndex = (i + 0) * sizeBits; 867 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 868 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 869 uint64_t resBits; 870 871 if (size == 4) { 872 floatInt arg1, arg2, res; 873 arg1.i = arg1Bits; 874 arg2.i = arg2Bits; 875 res.f = arg1.f * arg2.f; 876 resBits = res.i; 877 } else { 878 doubleInt arg1, arg2, res; 879 arg1.i = arg1Bits; 880 arg2.i = arg2Bits; 881 res.d = arg1.d * arg2.d; 882 resBits = res.i; 883 } 884 885 result = insertBits(result, hiIndex, loIndex, resBits); 886 } 887 FpDestReg.uqw = result; 888 ''' 889 890 class Mdivf(MediaOp): 891 code = ''' 892 union floatInt 893 { 894 float f; 895 uint32_t i; 896 }; 897 union doubleInt 898 { 899 double d; 900 uint64_t i; 901 }; 902 903 assert(srcSize == destSize); 904 int size = srcSize; 905 int sizeBits = size * 8; 906 assert(srcSize == 4 || srcSize == 8); 907 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 908 uint64_t result = FpDestReg.uqw; 909 910 for (int i = 0; i < items; i++) { 911 int hiIndex = (i + 1) * sizeBits - 1; 912 int loIndex = (i + 0) * sizeBits; 913 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 914 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 915 uint64_t resBits; 916 917 if (size == 4) { 918 floatInt arg1, arg2, res; 919 arg1.i = arg1Bits; 920 arg2.i = arg2Bits; 921 res.f = arg1.f / arg2.f; 922 resBits = res.i; 923 } else { 924 doubleInt arg1, arg2, res; 925 arg1.i = arg1Bits; 926 arg2.i = arg2Bits; 927 res.d = arg1.d / arg2.d; 928 resBits = res.i; 929 } 930 931 result = insertBits(result, hiIndex, loIndex, resBits); 932 } 933 FpDestReg.uqw = result; 934 ''' 935 936 class Maddi(MediaOp): 937 code = ''' 938 assert(srcSize == destSize); 939 int size = srcSize; 940 int sizeBits = size * 8; 941 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 942 uint64_t result = FpDestReg.uqw; 943 944 for (int i = 0; i < items; i++) { 945 int hiIndex = (i + 1) * sizeBits - 1; 946 int loIndex = (i + 0) * sizeBits; 947 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 948 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 949 uint64_t resBits = arg1Bits + arg2Bits; 950 951 if (ext & 0x2) { 952 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) 953 resBits = mask(sizeBits); 954 } else if (ext & 0x4) { 955 int arg1Sign = bits(arg1Bits, sizeBits - 1); 956 int arg2Sign = bits(arg2Bits, sizeBits - 1); 957 int resSign = bits(resBits, sizeBits - 1); 958 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 959 if (resSign == 0) 960 resBits = (1 << (sizeBits - 1)); 961 else 962 resBits = mask(sizeBits - 1); 963 } 964 } 965 966 result = insertBits(result, hiIndex, loIndex, resBits); 967 } 968 FpDestReg.uqw = result; 969 ''' 970 971 class Msubi(MediaOp): 972 code = ''' 973 assert(srcSize == destSize); 974 int size = srcSize; 975 int sizeBits = size * 8; 976 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 977 uint64_t result = FpDestReg.uqw; 978 979 for (int i = 0; i < items; i++) { 980 int hiIndex = (i + 1) * sizeBits - 1; 981 int loIndex = (i + 0) * sizeBits; 982 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 983 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 984 uint64_t resBits = arg1Bits - arg2Bits; 985 986 if (ext & 0x2) { 987 if (arg2Bits > arg1Bits) { 988 resBits = 0; 989 } else if (!findCarry(sizeBits, resBits, 990 arg1Bits, ~arg2Bits)) { 991 resBits = mask(sizeBits); 992 } 993 } else if (ext & 0x4) { 994 int arg1Sign = bits(arg1Bits, sizeBits - 1); 995 int arg2Sign = !bits(arg2Bits, sizeBits - 1); 996 int resSign = bits(resBits, sizeBits - 1); 997 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 998 if (resSign == 0) 999 resBits = (1 << (sizeBits - 1)); 1000 else 1001 resBits = mask(sizeBits - 1); 1002 } 1003 } 1004 1005 result = insertBits(result, hiIndex, loIndex, resBits); 1006 } 1007 FpDestReg.uqw = result; 1008 ''' 1009 1010 class Mmuli(MediaOp): 1011 code = ''' 1012 int srcBits = srcSize * 8; 1013 int destBits = destSize * 8; 1014 assert(destBits <= 64); 1015 assert(destSize >= srcSize); 1016 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize); 1017 uint64_t result = FpDestReg.uqw; 1018 1019 for (int i = 0; i < items; i++) { 1020 int offset = 0; 1021 if (ext & 16) { 1022 if (ext & 32) 1023 offset = i * (destBits - srcBits); 1024 else 1025 offset = i * (destBits - srcBits) + srcBits; 1026 } 1027 int srcHiIndex = (i + 1) * srcBits - 1 + offset; 1028 int srcLoIndex = (i + 0) * srcBits + offset; 1029 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1030 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex); 1031 uint64_t resBits; 1032 1033 if (ext & 0x2) { 1034 int64_t arg1 = arg1Bits | 1035 (0 - (arg1Bits & (1 << (srcBits - 1)))); 1036 int64_t arg2 = arg2Bits | 1037 (0 - (arg2Bits & (1 << (srcBits - 1)))); 1038 resBits = (uint64_t)(arg1 * arg2); 1039 } else { 1040 resBits = arg1Bits * arg2Bits; 1041 } 1042 1043 if (ext & 0x4) 1044 resBits += (1 << (destBits - 1)); 1045 1046 if (ext & 0x8) 1047 resBits >>= destBits; 1048 1049 int destHiIndex = (i + 1) * destBits - 1; 1050 int destLoIndex = (i + 0) * destBits; 1051 result = insertBits(result, destHiIndex, destLoIndex, resBits); 1052 } 1053 FpDestReg.uqw = result; 1054 ''' 1055 1056 class Mavg(MediaOp): 1057 code = ''' 1058 assert(srcSize == destSize); 1059 int size = srcSize; 1060 int sizeBits = size * 8; 1061 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1062 uint64_t result = FpDestReg.uqw; 1063 1064 for (int i = 0; i < items; i++) { 1065 int hiIndex = (i + 1) * sizeBits - 1; 1066 int loIndex = (i + 0) * sizeBits; 1067 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1068 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1069 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; 1070 1071 result = insertBits(result, hiIndex, loIndex, resBits); 1072 } 1073 FpDestReg.uqw = result; 1074 ''' 1075 1076 class Msad(MediaOp): 1077 code = ''' 1078 int srcBits = srcSize * 8; 1079 int items = sizeof(FloatRegBits) / srcSize; 1080 1081 uint64_t sum = 0; 1082 for (int i = 0; i < items; i++) { 1083 int hiIndex = (i + 1) * srcBits - 1; 1084 int loIndex = (i + 0) * srcBits; 1085 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1086 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1087 int64_t resBits = arg1Bits - arg2Bits; 1088 if (resBits < 0) 1089 resBits = -resBits; 1090 sum += resBits; 1091 } 1092 FpDestReg.uqw = sum & mask(destSize * 8); 1093 ''' 1094 1095 class Msrl(MediaOp): 1096 code = ''' 1097 1098 assert(srcSize == destSize); 1099 int size = srcSize; 1100 int sizeBits = size * 8; 1101 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1102 uint64_t shiftAmt = op2.uqw; 1103 uint64_t result = FpDestReg.uqw; 1104 1105 for (int i = 0; i < items; i++) { 1106 int hiIndex = (i + 1) * sizeBits - 1; 1107 int loIndex = (i + 0) * sizeBits; 1108 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1109 uint64_t resBits; 1110 if (shiftAmt >= sizeBits) { 1111 resBits = 0; 1112 } else { 1113 resBits = (arg1Bits >> shiftAmt) & 1114 mask(sizeBits - shiftAmt); 1115 } 1116 1117 result = insertBits(result, hiIndex, loIndex, resBits); 1118 } 1119 FpDestReg.uqw = result; 1120 ''' 1121 1122 class Msra(MediaOp): 1123 code = ''' 1124 1125 assert(srcSize == destSize); 1126 int size = srcSize; 1127 int sizeBits = size * 8; 1128 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1129 uint64_t shiftAmt = op2.uqw; 1130 uint64_t result = FpDestReg.uqw; 1131 1132 for (int i = 0; i < items; i++) { 1133 int hiIndex = (i + 1) * sizeBits - 1; 1134 int loIndex = (i + 0) * sizeBits; 1135 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1136 uint64_t resBits; 1137 if (shiftAmt >= sizeBits) { 1138 if (bits(arg1Bits, sizeBits - 1)) 1139 resBits = mask(sizeBits); 1140 else 1141 resBits = 0; 1142 } else { 1143 resBits = (arg1Bits >> shiftAmt); 1144 resBits = resBits | 1145 (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt)))); 1146 } 1147 1148 result = insertBits(result, hiIndex, loIndex, resBits); 1149 } 1150 FpDestReg.uqw = result; 1151 ''' 1152 1153 class Msll(MediaOp): 1154 code = ''' 1155 1156 assert(srcSize == destSize); 1157 int size = srcSize; 1158 int sizeBits = size * 8; 1159 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1160 uint64_t shiftAmt = op2.uqw; 1161 uint64_t result = FpDestReg.uqw; 1162 1163 for (int i = 0; i < items; i++) { 1164 int hiIndex = (i + 1) * sizeBits - 1; 1165 int loIndex = (i + 0) * sizeBits; 1166 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1167 uint64_t resBits; 1168 if (shiftAmt >= sizeBits) { 1169 resBits = 0; 1170 } else { 1171 resBits = (arg1Bits << shiftAmt); 1172 } 1173 1174 result = insertBits(result, hiIndex, loIndex, resBits); 1175 } 1176 FpDestReg.uqw = result; 1177 ''' 1178 1179 class Cvtf2i(MediaOp): 1180 def __init__(self, dest, src, \ 1181 size = None, destSize = None, srcSize = None, ext = None): 1182 super(Cvtf2i, self).__init__(dest, src,\ 1183 "InstRegIndex(0)", size, destSize, srcSize, ext) 1184 code = ''' 1185 union floatInt 1186 { 1187 float f; 1188 uint32_t i; 1189 }; 1190 union doubleInt 1191 { 1192 double d; 1193 uint64_t i; 1194 }; 1195 1196 assert(destSize == 4 || destSize == 8); 1197 assert(srcSize == 4 || srcSize == 8); 1198 int srcSizeBits = srcSize * 8; 1199 int destSizeBits = destSize * 8; 1200 int items; 1201 int srcStart = 0; 1202 int destStart = 0; 1203 if (srcSize == 2 * destSize) { 1204 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; 1205 if (ext & 0x2) 1206 destStart = destSizeBits * items; 1207 } else if (destSize == 2 * srcSize) { 1208 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1209 if (ext & 0x2) 1210 srcStart = srcSizeBits * items; 1211 } else { 1212 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1213 } 1214 uint64_t result = FpDestReg.uqw; 1215 1216 for (int i = 0; i < items; i++) { 1217 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1218 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1219 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1220 double arg; 1221 1222 if (srcSize == 4) { 1223 floatInt fi; 1224 fi.i = argBits; 1225 arg = fi.f; 1226 } else { 1227 doubleInt di; 1228 di.i = argBits; 1229 arg = di.d; 1230 } 1231 1232 if (ext & 0x4) { 1233 if (arg >= 0) 1234 arg += 0.5; 1235 else 1236 arg -= 0.5; 1237 } 1238 1239 if (destSize == 4) { 1240 argBits = (uint32_t)(float)arg; 1241 } else { 1242 argBits = (uint64_t)arg; 1243 } 1244 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1245 int destLoIndex = destStart + (i + 0) * destSizeBits; 1246 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1247 } 1248 FpDestReg.uqw = result; 1249 ''' 1250 1251 class Cvti2f(MediaOp): 1252 def __init__(self, dest, src, \ 1253 size = None, destSize = None, srcSize = None, ext = None): 1254 super(Cvti2f, self).__init__(dest, src,\ 1255 "InstRegIndex(0)", size, destSize, srcSize, ext) 1256 code = ''' 1257 union floatInt 1258 { 1259 float f; 1260 uint32_t i; 1261 }; 1262 union doubleInt 1263 { 1264 double d; 1265 uint64_t i; 1266 }; 1267 1268 assert(destSize == 4 || destSize == 8); 1269 assert(srcSize == 4 || srcSize == 8); 1270 int srcSizeBits = srcSize * 8; 1271 int destSizeBits = destSize * 8; 1272 int items; 1273 int srcStart = 0; 1274 int destStart = 0; 1275 if (srcSize == 2 * destSize) { 1276 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; 1277 if (ext & 0x2) 1278 destStart = destSizeBits * items; 1279 } else if (destSize == 2 * srcSize) { 1280 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1281 if (ext & 0x2) 1282 srcStart = srcSizeBits * items; 1283 } else { 1284 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1285 } 1286 uint64_t result = FpDestReg.uqw; 1287 1288 for (int i = 0; i < items; i++) { 1289 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1290 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1291 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1292 int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex))); 1293 double arg = sArg; 1294 1295 if (destSize == 4) { 1296 floatInt fi; 1297 fi.f = arg; 1298 argBits = fi.i; 1299 } else { 1300 doubleInt di; 1301 di.d = arg; 1302 argBits = di.i; 1303 } 1304 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1305 int destLoIndex = destStart + (i + 0) * destSizeBits; 1306 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1307 } 1308 FpDestReg.uqw = result; 1309 ''' 1310 1311 class Cvtf2f(MediaOp): 1312 def __init__(self, dest, src, \ 1313 size = None, destSize = None, srcSize = None, ext = None): 1314 super(Cvtf2f, self).__init__(dest, src,\ 1315 "InstRegIndex(0)", size, destSize, srcSize, ext) 1316 code = ''' 1317 union floatInt 1318 { 1319 float f; 1320 uint32_t i; 1321 }; 1322 union doubleInt 1323 { 1324 double d; 1325 uint64_t i; 1326 }; 1327 1328 assert(destSize == 4 || destSize == 8); 1329 assert(srcSize == 4 || srcSize == 8); 1330 int srcSizeBits = srcSize * 8; 1331 int destSizeBits = destSize * 8; 1332 int items; 1333 int srcStart = 0; 1334 int destStart = 0; 1335 if (srcSize == 2 * destSize) { 1336 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; 1337 if (ext & 0x2) 1338 destStart = destSizeBits * items; 1339 } else if (destSize == 2 * srcSize) { 1340 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1341 if (ext & 0x2) 1342 srcStart = srcSizeBits * items; 1343 } else { 1344 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1345 } 1346 uint64_t result = FpDestReg.uqw; 1347 1348 for (int i = 0; i < items; i++) { 1349 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1350 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1351 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1352 double arg; 1353 1354 if (srcSize == 4) { 1355 floatInt fi; 1356 fi.i = argBits; 1357 arg = fi.f; 1358 } else { 1359 doubleInt di; 1360 di.i = argBits; 1361 arg = di.d; 1362 } 1363 if (destSize == 4) { 1364 floatInt fi; 1365 fi.f = arg; 1366 argBits = fi.i; 1367 } else { 1368 doubleInt di; 1369 di.d = arg; 1370 argBits = di.i; 1371 } 1372 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1373 int destLoIndex = destStart + (i + 0) * destSizeBits; 1374 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1375 } 1376 FpDestReg.uqw = result; 1377 ''' 1378 1379 class Mcmpi2r(MediaOp): 1380 code = ''' 1381 union floatInt 1382 { 1383 float f; 1384 uint32_t i; 1385 }; 1386 union doubleInt 1387 { 1388 double d; 1389 uint64_t i; 1390 }; 1391 1392 assert(srcSize == destSize); 1393 int size = srcSize; 1394 int sizeBits = size * 8; 1395 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1396 uint64_t result = FpDestReg.uqw; 1397 1398 for (int i = 0; i < items; i++) { 1399 int hiIndex = (i + 1) * sizeBits - 1; 1400 int loIndex = (i + 0) * sizeBits; 1401 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1402 int64_t arg1 = arg1Bits | 1403 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 1404 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1405 int64_t arg2 = arg2Bits | 1406 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 1407 1408 uint64_t resBits = 0; 1409 if (((ext & 0x2) == 0 && arg1 == arg2) || 1410 ((ext & 0x2) == 0x2 && arg1 > arg2)) 1411 resBits = mask(sizeBits); 1412 1413 result = insertBits(result, hiIndex, loIndex, resBits); 1414 } 1415 FpDestReg.uqw = result; 1416 ''' 1417 1418 class Mcmpf2r(MediaOp): 1419 code = ''' 1420 union floatInt 1421 { 1422 float f; 1423 uint32_t i; 1424 }; 1425 union doubleInt 1426 { 1427 double d; 1428 uint64_t i; 1429 }; 1430 1431 assert(srcSize == destSize); 1432 int size = srcSize; 1433 int sizeBits = size * 8; 1434 int items = (ext & 0x8) ? 1: (sizeof(FloatRegBits) / size); 1435 uint64_t result = FpDestReg.uqw; 1436 1437 for (int i = 0; i < items; i++) { 1438 int hiIndex = (i + 1) * sizeBits - 1; 1439 int loIndex = (i + 0) * sizeBits; 1440 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1441 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1442 double arg1, arg2; 1443 1444 if (size == 4) { 1445 floatInt fi; 1446 fi.i = arg1Bits; 1447 arg1 = fi.f; 1448 fi.i = arg2Bits; 1449 arg2 = fi.f; 1450 } else { 1451 doubleInt di; 1452 di.i = arg1Bits; 1453 arg1 = di.d; 1454 di.i = arg2Bits; 1455 arg2 = di.d; 1456 } 1457 1458 uint64_t resBits = 0; 1459 bool nanop = isnan(arg1) || isnan(arg2); 1460 switch (ext & mask(3)) { 1461 case 0: 1462 if (arg1 == arg2 && !nanop) 1463 resBits = mask(sizeBits); 1464 break; 1465 case 1: 1466 if (arg1 < arg2 && !nanop) 1467 resBits = mask(sizeBits); 1468 break; 1469 case 2: 1470 if (arg1 <= arg2 && !nanop) 1471 resBits = mask(sizeBits); 1472 break; 1473 case 3: 1474 if (nanop) 1475 resBits = mask(sizeBits); 1476 break; 1477 case 4: 1478 if (arg1 != arg2 || nanop) 1479 resBits = mask(sizeBits); 1480 break; 1481 case 5: 1482 if (!(arg1 < arg2) || nanop) 1483 resBits = mask(sizeBits); 1484 break; 1485 case 6: 1486 if (!(arg1 <= arg2) || nanop) 1487 resBits = mask(sizeBits); 1488 break; 1489 case 7: 1490 if (!nanop) 1491 resBits = mask(sizeBits); 1492 break; 1493 }; 1494 1495 result = insertBits(result, hiIndex, loIndex, resBits); 1496 } 1497 FpDestReg.uqw = result; 1498 ''' 1499 1500 class Mcmpf2rf(MediaOp): 1501 def __init__(self, src1, src2,\ 1502 size = None, destSize = None, srcSize = None, ext = None): 1503 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\ 1504 src2, size, destSize, srcSize, ext) 1505 code = ''' 1506 union floatInt 1507 { 1508 float f; 1509 uint32_t i; 1510 }; 1511 union doubleInt 1512 { 1513 double d; 1514 uint64_t i; 1515 }; 1516 1517 assert(srcSize == destSize); 1518 assert(srcSize == 4 || srcSize == 8); 1519 int size = srcSize; 1520 int sizeBits = size * 8; 1521 1522 double arg1, arg2; 1523 uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0); 1524 uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0); 1525 if (size == 4) { 1526 floatInt fi; 1527 fi.i = arg1Bits; 1528 arg1 = fi.f; 1529 fi.i = arg2Bits; 1530 arg2 = fi.f; 1531 } else { 1532 doubleInt di; 1533 di.i = arg1Bits; 1534 arg1 = di.d; 1535 di.i = arg2Bits; 1536 arg2 = di.d; 1537 } 1538 1539 // ZF PF CF 1540 // Unordered 1 1 1 1541 // Greater than 0 0 0 1542 // Less than 0 0 1 1543 // Equal 1 0 0 1544 // OF = SF = AF = 0 1545 ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit | 1546 ZFBit | PFBit | CFBit); 1547 if (isnan(arg1) || isnan(arg2)) 1548 ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit); 1549 else if(arg1 < arg2) 1550 ccFlagBits = ccFlagBits | CFBit; 1551 else if(arg1 == arg2) 1552 ccFlagBits = ccFlagBits | ZFBit; 1553 ''' 1554}}; 1555