mediaop.isa revision 6587:1cb6f8b427c0
1/// Copyright (c) 2009 The Regents of The University of Michigan 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer; 8// redistributions in binary form must reproduce the above copyright 9// notice, this list of conditions and the following disclaimer in the 10// documentation and/or other materials provided with the distribution; 11// neither the name of the copyright holders nor the names of its 12// contributors may be used to endorse or promote products derived from 13// this software without specific prior written permission. 14// 15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26// 27// Authors: Gabe Black 28 29def template MediaOpExecute {{ 30 Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, 31 Trace::InstRecord *traceData) const 32 { 33 Fault fault = NoFault; 34 35 %(op_decl)s; 36 %(op_rd)s; 37 38 %(code)s; 39 40 //Write the resulting state to the execution context 41 if(fault == NoFault) 42 { 43 %(op_wb)s; 44 } 45 return fault; 46 } 47}}; 48 49def template MediaOpRegDeclare {{ 50 class %(class_name)s : public %(base_class)s 51 { 52 protected: 53 void buildMe(); 54 55 public: 56 %(class_name)s(ExtMachInst _machInst, 57 const char * instMnem, 58 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 59 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 60 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 61 62 %(class_name)s(ExtMachInst _machInst, 63 const char * instMnem, 64 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 65 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 66 67 %(BasicExecDeclare)s 68 }; 69}}; 70 71def template MediaOpImmDeclare {{ 72 73 class %(class_name)s : public %(base_class)s 74 { 75 protected: 76 void buildMe(); 77 78 public: 79 %(class_name)s(ExtMachInst _machInst, 80 const char * instMnem, 81 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 82 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 83 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 84 85 %(class_name)s(ExtMachInst _machInst, 86 const char * instMnem, 87 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 88 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 89 90 %(BasicExecDeclare)s 91 }; 92}}; 93 94def template MediaOpRegConstructor {{ 95 96 inline void %(class_name)s::buildMe() 97 { 98 %(constructor)s; 99 } 100 101 inline %(class_name)s::%(class_name)s( 102 ExtMachInst machInst, const char * instMnem, 103 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 104 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 105 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 106 false, false, false, false, 107 _src1, _src2, _dest, _srcSize, _destSize, _ext, 108 %(op_class)s) 109 { 110 buildMe(); 111 } 112 113 inline %(class_name)s::%(class_name)s( 114 ExtMachInst machInst, const char * instMnem, 115 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 116 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 117 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 118 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 119 isMicro, isDelayed, isFirst, isLast, 120 _src1, _src2, _dest, _srcSize, _destSize, _ext, 121 %(op_class)s) 122 { 123 buildMe(); 124 } 125}}; 126 127def template MediaOpImmConstructor {{ 128 129 inline void %(class_name)s::buildMe() 130 { 131 %(constructor)s; 132 } 133 134 inline %(class_name)s::%(class_name)s( 135 ExtMachInst machInst, const char * instMnem, 136 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 137 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 138 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 139 false, false, false, false, 140 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 141 %(op_class)s) 142 { 143 buildMe(); 144 } 145 146 inline %(class_name)s::%(class_name)s( 147 ExtMachInst machInst, const char * instMnem, 148 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 149 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 150 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 151 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 152 isMicro, isDelayed, isFirst, isLast, 153 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 154 %(op_class)s) 155 { 156 buildMe(); 157 } 158}}; 159 160let {{ 161 # Make these empty strings so that concatenating onto 162 # them will always work. 163 header_output = "" 164 decoder_output = "" 165 exec_output = "" 166 167 immTemplates = ( 168 MediaOpImmDeclare, 169 MediaOpImmConstructor, 170 MediaOpExecute) 171 172 regTemplates = ( 173 MediaOpRegDeclare, 174 MediaOpRegConstructor, 175 MediaOpExecute) 176 177 class MediaOpMeta(type): 178 def buildCppClasses(self, name, Name, suffix, code): 179 180 # Globals to stick the output in 181 global header_output 182 global decoder_output 183 global exec_output 184 185 # If op2 is used anywhere, make register and immediate versions 186 # of this code. 187 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?") 188 match = matcher.search(code) 189 if match: 190 typeQual = "" 191 if match.group("typeQual"): 192 typeQual = match.group("typeQual") 193 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual) 194 self.buildCppClasses(name, Name, suffix, 195 matcher.sub(src2_name, code)) 196 self.buildCppClasses(name + "i", Name, suffix + "Imm", 197 matcher.sub("imm8", code)) 198 return 199 200 base = "X86ISA::MediaOp" 201 202 # If imm8 shows up in the code, use the immediate templates, if 203 # not, hopefully the register ones will be correct. 204 matcher = re.compile("(?<!\w)imm8(?!\w)") 205 if matcher.search(code): 206 base += "Imm" 207 templates = immTemplates 208 else: 209 base += "Reg" 210 templates = regTemplates 211 212 # Get everything ready for the substitution 213 iop = InstObjParams(name, Name + suffix, base, {"code" : code}) 214 215 # Generate the actual code (finally!) 216 header_output += templates[0].subst(iop) 217 decoder_output += templates[1].subst(iop) 218 exec_output += templates[2].subst(iop) 219 220 221 def __new__(mcls, Name, bases, dict): 222 abstract = False 223 name = Name.lower() 224 if "abstract" in dict: 225 abstract = dict['abstract'] 226 del dict['abstract'] 227 228 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict) 229 if not abstract: 230 cls.className = Name 231 cls.base_mnemonic = name 232 code = cls.code 233 234 # Set up the C++ classes 235 mcls.buildCppClasses(cls, name, Name, "", code) 236 237 # Hook into the microassembler dict 238 global microopClasses 239 microopClasses[name] = cls 240 241 # If op2 is used anywhere, make register and immediate versions 242 # of this code. 243 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?") 244 if matcher.search(code): 245 microopClasses[name + 'i'] = cls 246 return cls 247 248 249 class MediaOp(X86Microop): 250 __metaclass__ = MediaOpMeta 251 # This class itself doesn't act as a microop 252 abstract = True 253 254 def __init__(self, dest, src1, op2, 255 size = None, destSize = None, srcSize = None, ext = None): 256 self.dest = dest 257 self.src1 = src1 258 self.op2 = op2 259 if size is not None: 260 self.srcSize = size 261 self.destSize = size 262 if srcSize is not None: 263 self.srcSize = srcSize 264 if destSize is not None: 265 self.destSize = destSize 266 if self.srcSize is None: 267 raise Exception, "Source size not set." 268 if self.destSize is None: 269 raise Exception, "Dest size not set." 270 if ext is None: 271 self.ext = 0 272 else: 273 self.ext = ext 274 275 def getAllocator(self, *microFlags): 276 className = self.className 277 if self.mnemonic == self.base_mnemonic + 'i': 278 className += "Imm" 279 allocator = '''new %(class_name)s(machInst, macrocodeBlock 280 %(flags)s, %(src1)s, %(op2)s, %(dest)s, 281 %(srcSize)s, %(destSize)s, %(ext)s)''' % { 282 "class_name" : className, 283 "flags" : self.microFlagsText(microFlags), 284 "src1" : self.src1, "op2" : self.op2, 285 "dest" : self.dest, 286 "srcSize" : self.srcSize, 287 "destSize" : self.destSize, 288 "ext" : self.ext} 289 return allocator 290 291 class Mov2int(MediaOp): 292 def __init__(self, dest, src, \ 293 size = None, destSize = None, srcSize = None, ext = None): 294 super(Mov2int, self).__init__(dest, src,\ 295 "InstRegIndex(0)", size, destSize, srcSize, ext) 296 code = ''' 297 uint64_t fpSrcReg1 = bits(FpSrcReg1.uqw, srcSize * 8 - 1, 0); 298 DestReg = merge(DestReg, fpSrcReg1, destSize); 299 ''' 300 301 class Mov2fp(MediaOp): 302 def __init__(self, dest, src, \ 303 size = None, destSize = None, srcSize = None, ext = None): 304 super(Mov2fp, self).__init__(dest, src,\ 305 "InstRegIndex(0)", size, destSize, srcSize, ext) 306 code = ''' 307 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); 308 FpDestReg.uqw = 309 insertBits(FpDestReg.uqw, destSize * 8 - 1, 0, srcReg1); 310 ''' 311 312 class Unpack(MediaOp): 313 code = ''' 314 assert(srcSize == destSize); 315 int size = destSize; 316 int items = (sizeof(FloatRegBits) / size) / 2; 317 int offset = ext ? items : 0; 318 uint64_t result = 0; 319 for (int i = 0; i < items; i++) { 320 uint64_t pickedLow = 321 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1, 322 (i + offset) * 8 * size); 323 result = insertBits(result, 324 (2 * i + 1) * 8 * size - 1, 325 (2 * i + 0) * 8 * size, 326 pickedLow); 327 uint64_t pickedHigh = 328 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1, 329 (i + offset) * 8 * size); 330 result = insertBits(result, 331 (2 * i + 2) * 8 * size - 1, 332 (2 * i + 1) * 8 * size, 333 pickedHigh); 334 } 335 FpDestReg.uqw = result; 336 ''' 337 338 class Pack(MediaOp): 339 code = ''' 340 assert(srcSize == destSize * 2); 341 int items = (sizeof(FloatRegBits) / destSize); 342 int destBits = destSize * 8; 343 int srcBits = srcSize * 8; 344 uint64_t result = 0; 345 int i; 346 for (i = 0; i < items / 2; i++) { 347 uint64_t picked = 348 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1, 349 (i + 0) * srcBits); 350 unsigned signBit = bits(picked, srcBits - 1); 351 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 352 353 // Handle saturation. 354 if (signBit) { 355 if (overflow != mask(destBits - srcBits + 1)) { 356 if (ext & 0x1) 357 picked = (1 << (destBits - 1)); 358 else 359 picked = 0; 360 } 361 } else { 362 if (overflow != 0) { 363 if (ext & 0x1) 364 picked = mask(destBits - 1); 365 else 366 picked = mask(destBits); 367 } 368 } 369 result = insertBits(result, 370 (i + 1) * destBits - 1, 371 (i + 0) * destBits, 372 picked); 373 } 374 for (;i < items; i++) { 375 uint64_t picked = 376 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1, 377 (i - items + 0) * srcBits); 378 unsigned signBit = bits(picked, srcBits - 1); 379 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 380 381 // Handle saturation. 382 if (signBit) { 383 if (overflow != mask(destBits - srcBits + 1)) { 384 if (ext & 0x1) 385 picked = (1 << (destBits - 1)); 386 else 387 picked = 0; 388 } 389 } else { 390 if (overflow != 0) { 391 if (ext & 0x1) 392 picked = mask(destBits - 1); 393 else 394 picked = mask(destBits); 395 } 396 } 397 result = insertBits(result, 398 (i + 1) * destBits - 1, 399 (i + 0) * destBits, 400 picked); 401 } 402 FpDestReg.uqw = result; 403 ''' 404 405 class Mxor(MediaOp): 406 def __init__(self, dest, src1, src2): 407 super(Mxor, self).__init__(dest, src1, src2, 1) 408 code = ''' 409 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw; 410 ''' 411 412 class Mor(MediaOp): 413 def __init__(self, dest, src1, src2): 414 super(Mor, self).__init__(dest, src1, src2, 1) 415 code = ''' 416 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw; 417 ''' 418 419 class Mand(MediaOp): 420 def __init__(self, dest, src1, src2): 421 super(Mand, self).__init__(dest, src1, src2, 1) 422 code = ''' 423 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw; 424 ''' 425 426 class Mandn(MediaOp): 427 def __init__(self, dest, src1, src2): 428 super(Mandn, self).__init__(dest, src1, src2, 1) 429 code = ''' 430 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw; 431 ''' 432 433 class Mminf(MediaOp): 434 code = ''' 435 union floatInt 436 { 437 float f; 438 uint32_t i; 439 }; 440 union doubleInt 441 { 442 double d; 443 uint64_t i; 444 }; 445 446 assert(srcSize == destSize); 447 int size = srcSize; 448 int sizeBits = size * 8; 449 assert(srcSize == 4 || srcSize == 8); 450 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 451 uint64_t result = FpDestReg.uqw; 452 453 for (int i = 0; i < items; i++) { 454 double arg1, arg2; 455 int hiIndex = (i + 1) * sizeBits - 1; 456 int loIndex = (i + 0) * sizeBits; 457 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 458 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 459 460 if (size == 4) { 461 floatInt fi; 462 fi.i = arg1Bits; 463 arg1 = fi.f; 464 fi.i = arg2Bits; 465 arg2 = fi.f; 466 } else { 467 doubleInt di; 468 di.i = arg1Bits; 469 arg1 = di.d; 470 di.i = arg2Bits; 471 arg2 = di.d; 472 } 473 474 if (arg1 < arg2) { 475 result = insertBits(result, hiIndex, loIndex, arg1Bits); 476 } else { 477 result = insertBits(result, hiIndex, loIndex, arg2Bits); 478 } 479 } 480 FpDestReg.uqw = result; 481 ''' 482 483 class Mmaxf(MediaOp): 484 code = ''' 485 union floatInt 486 { 487 float f; 488 uint32_t i; 489 }; 490 union doubleInt 491 { 492 double d; 493 uint64_t i; 494 }; 495 496 assert(srcSize == destSize); 497 int size = srcSize; 498 int sizeBits = size * 8; 499 assert(srcSize == 4 || srcSize == 8); 500 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 501 uint64_t result = FpDestReg.uqw; 502 503 for (int i = 0; i < items; i++) { 504 double arg1, arg2; 505 int hiIndex = (i + 1) * sizeBits - 1; 506 int loIndex = (i + 0) * sizeBits; 507 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 508 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 509 510 if (size == 4) { 511 floatInt fi; 512 fi.i = arg1Bits; 513 arg1 = fi.f; 514 fi.i = arg2Bits; 515 arg2 = fi.f; 516 } else { 517 doubleInt di; 518 di.i = arg1Bits; 519 arg1 = di.d; 520 di.i = arg2Bits; 521 arg2 = di.d; 522 } 523 524 if (arg1 > arg2) { 525 result = insertBits(result, hiIndex, loIndex, arg1Bits); 526 } else { 527 result = insertBits(result, hiIndex, loIndex, arg2Bits); 528 } 529 } 530 FpDestReg.uqw = result; 531 ''' 532 533 class Mmini(MediaOp): 534 code = ''' 535 536 assert(srcSize == destSize); 537 int size = srcSize; 538 int sizeBits = size * 8; 539 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 540 uint64_t result = FpDestReg.uqw; 541 542 for (int i = 0; i < items; i++) { 543 int hiIndex = (i + 1) * sizeBits - 1; 544 int loIndex = (i + 0) * sizeBits; 545 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 546 int64_t arg1 = arg1Bits | 547 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 548 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 549 int64_t arg2 = arg2Bits | 550 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 551 uint64_t resBits; 552 553 if (ext & 0x2) { 554 if (arg1 < arg2) { 555 resBits = arg1Bits; 556 } else { 557 resBits = arg2Bits; 558 } 559 } else { 560 if (arg1Bits < arg2Bits) { 561 resBits = arg1Bits; 562 } else { 563 resBits = arg2Bits; 564 } 565 } 566 result = insertBits(result, hiIndex, loIndex, resBits); 567 } 568 FpDestReg.uqw = result; 569 ''' 570 571 class Mmaxi(MediaOp): 572 code = ''' 573 574 assert(srcSize == destSize); 575 int size = srcSize; 576 int sizeBits = size * 8; 577 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 578 uint64_t result = FpDestReg.uqw; 579 580 for (int i = 0; i < items; i++) { 581 int hiIndex = (i + 1) * sizeBits - 1; 582 int loIndex = (i + 0) * sizeBits; 583 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 584 int64_t arg1 = arg1Bits | 585 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 586 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 587 int64_t arg2 = arg2Bits | 588 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 589 uint64_t resBits; 590 591 if (ext & 0x2) { 592 if (arg1 > arg2) { 593 resBits = arg1Bits; 594 } else { 595 resBits = arg2Bits; 596 } 597 } else { 598 if (arg1Bits > arg2Bits) { 599 resBits = arg1Bits; 600 } else { 601 resBits = arg2Bits; 602 } 603 } 604 result = insertBits(result, hiIndex, loIndex, resBits); 605 } 606 FpDestReg.uqw = result; 607 ''' 608 609 class Msqrt(MediaOp): 610 def __init__(self, dest, src, \ 611 size = None, destSize = None, srcSize = None, ext = None): 612 super(Msqrt, self).__init__(dest, src,\ 613 "InstRegIndex(0)", size, destSize, srcSize, ext) 614 code = ''' 615 union floatInt 616 { 617 float f; 618 uint32_t i; 619 }; 620 union doubleInt 621 { 622 double d; 623 uint64_t i; 624 }; 625 626 assert(srcSize == destSize); 627 int size = srcSize; 628 int sizeBits = size * 8; 629 assert(srcSize == 4 || srcSize == 8); 630 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 631 uint64_t result = FpDestReg.uqw; 632 633 for (int i = 0; i < items; i++) { 634 int hiIndex = (i + 1) * sizeBits - 1; 635 int loIndex = (i + 0) * sizeBits; 636 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 637 638 if (size == 4) { 639 floatInt fi; 640 fi.i = argBits; 641 fi.f = sqrt(fi.f); 642 argBits = fi.i; 643 } else { 644 doubleInt di; 645 di.i = argBits; 646 di.d = sqrt(di.d); 647 argBits = di.i; 648 } 649 result = insertBits(result, hiIndex, loIndex, argBits); 650 } 651 FpDestReg.uqw = result; 652 ''' 653 654 class Maddf(MediaOp): 655 code = ''' 656 union floatInt 657 { 658 float f; 659 uint32_t i; 660 }; 661 union doubleInt 662 { 663 double d; 664 uint64_t i; 665 }; 666 667 assert(srcSize == destSize); 668 int size = srcSize; 669 int sizeBits = size * 8; 670 assert(srcSize == 4 || srcSize == 8); 671 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 672 uint64_t result = FpDestReg.uqw; 673 674 for (int i = 0; i < items; i++) { 675 int hiIndex = (i + 1) * sizeBits - 1; 676 int loIndex = (i + 0) * sizeBits; 677 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 678 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 679 uint64_t resBits; 680 681 if (size == 4) { 682 floatInt arg1, arg2, res; 683 arg1.i = arg1Bits; 684 arg2.i = arg2Bits; 685 res.f = arg1.f + arg2.f; 686 resBits = res.i; 687 } else { 688 doubleInt arg1, arg2, res; 689 arg1.i = arg1Bits; 690 arg2.i = arg2Bits; 691 res.d = arg1.d + arg2.d; 692 resBits = res.i; 693 } 694 695 result = insertBits(result, hiIndex, loIndex, resBits); 696 } 697 FpDestReg.uqw = result; 698 ''' 699 700 class Msubf(MediaOp): 701 code = ''' 702 union floatInt 703 { 704 float f; 705 uint32_t i; 706 }; 707 union doubleInt 708 { 709 double d; 710 uint64_t i; 711 }; 712 713 assert(srcSize == destSize); 714 int size = srcSize; 715 int sizeBits = size * 8; 716 assert(srcSize == 4 || srcSize == 8); 717 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 718 uint64_t result = FpDestReg.uqw; 719 720 for (int i = 0; i < items; i++) { 721 int hiIndex = (i + 1) * sizeBits - 1; 722 int loIndex = (i + 0) * sizeBits; 723 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 724 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 725 uint64_t resBits; 726 727 if (size == 4) { 728 floatInt arg1, arg2, res; 729 arg1.i = arg1Bits; 730 arg2.i = arg2Bits; 731 res.f = arg1.f - arg2.f; 732 resBits = res.i; 733 } else { 734 doubleInt arg1, arg2, res; 735 arg1.i = arg1Bits; 736 arg2.i = arg2Bits; 737 res.d = arg1.d - arg2.d; 738 resBits = res.i; 739 } 740 741 result = insertBits(result, hiIndex, loIndex, resBits); 742 } 743 FpDestReg.uqw = result; 744 ''' 745 746 class Mmulf(MediaOp): 747 code = ''' 748 union floatInt 749 { 750 float f; 751 uint32_t i; 752 }; 753 union doubleInt 754 { 755 double d; 756 uint64_t i; 757 }; 758 759 assert(srcSize == destSize); 760 int size = srcSize; 761 int sizeBits = size * 8; 762 assert(srcSize == 4 || srcSize == 8); 763 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 764 uint64_t result = FpDestReg.uqw; 765 766 for (int i = 0; i < items; i++) { 767 int hiIndex = (i + 1) * sizeBits - 1; 768 int loIndex = (i + 0) * sizeBits; 769 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 770 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 771 uint64_t resBits; 772 773 if (size == 4) { 774 floatInt arg1, arg2, res; 775 arg1.i = arg1Bits; 776 arg2.i = arg2Bits; 777 res.f = arg1.f * arg2.f; 778 resBits = res.i; 779 } else { 780 doubleInt arg1, arg2, res; 781 arg1.i = arg1Bits; 782 arg2.i = arg2Bits; 783 res.d = arg1.d * arg2.d; 784 resBits = res.i; 785 } 786 787 result = insertBits(result, hiIndex, loIndex, resBits); 788 } 789 FpDestReg.uqw = result; 790 ''' 791 792 class Mdivf(MediaOp): 793 code = ''' 794 union floatInt 795 { 796 float f; 797 uint32_t i; 798 }; 799 union doubleInt 800 { 801 double d; 802 uint64_t i; 803 }; 804 805 assert(srcSize == destSize); 806 int size = srcSize; 807 int sizeBits = size * 8; 808 assert(srcSize == 4 || srcSize == 8); 809 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 810 uint64_t result = FpDestReg.uqw; 811 812 for (int i = 0; i < items; i++) { 813 int hiIndex = (i + 1) * sizeBits - 1; 814 int loIndex = (i + 0) * sizeBits; 815 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 816 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 817 uint64_t resBits; 818 819 if (size == 4) { 820 floatInt arg1, arg2, res; 821 arg1.i = arg1Bits; 822 arg2.i = arg2Bits; 823 res.f = arg1.f / arg2.f; 824 resBits = res.i; 825 } else { 826 doubleInt arg1, arg2, res; 827 arg1.i = arg1Bits; 828 arg2.i = arg2Bits; 829 res.d = arg1.d / arg2.d; 830 resBits = res.i; 831 } 832 833 result = insertBits(result, hiIndex, loIndex, resBits); 834 } 835 FpDestReg.uqw = result; 836 ''' 837 838 class Maddi(MediaOp): 839 code = ''' 840 assert(srcSize == destSize); 841 int size = srcSize; 842 int sizeBits = size * 8; 843 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 844 uint64_t result = FpDestReg.uqw; 845 846 for (int i = 0; i < items; i++) { 847 int hiIndex = (i + 1) * sizeBits - 1; 848 int loIndex = (i + 0) * sizeBits; 849 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 850 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 851 uint64_t resBits = arg1Bits + arg2Bits; 852 853 if (ext & 0x2) { 854 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) 855 resBits = mask(sizeBits); 856 } else if (ext & 0x4) { 857 int arg1Sign = bits(arg1Bits, sizeBits - 1); 858 int arg2Sign = bits(arg2Bits, sizeBits - 1); 859 int resSign = bits(resBits, sizeBits - 1); 860 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 861 if (resSign == 0) 862 resBits = (1 << (sizeBits - 1)); 863 else 864 resBits = mask(sizeBits - 1); 865 } 866 } 867 868 result = insertBits(result, hiIndex, loIndex, resBits); 869 } 870 FpDestReg.uqw = result; 871 ''' 872 873 class Msubi(MediaOp): 874 code = ''' 875 assert(srcSize == destSize); 876 int size = srcSize; 877 int sizeBits = size * 8; 878 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 879 uint64_t result = FpDestReg.uqw; 880 881 for (int i = 0; i < items; i++) { 882 int hiIndex = (i + 1) * sizeBits - 1; 883 int loIndex = (i + 0) * sizeBits; 884 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 885 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 886 uint64_t resBits = arg1Bits - arg2Bits; 887 888 if (ext & 0x2) { 889 if (arg2Bits > arg1Bits) { 890 resBits = 0; 891 } else if (!findCarry(sizeBits, resBits, 892 arg1Bits, ~arg2Bits)) { 893 resBits = mask(sizeBits); 894 } 895 } else if (ext & 0x4) { 896 int arg1Sign = bits(arg1Bits, sizeBits - 1); 897 int arg2Sign = !bits(arg2Bits, sizeBits - 1); 898 int resSign = bits(resBits, sizeBits - 1); 899 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 900 if (resSign == 0) 901 resBits = (1 << (sizeBits - 1)); 902 else 903 resBits = mask(sizeBits - 1); 904 } 905 } 906 907 result = insertBits(result, hiIndex, loIndex, resBits); 908 } 909 FpDestReg.uqw = result; 910 ''' 911 912 class Mmuli(MediaOp): 913 code = ''' 914 int srcBits = srcSize * 8; 915 int destBits = destSize * 8; 916 assert(destBits <= 64); 917 assert(destSize >= srcSize); 918 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize); 919 uint64_t result = FpDestReg.uqw; 920 921 for (int i = 0; i < items; i++) { 922 int offset = 0; 923 if (ext & 16) { 924 if (ext & 32) 925 offset = i * (destBits - srcBits); 926 else 927 offset = i * (destBits - srcBits) + srcBits; 928 } 929 int srcHiIndex = (i + 1) * srcBits - 1 + offset; 930 int srcLoIndex = (i + 0) * srcBits + offset; 931 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 932 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex); 933 uint64_t resBits; 934 935 if (ext & 0x2) { 936 int64_t arg1 = arg1Bits | 937 (0 - (arg1Bits & (1 << (srcBits - 1)))); 938 int64_t arg2 = arg2Bits | 939 (0 - (arg2Bits & (1 << (srcBits - 1)))); 940 resBits = (uint64_t)(arg1 * arg2); 941 } else { 942 resBits = arg1Bits * arg2Bits; 943 } 944 945 if (ext & 0x4) 946 resBits += (1 << (destBits - 1)); 947 948 if (ext & 0x8) 949 resBits >>= destBits; 950 951 int destHiIndex = (i + 1) * destBits - 1; 952 int destLoIndex = (i + 0) * destBits; 953 result = insertBits(result, destHiIndex, destLoIndex, resBits); 954 } 955 FpDestReg.uqw = result; 956 ''' 957 958 class Mavg(MediaOp): 959 code = ''' 960 assert(srcSize == destSize); 961 int size = srcSize; 962 int sizeBits = size * 8; 963 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 964 uint64_t result = FpDestReg.uqw; 965 966 for (int i = 0; i < items; i++) { 967 int hiIndex = (i + 1) * sizeBits - 1; 968 int loIndex = (i + 0) * sizeBits; 969 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 970 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 971 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; 972 973 result = insertBits(result, hiIndex, loIndex, resBits); 974 } 975 FpDestReg.uqw = result; 976 ''' 977 978 class Msad(MediaOp): 979 code = ''' 980 int srcBits = srcSize * 8; 981 int items = sizeof(FloatRegBits) / srcSize; 982 983 uint64_t sum = 0; 984 for (int i = 0; i < items; i++) { 985 int hiIndex = (i + 1) * srcBits - 1; 986 int loIndex = (i + 0) * srcBits; 987 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 988 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 989 int64_t resBits = arg1Bits - arg2Bits; 990 if (resBits < 0) 991 resBits = -resBits; 992 sum += resBits; 993 } 994 FpDestReg.uqw = sum & mask(destSize * 8); 995 ''' 996 997 class Msrl(MediaOp): 998 code = ''' 999 1000 assert(srcSize == destSize); 1001 int size = srcSize; 1002 int sizeBits = size * 8; 1003 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1004 uint64_t shiftAmt = op2.uqw; 1005 uint64_t result = FpDestReg.uqw; 1006 1007 for (int i = 0; i < items; i++) { 1008 int hiIndex = (i + 1) * sizeBits - 1; 1009 int loIndex = (i + 0) * sizeBits; 1010 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1011 uint64_t resBits; 1012 if (shiftAmt >= sizeBits) { 1013 resBits = 0; 1014 } else { 1015 resBits = (arg1Bits >> shiftAmt) & 1016 mask(sizeBits - shiftAmt); 1017 } 1018 1019 result = insertBits(result, hiIndex, loIndex, resBits); 1020 } 1021 FpDestReg.uqw = result; 1022 ''' 1023 1024 class Msra(MediaOp): 1025 code = ''' 1026 1027 assert(srcSize == destSize); 1028 int size = srcSize; 1029 int sizeBits = size * 8; 1030 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1031 uint64_t shiftAmt = op2.uqw; 1032 uint64_t result = FpDestReg.uqw; 1033 1034 for (int i = 0; i < items; i++) { 1035 int hiIndex = (i + 1) * sizeBits - 1; 1036 int loIndex = (i + 0) * sizeBits; 1037 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1038 uint64_t resBits; 1039 if (shiftAmt >= sizeBits) { 1040 if (bits(arg1Bits, sizeBits - 1)) 1041 resBits = mask(sizeBits); 1042 else 1043 resBits = 0; 1044 } else { 1045 resBits = (arg1Bits >> shiftAmt); 1046 resBits = resBits | 1047 (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt)))); 1048 } 1049 1050 result = insertBits(result, hiIndex, loIndex, resBits); 1051 } 1052 FpDestReg.uqw = result; 1053 ''' 1054 1055 class Msll(MediaOp): 1056 code = ''' 1057 1058 assert(srcSize == destSize); 1059 int size = srcSize; 1060 int sizeBits = size * 8; 1061 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1062 uint64_t shiftAmt = op2.uqw; 1063 uint64_t result = FpDestReg.uqw; 1064 1065 for (int i = 0; i < items; i++) { 1066 int hiIndex = (i + 1) * sizeBits - 1; 1067 int loIndex = (i + 0) * sizeBits; 1068 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1069 uint64_t resBits; 1070 if (shiftAmt >= sizeBits) { 1071 resBits = 0; 1072 } else { 1073 resBits = (arg1Bits << shiftAmt); 1074 } 1075 1076 result = insertBits(result, hiIndex, loIndex, resBits); 1077 } 1078 FpDestReg.uqw = result; 1079 ''' 1080 1081 class Cvti2f(MediaOp): 1082 def __init__(self, dest, src, \ 1083 size = None, destSize = None, srcSize = None, ext = None): 1084 super(Cvti2f, self).__init__(dest, src,\ 1085 "InstRegIndex(0)", size, destSize, srcSize, ext) 1086 code = ''' 1087 union floatInt 1088 { 1089 float f; 1090 uint32_t i; 1091 }; 1092 union doubleInt 1093 { 1094 double d; 1095 uint64_t i; 1096 }; 1097 1098 assert(destSize == 4 || destSize == 8); 1099 assert(srcSize == 4 || srcSize == 8); 1100 int srcSizeBits = srcSize * 8; 1101 int destSizeBits = destSize * 8; 1102 int items; 1103 int srcStart = 0; 1104 int destStart = 0; 1105 if (srcSize == 2 * destSize) { 1106 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; 1107 if (ext & 0x2) 1108 destStart = destSizeBits * items; 1109 } else if (destSize == 2 * srcSize) { 1110 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1111 if (ext & 0x2) 1112 srcStart = srcSizeBits * items; 1113 } else { 1114 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1115 } 1116 uint64_t result = FpDestReg.uqw; 1117 1118 for (int i = 0; i < items; i++) { 1119 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1120 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1121 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1122 int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex))); 1123 double arg = sArg; 1124 1125 if (destSize == 4) { 1126 floatInt fi; 1127 fi.f = arg; 1128 argBits = fi.i; 1129 } else { 1130 doubleInt di; 1131 di.d = arg; 1132 argBits = di.i; 1133 } 1134 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1135 int destLoIndex = destStart + (i + 0) * destSizeBits; 1136 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1137 } 1138 FpDestReg.uqw = result; 1139 ''' 1140 1141 class Cvtf2f(MediaOp): 1142 def __init__(self, dest, src, \ 1143 size = None, destSize = None, srcSize = None, ext = None): 1144 super(Cvtf2f, self).__init__(dest, src,\ 1145 "InstRegIndex(0)", size, destSize, srcSize, ext) 1146 code = ''' 1147 union floatInt 1148 { 1149 float f; 1150 uint32_t i; 1151 }; 1152 union doubleInt 1153 { 1154 double d; 1155 uint64_t i; 1156 }; 1157 1158 assert(destSize == 4 || destSize == 8); 1159 assert(srcSize == 4 || srcSize == 8); 1160 int srcSizeBits = srcSize * 8; 1161 int destSizeBits = destSize * 8; 1162 int items; 1163 int srcStart = 0; 1164 int destStart = 0; 1165 if (srcSize == 2 * destSize) { 1166 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; 1167 if (ext & 0x2) 1168 destStart = destSizeBits * items; 1169 } else if (destSize == 2 * srcSize) { 1170 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1171 if (ext & 0x2) 1172 srcStart = srcSizeBits * items; 1173 } else { 1174 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1175 } 1176 uint64_t result = FpDestReg.uqw; 1177 1178 for (int i = 0; i < items; i++) { 1179 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1180 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1181 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1182 double arg; 1183 1184 if (srcSize == 4) { 1185 floatInt fi; 1186 fi.i = argBits; 1187 arg = fi.f; 1188 } else { 1189 doubleInt di; 1190 di.i = argBits; 1191 arg = di.d; 1192 } 1193 if (destSize == 4) { 1194 floatInt fi; 1195 fi.f = arg; 1196 argBits = fi.i; 1197 } else { 1198 doubleInt di; 1199 di.d = arg; 1200 argBits = di.i; 1201 } 1202 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1203 int destLoIndex = destStart + (i + 0) * destSizeBits; 1204 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1205 } 1206 FpDestReg.uqw = result; 1207 ''' 1208 1209 class Mcmpi2r(MediaOp): 1210 code = ''' 1211 union floatInt 1212 { 1213 float f; 1214 uint32_t i; 1215 }; 1216 union doubleInt 1217 { 1218 double d; 1219 uint64_t i; 1220 }; 1221 1222 assert(srcSize == destSize); 1223 int size = srcSize; 1224 int sizeBits = size * 8; 1225 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1226 uint64_t result = FpDestReg.uqw; 1227 1228 for (int i = 0; i < items; i++) { 1229 int hiIndex = (i + 1) * sizeBits - 1; 1230 int loIndex = (i + 0) * sizeBits; 1231 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1232 int64_t arg1 = arg1Bits | 1233 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 1234 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1235 int64_t arg2 = arg2Bits | 1236 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 1237 1238 uint64_t resBits = 0; 1239 if ((ext & 0x2) == 0 && arg1 == arg2 || 1240 (ext & 0x2) == 0x2 && arg1 > arg2) 1241 resBits = mask(sizeBits); 1242 1243 result = insertBits(result, hiIndex, loIndex, resBits); 1244 } 1245 FpDestReg.uqw = result; 1246 ''' 1247}}; 1248