mediaop.isa revision 6589
1/// Copyright (c) 2009 The Regents of The University of Michigan 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer; 8// redistributions in binary form must reproduce the above copyright 9// notice, this list of conditions and the following disclaimer in the 10// documentation and/or other materials provided with the distribution; 11// neither the name of the copyright holders nor the names of its 12// contributors may be used to endorse or promote products derived from 13// this software without specific prior written permission. 14// 15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26// 27// Authors: Gabe Black 28 29def template MediaOpExecute {{ 30 Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, 31 Trace::InstRecord *traceData) const 32 { 33 Fault fault = NoFault; 34 35 %(op_decl)s; 36 %(op_rd)s; 37 38 %(code)s; 39 40 //Write the resulting state to the execution context 41 if(fault == NoFault) 42 { 43 %(op_wb)s; 44 } 45 return fault; 46 } 47}}; 48 49def template MediaOpRegDeclare {{ 50 class %(class_name)s : public %(base_class)s 51 { 52 protected: 53 void buildMe(); 54 55 public: 56 %(class_name)s(ExtMachInst _machInst, 57 const char * instMnem, 58 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 59 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 60 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 61 62 %(class_name)s(ExtMachInst _machInst, 63 const char * instMnem, 64 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 65 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 66 67 %(BasicExecDeclare)s 68 }; 69}}; 70 71def template MediaOpImmDeclare {{ 72 73 class %(class_name)s : public %(base_class)s 74 { 75 protected: 76 void buildMe(); 77 78 public: 79 %(class_name)s(ExtMachInst _machInst, 80 const char * instMnem, 81 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 82 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 83 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 84 85 %(class_name)s(ExtMachInst _machInst, 86 const char * instMnem, 87 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 88 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 89 90 %(BasicExecDeclare)s 91 }; 92}}; 93 94def template MediaOpRegConstructor {{ 95 96 inline void %(class_name)s::buildMe() 97 { 98 %(constructor)s; 99 } 100 101 inline %(class_name)s::%(class_name)s( 102 ExtMachInst machInst, const char * instMnem, 103 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 104 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 105 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 106 false, false, false, false, 107 _src1, _src2, _dest, _srcSize, _destSize, _ext, 108 %(op_class)s) 109 { 110 buildMe(); 111 } 112 113 inline %(class_name)s::%(class_name)s( 114 ExtMachInst machInst, const char * instMnem, 115 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 116 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 117 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 118 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 119 isMicro, isDelayed, isFirst, isLast, 120 _src1, _src2, _dest, _srcSize, _destSize, _ext, 121 %(op_class)s) 122 { 123 buildMe(); 124 } 125}}; 126 127def template MediaOpImmConstructor {{ 128 129 inline void %(class_name)s::buildMe() 130 { 131 %(constructor)s; 132 } 133 134 inline %(class_name)s::%(class_name)s( 135 ExtMachInst machInst, const char * instMnem, 136 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 137 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 138 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 139 false, false, false, false, 140 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 141 %(op_class)s) 142 { 143 buildMe(); 144 } 145 146 inline %(class_name)s::%(class_name)s( 147 ExtMachInst machInst, const char * instMnem, 148 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 149 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 150 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 151 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 152 isMicro, isDelayed, isFirst, isLast, 153 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 154 %(op_class)s) 155 { 156 buildMe(); 157 } 158}}; 159 160let {{ 161 # Make these empty strings so that concatenating onto 162 # them will always work. 163 header_output = "" 164 decoder_output = "" 165 exec_output = "" 166 167 immTemplates = ( 168 MediaOpImmDeclare, 169 MediaOpImmConstructor, 170 MediaOpExecute) 171 172 regTemplates = ( 173 MediaOpRegDeclare, 174 MediaOpRegConstructor, 175 MediaOpExecute) 176 177 class MediaOpMeta(type): 178 def buildCppClasses(self, name, Name, suffix, code): 179 180 # Globals to stick the output in 181 global header_output 182 global decoder_output 183 global exec_output 184 185 # If op2 is used anywhere, make register and immediate versions 186 # of this code. 187 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?") 188 match = matcher.search(code) 189 if match: 190 typeQual = "" 191 if match.group("typeQual"): 192 typeQual = match.group("typeQual") 193 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual) 194 self.buildCppClasses(name, Name, suffix, 195 matcher.sub(src2_name, code)) 196 self.buildCppClasses(name + "i", Name, suffix + "Imm", 197 matcher.sub("imm8", code)) 198 return 199 200 base = "X86ISA::MediaOp" 201 202 # If imm8 shows up in the code, use the immediate templates, if 203 # not, hopefully the register ones will be correct. 204 matcher = re.compile("(?<!\w)imm8(?!\w)") 205 if matcher.search(code): 206 base += "Imm" 207 templates = immTemplates 208 else: 209 base += "Reg" 210 templates = regTemplates 211 212 # Get everything ready for the substitution 213 iop = InstObjParams(name, Name + suffix, base, {"code" : code}) 214 215 # Generate the actual code (finally!) 216 header_output += templates[0].subst(iop) 217 decoder_output += templates[1].subst(iop) 218 exec_output += templates[2].subst(iop) 219 220 221 def __new__(mcls, Name, bases, dict): 222 abstract = False 223 name = Name.lower() 224 if "abstract" in dict: 225 abstract = dict['abstract'] 226 del dict['abstract'] 227 228 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict) 229 if not abstract: 230 cls.className = Name 231 cls.base_mnemonic = name 232 code = cls.code 233 234 # Set up the C++ classes 235 mcls.buildCppClasses(cls, name, Name, "", code) 236 237 # Hook into the microassembler dict 238 global microopClasses 239 microopClasses[name] = cls 240 241 # If op2 is used anywhere, make register and immediate versions 242 # of this code. 243 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?") 244 if matcher.search(code): 245 microopClasses[name + 'i'] = cls 246 return cls 247 248 249 class MediaOp(X86Microop): 250 __metaclass__ = MediaOpMeta 251 # This class itself doesn't act as a microop 252 abstract = True 253 254 def __init__(self, dest, src1, op2, 255 size = None, destSize = None, srcSize = None, ext = None): 256 self.dest = dest 257 self.src1 = src1 258 self.op2 = op2 259 if size is not None: 260 self.srcSize = size 261 self.destSize = size 262 if srcSize is not None: 263 self.srcSize = srcSize 264 if destSize is not None: 265 self.destSize = destSize 266 if self.srcSize is None: 267 raise Exception, "Source size not set." 268 if self.destSize is None: 269 raise Exception, "Dest size not set." 270 if ext is None: 271 self.ext = 0 272 else: 273 self.ext = ext 274 275 def getAllocator(self, *microFlags): 276 className = self.className 277 if self.mnemonic == self.base_mnemonic + 'i': 278 className += "Imm" 279 allocator = '''new %(class_name)s(machInst, macrocodeBlock 280 %(flags)s, %(src1)s, %(op2)s, %(dest)s, 281 %(srcSize)s, %(destSize)s, %(ext)s)''' % { 282 "class_name" : className, 283 "flags" : self.microFlagsText(microFlags), 284 "src1" : self.src1, "op2" : self.op2, 285 "dest" : self.dest, 286 "srcSize" : self.srcSize, 287 "destSize" : self.destSize, 288 "ext" : self.ext} 289 return allocator 290 291 class Mov2int(MediaOp): 292 def __init__(self, dest, src1, src2 = 0, \ 293 size = None, destSize = None, srcSize = None, ext = None): 294 super(Mov2int, self).__init__(dest, src1,\ 295 src2, size, destSize, srcSize, ext) 296 code = ''' 297 int items = sizeof(FloatRegBits) / srcSize; 298 int offset = imm8; 299 if (bits(src1, 0) && (ext & 0x1)) 300 offset -= items; 301 if (offset >= 0 && offset < items) { 302 uint64_t fpSrcReg1 = 303 bits(FpSrcReg1.uqw, 304 (offset + 1) * srcSize * 8 - 1, 305 (offset + 0) * srcSize * 8); 306 DestReg = merge(0, fpSrcReg1, destSize); 307 } else { 308 DestReg = DestReg; 309 } 310 ''' 311 312 class Mov2fp(MediaOp): 313 def __init__(self, dest, src1, src2 = 0, \ 314 size = None, destSize = None, srcSize = None, ext = None): 315 super(Mov2fp, self).__init__(dest, src1,\ 316 src2, size, destSize, srcSize, ext) 317 code = ''' 318 int items = sizeof(FloatRegBits) / destSize; 319 int offset = imm8; 320 if (bits(dest, 0) && (ext & 0x1)) 321 offset -= items; 322 if (offset >= 0 && offset < items) { 323 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); 324 FpDestReg.uqw = 325 insertBits(FpDestReg.uqw, 326 (offset + 1) * destSize * 8 - 1, 327 (offset + 0) * destSize * 8, srcReg1); 328 } else { 329 FpDestReg.uqw = FpDestReg.uqw; 330 } 331 ''' 332 333 class Unpack(MediaOp): 334 code = ''' 335 assert(srcSize == destSize); 336 int size = destSize; 337 int items = (sizeof(FloatRegBits) / size) / 2; 338 int offset = ext ? items : 0; 339 uint64_t result = 0; 340 for (int i = 0; i < items; i++) { 341 uint64_t pickedLow = 342 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1, 343 (i + offset) * 8 * size); 344 result = insertBits(result, 345 (2 * i + 1) * 8 * size - 1, 346 (2 * i + 0) * 8 * size, 347 pickedLow); 348 uint64_t pickedHigh = 349 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1, 350 (i + offset) * 8 * size); 351 result = insertBits(result, 352 (2 * i + 2) * 8 * size - 1, 353 (2 * i + 1) * 8 * size, 354 pickedHigh); 355 } 356 FpDestReg.uqw = result; 357 ''' 358 359 class Pack(MediaOp): 360 code = ''' 361 assert(srcSize == destSize * 2); 362 int items = (sizeof(FloatRegBits) / destSize); 363 int destBits = destSize * 8; 364 int srcBits = srcSize * 8; 365 uint64_t result = 0; 366 int i; 367 for (i = 0; i < items / 2; i++) { 368 uint64_t picked = 369 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1, 370 (i + 0) * srcBits); 371 unsigned signBit = bits(picked, srcBits - 1); 372 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 373 374 // Handle saturation. 375 if (signBit) { 376 if (overflow != mask(destBits - srcBits + 1)) { 377 if (ext & 0x1) 378 picked = (1 << (destBits - 1)); 379 else 380 picked = 0; 381 } 382 } else { 383 if (overflow != 0) { 384 if (ext & 0x1) 385 picked = mask(destBits - 1); 386 else 387 picked = mask(destBits); 388 } 389 } 390 result = insertBits(result, 391 (i + 1) * destBits - 1, 392 (i + 0) * destBits, 393 picked); 394 } 395 for (;i < items; i++) { 396 uint64_t picked = 397 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1, 398 (i - items + 0) * srcBits); 399 unsigned signBit = bits(picked, srcBits - 1); 400 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 401 402 // Handle saturation. 403 if (signBit) { 404 if (overflow != mask(destBits - srcBits + 1)) { 405 if (ext & 0x1) 406 picked = (1 << (destBits - 1)); 407 else 408 picked = 0; 409 } 410 } else { 411 if (overflow != 0) { 412 if (ext & 0x1) 413 picked = mask(destBits - 1); 414 else 415 picked = mask(destBits); 416 } 417 } 418 result = insertBits(result, 419 (i + 1) * destBits - 1, 420 (i + 0) * destBits, 421 picked); 422 } 423 FpDestReg.uqw = result; 424 ''' 425 426 class Mxor(MediaOp): 427 def __init__(self, dest, src1, src2): 428 super(Mxor, self).__init__(dest, src1, src2, 1) 429 code = ''' 430 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw; 431 ''' 432 433 class Mor(MediaOp): 434 def __init__(self, dest, src1, src2): 435 super(Mor, self).__init__(dest, src1, src2, 1) 436 code = ''' 437 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw; 438 ''' 439 440 class Mand(MediaOp): 441 def __init__(self, dest, src1, src2): 442 super(Mand, self).__init__(dest, src1, src2, 1) 443 code = ''' 444 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw; 445 ''' 446 447 class Mandn(MediaOp): 448 def __init__(self, dest, src1, src2): 449 super(Mandn, self).__init__(dest, src1, src2, 1) 450 code = ''' 451 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw; 452 ''' 453 454 class Mminf(MediaOp): 455 code = ''' 456 union floatInt 457 { 458 float f; 459 uint32_t i; 460 }; 461 union doubleInt 462 { 463 double d; 464 uint64_t i; 465 }; 466 467 assert(srcSize == destSize); 468 int size = srcSize; 469 int sizeBits = size * 8; 470 assert(srcSize == 4 || srcSize == 8); 471 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 472 uint64_t result = FpDestReg.uqw; 473 474 for (int i = 0; i < items; i++) { 475 double arg1, arg2; 476 int hiIndex = (i + 1) * sizeBits - 1; 477 int loIndex = (i + 0) * sizeBits; 478 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 479 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 480 481 if (size == 4) { 482 floatInt fi; 483 fi.i = arg1Bits; 484 arg1 = fi.f; 485 fi.i = arg2Bits; 486 arg2 = fi.f; 487 } else { 488 doubleInt di; 489 di.i = arg1Bits; 490 arg1 = di.d; 491 di.i = arg2Bits; 492 arg2 = di.d; 493 } 494 495 if (arg1 < arg2) { 496 result = insertBits(result, hiIndex, loIndex, arg1Bits); 497 } else { 498 result = insertBits(result, hiIndex, loIndex, arg2Bits); 499 } 500 } 501 FpDestReg.uqw = result; 502 ''' 503 504 class Mmaxf(MediaOp): 505 code = ''' 506 union floatInt 507 { 508 float f; 509 uint32_t i; 510 }; 511 union doubleInt 512 { 513 double d; 514 uint64_t i; 515 }; 516 517 assert(srcSize == destSize); 518 int size = srcSize; 519 int sizeBits = size * 8; 520 assert(srcSize == 4 || srcSize == 8); 521 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 522 uint64_t result = FpDestReg.uqw; 523 524 for (int i = 0; i < items; i++) { 525 double arg1, arg2; 526 int hiIndex = (i + 1) * sizeBits - 1; 527 int loIndex = (i + 0) * sizeBits; 528 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 529 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 530 531 if (size == 4) { 532 floatInt fi; 533 fi.i = arg1Bits; 534 arg1 = fi.f; 535 fi.i = arg2Bits; 536 arg2 = fi.f; 537 } else { 538 doubleInt di; 539 di.i = arg1Bits; 540 arg1 = di.d; 541 di.i = arg2Bits; 542 arg2 = di.d; 543 } 544 545 if (arg1 > arg2) { 546 result = insertBits(result, hiIndex, loIndex, arg1Bits); 547 } else { 548 result = insertBits(result, hiIndex, loIndex, arg2Bits); 549 } 550 } 551 FpDestReg.uqw = result; 552 ''' 553 554 class Mmini(MediaOp): 555 code = ''' 556 557 assert(srcSize == destSize); 558 int size = srcSize; 559 int sizeBits = size * 8; 560 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 561 uint64_t result = FpDestReg.uqw; 562 563 for (int i = 0; i < items; i++) { 564 int hiIndex = (i + 1) * sizeBits - 1; 565 int loIndex = (i + 0) * sizeBits; 566 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 567 int64_t arg1 = arg1Bits | 568 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 569 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 570 int64_t arg2 = arg2Bits | 571 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 572 uint64_t resBits; 573 574 if (ext & 0x2) { 575 if (arg1 < arg2) { 576 resBits = arg1Bits; 577 } else { 578 resBits = arg2Bits; 579 } 580 } else { 581 if (arg1Bits < arg2Bits) { 582 resBits = arg1Bits; 583 } else { 584 resBits = arg2Bits; 585 } 586 } 587 result = insertBits(result, hiIndex, loIndex, resBits); 588 } 589 FpDestReg.uqw = result; 590 ''' 591 592 class Mmaxi(MediaOp): 593 code = ''' 594 595 assert(srcSize == destSize); 596 int size = srcSize; 597 int sizeBits = size * 8; 598 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 599 uint64_t result = FpDestReg.uqw; 600 601 for (int i = 0; i < items; i++) { 602 int hiIndex = (i + 1) * sizeBits - 1; 603 int loIndex = (i + 0) * sizeBits; 604 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 605 int64_t arg1 = arg1Bits | 606 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 607 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 608 int64_t arg2 = arg2Bits | 609 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 610 uint64_t resBits; 611 612 if (ext & 0x2) { 613 if (arg1 > arg2) { 614 resBits = arg1Bits; 615 } else { 616 resBits = arg2Bits; 617 } 618 } else { 619 if (arg1Bits > arg2Bits) { 620 resBits = arg1Bits; 621 } else { 622 resBits = arg2Bits; 623 } 624 } 625 result = insertBits(result, hiIndex, loIndex, resBits); 626 } 627 FpDestReg.uqw = result; 628 ''' 629 630 class Msqrt(MediaOp): 631 def __init__(self, dest, src, \ 632 size = None, destSize = None, srcSize = None, ext = None): 633 super(Msqrt, self).__init__(dest, src,\ 634 "InstRegIndex(0)", size, destSize, srcSize, ext) 635 code = ''' 636 union floatInt 637 { 638 float f; 639 uint32_t i; 640 }; 641 union doubleInt 642 { 643 double d; 644 uint64_t i; 645 }; 646 647 assert(srcSize == destSize); 648 int size = srcSize; 649 int sizeBits = size * 8; 650 assert(srcSize == 4 || srcSize == 8); 651 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 652 uint64_t result = FpDestReg.uqw; 653 654 for (int i = 0; i < items; i++) { 655 int hiIndex = (i + 1) * sizeBits - 1; 656 int loIndex = (i + 0) * sizeBits; 657 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 658 659 if (size == 4) { 660 floatInt fi; 661 fi.i = argBits; 662 fi.f = sqrt(fi.f); 663 argBits = fi.i; 664 } else { 665 doubleInt di; 666 di.i = argBits; 667 di.d = sqrt(di.d); 668 argBits = di.i; 669 } 670 result = insertBits(result, hiIndex, loIndex, argBits); 671 } 672 FpDestReg.uqw = result; 673 ''' 674 675 class Maddf(MediaOp): 676 code = ''' 677 union floatInt 678 { 679 float f; 680 uint32_t i; 681 }; 682 union doubleInt 683 { 684 double d; 685 uint64_t i; 686 }; 687 688 assert(srcSize == destSize); 689 int size = srcSize; 690 int sizeBits = size * 8; 691 assert(srcSize == 4 || srcSize == 8); 692 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 693 uint64_t result = FpDestReg.uqw; 694 695 for (int i = 0; i < items; i++) { 696 int hiIndex = (i + 1) * sizeBits - 1; 697 int loIndex = (i + 0) * sizeBits; 698 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 699 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 700 uint64_t resBits; 701 702 if (size == 4) { 703 floatInt arg1, arg2, res; 704 arg1.i = arg1Bits; 705 arg2.i = arg2Bits; 706 res.f = arg1.f + arg2.f; 707 resBits = res.i; 708 } else { 709 doubleInt arg1, arg2, res; 710 arg1.i = arg1Bits; 711 arg2.i = arg2Bits; 712 res.d = arg1.d + arg2.d; 713 resBits = res.i; 714 } 715 716 result = insertBits(result, hiIndex, loIndex, resBits); 717 } 718 FpDestReg.uqw = result; 719 ''' 720 721 class Msubf(MediaOp): 722 code = ''' 723 union floatInt 724 { 725 float f; 726 uint32_t i; 727 }; 728 union doubleInt 729 { 730 double d; 731 uint64_t i; 732 }; 733 734 assert(srcSize == destSize); 735 int size = srcSize; 736 int sizeBits = size * 8; 737 assert(srcSize == 4 || srcSize == 8); 738 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 739 uint64_t result = FpDestReg.uqw; 740 741 for (int i = 0; i < items; i++) { 742 int hiIndex = (i + 1) * sizeBits - 1; 743 int loIndex = (i + 0) * sizeBits; 744 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 745 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 746 uint64_t resBits; 747 748 if (size == 4) { 749 floatInt arg1, arg2, res; 750 arg1.i = arg1Bits; 751 arg2.i = arg2Bits; 752 res.f = arg1.f - arg2.f; 753 resBits = res.i; 754 } else { 755 doubleInt arg1, arg2, res; 756 arg1.i = arg1Bits; 757 arg2.i = arg2Bits; 758 res.d = arg1.d - arg2.d; 759 resBits = res.i; 760 } 761 762 result = insertBits(result, hiIndex, loIndex, resBits); 763 } 764 FpDestReg.uqw = result; 765 ''' 766 767 class Mmulf(MediaOp): 768 code = ''' 769 union floatInt 770 { 771 float f; 772 uint32_t i; 773 }; 774 union doubleInt 775 { 776 double d; 777 uint64_t i; 778 }; 779 780 assert(srcSize == destSize); 781 int size = srcSize; 782 int sizeBits = size * 8; 783 assert(srcSize == 4 || srcSize == 8); 784 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 785 uint64_t result = FpDestReg.uqw; 786 787 for (int i = 0; i < items; i++) { 788 int hiIndex = (i + 1) * sizeBits - 1; 789 int loIndex = (i + 0) * sizeBits; 790 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 791 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 792 uint64_t resBits; 793 794 if (size == 4) { 795 floatInt arg1, arg2, res; 796 arg1.i = arg1Bits; 797 arg2.i = arg2Bits; 798 res.f = arg1.f * arg2.f; 799 resBits = res.i; 800 } else { 801 doubleInt arg1, arg2, res; 802 arg1.i = arg1Bits; 803 arg2.i = arg2Bits; 804 res.d = arg1.d * arg2.d; 805 resBits = res.i; 806 } 807 808 result = insertBits(result, hiIndex, loIndex, resBits); 809 } 810 FpDestReg.uqw = result; 811 ''' 812 813 class Mdivf(MediaOp): 814 code = ''' 815 union floatInt 816 { 817 float f; 818 uint32_t i; 819 }; 820 union doubleInt 821 { 822 double d; 823 uint64_t i; 824 }; 825 826 assert(srcSize == destSize); 827 int size = srcSize; 828 int sizeBits = size * 8; 829 assert(srcSize == 4 || srcSize == 8); 830 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 831 uint64_t result = FpDestReg.uqw; 832 833 for (int i = 0; i < items; i++) { 834 int hiIndex = (i + 1) * sizeBits - 1; 835 int loIndex = (i + 0) * sizeBits; 836 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 837 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 838 uint64_t resBits; 839 840 if (size == 4) { 841 floatInt arg1, arg2, res; 842 arg1.i = arg1Bits; 843 arg2.i = arg2Bits; 844 res.f = arg1.f / arg2.f; 845 resBits = res.i; 846 } else { 847 doubleInt arg1, arg2, res; 848 arg1.i = arg1Bits; 849 arg2.i = arg2Bits; 850 res.d = arg1.d / arg2.d; 851 resBits = res.i; 852 } 853 854 result = insertBits(result, hiIndex, loIndex, resBits); 855 } 856 FpDestReg.uqw = result; 857 ''' 858 859 class Maddi(MediaOp): 860 code = ''' 861 assert(srcSize == destSize); 862 int size = srcSize; 863 int sizeBits = size * 8; 864 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 865 uint64_t result = FpDestReg.uqw; 866 867 for (int i = 0; i < items; i++) { 868 int hiIndex = (i + 1) * sizeBits - 1; 869 int loIndex = (i + 0) * sizeBits; 870 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 871 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 872 uint64_t resBits = arg1Bits + arg2Bits; 873 874 if (ext & 0x2) { 875 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) 876 resBits = mask(sizeBits); 877 } else if (ext & 0x4) { 878 int arg1Sign = bits(arg1Bits, sizeBits - 1); 879 int arg2Sign = bits(arg2Bits, sizeBits - 1); 880 int resSign = bits(resBits, sizeBits - 1); 881 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 882 if (resSign == 0) 883 resBits = (1 << (sizeBits - 1)); 884 else 885 resBits = mask(sizeBits - 1); 886 } 887 } 888 889 result = insertBits(result, hiIndex, loIndex, resBits); 890 } 891 FpDestReg.uqw = result; 892 ''' 893 894 class Msubi(MediaOp): 895 code = ''' 896 assert(srcSize == destSize); 897 int size = srcSize; 898 int sizeBits = size * 8; 899 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 900 uint64_t result = FpDestReg.uqw; 901 902 for (int i = 0; i < items; i++) { 903 int hiIndex = (i + 1) * sizeBits - 1; 904 int loIndex = (i + 0) * sizeBits; 905 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 906 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 907 uint64_t resBits = arg1Bits - arg2Bits; 908 909 if (ext & 0x2) { 910 if (arg2Bits > arg1Bits) { 911 resBits = 0; 912 } else if (!findCarry(sizeBits, resBits, 913 arg1Bits, ~arg2Bits)) { 914 resBits = mask(sizeBits); 915 } 916 } else if (ext & 0x4) { 917 int arg1Sign = bits(arg1Bits, sizeBits - 1); 918 int arg2Sign = !bits(arg2Bits, sizeBits - 1); 919 int resSign = bits(resBits, sizeBits - 1); 920 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 921 if (resSign == 0) 922 resBits = (1 << (sizeBits - 1)); 923 else 924 resBits = mask(sizeBits - 1); 925 } 926 } 927 928 result = insertBits(result, hiIndex, loIndex, resBits); 929 } 930 FpDestReg.uqw = result; 931 ''' 932 933 class Mmuli(MediaOp): 934 code = ''' 935 int srcBits = srcSize * 8; 936 int destBits = destSize * 8; 937 assert(destBits <= 64); 938 assert(destSize >= srcSize); 939 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize); 940 uint64_t result = FpDestReg.uqw; 941 942 for (int i = 0; i < items; i++) { 943 int offset = 0; 944 if (ext & 16) { 945 if (ext & 32) 946 offset = i * (destBits - srcBits); 947 else 948 offset = i * (destBits - srcBits) + srcBits; 949 } 950 int srcHiIndex = (i + 1) * srcBits - 1 + offset; 951 int srcLoIndex = (i + 0) * srcBits + offset; 952 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 953 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex); 954 uint64_t resBits; 955 956 if (ext & 0x2) { 957 int64_t arg1 = arg1Bits | 958 (0 - (arg1Bits & (1 << (srcBits - 1)))); 959 int64_t arg2 = arg2Bits | 960 (0 - (arg2Bits & (1 << (srcBits - 1)))); 961 resBits = (uint64_t)(arg1 * arg2); 962 } else { 963 resBits = arg1Bits * arg2Bits; 964 } 965 966 if (ext & 0x4) 967 resBits += (1 << (destBits - 1)); 968 969 if (ext & 0x8) 970 resBits >>= destBits; 971 972 int destHiIndex = (i + 1) * destBits - 1; 973 int destLoIndex = (i + 0) * destBits; 974 result = insertBits(result, destHiIndex, destLoIndex, resBits); 975 } 976 FpDestReg.uqw = result; 977 ''' 978 979 class Mavg(MediaOp): 980 code = ''' 981 assert(srcSize == destSize); 982 int size = srcSize; 983 int sizeBits = size * 8; 984 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 985 uint64_t result = FpDestReg.uqw; 986 987 for (int i = 0; i < items; i++) { 988 int hiIndex = (i + 1) * sizeBits - 1; 989 int loIndex = (i + 0) * sizeBits; 990 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 991 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 992 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; 993 994 result = insertBits(result, hiIndex, loIndex, resBits); 995 } 996 FpDestReg.uqw = result; 997 ''' 998 999 class Msad(MediaOp): 1000 code = ''' 1001 int srcBits = srcSize * 8; 1002 int items = sizeof(FloatRegBits) / srcSize; 1003 1004 uint64_t sum = 0; 1005 for (int i = 0; i < items; i++) { 1006 int hiIndex = (i + 1) * srcBits - 1; 1007 int loIndex = (i + 0) * srcBits; 1008 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1009 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1010 int64_t resBits = arg1Bits - arg2Bits; 1011 if (resBits < 0) 1012 resBits = -resBits; 1013 sum += resBits; 1014 } 1015 FpDestReg.uqw = sum & mask(destSize * 8); 1016 ''' 1017 1018 class Msrl(MediaOp): 1019 code = ''' 1020 1021 assert(srcSize == destSize); 1022 int size = srcSize; 1023 int sizeBits = size * 8; 1024 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1025 uint64_t shiftAmt = op2.uqw; 1026 uint64_t result = FpDestReg.uqw; 1027 1028 for (int i = 0; i < items; i++) { 1029 int hiIndex = (i + 1) * sizeBits - 1; 1030 int loIndex = (i + 0) * sizeBits; 1031 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1032 uint64_t resBits; 1033 if (shiftAmt >= sizeBits) { 1034 resBits = 0; 1035 } else { 1036 resBits = (arg1Bits >> shiftAmt) & 1037 mask(sizeBits - shiftAmt); 1038 } 1039 1040 result = insertBits(result, hiIndex, loIndex, resBits); 1041 } 1042 FpDestReg.uqw = result; 1043 ''' 1044 1045 class Msra(MediaOp): 1046 code = ''' 1047 1048 assert(srcSize == destSize); 1049 int size = srcSize; 1050 int sizeBits = size * 8; 1051 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1052 uint64_t shiftAmt = op2.uqw; 1053 uint64_t result = FpDestReg.uqw; 1054 1055 for (int i = 0; i < items; i++) { 1056 int hiIndex = (i + 1) * sizeBits - 1; 1057 int loIndex = (i + 0) * sizeBits; 1058 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1059 uint64_t resBits; 1060 if (shiftAmt >= sizeBits) { 1061 if (bits(arg1Bits, sizeBits - 1)) 1062 resBits = mask(sizeBits); 1063 else 1064 resBits = 0; 1065 } else { 1066 resBits = (arg1Bits >> shiftAmt); 1067 resBits = resBits | 1068 (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt)))); 1069 } 1070 1071 result = insertBits(result, hiIndex, loIndex, resBits); 1072 } 1073 FpDestReg.uqw = result; 1074 ''' 1075 1076 class Msll(MediaOp): 1077 code = ''' 1078 1079 assert(srcSize == destSize); 1080 int size = srcSize; 1081 int sizeBits = size * 8; 1082 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1083 uint64_t shiftAmt = op2.uqw; 1084 uint64_t result = FpDestReg.uqw; 1085 1086 for (int i = 0; i < items; i++) { 1087 int hiIndex = (i + 1) * sizeBits - 1; 1088 int loIndex = (i + 0) * sizeBits; 1089 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1090 uint64_t resBits; 1091 if (shiftAmt >= sizeBits) { 1092 resBits = 0; 1093 } else { 1094 resBits = (arg1Bits << shiftAmt); 1095 } 1096 1097 result = insertBits(result, hiIndex, loIndex, resBits); 1098 } 1099 FpDestReg.uqw = result; 1100 ''' 1101 1102 class Cvti2f(MediaOp): 1103 def __init__(self, dest, src, \ 1104 size = None, destSize = None, srcSize = None, ext = None): 1105 super(Cvti2f, self).__init__(dest, src,\ 1106 "InstRegIndex(0)", size, destSize, srcSize, ext) 1107 code = ''' 1108 union floatInt 1109 { 1110 float f; 1111 uint32_t i; 1112 }; 1113 union doubleInt 1114 { 1115 double d; 1116 uint64_t i; 1117 }; 1118 1119 assert(destSize == 4 || destSize == 8); 1120 assert(srcSize == 4 || srcSize == 8); 1121 int srcSizeBits = srcSize * 8; 1122 int destSizeBits = destSize * 8; 1123 int items; 1124 int srcStart = 0; 1125 int destStart = 0; 1126 if (srcSize == 2 * destSize) { 1127 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; 1128 if (ext & 0x2) 1129 destStart = destSizeBits * items; 1130 } else if (destSize == 2 * srcSize) { 1131 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1132 if (ext & 0x2) 1133 srcStart = srcSizeBits * items; 1134 } else { 1135 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1136 } 1137 uint64_t result = FpDestReg.uqw; 1138 1139 for (int i = 0; i < items; i++) { 1140 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1141 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1142 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1143 int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex))); 1144 double arg = sArg; 1145 1146 if (destSize == 4) { 1147 floatInt fi; 1148 fi.f = arg; 1149 argBits = fi.i; 1150 } else { 1151 doubleInt di; 1152 di.d = arg; 1153 argBits = di.i; 1154 } 1155 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1156 int destLoIndex = destStart + (i + 0) * destSizeBits; 1157 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1158 } 1159 FpDestReg.uqw = result; 1160 ''' 1161 1162 class Cvtf2f(MediaOp): 1163 def __init__(self, dest, src, \ 1164 size = None, destSize = None, srcSize = None, ext = None): 1165 super(Cvtf2f, self).__init__(dest, src,\ 1166 "InstRegIndex(0)", size, destSize, srcSize, ext) 1167 code = ''' 1168 union floatInt 1169 { 1170 float f; 1171 uint32_t i; 1172 }; 1173 union doubleInt 1174 { 1175 double d; 1176 uint64_t i; 1177 }; 1178 1179 assert(destSize == 4 || destSize == 8); 1180 assert(srcSize == 4 || srcSize == 8); 1181 int srcSizeBits = srcSize * 8; 1182 int destSizeBits = destSize * 8; 1183 int items; 1184 int srcStart = 0; 1185 int destStart = 0; 1186 if (srcSize == 2 * destSize) { 1187 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; 1188 if (ext & 0x2) 1189 destStart = destSizeBits * items; 1190 } else if (destSize == 2 * srcSize) { 1191 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1192 if (ext & 0x2) 1193 srcStart = srcSizeBits * items; 1194 } else { 1195 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1196 } 1197 uint64_t result = FpDestReg.uqw; 1198 1199 for (int i = 0; i < items; i++) { 1200 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1201 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1202 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1203 double arg; 1204 1205 if (srcSize == 4) { 1206 floatInt fi; 1207 fi.i = argBits; 1208 arg = fi.f; 1209 } else { 1210 doubleInt di; 1211 di.i = argBits; 1212 arg = di.d; 1213 } 1214 if (destSize == 4) { 1215 floatInt fi; 1216 fi.f = arg; 1217 argBits = fi.i; 1218 } else { 1219 doubleInt di; 1220 di.d = arg; 1221 argBits = di.i; 1222 } 1223 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1224 int destLoIndex = destStart + (i + 0) * destSizeBits; 1225 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1226 } 1227 FpDestReg.uqw = result; 1228 ''' 1229 1230 class Mcmpi2r(MediaOp): 1231 code = ''' 1232 union floatInt 1233 { 1234 float f; 1235 uint32_t i; 1236 }; 1237 union doubleInt 1238 { 1239 double d; 1240 uint64_t i; 1241 }; 1242 1243 assert(srcSize == destSize); 1244 int size = srcSize; 1245 int sizeBits = size * 8; 1246 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1247 uint64_t result = FpDestReg.uqw; 1248 1249 for (int i = 0; i < items; i++) { 1250 int hiIndex = (i + 1) * sizeBits - 1; 1251 int loIndex = (i + 0) * sizeBits; 1252 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1253 int64_t arg1 = arg1Bits | 1254 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 1255 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1256 int64_t arg2 = arg2Bits | 1257 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 1258 1259 uint64_t resBits = 0; 1260 if ((ext & 0x2) == 0 && arg1 == arg2 || 1261 (ext & 0x2) == 0x2 && arg1 > arg2) 1262 resBits = mask(sizeBits); 1263 1264 result = insertBits(result, hiIndex, loIndex, resBits); 1265 } 1266 FpDestReg.uqw = result; 1267 ''' 1268}}; 1269