mediaop.isa revision 6572
1/// Copyright (c) 2009 The Regents of The University of Michigan 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer; 8// redistributions in binary form must reproduce the above copyright 9// notice, this list of conditions and the following disclaimer in the 10// documentation and/or other materials provided with the distribution; 11// neither the name of the copyright holders nor the names of its 12// contributors may be used to endorse or promote products derived from 13// this software without specific prior written permission. 14// 15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26// 27// Authors: Gabe Black 28 29def template MediaOpExecute {{ 30 Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, 31 Trace::InstRecord *traceData) const 32 { 33 Fault fault = NoFault; 34 35 %(op_decl)s; 36 %(op_rd)s; 37 38 %(code)s; 39 40 //Write the resulting state to the execution context 41 if(fault == NoFault) 42 { 43 %(op_wb)s; 44 } 45 return fault; 46 } 47}}; 48 49def template MediaOpRegDeclare {{ 50 class %(class_name)s : public %(base_class)s 51 { 52 protected: 53 void buildMe(); 54 55 public: 56 %(class_name)s(ExtMachInst _machInst, 57 const char * instMnem, 58 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 59 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 60 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 61 62 %(class_name)s(ExtMachInst _machInst, 63 const char * instMnem, 64 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 65 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 66 67 %(BasicExecDeclare)s 68 }; 69}}; 70 71def template MediaOpImmDeclare {{ 72 73 class %(class_name)s : public %(base_class)s 74 { 75 protected: 76 void buildMe(); 77 78 public: 79 %(class_name)s(ExtMachInst _machInst, 80 const char * instMnem, 81 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 82 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 83 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 84 85 %(class_name)s(ExtMachInst _machInst, 86 const char * instMnem, 87 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 88 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 89 90 %(BasicExecDeclare)s 91 }; 92}}; 93 94def template MediaOpRegConstructor {{ 95 96 inline void %(class_name)s::buildMe() 97 { 98 %(constructor)s; 99 } 100 101 inline %(class_name)s::%(class_name)s( 102 ExtMachInst machInst, const char * instMnem, 103 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 104 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 105 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 106 false, false, false, false, 107 _src1, _src2, _dest, _srcSize, _destSize, _ext, 108 %(op_class)s) 109 { 110 buildMe(); 111 } 112 113 inline %(class_name)s::%(class_name)s( 114 ExtMachInst machInst, const char * instMnem, 115 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 116 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 117 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 118 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 119 isMicro, isDelayed, isFirst, isLast, 120 _src1, _src2, _dest, _srcSize, _destSize, _ext, 121 %(op_class)s) 122 { 123 buildMe(); 124 } 125}}; 126 127def template MediaOpImmConstructor {{ 128 129 inline void %(class_name)s::buildMe() 130 { 131 %(constructor)s; 132 } 133 134 inline %(class_name)s::%(class_name)s( 135 ExtMachInst machInst, const char * instMnem, 136 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 137 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 138 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 139 false, false, false, false, 140 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 141 %(op_class)s) 142 { 143 buildMe(); 144 } 145 146 inline %(class_name)s::%(class_name)s( 147 ExtMachInst machInst, const char * instMnem, 148 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 149 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 150 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 151 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 152 isMicro, isDelayed, isFirst, isLast, 153 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 154 %(op_class)s) 155 { 156 buildMe(); 157 } 158}}; 159 160let {{ 161 # Make these empty strings so that concatenating onto 162 # them will always work. 163 header_output = "" 164 decoder_output = "" 165 exec_output = "" 166 167 immTemplates = ( 168 MediaOpImmDeclare, 169 MediaOpImmConstructor, 170 MediaOpExecute) 171 172 regTemplates = ( 173 MediaOpRegDeclare, 174 MediaOpRegConstructor, 175 MediaOpExecute) 176 177 class MediaOpMeta(type): 178 def buildCppClasses(self, name, Name, suffix, code): 179 180 # Globals to stick the output in 181 global header_output 182 global decoder_output 183 global exec_output 184 185 # If op2 is used anywhere, make register and immediate versions 186 # of this code. 187 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?") 188 match = matcher.search(code) 189 if match: 190 typeQual = "" 191 if match.group("typeQual"): 192 typeQual = match.group("typeQual") 193 src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual) 194 self.buildCppClasses(name, Name, suffix, 195 matcher.sub(src2_name, code)) 196 self.buildCppClasses(name + "i", Name, suffix + "Imm", 197 matcher.sub("imm8", code)) 198 return 199 200 base = "X86ISA::MediaOp" 201 202 # If imm8 shows up in the code, use the immediate templates, if 203 # not, hopefully the register ones will be correct. 204 matcher = re.compile("(?<!\w)imm8(?!\w)") 205 if matcher.search(code): 206 base += "Imm" 207 templates = immTemplates 208 else: 209 base += "Reg" 210 templates = regTemplates 211 212 # Get everything ready for the substitution 213 iop = InstObjParams(name, Name + suffix, base, {"code" : code}) 214 215 # Generate the actual code (finally!) 216 header_output += templates[0].subst(iop) 217 decoder_output += templates[1].subst(iop) 218 exec_output += templates[2].subst(iop) 219 220 221 def __new__(mcls, Name, bases, dict): 222 abstract = False 223 name = Name.lower() 224 if "abstract" in dict: 225 abstract = dict['abstract'] 226 del dict['abstract'] 227 228 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict) 229 if not abstract: 230 cls.className = Name 231 cls.base_mnemonic = name 232 code = cls.code 233 234 # Set up the C++ classes 235 mcls.buildCppClasses(cls, name, Name, "", code) 236 237 # Hook into the microassembler dict 238 global microopClasses 239 microopClasses[name] = cls 240 241 # If op2 is used anywhere, make register and immediate versions 242 # of this code. 243 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?") 244 if matcher.search(code): 245 microopClasses[name + 'i'] = cls 246 return cls 247 248 249 class MediaOp(X86Microop): 250 __metaclass__ = MediaOpMeta 251 # This class itself doesn't act as a microop 252 abstract = True 253 254 def __init__(self, dest, src1, op2, 255 size = None, destSize = None, srcSize = None, ext = None): 256 self.dest = dest 257 self.src1 = src1 258 self.op2 = op2 259 if size is not None: 260 self.srcSize = size 261 self.destSize = size 262 if srcSize is not None: 263 self.srcSize = srcSize 264 if destSize is not None: 265 self.destSize = destSize 266 if self.srcSize is None: 267 raise Exception, "Source size not set." 268 if self.destSize is None: 269 raise Exception, "Dest size not set." 270 if ext is None: 271 self.ext = 0 272 else: 273 self.ext = ext 274 275 def getAllocator(self, *microFlags): 276 className = self.className 277 if self.mnemonic == self.base_mnemonic + 'i': 278 className += "Imm" 279 allocator = '''new %(class_name)s(machInst, macrocodeBlock 280 %(flags)s, %(src1)s, %(op2)s, %(dest)s, 281 %(srcSize)s, %(destSize)s, %(ext)s)''' % { 282 "class_name" : className, 283 "flags" : self.microFlagsText(microFlags), 284 "src1" : self.src1, "op2" : self.op2, 285 "dest" : self.dest, 286 "srcSize" : self.srcSize, 287 "destSize" : self.destSize, 288 "ext" : self.ext} 289 return allocator 290 291 class Mov2int(MediaOp): 292 def __init__(self, dest, src, \ 293 size = None, destSize = None, srcSize = None, ext = None): 294 super(Mov2int, self).__init__(dest, src,\ 295 "InstRegIndex(0)", size, destSize, srcSize, ext) 296 code = ''' 297 uint64_t fpSrcReg1 = bits(FpSrcReg1.uqw, srcSize * 8 - 1, 0); 298 DestReg = merge(DestReg, fpSrcReg1, destSize); 299 ''' 300 301 class Mov2fp(MediaOp): 302 def __init__(self, dest, src, \ 303 size = None, destSize = None, srcSize = None, ext = None): 304 super(Mov2fp, self).__init__(dest, src,\ 305 "InstRegIndex(0)", size, destSize, srcSize, ext) 306 code = ''' 307 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); 308 FpDestReg.uqw = 309 insertBits(FpDestReg.uqw, destSize * 8 - 1, 0, srcReg1); 310 ''' 311 312 class Unpack(MediaOp): 313 code = ''' 314 assert(srcSize == destSize); 315 int size = destSize; 316 int items = (sizeof(FloatRegBits) / size) / 2; 317 int offset = ext ? items : 0; 318 uint64_t result = 0; 319 for (int i = 0; i < items; i++) { 320 uint64_t pickedLow = 321 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1, 322 (i + offset) * 8 * size); 323 result = insertBits(result, 324 (2 * i + 1) * 8 * size - 1, 325 (2 * i + 0) * 8 * size, 326 pickedLow); 327 uint64_t pickedHigh = 328 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1, 329 (i + offset) * 8 * size); 330 result = insertBits(result, 331 (2 * i + 2) * 8 * size - 1, 332 (2 * i + 1) * 8 * size, 333 pickedHigh); 334 } 335 FpDestReg.uqw = result; 336 ''' 337 338 class Pack(MediaOp): 339 code = ''' 340 assert(srcSize == destSize * 2); 341 int items = (sizeof(FloatRegBits) / destSize); 342 int destBits = destSize * 8; 343 int srcBits = srcSize * 8; 344 uint64_t result = 0; 345 int i; 346 for (i = 0; i < items / 2; i++) { 347 uint64_t picked = 348 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1, 349 (i + 0) * srcBits); 350 unsigned signBit = bits(picked, srcBits - 1); 351 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 352 353 // Handle saturation. 354 if (signBit) { 355 if (overflow != mask(destBits - srcBits + 1)) { 356 if (ext & 0x1) 357 picked = (1 << (destBits - 1)); 358 else 359 picked = 0; 360 } 361 } else { 362 if (overflow != 0) { 363 if (ext & 0x1) 364 picked = mask(destBits - 1); 365 else 366 picked = mask(destBits); 367 } 368 } 369 result = insertBits(result, 370 (i + 1) * destBits - 1, 371 (i + 0) * destBits, 372 picked); 373 } 374 for (;i < items; i++) { 375 uint64_t picked = 376 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1, 377 (i - items + 0) * srcBits); 378 unsigned signBit = bits(picked, srcBits - 1); 379 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 380 381 // Handle saturation. 382 if (signBit) { 383 if (overflow != mask(destBits - srcBits + 1)) { 384 if (ext & 0x1) 385 picked = (1 << (destBits - 1)); 386 else 387 picked = 0; 388 } 389 } else { 390 if (overflow != 0) { 391 if (ext & 0x1) 392 picked = mask(destBits - 1); 393 else 394 picked = mask(destBits); 395 } 396 } 397 result = insertBits(result, 398 (i + 1) * destBits - 1, 399 (i + 0) * destBits, 400 picked); 401 } 402 FpDestReg.uqw = result; 403 ''' 404 405 class Mxor(MediaOp): 406 def __init__(self, dest, src1, src2): 407 super(Mxor, self).__init__(dest, src1, src2, 1) 408 code = ''' 409 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw; 410 ''' 411 412 class Mor(MediaOp): 413 def __init__(self, dest, src1, src2): 414 super(Mor, self).__init__(dest, src1, src2, 1) 415 code = ''' 416 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw; 417 ''' 418 419 class Mand(MediaOp): 420 def __init__(self, dest, src1, src2): 421 super(Mand, self).__init__(dest, src1, src2, 1) 422 code = ''' 423 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw; 424 ''' 425 426 class Mandn(MediaOp): 427 def __init__(self, dest, src1, src2): 428 super(Mandn, self).__init__(dest, src1, src2, 1) 429 code = ''' 430 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw; 431 ''' 432 433 class Mminf(MediaOp): 434 code = ''' 435 union floatInt 436 { 437 float f; 438 uint32_t i; 439 }; 440 union doubleInt 441 { 442 double d; 443 uint64_t i; 444 }; 445 446 assert(srcSize == destSize); 447 int size = srcSize; 448 int sizeBits = size * 8; 449 assert(srcSize == 4 || srcSize == 8); 450 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 451 uint64_t result = FpDestReg.uqw; 452 453 for (int i = 0; i < items; i++) { 454 double arg1, arg2; 455 int hiIndex = (i + 1) * sizeBits - 1; 456 int loIndex = (i + 0) * sizeBits; 457 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 458 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 459 460 if (size == 4) { 461 floatInt fi; 462 fi.i = arg1Bits; 463 arg1 = fi.f; 464 fi.i = arg2Bits; 465 arg2 = fi.f; 466 } else { 467 doubleInt di; 468 di.i = arg1Bits; 469 arg1 = di.d; 470 di.i = arg2Bits; 471 arg2 = di.d; 472 } 473 474 if (arg1 < arg2) { 475 result = insertBits(result, hiIndex, loIndex, arg1Bits); 476 } else { 477 result = insertBits(result, hiIndex, loIndex, arg2Bits); 478 } 479 } 480 FpDestReg.uqw = result; 481 ''' 482 483 class Mmaxf(MediaOp): 484 code = ''' 485 union floatInt 486 { 487 float f; 488 uint32_t i; 489 }; 490 union doubleInt 491 { 492 double d; 493 uint64_t i; 494 }; 495 496 assert(srcSize == destSize); 497 int size = srcSize; 498 int sizeBits = size * 8; 499 assert(srcSize == 4 || srcSize == 8); 500 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 501 uint64_t result = FpDestReg.uqw; 502 503 for (int i = 0; i < items; i++) { 504 double arg1, arg2; 505 int hiIndex = (i + 1) * sizeBits - 1; 506 int loIndex = (i + 0) * sizeBits; 507 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 508 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 509 510 if (size == 4) { 511 floatInt fi; 512 fi.i = arg1Bits; 513 arg1 = fi.f; 514 fi.i = arg2Bits; 515 arg2 = fi.f; 516 } else { 517 doubleInt di; 518 di.i = arg1Bits; 519 arg1 = di.d; 520 di.i = arg2Bits; 521 arg2 = di.d; 522 } 523 524 if (arg1 > arg2) { 525 result = insertBits(result, hiIndex, loIndex, arg1Bits); 526 } else { 527 result = insertBits(result, hiIndex, loIndex, arg2Bits); 528 } 529 } 530 FpDestReg.uqw = result; 531 ''' 532 533 class Mmini(MediaOp): 534 code = ''' 535 536 assert(srcSize == destSize); 537 int size = srcSize; 538 int sizeBits = size * 8; 539 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 540 uint64_t result = FpDestReg.uqw; 541 542 for (int i = 0; i < items; i++) { 543 int hiIndex = (i + 1) * sizeBits - 1; 544 int loIndex = (i + 0) * sizeBits; 545 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 546 int64_t arg1 = arg1Bits | 547 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 548 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 549 int64_t arg2 = arg2Bits | 550 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 551 uint64_t resBits; 552 553 if (ext & 0x2) { 554 if (arg1 < arg2) { 555 resBits = arg1Bits; 556 } else { 557 resBits = arg2Bits; 558 } 559 } else { 560 if (arg1Bits < arg2Bits) { 561 resBits = arg1Bits; 562 } else { 563 resBits = arg2Bits; 564 } 565 } 566 result = insertBits(result, hiIndex, loIndex, resBits); 567 } 568 FpDestReg.uqw = result; 569 ''' 570 571 class Msqrt(MediaOp): 572 def __init__(self, dest, src, \ 573 size = None, destSize = None, srcSize = None, ext = None): 574 super(Msqrt, self).__init__(dest, src,\ 575 "InstRegIndex(0)", size, destSize, srcSize, ext) 576 code = ''' 577 union floatInt 578 { 579 float f; 580 uint32_t i; 581 }; 582 union doubleInt 583 { 584 double d; 585 uint64_t i; 586 }; 587 588 assert(srcSize == destSize); 589 int size = srcSize; 590 int sizeBits = size * 8; 591 assert(srcSize == 4 || srcSize == 8); 592 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 593 uint64_t result = FpDestReg.uqw; 594 595 for (int i = 0; i < items; i++) { 596 int hiIndex = (i + 1) * sizeBits - 1; 597 int loIndex = (i + 0) * sizeBits; 598 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 599 600 if (size == 4) { 601 floatInt fi; 602 fi.i = argBits; 603 fi.f = sqrt(fi.f); 604 argBits = fi.i; 605 } else { 606 doubleInt di; 607 di.i = argBits; 608 di.d = sqrt(di.d); 609 argBits = di.i; 610 } 611 result = insertBits(result, hiIndex, loIndex, argBits); 612 } 613 FpDestReg.uqw = result; 614 ''' 615 616 class Maddf(MediaOp): 617 code = ''' 618 union floatInt 619 { 620 float f; 621 uint32_t i; 622 }; 623 union doubleInt 624 { 625 double d; 626 uint64_t i; 627 }; 628 629 assert(srcSize == destSize); 630 int size = srcSize; 631 int sizeBits = size * 8; 632 assert(srcSize == 4 || srcSize == 8); 633 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 634 uint64_t result = FpDestReg.uqw; 635 636 for (int i = 0; i < items; i++) { 637 int hiIndex = (i + 1) * sizeBits - 1; 638 int loIndex = (i + 0) * sizeBits; 639 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 640 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 641 uint64_t resBits; 642 643 if (size == 4) { 644 floatInt arg1, arg2, res; 645 arg1.i = arg1Bits; 646 arg2.i = arg2Bits; 647 res.f = arg1.f + arg2.f; 648 resBits = res.i; 649 } else { 650 doubleInt arg1, arg2, res; 651 arg1.i = arg1Bits; 652 arg2.i = arg2Bits; 653 res.d = arg1.d + arg2.d; 654 resBits = res.i; 655 } 656 657 result = insertBits(result, hiIndex, loIndex, resBits); 658 } 659 FpDestReg.uqw = result; 660 ''' 661 662 class Msubf(MediaOp): 663 code = ''' 664 union floatInt 665 { 666 float f; 667 uint32_t i; 668 }; 669 union doubleInt 670 { 671 double d; 672 uint64_t i; 673 }; 674 675 assert(srcSize == destSize); 676 int size = srcSize; 677 int sizeBits = size * 8; 678 assert(srcSize == 4 || srcSize == 8); 679 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 680 uint64_t result = FpDestReg.uqw; 681 682 for (int i = 0; i < items; i++) { 683 int hiIndex = (i + 1) * sizeBits - 1; 684 int loIndex = (i + 0) * sizeBits; 685 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 686 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 687 uint64_t resBits; 688 689 if (size == 4) { 690 floatInt arg1, arg2, res; 691 arg1.i = arg1Bits; 692 arg2.i = arg2Bits; 693 res.f = arg1.f - arg2.f; 694 resBits = res.i; 695 } else { 696 doubleInt arg1, arg2, res; 697 arg1.i = arg1Bits; 698 arg2.i = arg2Bits; 699 res.d = arg1.d - arg2.d; 700 resBits = res.i; 701 } 702 703 result = insertBits(result, hiIndex, loIndex, resBits); 704 } 705 FpDestReg.uqw = result; 706 ''' 707 708 class Mmulf(MediaOp): 709 code = ''' 710 union floatInt 711 { 712 float f; 713 uint32_t i; 714 }; 715 union doubleInt 716 { 717 double d; 718 uint64_t i; 719 }; 720 721 assert(srcSize == destSize); 722 int size = srcSize; 723 int sizeBits = size * 8; 724 assert(srcSize == 4 || srcSize == 8); 725 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 726 uint64_t result = FpDestReg.uqw; 727 728 for (int i = 0; i < items; i++) { 729 int hiIndex = (i + 1) * sizeBits - 1; 730 int loIndex = (i + 0) * sizeBits; 731 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 732 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 733 uint64_t resBits; 734 735 if (size == 4) { 736 floatInt arg1, arg2, res; 737 arg1.i = arg1Bits; 738 arg2.i = arg2Bits; 739 res.f = arg1.f * arg2.f; 740 resBits = res.i; 741 } else { 742 doubleInt arg1, arg2, res; 743 arg1.i = arg1Bits; 744 arg2.i = arg2Bits; 745 res.d = arg1.d * arg2.d; 746 resBits = res.i; 747 } 748 749 result = insertBits(result, hiIndex, loIndex, resBits); 750 } 751 FpDestReg.uqw = result; 752 ''' 753 754 class Mdivf(MediaOp): 755 code = ''' 756 union floatInt 757 { 758 float f; 759 uint32_t i; 760 }; 761 union doubleInt 762 { 763 double d; 764 uint64_t i; 765 }; 766 767 assert(srcSize == destSize); 768 int size = srcSize; 769 int sizeBits = size * 8; 770 assert(srcSize == 4 || srcSize == 8); 771 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 772 uint64_t result = FpDestReg.uqw; 773 774 for (int i = 0; i < items; i++) { 775 int hiIndex = (i + 1) * sizeBits - 1; 776 int loIndex = (i + 0) * sizeBits; 777 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 778 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 779 uint64_t resBits; 780 781 if (size == 4) { 782 floatInt arg1, arg2, res; 783 arg1.i = arg1Bits; 784 arg2.i = arg2Bits; 785 res.f = arg1.f / arg2.f; 786 resBits = res.i; 787 } else { 788 doubleInt arg1, arg2, res; 789 arg1.i = arg1Bits; 790 arg2.i = arg2Bits; 791 res.d = arg1.d / arg2.d; 792 resBits = res.i; 793 } 794 795 result = insertBits(result, hiIndex, loIndex, resBits); 796 } 797 FpDestReg.uqw = result; 798 ''' 799 800 class Maddi(MediaOp): 801 code = ''' 802 assert(srcSize == destSize); 803 int size = srcSize; 804 int sizeBits = size * 8; 805 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 806 uint64_t result = FpDestReg.uqw; 807 808 for (int i = 0; i < items; i++) { 809 int hiIndex = (i + 1) * sizeBits - 1; 810 int loIndex = (i + 0) * sizeBits; 811 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 812 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 813 uint64_t resBits = arg1Bits + arg2Bits; 814 815 if (ext & 0x2) { 816 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) 817 resBits = mask(sizeBits); 818 } else if (ext & 0x4) { 819 int arg1Sign = bits(arg1Bits, sizeBits - 1); 820 int arg2Sign = bits(arg2Bits, sizeBits - 1); 821 int resSign = bits(resBits, sizeBits - 1); 822 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 823 if (resSign == 0) 824 resBits = (1 << (sizeBits - 1)); 825 else 826 resBits = mask(sizeBits - 1); 827 } 828 } 829 830 result = insertBits(result, hiIndex, loIndex, resBits); 831 } 832 FpDestReg.uqw = result; 833 ''' 834 835 class Cvti2f(MediaOp): 836 def __init__(self, dest, src, \ 837 size = None, destSize = None, srcSize = None, ext = None): 838 super(Cvti2f, self).__init__(dest, src,\ 839 "InstRegIndex(0)", size, destSize, srcSize, ext) 840 code = ''' 841 union floatInt 842 { 843 float f; 844 uint32_t i; 845 }; 846 union doubleInt 847 { 848 double d; 849 uint64_t i; 850 }; 851 852 assert(destSize == 4 || destSize == 8); 853 assert(srcSize == 4 || srcSize == 8); 854 int srcSizeBits = srcSize * 8; 855 int destSizeBits = destSize * 8; 856 int items; 857 int srcStart = 0; 858 int destStart = 0; 859 if (srcSize == 2 * destSize) { 860 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; 861 if (ext & 0x2) 862 destStart = destSizeBits * items; 863 } else if (destSize == 2 * srcSize) { 864 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 865 if (ext & 0x2) 866 srcStart = srcSizeBits * items; 867 } else { 868 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 869 } 870 uint64_t result = FpDestReg.uqw; 871 872 for (int i = 0; i < items; i++) { 873 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 874 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 875 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 876 int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex))); 877 double arg = sArg; 878 879 if (destSize == 4) { 880 floatInt fi; 881 fi.f = arg; 882 argBits = fi.i; 883 } else { 884 doubleInt di; 885 di.d = arg; 886 argBits = di.i; 887 } 888 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 889 int destLoIndex = destStart + (i + 0) * destSizeBits; 890 result = insertBits(result, destHiIndex, destLoIndex, argBits); 891 } 892 FpDestReg.uqw = result; 893 ''' 894 895 class Cvtf2f(MediaOp): 896 def __init__(self, dest, src, \ 897 size = None, destSize = None, srcSize = None, ext = None): 898 super(Cvtf2f, self).__init__(dest, src,\ 899 "InstRegIndex(0)", size, destSize, srcSize, ext) 900 code = ''' 901 union floatInt 902 { 903 float f; 904 uint32_t i; 905 }; 906 union doubleInt 907 { 908 double d; 909 uint64_t i; 910 }; 911 912 assert(destSize == 4 || destSize == 8); 913 assert(srcSize == 4 || srcSize == 8); 914 int srcSizeBits = srcSize * 8; 915 int destSizeBits = destSize * 8; 916 int items; 917 int srcStart = 0; 918 int destStart = 0; 919 if (srcSize == 2 * destSize) { 920 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; 921 if (ext & 0x2) 922 destStart = destSizeBits * items; 923 } else if (destSize == 2 * srcSize) { 924 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 925 if (ext & 0x2) 926 srcStart = srcSizeBits * items; 927 } else { 928 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 929 } 930 uint64_t result = FpDestReg.uqw; 931 932 for (int i = 0; i < items; i++) { 933 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 934 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 935 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 936 double arg; 937 938 if (srcSize == 4) { 939 floatInt fi; 940 fi.i = argBits; 941 arg = fi.f; 942 } else { 943 doubleInt di; 944 di.i = argBits; 945 arg = di.d; 946 } 947 if (destSize == 4) { 948 floatInt fi; 949 fi.f = arg; 950 argBits = fi.i; 951 } else { 952 doubleInt di; 953 di.d = arg; 954 argBits = di.i; 955 } 956 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 957 int destLoIndex = destStart + (i + 0) * destSizeBits; 958 result = insertBits(result, destHiIndex, destLoIndex, argBits); 959 } 960 FpDestReg.uqw = result; 961 ''' 962 963 class Mcmpi2r(MediaOp): 964 code = ''' 965 union floatInt 966 { 967 float f; 968 uint32_t i; 969 }; 970 union doubleInt 971 { 972 double d; 973 uint64_t i; 974 }; 975 976 assert(srcSize == destSize); 977 int size = srcSize; 978 int sizeBits = size * 8; 979 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 980 uint64_t result = FpDestReg.uqw; 981 982 for (int i = 0; i < items; i++) { 983 int hiIndex = (i + 1) * sizeBits - 1; 984 int loIndex = (i + 0) * sizeBits; 985 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 986 int64_t arg1 = arg1Bits | 987 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 988 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 989 int64_t arg2 = arg2Bits | 990 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 991 992 uint64_t resBits = 0; 993 if ((ext & 0x2) == 0 && arg1 == arg2 || 994 (ext & 0x2) == 0x2 && arg1 > arg2) 995 resBits = mask(sizeBits); 996 997 result = insertBits(result, hiIndex, loIndex, resBits); 998 } 999 FpDestReg.uqw = result; 1000 ''' 1001}}; 1002