mediaop.isa revision 6592:0143f8c4b2c2
1/// Copyright (c) 2009 The Regents of The University of Michigan 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer; 8// redistributions in binary form must reproduce the above copyright 9// notice, this list of conditions and the following disclaimer in the 10// documentation and/or other materials provided with the distribution; 11// neither the name of the copyright holders nor the names of its 12// contributors may be used to endorse or promote products derived from 13// this software without specific prior written permission. 14// 15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26// 27// Authors: Gabe Black 28 29def template MediaOpExecute {{ 30 Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, 31 Trace::InstRecord *traceData) const 32 { 33 Fault fault = NoFault; 34 35 %(op_decl)s; 36 %(op_rd)s; 37 38 %(code)s; 39 40 //Write the resulting state to the execution context 41 if(fault == NoFault) 42 { 43 %(op_wb)s; 44 } 45 return fault; 46 } 47}}; 48 49def template MediaOpRegDeclare {{ 50 class %(class_name)s : public %(base_class)s 51 { 52 protected: 53 void buildMe(); 54 55 public: 56 %(class_name)s(ExtMachInst _machInst, 57 const char * instMnem, 58 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 59 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 60 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 61 62 %(class_name)s(ExtMachInst _machInst, 63 const char * instMnem, 64 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 65 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 66 67 %(BasicExecDeclare)s 68 }; 69}}; 70 71def template MediaOpImmDeclare {{ 72 73 class %(class_name)s : public %(base_class)s 74 { 75 protected: 76 void buildMe(); 77 78 public: 79 %(class_name)s(ExtMachInst _machInst, 80 const char * instMnem, 81 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 82 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 83 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 84 85 %(class_name)s(ExtMachInst _machInst, 86 const char * instMnem, 87 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 88 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 89 90 %(BasicExecDeclare)s 91 }; 92}}; 93 94def template MediaOpRegConstructor {{ 95 96 inline void %(class_name)s::buildMe() 97 { 98 %(constructor)s; 99 } 100 101 inline %(class_name)s::%(class_name)s( 102 ExtMachInst machInst, const char * instMnem, 103 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 104 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 105 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 106 false, false, false, false, 107 _src1, _src2, _dest, _srcSize, _destSize, _ext, 108 %(op_class)s) 109 { 110 buildMe(); 111 } 112 113 inline %(class_name)s::%(class_name)s( 114 ExtMachInst machInst, const char * instMnem, 115 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 116 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 117 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 118 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 119 isMicro, isDelayed, isFirst, isLast, 120 _src1, _src2, _dest, _srcSize, _destSize, _ext, 121 %(op_class)s) 122 { 123 buildMe(); 124 } 125}}; 126 127def template MediaOpImmConstructor {{ 128 129 inline void %(class_name)s::buildMe() 130 { 131 %(constructor)s; 132 } 133 134 inline %(class_name)s::%(class_name)s( 135 ExtMachInst machInst, const char * instMnem, 136 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 137 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 138 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 139 false, false, false, false, 140 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 141 %(op_class)s) 142 { 143 buildMe(); 144 } 145 146 inline %(class_name)s::%(class_name)s( 147 ExtMachInst machInst, const char * instMnem, 148 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 149 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 150 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 151 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 152 isMicro, isDelayed, isFirst, isLast, 153 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 154 %(op_class)s) 155 { 156 buildMe(); 157 } 158}}; 159 160let {{ 161 # Make these empty strings so that concatenating onto 162 # them will always work. 163 header_output = "" 164 decoder_output = "" 165 exec_output = "" 166 167 immTemplates = ( 168 MediaOpImmDeclare, 169 MediaOpImmConstructor, 170 MediaOpExecute) 171 172 regTemplates = ( 173 MediaOpRegDeclare, 174 MediaOpRegConstructor, 175 MediaOpExecute) 176 177 class MediaOpMeta(type): 178 def buildCppClasses(self, name, Name, suffix, code): 179 180 # Globals to stick the output in 181 global header_output 182 global decoder_output 183 global exec_output 184 185 # If op2 is used anywhere, make register and immediate versions 186 # of this code. 187 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?") 188 match = matcher.search(code) 189 if match: 190 typeQual = "" 191 if match.group("typeQual"): 192 typeQual = match.group("typeQual") 193 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual) 194 self.buildCppClasses(name, Name, suffix, 195 matcher.sub(src2_name, code)) 196 self.buildCppClasses(name + "i", Name, suffix + "Imm", 197 matcher.sub("imm8", code)) 198 return 199 200 base = "X86ISA::MediaOp" 201 202 # If imm8 shows up in the code, use the immediate templates, if 203 # not, hopefully the register ones will be correct. 204 matcher = re.compile("(?<!\w)imm8(?!\w)") 205 if matcher.search(code): 206 base += "Imm" 207 templates = immTemplates 208 else: 209 base += "Reg" 210 templates = regTemplates 211 212 # Get everything ready for the substitution 213 iop = InstObjParams(name, Name + suffix, base, {"code" : code}) 214 215 # Generate the actual code (finally!) 216 header_output += templates[0].subst(iop) 217 decoder_output += templates[1].subst(iop) 218 exec_output += templates[2].subst(iop) 219 220 221 def __new__(mcls, Name, bases, dict): 222 abstract = False 223 name = Name.lower() 224 if "abstract" in dict: 225 abstract = dict['abstract'] 226 del dict['abstract'] 227 228 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict) 229 if not abstract: 230 cls.className = Name 231 cls.base_mnemonic = name 232 code = cls.code 233 234 # Set up the C++ classes 235 mcls.buildCppClasses(cls, name, Name, "", code) 236 237 # Hook into the microassembler dict 238 global microopClasses 239 microopClasses[name] = cls 240 241 # If op2 is used anywhere, make register and immediate versions 242 # of this code. 243 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?") 244 if matcher.search(code): 245 microopClasses[name + 'i'] = cls 246 return cls 247 248 249 class MediaOp(X86Microop): 250 __metaclass__ = MediaOpMeta 251 # This class itself doesn't act as a microop 252 abstract = True 253 254 def __init__(self, dest, src1, op2, 255 size = None, destSize = None, srcSize = None, ext = None): 256 self.dest = dest 257 self.src1 = src1 258 self.op2 = op2 259 if size is not None: 260 self.srcSize = size 261 self.destSize = size 262 if srcSize is not None: 263 self.srcSize = srcSize 264 if destSize is not None: 265 self.destSize = destSize 266 if self.srcSize is None: 267 raise Exception, "Source size not set." 268 if self.destSize is None: 269 raise Exception, "Dest size not set." 270 if ext is None: 271 self.ext = 0 272 else: 273 self.ext = ext 274 275 def getAllocator(self, *microFlags): 276 className = self.className 277 if self.mnemonic == self.base_mnemonic + 'i': 278 className += "Imm" 279 allocator = '''new %(class_name)s(machInst, macrocodeBlock 280 %(flags)s, %(src1)s, %(op2)s, %(dest)s, 281 %(srcSize)s, %(destSize)s, %(ext)s)''' % { 282 "class_name" : className, 283 "flags" : self.microFlagsText(microFlags), 284 "src1" : self.src1, "op2" : self.op2, 285 "dest" : self.dest, 286 "srcSize" : self.srcSize, 287 "destSize" : self.destSize, 288 "ext" : self.ext} 289 return allocator 290 291 class Mov2int(MediaOp): 292 def __init__(self, dest, src1, src2 = 0, \ 293 size = None, destSize = None, srcSize = None, ext = None): 294 super(Mov2int, self).__init__(dest, src1,\ 295 src2, size, destSize, srcSize, ext) 296 code = ''' 297 int items = sizeof(FloatRegBits) / srcSize; 298 int offset = imm8; 299 if (bits(src1, 0) && (ext & 0x1)) 300 offset -= items; 301 if (offset >= 0 && offset < items) { 302 uint64_t fpSrcReg1 = 303 bits(FpSrcReg1.uqw, 304 (offset + 1) * srcSize * 8 - 1, 305 (offset + 0) * srcSize * 8); 306 DestReg = merge(0, fpSrcReg1, destSize); 307 } else { 308 DestReg = DestReg; 309 } 310 ''' 311 312 class Mov2fp(MediaOp): 313 def __init__(self, dest, src1, src2 = 0, \ 314 size = None, destSize = None, srcSize = None, ext = None): 315 super(Mov2fp, self).__init__(dest, src1,\ 316 src2, size, destSize, srcSize, ext) 317 code = ''' 318 int items = sizeof(FloatRegBits) / destSize; 319 int offset = imm8; 320 if (bits(dest, 0) && (ext & 0x1)) 321 offset -= items; 322 if (offset >= 0 && offset < items) { 323 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); 324 FpDestReg.uqw = 325 insertBits(FpDestReg.uqw, 326 (offset + 1) * destSize * 8 - 1, 327 (offset + 0) * destSize * 8, srcReg1); 328 } else { 329 FpDestReg.uqw = FpDestReg.uqw; 330 } 331 ''' 332 333 class Movsign(MediaOp): 334 def __init__(self, dest, src, \ 335 size = None, destSize = None, srcSize = None, ext = None): 336 super(Movsign, self).__init__(dest, src,\ 337 "InstRegIndex(0)", size, destSize, srcSize, ext) 338 code = ''' 339 int items = sizeof(FloatRegBits) / srcSize; 340 uint64_t result = 0; 341 int offset = (ext & 0x1) ? items : 0; 342 for (int i = 0; i < items; i++) { 343 uint64_t picked = 344 bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1); 345 result = insertBits(result, i + offset, i + offset, picked); 346 } 347 DestReg = DestReg | result; 348 ''' 349 350 class Unpack(MediaOp): 351 code = ''' 352 assert(srcSize == destSize); 353 int size = destSize; 354 int items = (sizeof(FloatRegBits) / size) / 2; 355 int offset = ext ? items : 0; 356 uint64_t result = 0; 357 for (int i = 0; i < items; i++) { 358 uint64_t pickedLow = 359 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1, 360 (i + offset) * 8 * size); 361 result = insertBits(result, 362 (2 * i + 1) * 8 * size - 1, 363 (2 * i + 0) * 8 * size, 364 pickedLow); 365 uint64_t pickedHigh = 366 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1, 367 (i + offset) * 8 * size); 368 result = insertBits(result, 369 (2 * i + 2) * 8 * size - 1, 370 (2 * i + 1) * 8 * size, 371 pickedHigh); 372 } 373 FpDestReg.uqw = result; 374 ''' 375 376 class Pack(MediaOp): 377 code = ''' 378 assert(srcSize == destSize * 2); 379 int items = (sizeof(FloatRegBits) / destSize); 380 int destBits = destSize * 8; 381 int srcBits = srcSize * 8; 382 uint64_t result = 0; 383 int i; 384 for (i = 0; i < items / 2; i++) { 385 uint64_t picked = 386 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1, 387 (i + 0) * srcBits); 388 unsigned signBit = bits(picked, srcBits - 1); 389 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 390 391 // Handle saturation. 392 if (signBit) { 393 if (overflow != mask(destBits - srcBits + 1)) { 394 if (ext & 0x1) 395 picked = (1 << (destBits - 1)); 396 else 397 picked = 0; 398 } 399 } else { 400 if (overflow != 0) { 401 if (ext & 0x1) 402 picked = mask(destBits - 1); 403 else 404 picked = mask(destBits); 405 } 406 } 407 result = insertBits(result, 408 (i + 1) * destBits - 1, 409 (i + 0) * destBits, 410 picked); 411 } 412 for (;i < items; i++) { 413 uint64_t picked = 414 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1, 415 (i - items + 0) * srcBits); 416 unsigned signBit = bits(picked, srcBits - 1); 417 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 418 419 // Handle saturation. 420 if (signBit) { 421 if (overflow != mask(destBits - srcBits + 1)) { 422 if (ext & 0x1) 423 picked = (1 << (destBits - 1)); 424 else 425 picked = 0; 426 } 427 } else { 428 if (overflow != 0) { 429 if (ext & 0x1) 430 picked = mask(destBits - 1); 431 else 432 picked = mask(destBits); 433 } 434 } 435 result = insertBits(result, 436 (i + 1) * destBits - 1, 437 (i + 0) * destBits, 438 picked); 439 } 440 FpDestReg.uqw = result; 441 ''' 442 443 class Mxor(MediaOp): 444 def __init__(self, dest, src1, src2): 445 super(Mxor, self).__init__(dest, src1, src2, 1) 446 code = ''' 447 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw; 448 ''' 449 450 class Mor(MediaOp): 451 def __init__(self, dest, src1, src2): 452 super(Mor, self).__init__(dest, src1, src2, 1) 453 code = ''' 454 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw; 455 ''' 456 457 class Mand(MediaOp): 458 def __init__(self, dest, src1, src2): 459 super(Mand, self).__init__(dest, src1, src2, 1) 460 code = ''' 461 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw; 462 ''' 463 464 class Mandn(MediaOp): 465 def __init__(self, dest, src1, src2): 466 super(Mandn, self).__init__(dest, src1, src2, 1) 467 code = ''' 468 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw; 469 ''' 470 471 class Mminf(MediaOp): 472 code = ''' 473 union floatInt 474 { 475 float f; 476 uint32_t i; 477 }; 478 union doubleInt 479 { 480 double d; 481 uint64_t i; 482 }; 483 484 assert(srcSize == destSize); 485 int size = srcSize; 486 int sizeBits = size * 8; 487 assert(srcSize == 4 || srcSize == 8); 488 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 489 uint64_t result = FpDestReg.uqw; 490 491 for (int i = 0; i < items; i++) { 492 double arg1, arg2; 493 int hiIndex = (i + 1) * sizeBits - 1; 494 int loIndex = (i + 0) * sizeBits; 495 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 496 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 497 498 if (size == 4) { 499 floatInt fi; 500 fi.i = arg1Bits; 501 arg1 = fi.f; 502 fi.i = arg2Bits; 503 arg2 = fi.f; 504 } else { 505 doubleInt di; 506 di.i = arg1Bits; 507 arg1 = di.d; 508 di.i = arg2Bits; 509 arg2 = di.d; 510 } 511 512 if (arg1 < arg2) { 513 result = insertBits(result, hiIndex, loIndex, arg1Bits); 514 } else { 515 result = insertBits(result, hiIndex, loIndex, arg2Bits); 516 } 517 } 518 FpDestReg.uqw = result; 519 ''' 520 521 class Mmaxf(MediaOp): 522 code = ''' 523 union floatInt 524 { 525 float f; 526 uint32_t i; 527 }; 528 union doubleInt 529 { 530 double d; 531 uint64_t i; 532 }; 533 534 assert(srcSize == destSize); 535 int size = srcSize; 536 int sizeBits = size * 8; 537 assert(srcSize == 4 || srcSize == 8); 538 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 539 uint64_t result = FpDestReg.uqw; 540 541 for (int i = 0; i < items; i++) { 542 double arg1, arg2; 543 int hiIndex = (i + 1) * sizeBits - 1; 544 int loIndex = (i + 0) * sizeBits; 545 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 546 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 547 548 if (size == 4) { 549 floatInt fi; 550 fi.i = arg1Bits; 551 arg1 = fi.f; 552 fi.i = arg2Bits; 553 arg2 = fi.f; 554 } else { 555 doubleInt di; 556 di.i = arg1Bits; 557 arg1 = di.d; 558 di.i = arg2Bits; 559 arg2 = di.d; 560 } 561 562 if (arg1 > arg2) { 563 result = insertBits(result, hiIndex, loIndex, arg1Bits); 564 } else { 565 result = insertBits(result, hiIndex, loIndex, arg2Bits); 566 } 567 } 568 FpDestReg.uqw = result; 569 ''' 570 571 class Mmini(MediaOp): 572 code = ''' 573 574 assert(srcSize == destSize); 575 int size = srcSize; 576 int sizeBits = size * 8; 577 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 578 uint64_t result = FpDestReg.uqw; 579 580 for (int i = 0; i < items; i++) { 581 int hiIndex = (i + 1) * sizeBits - 1; 582 int loIndex = (i + 0) * sizeBits; 583 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 584 int64_t arg1 = arg1Bits | 585 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 586 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 587 int64_t arg2 = arg2Bits | 588 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 589 uint64_t resBits; 590 591 if (ext & 0x2) { 592 if (arg1 < arg2) { 593 resBits = arg1Bits; 594 } else { 595 resBits = arg2Bits; 596 } 597 } else { 598 if (arg1Bits < arg2Bits) { 599 resBits = arg1Bits; 600 } else { 601 resBits = arg2Bits; 602 } 603 } 604 result = insertBits(result, hiIndex, loIndex, resBits); 605 } 606 FpDestReg.uqw = result; 607 ''' 608 609 class Mmaxi(MediaOp): 610 code = ''' 611 612 assert(srcSize == destSize); 613 int size = srcSize; 614 int sizeBits = size * 8; 615 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 616 uint64_t result = FpDestReg.uqw; 617 618 for (int i = 0; i < items; i++) { 619 int hiIndex = (i + 1) * sizeBits - 1; 620 int loIndex = (i + 0) * sizeBits; 621 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 622 int64_t arg1 = arg1Bits | 623 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 624 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 625 int64_t arg2 = arg2Bits | 626 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 627 uint64_t resBits; 628 629 if (ext & 0x2) { 630 if (arg1 > arg2) { 631 resBits = arg1Bits; 632 } else { 633 resBits = arg2Bits; 634 } 635 } else { 636 if (arg1Bits > arg2Bits) { 637 resBits = arg1Bits; 638 } else { 639 resBits = arg2Bits; 640 } 641 } 642 result = insertBits(result, hiIndex, loIndex, resBits); 643 } 644 FpDestReg.uqw = result; 645 ''' 646 647 class Msqrt(MediaOp): 648 def __init__(self, dest, src, \ 649 size = None, destSize = None, srcSize = None, ext = None): 650 super(Msqrt, self).__init__(dest, src,\ 651 "InstRegIndex(0)", size, destSize, srcSize, ext) 652 code = ''' 653 union floatInt 654 { 655 float f; 656 uint32_t i; 657 }; 658 union doubleInt 659 { 660 double d; 661 uint64_t i; 662 }; 663 664 assert(srcSize == destSize); 665 int size = srcSize; 666 int sizeBits = size * 8; 667 assert(srcSize == 4 || srcSize == 8); 668 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 669 uint64_t result = FpDestReg.uqw; 670 671 for (int i = 0; i < items; i++) { 672 int hiIndex = (i + 1) * sizeBits - 1; 673 int loIndex = (i + 0) * sizeBits; 674 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 675 676 if (size == 4) { 677 floatInt fi; 678 fi.i = argBits; 679 fi.f = sqrt(fi.f); 680 argBits = fi.i; 681 } else { 682 doubleInt di; 683 di.i = argBits; 684 di.d = sqrt(di.d); 685 argBits = di.i; 686 } 687 result = insertBits(result, hiIndex, loIndex, argBits); 688 } 689 FpDestReg.uqw = result; 690 ''' 691 692 class Maddf(MediaOp): 693 code = ''' 694 union floatInt 695 { 696 float f; 697 uint32_t i; 698 }; 699 union doubleInt 700 { 701 double d; 702 uint64_t i; 703 }; 704 705 assert(srcSize == destSize); 706 int size = srcSize; 707 int sizeBits = size * 8; 708 assert(srcSize == 4 || srcSize == 8); 709 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 710 uint64_t result = FpDestReg.uqw; 711 712 for (int i = 0; i < items; i++) { 713 int hiIndex = (i + 1) * sizeBits - 1; 714 int loIndex = (i + 0) * sizeBits; 715 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 716 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 717 uint64_t resBits; 718 719 if (size == 4) { 720 floatInt arg1, arg2, res; 721 arg1.i = arg1Bits; 722 arg2.i = arg2Bits; 723 res.f = arg1.f + arg2.f; 724 resBits = res.i; 725 } else { 726 doubleInt arg1, arg2, res; 727 arg1.i = arg1Bits; 728 arg2.i = arg2Bits; 729 res.d = arg1.d + arg2.d; 730 resBits = res.i; 731 } 732 733 result = insertBits(result, hiIndex, loIndex, resBits); 734 } 735 FpDestReg.uqw = result; 736 ''' 737 738 class Msubf(MediaOp): 739 code = ''' 740 union floatInt 741 { 742 float f; 743 uint32_t i; 744 }; 745 union doubleInt 746 { 747 double d; 748 uint64_t i; 749 }; 750 751 assert(srcSize == destSize); 752 int size = srcSize; 753 int sizeBits = size * 8; 754 assert(srcSize == 4 || srcSize == 8); 755 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 756 uint64_t result = FpDestReg.uqw; 757 758 for (int i = 0; i < items; i++) { 759 int hiIndex = (i + 1) * sizeBits - 1; 760 int loIndex = (i + 0) * sizeBits; 761 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 762 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 763 uint64_t resBits; 764 765 if (size == 4) { 766 floatInt arg1, arg2, res; 767 arg1.i = arg1Bits; 768 arg2.i = arg2Bits; 769 res.f = arg1.f - arg2.f; 770 resBits = res.i; 771 } else { 772 doubleInt arg1, arg2, res; 773 arg1.i = arg1Bits; 774 arg2.i = arg2Bits; 775 res.d = arg1.d - arg2.d; 776 resBits = res.i; 777 } 778 779 result = insertBits(result, hiIndex, loIndex, resBits); 780 } 781 FpDestReg.uqw = result; 782 ''' 783 784 class Mmulf(MediaOp): 785 code = ''' 786 union floatInt 787 { 788 float f; 789 uint32_t i; 790 }; 791 union doubleInt 792 { 793 double d; 794 uint64_t i; 795 }; 796 797 assert(srcSize == destSize); 798 int size = srcSize; 799 int sizeBits = size * 8; 800 assert(srcSize == 4 || srcSize == 8); 801 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 802 uint64_t result = FpDestReg.uqw; 803 804 for (int i = 0; i < items; i++) { 805 int hiIndex = (i + 1) * sizeBits - 1; 806 int loIndex = (i + 0) * sizeBits; 807 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 808 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 809 uint64_t resBits; 810 811 if (size == 4) { 812 floatInt arg1, arg2, res; 813 arg1.i = arg1Bits; 814 arg2.i = arg2Bits; 815 res.f = arg1.f * arg2.f; 816 resBits = res.i; 817 } else { 818 doubleInt arg1, arg2, res; 819 arg1.i = arg1Bits; 820 arg2.i = arg2Bits; 821 res.d = arg1.d * arg2.d; 822 resBits = res.i; 823 } 824 825 result = insertBits(result, hiIndex, loIndex, resBits); 826 } 827 FpDestReg.uqw = result; 828 ''' 829 830 class Mdivf(MediaOp): 831 code = ''' 832 union floatInt 833 { 834 float f; 835 uint32_t i; 836 }; 837 union doubleInt 838 { 839 double d; 840 uint64_t i; 841 }; 842 843 assert(srcSize == destSize); 844 int size = srcSize; 845 int sizeBits = size * 8; 846 assert(srcSize == 4 || srcSize == 8); 847 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 848 uint64_t result = FpDestReg.uqw; 849 850 for (int i = 0; i < items; i++) { 851 int hiIndex = (i + 1) * sizeBits - 1; 852 int loIndex = (i + 0) * sizeBits; 853 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 854 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 855 uint64_t resBits; 856 857 if (size == 4) { 858 floatInt arg1, arg2, res; 859 arg1.i = arg1Bits; 860 arg2.i = arg2Bits; 861 res.f = arg1.f / arg2.f; 862 resBits = res.i; 863 } else { 864 doubleInt arg1, arg2, res; 865 arg1.i = arg1Bits; 866 arg2.i = arg2Bits; 867 res.d = arg1.d / arg2.d; 868 resBits = res.i; 869 } 870 871 result = insertBits(result, hiIndex, loIndex, resBits); 872 } 873 FpDestReg.uqw = result; 874 ''' 875 876 class Maddi(MediaOp): 877 code = ''' 878 assert(srcSize == destSize); 879 int size = srcSize; 880 int sizeBits = size * 8; 881 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 882 uint64_t result = FpDestReg.uqw; 883 884 for (int i = 0; i < items; i++) { 885 int hiIndex = (i + 1) * sizeBits - 1; 886 int loIndex = (i + 0) * sizeBits; 887 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 888 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 889 uint64_t resBits = arg1Bits + arg2Bits; 890 891 if (ext & 0x2) { 892 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) 893 resBits = mask(sizeBits); 894 } else if (ext & 0x4) { 895 int arg1Sign = bits(arg1Bits, sizeBits - 1); 896 int arg2Sign = bits(arg2Bits, sizeBits - 1); 897 int resSign = bits(resBits, sizeBits - 1); 898 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 899 if (resSign == 0) 900 resBits = (1 << (sizeBits - 1)); 901 else 902 resBits = mask(sizeBits - 1); 903 } 904 } 905 906 result = insertBits(result, hiIndex, loIndex, resBits); 907 } 908 FpDestReg.uqw = result; 909 ''' 910 911 class Msubi(MediaOp): 912 code = ''' 913 assert(srcSize == destSize); 914 int size = srcSize; 915 int sizeBits = size * 8; 916 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 917 uint64_t result = FpDestReg.uqw; 918 919 for (int i = 0; i < items; i++) { 920 int hiIndex = (i + 1) * sizeBits - 1; 921 int loIndex = (i + 0) * sizeBits; 922 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 923 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 924 uint64_t resBits = arg1Bits - arg2Bits; 925 926 if (ext & 0x2) { 927 if (arg2Bits > arg1Bits) { 928 resBits = 0; 929 } else if (!findCarry(sizeBits, resBits, 930 arg1Bits, ~arg2Bits)) { 931 resBits = mask(sizeBits); 932 } 933 } else if (ext & 0x4) { 934 int arg1Sign = bits(arg1Bits, sizeBits - 1); 935 int arg2Sign = !bits(arg2Bits, sizeBits - 1); 936 int resSign = bits(resBits, sizeBits - 1); 937 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 938 if (resSign == 0) 939 resBits = (1 << (sizeBits - 1)); 940 else 941 resBits = mask(sizeBits - 1); 942 } 943 } 944 945 result = insertBits(result, hiIndex, loIndex, resBits); 946 } 947 FpDestReg.uqw = result; 948 ''' 949 950 class Mmuli(MediaOp): 951 code = ''' 952 int srcBits = srcSize * 8; 953 int destBits = destSize * 8; 954 assert(destBits <= 64); 955 assert(destSize >= srcSize); 956 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize); 957 uint64_t result = FpDestReg.uqw; 958 959 for (int i = 0; i < items; i++) { 960 int offset = 0; 961 if (ext & 16) { 962 if (ext & 32) 963 offset = i * (destBits - srcBits); 964 else 965 offset = i * (destBits - srcBits) + srcBits; 966 } 967 int srcHiIndex = (i + 1) * srcBits - 1 + offset; 968 int srcLoIndex = (i + 0) * srcBits + offset; 969 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 970 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex); 971 uint64_t resBits; 972 973 if (ext & 0x2) { 974 int64_t arg1 = arg1Bits | 975 (0 - (arg1Bits & (1 << (srcBits - 1)))); 976 int64_t arg2 = arg2Bits | 977 (0 - (arg2Bits & (1 << (srcBits - 1)))); 978 resBits = (uint64_t)(arg1 * arg2); 979 } else { 980 resBits = arg1Bits * arg2Bits; 981 } 982 983 if (ext & 0x4) 984 resBits += (1 << (destBits - 1)); 985 986 if (ext & 0x8) 987 resBits >>= destBits; 988 989 int destHiIndex = (i + 1) * destBits - 1; 990 int destLoIndex = (i + 0) * destBits; 991 result = insertBits(result, destHiIndex, destLoIndex, resBits); 992 } 993 FpDestReg.uqw = result; 994 ''' 995 996 class Mavg(MediaOp): 997 code = ''' 998 assert(srcSize == destSize); 999 int size = srcSize; 1000 int sizeBits = size * 8; 1001 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1002 uint64_t result = FpDestReg.uqw; 1003 1004 for (int i = 0; i < items; i++) { 1005 int hiIndex = (i + 1) * sizeBits - 1; 1006 int loIndex = (i + 0) * sizeBits; 1007 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1008 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1009 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; 1010 1011 result = insertBits(result, hiIndex, loIndex, resBits); 1012 } 1013 FpDestReg.uqw = result; 1014 ''' 1015 1016 class Msad(MediaOp): 1017 code = ''' 1018 int srcBits = srcSize * 8; 1019 int items = sizeof(FloatRegBits) / srcSize; 1020 1021 uint64_t sum = 0; 1022 for (int i = 0; i < items; i++) { 1023 int hiIndex = (i + 1) * srcBits - 1; 1024 int loIndex = (i + 0) * srcBits; 1025 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1026 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1027 int64_t resBits = arg1Bits - arg2Bits; 1028 if (resBits < 0) 1029 resBits = -resBits; 1030 sum += resBits; 1031 } 1032 FpDestReg.uqw = sum & mask(destSize * 8); 1033 ''' 1034 1035 class Msrl(MediaOp): 1036 code = ''' 1037 1038 assert(srcSize == destSize); 1039 int size = srcSize; 1040 int sizeBits = size * 8; 1041 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1042 uint64_t shiftAmt = op2.uqw; 1043 uint64_t result = FpDestReg.uqw; 1044 1045 for (int i = 0; i < items; i++) { 1046 int hiIndex = (i + 1) * sizeBits - 1; 1047 int loIndex = (i + 0) * sizeBits; 1048 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1049 uint64_t resBits; 1050 if (shiftAmt >= sizeBits) { 1051 resBits = 0; 1052 } else { 1053 resBits = (arg1Bits >> shiftAmt) & 1054 mask(sizeBits - shiftAmt); 1055 } 1056 1057 result = insertBits(result, hiIndex, loIndex, resBits); 1058 } 1059 FpDestReg.uqw = result; 1060 ''' 1061 1062 class Msra(MediaOp): 1063 code = ''' 1064 1065 assert(srcSize == destSize); 1066 int size = srcSize; 1067 int sizeBits = size * 8; 1068 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1069 uint64_t shiftAmt = op2.uqw; 1070 uint64_t result = FpDestReg.uqw; 1071 1072 for (int i = 0; i < items; i++) { 1073 int hiIndex = (i + 1) * sizeBits - 1; 1074 int loIndex = (i + 0) * sizeBits; 1075 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1076 uint64_t resBits; 1077 if (shiftAmt >= sizeBits) { 1078 if (bits(arg1Bits, sizeBits - 1)) 1079 resBits = mask(sizeBits); 1080 else 1081 resBits = 0; 1082 } else { 1083 resBits = (arg1Bits >> shiftAmt); 1084 resBits = resBits | 1085 (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt)))); 1086 } 1087 1088 result = insertBits(result, hiIndex, loIndex, resBits); 1089 } 1090 FpDestReg.uqw = result; 1091 ''' 1092 1093 class Msll(MediaOp): 1094 code = ''' 1095 1096 assert(srcSize == destSize); 1097 int size = srcSize; 1098 int sizeBits = size * 8; 1099 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1100 uint64_t shiftAmt = op2.uqw; 1101 uint64_t result = FpDestReg.uqw; 1102 1103 for (int i = 0; i < items; i++) { 1104 int hiIndex = (i + 1) * sizeBits - 1; 1105 int loIndex = (i + 0) * sizeBits; 1106 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1107 uint64_t resBits; 1108 if (shiftAmt >= sizeBits) { 1109 resBits = 0; 1110 } else { 1111 resBits = (arg1Bits << shiftAmt); 1112 } 1113 1114 result = insertBits(result, hiIndex, loIndex, resBits); 1115 } 1116 FpDestReg.uqw = result; 1117 ''' 1118 1119 class Cvti2f(MediaOp): 1120 def __init__(self, dest, src, \ 1121 size = None, destSize = None, srcSize = None, ext = None): 1122 super(Cvti2f, self).__init__(dest, src,\ 1123 "InstRegIndex(0)", size, destSize, srcSize, ext) 1124 code = ''' 1125 union floatInt 1126 { 1127 float f; 1128 uint32_t i; 1129 }; 1130 union doubleInt 1131 { 1132 double d; 1133 uint64_t i; 1134 }; 1135 1136 assert(destSize == 4 || destSize == 8); 1137 assert(srcSize == 4 || srcSize == 8); 1138 int srcSizeBits = srcSize * 8; 1139 int destSizeBits = destSize * 8; 1140 int items; 1141 int srcStart = 0; 1142 int destStart = 0; 1143 if (srcSize == 2 * destSize) { 1144 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; 1145 if (ext & 0x2) 1146 destStart = destSizeBits * items; 1147 } else if (destSize == 2 * srcSize) { 1148 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1149 if (ext & 0x2) 1150 srcStart = srcSizeBits * items; 1151 } else { 1152 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1153 } 1154 uint64_t result = FpDestReg.uqw; 1155 1156 for (int i = 0; i < items; i++) { 1157 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1158 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1159 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1160 int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex))); 1161 double arg = sArg; 1162 1163 if (destSize == 4) { 1164 floatInt fi; 1165 fi.f = arg; 1166 argBits = fi.i; 1167 } else { 1168 doubleInt di; 1169 di.d = arg; 1170 argBits = di.i; 1171 } 1172 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1173 int destLoIndex = destStart + (i + 0) * destSizeBits; 1174 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1175 } 1176 FpDestReg.uqw = result; 1177 ''' 1178 1179 class Cvtf2f(MediaOp): 1180 def __init__(self, dest, src, \ 1181 size = None, destSize = None, srcSize = None, ext = None): 1182 super(Cvtf2f, self).__init__(dest, src,\ 1183 "InstRegIndex(0)", size, destSize, srcSize, ext) 1184 code = ''' 1185 union floatInt 1186 { 1187 float f; 1188 uint32_t i; 1189 }; 1190 union doubleInt 1191 { 1192 double d; 1193 uint64_t i; 1194 }; 1195 1196 assert(destSize == 4 || destSize == 8); 1197 assert(srcSize == 4 || srcSize == 8); 1198 int srcSizeBits = srcSize * 8; 1199 int destSizeBits = destSize * 8; 1200 int items; 1201 int srcStart = 0; 1202 int destStart = 0; 1203 if (srcSize == 2 * destSize) { 1204 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; 1205 if (ext & 0x2) 1206 destStart = destSizeBits * items; 1207 } else if (destSize == 2 * srcSize) { 1208 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1209 if (ext & 0x2) 1210 srcStart = srcSizeBits * items; 1211 } else { 1212 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1213 } 1214 uint64_t result = FpDestReg.uqw; 1215 1216 for (int i = 0; i < items; i++) { 1217 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1218 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1219 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1220 double arg; 1221 1222 if (srcSize == 4) { 1223 floatInt fi; 1224 fi.i = argBits; 1225 arg = fi.f; 1226 } else { 1227 doubleInt di; 1228 di.i = argBits; 1229 arg = di.d; 1230 } 1231 if (destSize == 4) { 1232 floatInt fi; 1233 fi.f = arg; 1234 argBits = fi.i; 1235 } else { 1236 doubleInt di; 1237 di.d = arg; 1238 argBits = di.i; 1239 } 1240 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1241 int destLoIndex = destStart + (i + 0) * destSizeBits; 1242 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1243 } 1244 FpDestReg.uqw = result; 1245 ''' 1246 1247 class Mcmpi2r(MediaOp): 1248 code = ''' 1249 union floatInt 1250 { 1251 float f; 1252 uint32_t i; 1253 }; 1254 union doubleInt 1255 { 1256 double d; 1257 uint64_t i; 1258 }; 1259 1260 assert(srcSize == destSize); 1261 int size = srcSize; 1262 int sizeBits = size * 8; 1263 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1264 uint64_t result = FpDestReg.uqw; 1265 1266 for (int i = 0; i < items; i++) { 1267 int hiIndex = (i + 1) * sizeBits - 1; 1268 int loIndex = (i + 0) * sizeBits; 1269 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1270 int64_t arg1 = arg1Bits | 1271 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 1272 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1273 int64_t arg2 = arg2Bits | 1274 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 1275 1276 uint64_t resBits = 0; 1277 if ((ext & 0x2) == 0 && arg1 == arg2 || 1278 (ext & 0x2) == 0x2 && arg1 > arg2) 1279 resBits = mask(sizeBits); 1280 1281 result = insertBits(result, hiIndex, loIndex, resBits); 1282 } 1283 FpDestReg.uqw = result; 1284 ''' 1285}}; 1286