mediaop.isa revision 6594:a5dbea7ba3f9
1/// Copyright (c) 2009 The Regents of The University of Michigan 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer; 8// redistributions in binary form must reproduce the above copyright 9// notice, this list of conditions and the following disclaimer in the 10// documentation and/or other materials provided with the distribution; 11// neither the name of the copyright holders nor the names of its 12// contributors may be used to endorse or promote products derived from 13// this software without specific prior written permission. 14// 15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26// 27// Authors: Gabe Black 28 29def template MediaOpExecute {{ 30 Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, 31 Trace::InstRecord *traceData) const 32 { 33 Fault fault = NoFault; 34 35 %(op_decl)s; 36 %(op_rd)s; 37 38 %(code)s; 39 40 //Write the resulting state to the execution context 41 if(fault == NoFault) 42 { 43 %(op_wb)s; 44 } 45 return fault; 46 } 47}}; 48 49def template MediaOpRegDeclare {{ 50 class %(class_name)s : public %(base_class)s 51 { 52 protected: 53 void buildMe(); 54 55 public: 56 %(class_name)s(ExtMachInst _machInst, 57 const char * instMnem, 58 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 59 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 60 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 61 62 %(class_name)s(ExtMachInst _machInst, 63 const char * instMnem, 64 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 65 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 66 67 %(BasicExecDeclare)s 68 }; 69}}; 70 71def template MediaOpImmDeclare {{ 72 73 class %(class_name)s : public %(base_class)s 74 { 75 protected: 76 void buildMe(); 77 78 public: 79 %(class_name)s(ExtMachInst _machInst, 80 const char * instMnem, 81 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 82 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 83 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 84 85 %(class_name)s(ExtMachInst _machInst, 86 const char * instMnem, 87 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 88 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); 89 90 %(BasicExecDeclare)s 91 }; 92}}; 93 94def template MediaOpRegConstructor {{ 95 96 inline void %(class_name)s::buildMe() 97 { 98 %(constructor)s; 99 } 100 101 inline %(class_name)s::%(class_name)s( 102 ExtMachInst machInst, const char * instMnem, 103 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 104 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 105 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 106 false, false, false, false, 107 _src1, _src2, _dest, _srcSize, _destSize, _ext, 108 %(op_class)s) 109 { 110 buildMe(); 111 } 112 113 inline %(class_name)s::%(class_name)s( 114 ExtMachInst machInst, const char * instMnem, 115 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 116 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, 117 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 118 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 119 isMicro, isDelayed, isFirst, isLast, 120 _src1, _src2, _dest, _srcSize, _destSize, _ext, 121 %(op_class)s) 122 { 123 buildMe(); 124 } 125}}; 126 127def template MediaOpImmConstructor {{ 128 129 inline void %(class_name)s::buildMe() 130 { 131 %(constructor)s; 132 } 133 134 inline %(class_name)s::%(class_name)s( 135 ExtMachInst machInst, const char * instMnem, 136 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 137 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 138 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 139 false, false, false, false, 140 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 141 %(op_class)s) 142 { 143 buildMe(); 144 } 145 146 inline %(class_name)s::%(class_name)s( 147 ExtMachInst machInst, const char * instMnem, 148 bool isMicro, bool isDelayed, bool isFirst, bool isLast, 149 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, 150 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : 151 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 152 isMicro, isDelayed, isFirst, isLast, 153 _src1, _imm8, _dest, _srcSize, _destSize, _ext, 154 %(op_class)s) 155 { 156 buildMe(); 157 } 158}}; 159 160let {{ 161 # Make these empty strings so that concatenating onto 162 # them will always work. 163 header_output = "" 164 decoder_output = "" 165 exec_output = "" 166 167 immTemplates = ( 168 MediaOpImmDeclare, 169 MediaOpImmConstructor, 170 MediaOpExecute) 171 172 regTemplates = ( 173 MediaOpRegDeclare, 174 MediaOpRegConstructor, 175 MediaOpExecute) 176 177 class MediaOpMeta(type): 178 def buildCppClasses(self, name, Name, suffix, code): 179 180 # Globals to stick the output in 181 global header_output 182 global decoder_output 183 global exec_output 184 185 # If op2 is used anywhere, make register and immediate versions 186 # of this code. 187 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?") 188 match = matcher.search(code) 189 if match: 190 typeQual = "" 191 if match.group("typeQual"): 192 typeQual = match.group("typeQual") 193 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual) 194 self.buildCppClasses(name, Name, suffix, 195 matcher.sub(src2_name, code)) 196 self.buildCppClasses(name + "i", Name, suffix + "Imm", 197 matcher.sub("imm8", code)) 198 return 199 200 base = "X86ISA::MediaOp" 201 202 # If imm8 shows up in the code, use the immediate templates, if 203 # not, hopefully the register ones will be correct. 204 matcher = re.compile("(?<!\w)imm8(?!\w)") 205 if matcher.search(code): 206 base += "Imm" 207 templates = immTemplates 208 else: 209 base += "Reg" 210 templates = regTemplates 211 212 # Get everything ready for the substitution 213 iop = InstObjParams(name, Name + suffix, base, {"code" : code}) 214 215 # Generate the actual code (finally!) 216 header_output += templates[0].subst(iop) 217 decoder_output += templates[1].subst(iop) 218 exec_output += templates[2].subst(iop) 219 220 221 def __new__(mcls, Name, bases, dict): 222 abstract = False 223 name = Name.lower() 224 if "abstract" in dict: 225 abstract = dict['abstract'] 226 del dict['abstract'] 227 228 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict) 229 if not abstract: 230 cls.className = Name 231 cls.base_mnemonic = name 232 code = cls.code 233 234 # Set up the C++ classes 235 mcls.buildCppClasses(cls, name, Name, "", code) 236 237 # Hook into the microassembler dict 238 global microopClasses 239 microopClasses[name] = cls 240 241 # If op2 is used anywhere, make register and immediate versions 242 # of this code. 243 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?") 244 if matcher.search(code): 245 microopClasses[name + 'i'] = cls 246 return cls 247 248 249 class MediaOp(X86Microop): 250 __metaclass__ = MediaOpMeta 251 # This class itself doesn't act as a microop 252 abstract = True 253 254 def __init__(self, dest, src1, op2, 255 size = None, destSize = None, srcSize = None, ext = None): 256 self.dest = dest 257 self.src1 = src1 258 self.op2 = op2 259 if size is not None: 260 self.srcSize = size 261 self.destSize = size 262 if srcSize is not None: 263 self.srcSize = srcSize 264 if destSize is not None: 265 self.destSize = destSize 266 if self.srcSize is None: 267 raise Exception, "Source size not set." 268 if self.destSize is None: 269 raise Exception, "Dest size not set." 270 if ext is None: 271 self.ext = 0 272 else: 273 self.ext = ext 274 275 def getAllocator(self, *microFlags): 276 className = self.className 277 if self.mnemonic == self.base_mnemonic + 'i': 278 className += "Imm" 279 allocator = '''new %(class_name)s(machInst, macrocodeBlock 280 %(flags)s, %(src1)s, %(op2)s, %(dest)s, 281 %(srcSize)s, %(destSize)s, %(ext)s)''' % { 282 "class_name" : className, 283 "flags" : self.microFlagsText(microFlags), 284 "src1" : self.src1, "op2" : self.op2, 285 "dest" : self.dest, 286 "srcSize" : self.srcSize, 287 "destSize" : self.destSize, 288 "ext" : self.ext} 289 return allocator 290 291 class Mov2int(MediaOp): 292 def __init__(self, dest, src1, src2 = 0, \ 293 size = None, destSize = None, srcSize = None, ext = None): 294 super(Mov2int, self).__init__(dest, src1,\ 295 src2, size, destSize, srcSize, ext) 296 code = ''' 297 int items = sizeof(FloatRegBits) / srcSize; 298 int offset = imm8; 299 if (bits(src1, 0) && (ext & 0x1)) 300 offset -= items; 301 if (offset >= 0 && offset < items) { 302 uint64_t fpSrcReg1 = 303 bits(FpSrcReg1.uqw, 304 (offset + 1) * srcSize * 8 - 1, 305 (offset + 0) * srcSize * 8); 306 DestReg = merge(0, fpSrcReg1, destSize); 307 } else { 308 DestReg = DestReg; 309 } 310 ''' 311 312 class Mov2fp(MediaOp): 313 def __init__(self, dest, src1, src2 = 0, \ 314 size = None, destSize = None, srcSize = None, ext = None): 315 super(Mov2fp, self).__init__(dest, src1,\ 316 src2, size, destSize, srcSize, ext) 317 code = ''' 318 int items = sizeof(FloatRegBits) / destSize; 319 int offset = imm8; 320 if (bits(dest, 0) && (ext & 0x1)) 321 offset -= items; 322 if (offset >= 0 && offset < items) { 323 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); 324 FpDestReg.uqw = 325 insertBits(FpDestReg.uqw, 326 (offset + 1) * destSize * 8 - 1, 327 (offset + 0) * destSize * 8, srcReg1); 328 } else { 329 FpDestReg.uqw = FpDestReg.uqw; 330 } 331 ''' 332 333 class Movsign(MediaOp): 334 def __init__(self, dest, src, \ 335 size = None, destSize = None, srcSize = None, ext = None): 336 super(Movsign, self).__init__(dest, src,\ 337 "InstRegIndex(0)", size, destSize, srcSize, ext) 338 code = ''' 339 int items = sizeof(FloatRegBits) / srcSize; 340 uint64_t result = 0; 341 int offset = (ext & 0x1) ? items : 0; 342 for (int i = 0; i < items; i++) { 343 uint64_t picked = 344 bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1); 345 result = insertBits(result, i + offset, i + offset, picked); 346 } 347 DestReg = DestReg | result; 348 ''' 349 350 class Maskmov(MediaOp): 351 code = ''' 352 assert(srcSize == destSize); 353 int size = srcSize; 354 int sizeBits = size * 8; 355 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 356 uint64_t result = FpDestReg.uqw; 357 358 for (int i = 0; i < items; i++) { 359 int hiIndex = (i + 1) * sizeBits - 1; 360 int loIndex = (i + 0) * sizeBits; 361 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 362 if (bits(FpSrcReg2.uqw, hiIndex)) 363 result = insertBits(result, hiIndex, loIndex, arg1Bits); 364 } 365 FpDestReg.uqw = result; 366 ''' 367 368 class Unpack(MediaOp): 369 code = ''' 370 assert(srcSize == destSize); 371 int size = destSize; 372 int items = (sizeof(FloatRegBits) / size) / 2; 373 int offset = ext ? items : 0; 374 uint64_t result = 0; 375 for (int i = 0; i < items; i++) { 376 uint64_t pickedLow = 377 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1, 378 (i + offset) * 8 * size); 379 result = insertBits(result, 380 (2 * i + 1) * 8 * size - 1, 381 (2 * i + 0) * 8 * size, 382 pickedLow); 383 uint64_t pickedHigh = 384 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1, 385 (i + offset) * 8 * size); 386 result = insertBits(result, 387 (2 * i + 2) * 8 * size - 1, 388 (2 * i + 1) * 8 * size, 389 pickedHigh); 390 } 391 FpDestReg.uqw = result; 392 ''' 393 394 class Pack(MediaOp): 395 code = ''' 396 assert(srcSize == destSize * 2); 397 int items = (sizeof(FloatRegBits) / destSize); 398 int destBits = destSize * 8; 399 int srcBits = srcSize * 8; 400 uint64_t result = 0; 401 int i; 402 for (i = 0; i < items / 2; i++) { 403 uint64_t picked = 404 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1, 405 (i + 0) * srcBits); 406 unsigned signBit = bits(picked, srcBits - 1); 407 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 408 409 // Handle saturation. 410 if (signBit) { 411 if (overflow != mask(destBits - srcBits + 1)) { 412 if (ext & 0x1) 413 picked = (1 << (destBits - 1)); 414 else 415 picked = 0; 416 } 417 } else { 418 if (overflow != 0) { 419 if (ext & 0x1) 420 picked = mask(destBits - 1); 421 else 422 picked = mask(destBits); 423 } 424 } 425 result = insertBits(result, 426 (i + 1) * destBits - 1, 427 (i + 0) * destBits, 428 picked); 429 } 430 for (;i < items; i++) { 431 uint64_t picked = 432 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1, 433 (i - items + 0) * srcBits); 434 unsigned signBit = bits(picked, srcBits - 1); 435 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 436 437 // Handle saturation. 438 if (signBit) { 439 if (overflow != mask(destBits - srcBits + 1)) { 440 if (ext & 0x1) 441 picked = (1 << (destBits - 1)); 442 else 443 picked = 0; 444 } 445 } else { 446 if (overflow != 0) { 447 if (ext & 0x1) 448 picked = mask(destBits - 1); 449 else 450 picked = mask(destBits); 451 } 452 } 453 result = insertBits(result, 454 (i + 1) * destBits - 1, 455 (i + 0) * destBits, 456 picked); 457 } 458 FpDestReg.uqw = result; 459 ''' 460 461 class Mxor(MediaOp): 462 def __init__(self, dest, src1, src2): 463 super(Mxor, self).__init__(dest, src1, src2, 1) 464 code = ''' 465 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw; 466 ''' 467 468 class Mor(MediaOp): 469 def __init__(self, dest, src1, src2): 470 super(Mor, self).__init__(dest, src1, src2, 1) 471 code = ''' 472 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw; 473 ''' 474 475 class Mand(MediaOp): 476 def __init__(self, dest, src1, src2): 477 super(Mand, self).__init__(dest, src1, src2, 1) 478 code = ''' 479 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw; 480 ''' 481 482 class Mandn(MediaOp): 483 def __init__(self, dest, src1, src2): 484 super(Mandn, self).__init__(dest, src1, src2, 1) 485 code = ''' 486 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw; 487 ''' 488 489 class Mminf(MediaOp): 490 code = ''' 491 union floatInt 492 { 493 float f; 494 uint32_t i; 495 }; 496 union doubleInt 497 { 498 double d; 499 uint64_t i; 500 }; 501 502 assert(srcSize == destSize); 503 int size = srcSize; 504 int sizeBits = size * 8; 505 assert(srcSize == 4 || srcSize == 8); 506 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 507 uint64_t result = FpDestReg.uqw; 508 509 for (int i = 0; i < items; i++) { 510 double arg1, arg2; 511 int hiIndex = (i + 1) * sizeBits - 1; 512 int loIndex = (i + 0) * sizeBits; 513 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 514 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 515 516 if (size == 4) { 517 floatInt fi; 518 fi.i = arg1Bits; 519 arg1 = fi.f; 520 fi.i = arg2Bits; 521 arg2 = fi.f; 522 } else { 523 doubleInt di; 524 di.i = arg1Bits; 525 arg1 = di.d; 526 di.i = arg2Bits; 527 arg2 = di.d; 528 } 529 530 if (arg1 < arg2) { 531 result = insertBits(result, hiIndex, loIndex, arg1Bits); 532 } else { 533 result = insertBits(result, hiIndex, loIndex, arg2Bits); 534 } 535 } 536 FpDestReg.uqw = result; 537 ''' 538 539 class Mmaxf(MediaOp): 540 code = ''' 541 union floatInt 542 { 543 float f; 544 uint32_t i; 545 }; 546 union doubleInt 547 { 548 double d; 549 uint64_t i; 550 }; 551 552 assert(srcSize == destSize); 553 int size = srcSize; 554 int sizeBits = size * 8; 555 assert(srcSize == 4 || srcSize == 8); 556 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 557 uint64_t result = FpDestReg.uqw; 558 559 for (int i = 0; i < items; i++) { 560 double arg1, arg2; 561 int hiIndex = (i + 1) * sizeBits - 1; 562 int loIndex = (i + 0) * sizeBits; 563 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 564 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 565 566 if (size == 4) { 567 floatInt fi; 568 fi.i = arg1Bits; 569 arg1 = fi.f; 570 fi.i = arg2Bits; 571 arg2 = fi.f; 572 } else { 573 doubleInt di; 574 di.i = arg1Bits; 575 arg1 = di.d; 576 di.i = arg2Bits; 577 arg2 = di.d; 578 } 579 580 if (arg1 > arg2) { 581 result = insertBits(result, hiIndex, loIndex, arg1Bits); 582 } else { 583 result = insertBits(result, hiIndex, loIndex, arg2Bits); 584 } 585 } 586 FpDestReg.uqw = result; 587 ''' 588 589 class Mmini(MediaOp): 590 code = ''' 591 592 assert(srcSize == destSize); 593 int size = srcSize; 594 int sizeBits = size * 8; 595 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 596 uint64_t result = FpDestReg.uqw; 597 598 for (int i = 0; i < items; i++) { 599 int hiIndex = (i + 1) * sizeBits - 1; 600 int loIndex = (i + 0) * sizeBits; 601 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 602 int64_t arg1 = arg1Bits | 603 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 604 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 605 int64_t arg2 = arg2Bits | 606 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 607 uint64_t resBits; 608 609 if (ext & 0x2) { 610 if (arg1 < arg2) { 611 resBits = arg1Bits; 612 } else { 613 resBits = arg2Bits; 614 } 615 } else { 616 if (arg1Bits < arg2Bits) { 617 resBits = arg1Bits; 618 } else { 619 resBits = arg2Bits; 620 } 621 } 622 result = insertBits(result, hiIndex, loIndex, resBits); 623 } 624 FpDestReg.uqw = result; 625 ''' 626 627 class Mmaxi(MediaOp): 628 code = ''' 629 630 assert(srcSize == destSize); 631 int size = srcSize; 632 int sizeBits = size * 8; 633 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 634 uint64_t result = FpDestReg.uqw; 635 636 for (int i = 0; i < items; i++) { 637 int hiIndex = (i + 1) * sizeBits - 1; 638 int loIndex = (i + 0) * sizeBits; 639 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 640 int64_t arg1 = arg1Bits | 641 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 642 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 643 int64_t arg2 = arg2Bits | 644 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 645 uint64_t resBits; 646 647 if (ext & 0x2) { 648 if (arg1 > arg2) { 649 resBits = arg1Bits; 650 } else { 651 resBits = arg2Bits; 652 } 653 } else { 654 if (arg1Bits > arg2Bits) { 655 resBits = arg1Bits; 656 } else { 657 resBits = arg2Bits; 658 } 659 } 660 result = insertBits(result, hiIndex, loIndex, resBits); 661 } 662 FpDestReg.uqw = result; 663 ''' 664 665 class Msqrt(MediaOp): 666 def __init__(self, dest, src, \ 667 size = None, destSize = None, srcSize = None, ext = None): 668 super(Msqrt, self).__init__(dest, src,\ 669 "InstRegIndex(0)", size, destSize, srcSize, ext) 670 code = ''' 671 union floatInt 672 { 673 float f; 674 uint32_t i; 675 }; 676 union doubleInt 677 { 678 double d; 679 uint64_t i; 680 }; 681 682 assert(srcSize == destSize); 683 int size = srcSize; 684 int sizeBits = size * 8; 685 assert(srcSize == 4 || srcSize == 8); 686 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 687 uint64_t result = FpDestReg.uqw; 688 689 for (int i = 0; i < items; i++) { 690 int hiIndex = (i + 1) * sizeBits - 1; 691 int loIndex = (i + 0) * sizeBits; 692 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 693 694 if (size == 4) { 695 floatInt fi; 696 fi.i = argBits; 697 fi.f = sqrt(fi.f); 698 argBits = fi.i; 699 } else { 700 doubleInt di; 701 di.i = argBits; 702 di.d = sqrt(di.d); 703 argBits = di.i; 704 } 705 result = insertBits(result, hiIndex, loIndex, argBits); 706 } 707 FpDestReg.uqw = result; 708 ''' 709 710 class Maddf(MediaOp): 711 code = ''' 712 union floatInt 713 { 714 float f; 715 uint32_t i; 716 }; 717 union doubleInt 718 { 719 double d; 720 uint64_t i; 721 }; 722 723 assert(srcSize == destSize); 724 int size = srcSize; 725 int sizeBits = size * 8; 726 assert(srcSize == 4 || srcSize == 8); 727 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 728 uint64_t result = FpDestReg.uqw; 729 730 for (int i = 0; i < items; i++) { 731 int hiIndex = (i + 1) * sizeBits - 1; 732 int loIndex = (i + 0) * sizeBits; 733 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 734 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 735 uint64_t resBits; 736 737 if (size == 4) { 738 floatInt arg1, arg2, res; 739 arg1.i = arg1Bits; 740 arg2.i = arg2Bits; 741 res.f = arg1.f + arg2.f; 742 resBits = res.i; 743 } else { 744 doubleInt arg1, arg2, res; 745 arg1.i = arg1Bits; 746 arg2.i = arg2Bits; 747 res.d = arg1.d + arg2.d; 748 resBits = res.i; 749 } 750 751 result = insertBits(result, hiIndex, loIndex, resBits); 752 } 753 FpDestReg.uqw = result; 754 ''' 755 756 class Msubf(MediaOp): 757 code = ''' 758 union floatInt 759 { 760 float f; 761 uint32_t i; 762 }; 763 union doubleInt 764 { 765 double d; 766 uint64_t i; 767 }; 768 769 assert(srcSize == destSize); 770 int size = srcSize; 771 int sizeBits = size * 8; 772 assert(srcSize == 4 || srcSize == 8); 773 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 774 uint64_t result = FpDestReg.uqw; 775 776 for (int i = 0; i < items; i++) { 777 int hiIndex = (i + 1) * sizeBits - 1; 778 int loIndex = (i + 0) * sizeBits; 779 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 780 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 781 uint64_t resBits; 782 783 if (size == 4) { 784 floatInt arg1, arg2, res; 785 arg1.i = arg1Bits; 786 arg2.i = arg2Bits; 787 res.f = arg1.f - arg2.f; 788 resBits = res.i; 789 } else { 790 doubleInt arg1, arg2, res; 791 arg1.i = arg1Bits; 792 arg2.i = arg2Bits; 793 res.d = arg1.d - arg2.d; 794 resBits = res.i; 795 } 796 797 result = insertBits(result, hiIndex, loIndex, resBits); 798 } 799 FpDestReg.uqw = result; 800 ''' 801 802 class Mmulf(MediaOp): 803 code = ''' 804 union floatInt 805 { 806 float f; 807 uint32_t i; 808 }; 809 union doubleInt 810 { 811 double d; 812 uint64_t i; 813 }; 814 815 assert(srcSize == destSize); 816 int size = srcSize; 817 int sizeBits = size * 8; 818 assert(srcSize == 4 || srcSize == 8); 819 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 820 uint64_t result = FpDestReg.uqw; 821 822 for (int i = 0; i < items; i++) { 823 int hiIndex = (i + 1) * sizeBits - 1; 824 int loIndex = (i + 0) * sizeBits; 825 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 826 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 827 uint64_t resBits; 828 829 if (size == 4) { 830 floatInt arg1, arg2, res; 831 arg1.i = arg1Bits; 832 arg2.i = arg2Bits; 833 res.f = arg1.f * arg2.f; 834 resBits = res.i; 835 } else { 836 doubleInt arg1, arg2, res; 837 arg1.i = arg1Bits; 838 arg2.i = arg2Bits; 839 res.d = arg1.d * arg2.d; 840 resBits = res.i; 841 } 842 843 result = insertBits(result, hiIndex, loIndex, resBits); 844 } 845 FpDestReg.uqw = result; 846 ''' 847 848 class Mdivf(MediaOp): 849 code = ''' 850 union floatInt 851 { 852 float f; 853 uint32_t i; 854 }; 855 union doubleInt 856 { 857 double d; 858 uint64_t i; 859 }; 860 861 assert(srcSize == destSize); 862 int size = srcSize; 863 int sizeBits = size * 8; 864 assert(srcSize == 4 || srcSize == 8); 865 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 866 uint64_t result = FpDestReg.uqw; 867 868 for (int i = 0; i < items; i++) { 869 int hiIndex = (i + 1) * sizeBits - 1; 870 int loIndex = (i + 0) * sizeBits; 871 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 872 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 873 uint64_t resBits; 874 875 if (size == 4) { 876 floatInt arg1, arg2, res; 877 arg1.i = arg1Bits; 878 arg2.i = arg2Bits; 879 res.f = arg1.f / arg2.f; 880 resBits = res.i; 881 } else { 882 doubleInt arg1, arg2, res; 883 arg1.i = arg1Bits; 884 arg2.i = arg2Bits; 885 res.d = arg1.d / arg2.d; 886 resBits = res.i; 887 } 888 889 result = insertBits(result, hiIndex, loIndex, resBits); 890 } 891 FpDestReg.uqw = result; 892 ''' 893 894 class Maddi(MediaOp): 895 code = ''' 896 assert(srcSize == destSize); 897 int size = srcSize; 898 int sizeBits = size * 8; 899 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 900 uint64_t result = FpDestReg.uqw; 901 902 for (int i = 0; i < items; i++) { 903 int hiIndex = (i + 1) * sizeBits - 1; 904 int loIndex = (i + 0) * sizeBits; 905 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 906 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 907 uint64_t resBits = arg1Bits + arg2Bits; 908 909 if (ext & 0x2) { 910 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) 911 resBits = mask(sizeBits); 912 } else if (ext & 0x4) { 913 int arg1Sign = bits(arg1Bits, sizeBits - 1); 914 int arg2Sign = bits(arg2Bits, sizeBits - 1); 915 int resSign = bits(resBits, sizeBits - 1); 916 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 917 if (resSign == 0) 918 resBits = (1 << (sizeBits - 1)); 919 else 920 resBits = mask(sizeBits - 1); 921 } 922 } 923 924 result = insertBits(result, hiIndex, loIndex, resBits); 925 } 926 FpDestReg.uqw = result; 927 ''' 928 929 class Msubi(MediaOp): 930 code = ''' 931 assert(srcSize == destSize); 932 int size = srcSize; 933 int sizeBits = size * 8; 934 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 935 uint64_t result = FpDestReg.uqw; 936 937 for (int i = 0; i < items; i++) { 938 int hiIndex = (i + 1) * sizeBits - 1; 939 int loIndex = (i + 0) * sizeBits; 940 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 941 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 942 uint64_t resBits = arg1Bits - arg2Bits; 943 944 if (ext & 0x2) { 945 if (arg2Bits > arg1Bits) { 946 resBits = 0; 947 } else if (!findCarry(sizeBits, resBits, 948 arg1Bits, ~arg2Bits)) { 949 resBits = mask(sizeBits); 950 } 951 } else if (ext & 0x4) { 952 int arg1Sign = bits(arg1Bits, sizeBits - 1); 953 int arg2Sign = !bits(arg2Bits, sizeBits - 1); 954 int resSign = bits(resBits, sizeBits - 1); 955 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 956 if (resSign == 0) 957 resBits = (1 << (sizeBits - 1)); 958 else 959 resBits = mask(sizeBits - 1); 960 } 961 } 962 963 result = insertBits(result, hiIndex, loIndex, resBits); 964 } 965 FpDestReg.uqw = result; 966 ''' 967 968 class Mmuli(MediaOp): 969 code = ''' 970 int srcBits = srcSize * 8; 971 int destBits = destSize * 8; 972 assert(destBits <= 64); 973 assert(destSize >= srcSize); 974 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize); 975 uint64_t result = FpDestReg.uqw; 976 977 for (int i = 0; i < items; i++) { 978 int offset = 0; 979 if (ext & 16) { 980 if (ext & 32) 981 offset = i * (destBits - srcBits); 982 else 983 offset = i * (destBits - srcBits) + srcBits; 984 } 985 int srcHiIndex = (i + 1) * srcBits - 1 + offset; 986 int srcLoIndex = (i + 0) * srcBits + offset; 987 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 988 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex); 989 uint64_t resBits; 990 991 if (ext & 0x2) { 992 int64_t arg1 = arg1Bits | 993 (0 - (arg1Bits & (1 << (srcBits - 1)))); 994 int64_t arg2 = arg2Bits | 995 (0 - (arg2Bits & (1 << (srcBits - 1)))); 996 resBits = (uint64_t)(arg1 * arg2); 997 } else { 998 resBits = arg1Bits * arg2Bits; 999 } 1000 1001 if (ext & 0x4) 1002 resBits += (1 << (destBits - 1)); 1003 1004 if (ext & 0x8) 1005 resBits >>= destBits; 1006 1007 int destHiIndex = (i + 1) * destBits - 1; 1008 int destLoIndex = (i + 0) * destBits; 1009 result = insertBits(result, destHiIndex, destLoIndex, resBits); 1010 } 1011 FpDestReg.uqw = result; 1012 ''' 1013 1014 class Mavg(MediaOp): 1015 code = ''' 1016 assert(srcSize == destSize); 1017 int size = srcSize; 1018 int sizeBits = size * 8; 1019 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1020 uint64_t result = FpDestReg.uqw; 1021 1022 for (int i = 0; i < items; i++) { 1023 int hiIndex = (i + 1) * sizeBits - 1; 1024 int loIndex = (i + 0) * sizeBits; 1025 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1026 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1027 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; 1028 1029 result = insertBits(result, hiIndex, loIndex, resBits); 1030 } 1031 FpDestReg.uqw = result; 1032 ''' 1033 1034 class Msad(MediaOp): 1035 code = ''' 1036 int srcBits = srcSize * 8; 1037 int items = sizeof(FloatRegBits) / srcSize; 1038 1039 uint64_t sum = 0; 1040 for (int i = 0; i < items; i++) { 1041 int hiIndex = (i + 1) * srcBits - 1; 1042 int loIndex = (i + 0) * srcBits; 1043 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1044 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1045 int64_t resBits = arg1Bits - arg2Bits; 1046 if (resBits < 0) 1047 resBits = -resBits; 1048 sum += resBits; 1049 } 1050 FpDestReg.uqw = sum & mask(destSize * 8); 1051 ''' 1052 1053 class Msrl(MediaOp): 1054 code = ''' 1055 1056 assert(srcSize == destSize); 1057 int size = srcSize; 1058 int sizeBits = size * 8; 1059 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1060 uint64_t shiftAmt = op2.uqw; 1061 uint64_t result = FpDestReg.uqw; 1062 1063 for (int i = 0; i < items; i++) { 1064 int hiIndex = (i + 1) * sizeBits - 1; 1065 int loIndex = (i + 0) * sizeBits; 1066 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1067 uint64_t resBits; 1068 if (shiftAmt >= sizeBits) { 1069 resBits = 0; 1070 } else { 1071 resBits = (arg1Bits >> shiftAmt) & 1072 mask(sizeBits - shiftAmt); 1073 } 1074 1075 result = insertBits(result, hiIndex, loIndex, resBits); 1076 } 1077 FpDestReg.uqw = result; 1078 ''' 1079 1080 class Msra(MediaOp): 1081 code = ''' 1082 1083 assert(srcSize == destSize); 1084 int size = srcSize; 1085 int sizeBits = size * 8; 1086 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1087 uint64_t shiftAmt = op2.uqw; 1088 uint64_t result = FpDestReg.uqw; 1089 1090 for (int i = 0; i < items; i++) { 1091 int hiIndex = (i + 1) * sizeBits - 1; 1092 int loIndex = (i + 0) * sizeBits; 1093 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1094 uint64_t resBits; 1095 if (shiftAmt >= sizeBits) { 1096 if (bits(arg1Bits, sizeBits - 1)) 1097 resBits = mask(sizeBits); 1098 else 1099 resBits = 0; 1100 } else { 1101 resBits = (arg1Bits >> shiftAmt); 1102 resBits = resBits | 1103 (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt)))); 1104 } 1105 1106 result = insertBits(result, hiIndex, loIndex, resBits); 1107 } 1108 FpDestReg.uqw = result; 1109 ''' 1110 1111 class Msll(MediaOp): 1112 code = ''' 1113 1114 assert(srcSize == destSize); 1115 int size = srcSize; 1116 int sizeBits = size * 8; 1117 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1118 uint64_t shiftAmt = op2.uqw; 1119 uint64_t result = FpDestReg.uqw; 1120 1121 for (int i = 0; i < items; i++) { 1122 int hiIndex = (i + 1) * sizeBits - 1; 1123 int loIndex = (i + 0) * sizeBits; 1124 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1125 uint64_t resBits; 1126 if (shiftAmt >= sizeBits) { 1127 resBits = 0; 1128 } else { 1129 resBits = (arg1Bits << shiftAmt); 1130 } 1131 1132 result = insertBits(result, hiIndex, loIndex, resBits); 1133 } 1134 FpDestReg.uqw = result; 1135 ''' 1136 1137 class Cvti2f(MediaOp): 1138 def __init__(self, dest, src, \ 1139 size = None, destSize = None, srcSize = None, ext = None): 1140 super(Cvti2f, self).__init__(dest, src,\ 1141 "InstRegIndex(0)", size, destSize, srcSize, ext) 1142 code = ''' 1143 union floatInt 1144 { 1145 float f; 1146 uint32_t i; 1147 }; 1148 union doubleInt 1149 { 1150 double d; 1151 uint64_t i; 1152 }; 1153 1154 assert(destSize == 4 || destSize == 8); 1155 assert(srcSize == 4 || srcSize == 8); 1156 int srcSizeBits = srcSize * 8; 1157 int destSizeBits = destSize * 8; 1158 int items; 1159 int srcStart = 0; 1160 int destStart = 0; 1161 if (srcSize == 2 * destSize) { 1162 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; 1163 if (ext & 0x2) 1164 destStart = destSizeBits * items; 1165 } else if (destSize == 2 * srcSize) { 1166 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1167 if (ext & 0x2) 1168 srcStart = srcSizeBits * items; 1169 } else { 1170 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1171 } 1172 uint64_t result = FpDestReg.uqw; 1173 1174 for (int i = 0; i < items; i++) { 1175 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1176 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1177 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1178 int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex))); 1179 double arg = sArg; 1180 1181 if (destSize == 4) { 1182 floatInt fi; 1183 fi.f = arg; 1184 argBits = fi.i; 1185 } else { 1186 doubleInt di; 1187 di.d = arg; 1188 argBits = di.i; 1189 } 1190 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1191 int destLoIndex = destStart + (i + 0) * destSizeBits; 1192 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1193 } 1194 FpDestReg.uqw = result; 1195 ''' 1196 1197 class Cvtf2f(MediaOp): 1198 def __init__(self, dest, src, \ 1199 size = None, destSize = None, srcSize = None, ext = None): 1200 super(Cvtf2f, self).__init__(dest, src,\ 1201 "InstRegIndex(0)", size, destSize, srcSize, ext) 1202 code = ''' 1203 union floatInt 1204 { 1205 float f; 1206 uint32_t i; 1207 }; 1208 union doubleInt 1209 { 1210 double d; 1211 uint64_t i; 1212 }; 1213 1214 assert(destSize == 4 || destSize == 8); 1215 assert(srcSize == 4 || srcSize == 8); 1216 int srcSizeBits = srcSize * 8; 1217 int destSizeBits = destSize * 8; 1218 int items; 1219 int srcStart = 0; 1220 int destStart = 0; 1221 if (srcSize == 2 * destSize) { 1222 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; 1223 if (ext & 0x2) 1224 destStart = destSizeBits * items; 1225 } else if (destSize == 2 * srcSize) { 1226 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1227 if (ext & 0x2) 1228 srcStart = srcSizeBits * items; 1229 } else { 1230 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; 1231 } 1232 uint64_t result = FpDestReg.uqw; 1233 1234 for (int i = 0; i < items; i++) { 1235 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1236 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1237 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1238 double arg; 1239 1240 if (srcSize == 4) { 1241 floatInt fi; 1242 fi.i = argBits; 1243 arg = fi.f; 1244 } else { 1245 doubleInt di; 1246 di.i = argBits; 1247 arg = di.d; 1248 } 1249 if (destSize == 4) { 1250 floatInt fi; 1251 fi.f = arg; 1252 argBits = fi.i; 1253 } else { 1254 doubleInt di; 1255 di.d = arg; 1256 argBits = di.i; 1257 } 1258 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1259 int destLoIndex = destStart + (i + 0) * destSizeBits; 1260 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1261 } 1262 FpDestReg.uqw = result; 1263 ''' 1264 1265 class Mcmpi2r(MediaOp): 1266 code = ''' 1267 union floatInt 1268 { 1269 float f; 1270 uint32_t i; 1271 }; 1272 union doubleInt 1273 { 1274 double d; 1275 uint64_t i; 1276 }; 1277 1278 assert(srcSize == destSize); 1279 int size = srcSize; 1280 int sizeBits = size * 8; 1281 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); 1282 uint64_t result = FpDestReg.uqw; 1283 1284 for (int i = 0; i < items; i++) { 1285 int hiIndex = (i + 1) * sizeBits - 1; 1286 int loIndex = (i + 0) * sizeBits; 1287 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1288 int64_t arg1 = arg1Bits | 1289 (0 - (arg1Bits & (1 << (sizeBits - 1)))); 1290 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1291 int64_t arg2 = arg2Bits | 1292 (0 - (arg2Bits & (1 << (sizeBits - 1)))); 1293 1294 uint64_t resBits = 0; 1295 if ((ext & 0x2) == 0 && arg1 == arg2 || 1296 (ext & 0x2) == 0x2 && arg1 > arg2) 1297 resBits = mask(sizeBits); 1298 1299 result = insertBits(result, hiIndex, loIndex, resBits); 1300 } 1301 FpDestReg.uqw = result; 1302 ''' 1303}}; 1304