149 } 150}}; 151 152let {{ 153 # Make these empty strings so that concatenating onto 154 # them will always work. 155 header_output = "" 156 decoder_output = "" 157 exec_output = "" 158 159 immTemplates = ( 160 MediaOpImmDeclare, 161 MediaOpImmConstructor, 162 MediaOpExecute) 163 164 regTemplates = ( 165 MediaOpRegDeclare, 166 MediaOpRegConstructor, 167 MediaOpExecute) 168 169 class MediaOpMeta(type): 170 def buildCppClasses(self, name, Name, suffix, code): 171 172 # Globals to stick the output in 173 global header_output 174 global decoder_output 175 global exec_output 176 177 # If op2 is used anywhere, make register and immediate versions 178 # of this code. 179 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?") 180 match = matcher.search(code) 181 if match: 182 typeQual = "" 183 if match.group("typeQual"): 184 typeQual = match.group("typeQual") 185 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual) 186 self.buildCppClasses(name, Name, suffix, 187 matcher.sub(src2_name, code)) 188 self.buildCppClasses(name + "i", Name, suffix + "Imm", 189 matcher.sub("imm8", code)) 190 return 191 192 base = "X86ISA::MediaOp" 193 194 # If imm8 shows up in the code, use the immediate templates, if 195 # not, hopefully the register ones will be correct. 196 matcher = re.compile("(?<!\w)imm8(?!\w)") 197 if matcher.search(code): 198 base += "Imm" 199 templates = immTemplates 200 else: 201 base += "Reg" 202 templates = regTemplates 203 204 # Get everything ready for the substitution 205 iop = InstObjParams(name, Name + suffix, base, {"code" : code}) 206 207 # Generate the actual code (finally!) 208 header_output += templates[0].subst(iop) 209 decoder_output += templates[1].subst(iop) 210 exec_output += templates[2].subst(iop) 211 212 213 def __new__(mcls, Name, bases, dict): 214 abstract = False 215 name = Name.lower() 216 if "abstract" in dict: 217 abstract = dict['abstract'] 218 del dict['abstract'] 219 220 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict) 221 if not abstract: 222 cls.className = Name 223 cls.base_mnemonic = name 224 code = cls.code 225 226 # Set up the C++ classes 227 mcls.buildCppClasses(cls, name, Name, "", code) 228 229 # Hook into the microassembler dict 230 global microopClasses 231 microopClasses[name] = cls 232 233 # If op2 is used anywhere, make register and immediate versions 234 # of this code. 235 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?") 236 if matcher.search(code): 237 microopClasses[name + 'i'] = cls 238 return cls 239 240 241 class MediaOp(X86Microop): 242 __metaclass__ = MediaOpMeta 243 # This class itself doesn't act as a microop 244 abstract = True 245 246 def __init__(self, dest, src1, op2, 247 size = None, destSize = None, srcSize = None, ext = None): 248 self.dest = dest 249 self.src1 = src1 250 self.op2 = op2 251 if size is not None: 252 self.srcSize = size 253 self.destSize = size 254 if srcSize is not None: 255 self.srcSize = srcSize 256 if destSize is not None: 257 self.destSize = destSize 258 if self.srcSize is None: 259 raise Exception, "Source size not set." 260 if self.destSize is None: 261 raise Exception, "Dest size not set." 262 if ext is None: 263 self.ext = 0 264 else: 265 self.ext = ext 266 267 def getAllocator(self, microFlags): 268 className = self.className 269 if self.mnemonic == self.base_mnemonic + 'i': 270 className += "Imm" 271 allocator = '''new %(class_name)s(machInst, macrocodeBlock, 272 %(flags)s, %(src1)s, %(op2)s, %(dest)s, 273 %(srcSize)s, %(destSize)s, %(ext)s)''' % { 274 "class_name" : className, 275 "flags" : self.microFlagsText(microFlags), 276 "src1" : self.src1, "op2" : self.op2, 277 "dest" : self.dest, 278 "srcSize" : self.srcSize, 279 "destSize" : self.destSize, 280 "ext" : self.ext} 281 return allocator 282 283 class Mov2int(MediaOp): 284 def __init__(self, dest, src1, src2 = 0, \ 285 size = None, destSize = None, srcSize = None, ext = None): 286 super(Mov2int, self).__init__(dest, src1,\ 287 src2, size, destSize, srcSize, ext) 288 code = ''' 289 int items = sizeof(FloatRegBits) / srcSize; 290 int offset = imm8; 291 if (bits(src1, 0) && (ext & 0x1)) 292 offset -= items; 293 if (offset >= 0 && offset < items) { 294 uint64_t fpSrcReg1 = 295 bits(FpSrcReg1.uqw, 296 (offset + 1) * srcSize * 8 - 1, 297 (offset + 0) * srcSize * 8); 298 DestReg = merge(0, fpSrcReg1, destSize); 299 } else { 300 DestReg = DestReg; 301 } 302 ''' 303 304 class Mov2fp(MediaOp): 305 def __init__(self, dest, src1, src2 = 0, \ 306 size = None, destSize = None, srcSize = None, ext = None): 307 super(Mov2fp, self).__init__(dest, src1,\ 308 src2, size, destSize, srcSize, ext) 309 code = ''' 310 int items = sizeof(FloatRegBits) / destSize; 311 int offset = imm8; 312 if (bits(dest, 0) && (ext & 0x1)) 313 offset -= items; 314 if (offset >= 0 && offset < items) { 315 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); 316 FpDestReg.uqw = 317 insertBits(FpDestReg.uqw, 318 (offset + 1) * destSize * 8 - 1, 319 (offset + 0) * destSize * 8, srcReg1); 320 } else { 321 FpDestReg.uqw = FpDestReg.uqw; 322 } 323 ''' 324 325 class Movsign(MediaOp): 326 def __init__(self, dest, src, \ 327 size = None, destSize = None, srcSize = None, ext = None): 328 super(Movsign, self).__init__(dest, src,\ 329 "InstRegIndex(0)", size, destSize, srcSize, ext) 330 code = ''' 331 int items = sizeof(FloatRegBits) / srcSize; 332 uint64_t result = 0; 333 int offset = (ext & 0x1) ? items : 0; 334 for (int i = 0; i < items; i++) { 335 uint64_t picked = 336 bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1); 337 result = insertBits(result, i + offset, i + offset, picked); 338 } 339 DestReg = DestReg | result; 340 ''' 341 342 class Maskmov(MediaOp): 343 code = ''' 344 assert(srcSize == destSize); 345 int size = srcSize; 346 int sizeBits = size * 8; 347 int items = numItems(size); 348 uint64_t result = FpDestReg.uqw; 349 350 for (int i = 0; i < items; i++) { 351 int hiIndex = (i + 1) * sizeBits - 1; 352 int loIndex = (i + 0) * sizeBits; 353 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 354 if (bits(FpSrcReg2.uqw, hiIndex)) 355 result = insertBits(result, hiIndex, loIndex, arg1Bits); 356 } 357 FpDestReg.uqw = result; 358 ''' 359 360 class shuffle(MediaOp): 361 code = ''' 362 assert(srcSize == destSize); 363 int size = srcSize; 364 int sizeBits = size * 8; 365 int items = sizeof(FloatRegBits) / size; 366 int options; 367 int optionBits; 368 if (size == 8) { 369 options = 2; 370 optionBits = 1; 371 } else { 372 options = 4; 373 optionBits = 2; 374 } 375 376 uint64_t result = 0; 377 uint8_t sel = ext; 378 379 for (int i = 0; i < items; i++) { 380 uint64_t resBits; 381 uint8_t lsel = sel & mask(optionBits); 382 if (lsel * size >= sizeof(FloatRegBits)) { 383 lsel -= options / 2; 384 resBits = bits(FpSrcReg2.uqw, 385 (lsel + 1) * sizeBits - 1, 386 (lsel + 0) * sizeBits); 387 } else { 388 resBits = bits(FpSrcReg1.uqw, 389 (lsel + 1) * sizeBits - 1, 390 (lsel + 0) * sizeBits); 391 } 392 393 sel >>= optionBits; 394 395 int hiIndex = (i + 1) * sizeBits - 1; 396 int loIndex = (i + 0) * sizeBits; 397 result = insertBits(result, hiIndex, loIndex, resBits); 398 } 399 FpDestReg.uqw = result; 400 ''' 401 402 class Unpack(MediaOp): 403 code = ''' 404 assert(srcSize == destSize); 405 int size = destSize; 406 int items = (sizeof(FloatRegBits) / size) / 2; 407 int offset = ext ? items : 0; 408 uint64_t result = 0; 409 for (int i = 0; i < items; i++) { 410 uint64_t pickedLow = 411 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1, 412 (i + offset) * 8 * size); 413 result = insertBits(result, 414 (2 * i + 1) * 8 * size - 1, 415 (2 * i + 0) * 8 * size, 416 pickedLow); 417 uint64_t pickedHigh = 418 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1, 419 (i + offset) * 8 * size); 420 result = insertBits(result, 421 (2 * i + 2) * 8 * size - 1, 422 (2 * i + 1) * 8 * size, 423 pickedHigh); 424 } 425 FpDestReg.uqw = result; 426 ''' 427 428 class Pack(MediaOp): 429 code = ''' 430 assert(srcSize == destSize * 2); 431 int items = (sizeof(FloatRegBits) / destSize); 432 int destBits = destSize * 8; 433 int srcBits = srcSize * 8; 434 uint64_t result = 0; 435 int i; 436 for (i = 0; i < items / 2; i++) { 437 uint64_t picked = 438 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1, 439 (i + 0) * srcBits); 440 unsigned signBit = bits(picked, srcBits - 1); 441 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 442 443 // Handle saturation. 444 if (signBit) { 445 if (overflow != mask(destBits - srcBits + 1)) { 446 if (signedOp()) 447 picked = (ULL(1) << (destBits - 1)); 448 else 449 picked = 0; 450 } 451 } else { 452 if (overflow != 0) { 453 if (signedOp()) 454 picked = mask(destBits - 1); 455 else 456 picked = mask(destBits); 457 } 458 } 459 result = insertBits(result, 460 (i + 1) * destBits - 1, 461 (i + 0) * destBits, 462 picked); 463 } 464 for (;i < items; i++) { 465 uint64_t picked = 466 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1, 467 (i - items + 0) * srcBits); 468 unsigned signBit = bits(picked, srcBits - 1); 469 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 470 471 // Handle saturation. 472 if (signBit) { 473 if (overflow != mask(destBits - srcBits + 1)) { 474 if (signedOp()) 475 picked = (ULL(1) << (destBits - 1)); 476 else 477 picked = 0; 478 } 479 } else { 480 if (overflow != 0) { 481 if (signedOp()) 482 picked = mask(destBits - 1); 483 else 484 picked = mask(destBits); 485 } 486 } 487 result = insertBits(result, 488 (i + 1) * destBits - 1, 489 (i + 0) * destBits, 490 picked); 491 } 492 FpDestReg.uqw = result; 493 ''' 494 495 class Mxor(MediaOp): 496 def __init__(self, dest, src1, src2): 497 super(Mxor, self).__init__(dest, src1, src2, 1) 498 code = ''' 499 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw; 500 ''' 501 502 class Mor(MediaOp): 503 def __init__(self, dest, src1, src2): 504 super(Mor, self).__init__(dest, src1, src2, 1) 505 code = ''' 506 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw; 507 ''' 508 509 class Mand(MediaOp): 510 def __init__(self, dest, src1, src2): 511 super(Mand, self).__init__(dest, src1, src2, 1) 512 code = ''' 513 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw; 514 ''' 515 516 class Mandn(MediaOp): 517 def __init__(self, dest, src1, src2): 518 super(Mandn, self).__init__(dest, src1, src2, 1) 519 code = ''' 520 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw; 521 ''' 522 523 class Mminf(MediaOp): 524 code = ''' 525 union floatInt 526 { 527 float f; 528 uint32_t i; 529 }; 530 union doubleInt 531 { 532 double d; 533 uint64_t i; 534 }; 535 536 assert(srcSize == destSize); 537 int size = srcSize; 538 int sizeBits = size * 8; 539 assert(srcSize == 4 || srcSize == 8); 540 int items = numItems(size); 541 uint64_t result = FpDestReg.uqw; 542 543 for (int i = 0; i < items; i++) { 544 double arg1, arg2; 545 int hiIndex = (i + 1) * sizeBits - 1; 546 int loIndex = (i + 0) * sizeBits; 547 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 548 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 549 550 if (size == 4) { 551 floatInt fi; 552 fi.i = arg1Bits; 553 arg1 = fi.f; 554 fi.i = arg2Bits; 555 arg2 = fi.f; 556 } else { 557 doubleInt di; 558 di.i = arg1Bits; 559 arg1 = di.d; 560 di.i = arg2Bits; 561 arg2 = di.d; 562 } 563 564 if (arg1 < arg2) { 565 result = insertBits(result, hiIndex, loIndex, arg1Bits); 566 } else { 567 result = insertBits(result, hiIndex, loIndex, arg2Bits); 568 } 569 } 570 FpDestReg.uqw = result; 571 ''' 572 573 class Mmaxf(MediaOp): 574 code = ''' 575 union floatInt 576 { 577 float f; 578 uint32_t i; 579 }; 580 union doubleInt 581 { 582 double d; 583 uint64_t i; 584 }; 585 586 assert(srcSize == destSize); 587 int size = srcSize; 588 int sizeBits = size * 8; 589 assert(srcSize == 4 || srcSize == 8); 590 int items = numItems(size); 591 uint64_t result = FpDestReg.uqw; 592 593 for (int i = 0; i < items; i++) { 594 double arg1, arg2; 595 int hiIndex = (i + 1) * sizeBits - 1; 596 int loIndex = (i + 0) * sizeBits; 597 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 598 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 599 600 if (size == 4) { 601 floatInt fi; 602 fi.i = arg1Bits; 603 arg1 = fi.f; 604 fi.i = arg2Bits; 605 arg2 = fi.f; 606 } else { 607 doubleInt di; 608 di.i = arg1Bits; 609 arg1 = di.d; 610 di.i = arg2Bits; 611 arg2 = di.d; 612 } 613 614 if (arg1 > arg2) { 615 result = insertBits(result, hiIndex, loIndex, arg1Bits); 616 } else { 617 result = insertBits(result, hiIndex, loIndex, arg2Bits); 618 } 619 } 620 FpDestReg.uqw = result; 621 ''' 622 623 class Mmini(MediaOp): 624 code = ''' 625 626 assert(srcSize == destSize); 627 int size = srcSize; 628 int sizeBits = size * 8; 629 int items = numItems(size); 630 uint64_t result = FpDestReg.uqw; 631 632 for (int i = 0; i < items; i++) { 633 int hiIndex = (i + 1) * sizeBits - 1; 634 int loIndex = (i + 0) * sizeBits; 635 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 636 int64_t arg1 = arg1Bits | 637 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 638 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 639 int64_t arg2 = arg2Bits | 640 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 641 uint64_t resBits; 642 643 if (signedOp()) { 644 if (arg1 < arg2) { 645 resBits = arg1Bits; 646 } else { 647 resBits = arg2Bits; 648 } 649 } else { 650 if (arg1Bits < arg2Bits) { 651 resBits = arg1Bits; 652 } else { 653 resBits = arg2Bits; 654 } 655 } 656 result = insertBits(result, hiIndex, loIndex, resBits); 657 } 658 FpDestReg.uqw = result; 659 ''' 660 661 class Mmaxi(MediaOp): 662 code = ''' 663 664 assert(srcSize == destSize); 665 int size = srcSize; 666 int sizeBits = size * 8; 667 int items = numItems(size); 668 uint64_t result = FpDestReg.uqw; 669 670 for (int i = 0; i < items; i++) { 671 int hiIndex = (i + 1) * sizeBits - 1; 672 int loIndex = (i + 0) * sizeBits; 673 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 674 int64_t arg1 = arg1Bits | 675 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 676 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 677 int64_t arg2 = arg2Bits | 678 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 679 uint64_t resBits; 680 681 if (signedOp()) { 682 if (arg1 > arg2) { 683 resBits = arg1Bits; 684 } else { 685 resBits = arg2Bits; 686 } 687 } else { 688 if (arg1Bits > arg2Bits) { 689 resBits = arg1Bits; 690 } else { 691 resBits = arg2Bits; 692 } 693 } 694 result = insertBits(result, hiIndex, loIndex, resBits); 695 } 696 FpDestReg.uqw = result; 697 ''' 698 699 class Msqrt(MediaOp): 700 def __init__(self, dest, src, \ 701 size = None, destSize = None, srcSize = None, ext = None): 702 super(Msqrt, self).__init__(dest, src,\ 703 "InstRegIndex(0)", size, destSize, srcSize, ext) 704 code = ''' 705 union floatInt 706 { 707 float f; 708 uint32_t i; 709 }; 710 union doubleInt 711 { 712 double d; 713 uint64_t i; 714 }; 715 716 assert(srcSize == destSize); 717 int size = srcSize; 718 int sizeBits = size * 8; 719 assert(srcSize == 4 || srcSize == 8); 720 int items = numItems(size); 721 uint64_t result = FpDestReg.uqw; 722 723 for (int i = 0; i < items; i++) { 724 int hiIndex = (i + 1) * sizeBits - 1; 725 int loIndex = (i + 0) * sizeBits; 726 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 727 728 if (size == 4) { 729 floatInt fi; 730 fi.i = argBits; 731 fi.f = sqrt(fi.f); 732 argBits = fi.i; 733 } else { 734 doubleInt di; 735 di.i = argBits; 736 di.d = sqrt(di.d); 737 argBits = di.i; 738 } 739 result = insertBits(result, hiIndex, loIndex, argBits); 740 } 741 FpDestReg.uqw = result; 742 ''' 743 744 class Maddf(MediaOp): 745 code = ''' 746 union floatInt 747 { 748 float f; 749 uint32_t i; 750 }; 751 union doubleInt 752 { 753 double d; 754 uint64_t i; 755 }; 756 757 assert(srcSize == destSize); 758 int size = srcSize; 759 int sizeBits = size * 8; 760 assert(srcSize == 4 || srcSize == 8); 761 int items = numItems(size); 762 uint64_t result = FpDestReg.uqw; 763 764 for (int i = 0; i < items; i++) { 765 int hiIndex = (i + 1) * sizeBits - 1; 766 int loIndex = (i + 0) * sizeBits; 767 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 768 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 769 uint64_t resBits; 770 771 if (size == 4) { 772 floatInt arg1, arg2, res; 773 arg1.i = arg1Bits; 774 arg2.i = arg2Bits; 775 res.f = arg1.f + arg2.f; 776 resBits = res.i; 777 } else { 778 doubleInt arg1, arg2, res; 779 arg1.i = arg1Bits; 780 arg2.i = arg2Bits; 781 res.d = arg1.d + arg2.d; 782 resBits = res.i; 783 } 784 785 result = insertBits(result, hiIndex, loIndex, resBits); 786 } 787 FpDestReg.uqw = result; 788 ''' 789 790 class Msubf(MediaOp): 791 code = ''' 792 union floatInt 793 { 794 float f; 795 uint32_t i; 796 }; 797 union doubleInt 798 { 799 double d; 800 uint64_t i; 801 }; 802 803 assert(srcSize == destSize); 804 int size = srcSize; 805 int sizeBits = size * 8; 806 assert(srcSize == 4 || srcSize == 8); 807 int items = numItems(size); 808 uint64_t result = FpDestReg.uqw; 809 810 for (int i = 0; i < items; i++) { 811 int hiIndex = (i + 1) * sizeBits - 1; 812 int loIndex = (i + 0) * sizeBits; 813 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 814 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 815 uint64_t resBits; 816 817 if (size == 4) { 818 floatInt arg1, arg2, res; 819 arg1.i = arg1Bits; 820 arg2.i = arg2Bits; 821 res.f = arg1.f - arg2.f; 822 resBits = res.i; 823 } else { 824 doubleInt arg1, arg2, res; 825 arg1.i = arg1Bits; 826 arg2.i = arg2Bits; 827 res.d = arg1.d - arg2.d; 828 resBits = res.i; 829 } 830 831 result = insertBits(result, hiIndex, loIndex, resBits); 832 } 833 FpDestReg.uqw = result; 834 ''' 835 836 class Mmulf(MediaOp): 837 code = ''' 838 union floatInt 839 { 840 float f; 841 uint32_t i; 842 }; 843 union doubleInt 844 { 845 double d; 846 uint64_t i; 847 }; 848 849 assert(srcSize == destSize); 850 int size = srcSize; 851 int sizeBits = size * 8; 852 assert(srcSize == 4 || srcSize == 8); 853 int items = numItems(size); 854 uint64_t result = FpDestReg.uqw; 855 856 for (int i = 0; i < items; i++) { 857 int hiIndex = (i + 1) * sizeBits - 1; 858 int loIndex = (i + 0) * sizeBits; 859 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 860 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 861 uint64_t resBits; 862 863 if (size == 4) { 864 floatInt arg1, arg2, res; 865 arg1.i = arg1Bits; 866 arg2.i = arg2Bits; 867 res.f = arg1.f * arg2.f; 868 resBits = res.i; 869 } else { 870 doubleInt arg1, arg2, res; 871 arg1.i = arg1Bits; 872 arg2.i = arg2Bits; 873 res.d = arg1.d * arg2.d; 874 resBits = res.i; 875 } 876 877 result = insertBits(result, hiIndex, loIndex, resBits); 878 } 879 FpDestReg.uqw = result; 880 ''' 881 882 class Mdivf(MediaOp): 883 code = ''' 884 union floatInt 885 { 886 float f; 887 uint32_t i; 888 }; 889 union doubleInt 890 { 891 double d; 892 uint64_t i; 893 }; 894 895 assert(srcSize == destSize); 896 int size = srcSize; 897 int sizeBits = size * 8; 898 assert(srcSize == 4 || srcSize == 8); 899 int items = numItems(size); 900 uint64_t result = FpDestReg.uqw; 901 902 for (int i = 0; i < items; i++) { 903 int hiIndex = (i + 1) * sizeBits - 1; 904 int loIndex = (i + 0) * sizeBits; 905 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 906 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 907 uint64_t resBits; 908 909 if (size == 4) { 910 floatInt arg1, arg2, res; 911 arg1.i = arg1Bits; 912 arg2.i = arg2Bits; 913 res.f = arg1.f / arg2.f; 914 resBits = res.i; 915 } else { 916 doubleInt arg1, arg2, res; 917 arg1.i = arg1Bits; 918 arg2.i = arg2Bits; 919 res.d = arg1.d / arg2.d; 920 resBits = res.i; 921 } 922 923 result = insertBits(result, hiIndex, loIndex, resBits); 924 } 925 FpDestReg.uqw = result; 926 ''' 927 928 class Maddi(MediaOp): 929 code = ''' 930 assert(srcSize == destSize); 931 int size = srcSize; 932 int sizeBits = size * 8; 933 int items = numItems(size); 934 uint64_t result = FpDestReg.uqw; 935 936 for (int i = 0; i < items; i++) { 937 int hiIndex = (i + 1) * sizeBits - 1; 938 int loIndex = (i + 0) * sizeBits; 939 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 940 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 941 uint64_t resBits = arg1Bits + arg2Bits; 942 943 if (ext & 0x2) { 944 if (signedOp()) { 945 int arg1Sign = bits(arg1Bits, sizeBits - 1); 946 int arg2Sign = bits(arg2Bits, sizeBits - 1); 947 int resSign = bits(resBits, sizeBits - 1); 948 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 949 if (resSign == 0) 950 resBits = (ULL(1) << (sizeBits - 1)); 951 else 952 resBits = mask(sizeBits - 1); 953 } 954 } else { 955 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) 956 resBits = mask(sizeBits); 957 } 958 } 959 960 result = insertBits(result, hiIndex, loIndex, resBits); 961 } 962 FpDestReg.uqw = result; 963 ''' 964 965 class Msubi(MediaOp): 966 code = ''' 967 assert(srcSize == destSize); 968 int size = srcSize; 969 int sizeBits = size * 8; 970 int items = numItems(size); 971 uint64_t result = FpDestReg.uqw; 972 973 for (int i = 0; i < items; i++) { 974 int hiIndex = (i + 1) * sizeBits - 1; 975 int loIndex = (i + 0) * sizeBits; 976 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 977 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 978 uint64_t resBits = arg1Bits - arg2Bits; 979 980 if (ext & 0x2) { 981 if (signedOp()) { 982 int arg1Sign = bits(arg1Bits, sizeBits - 1); 983 int arg2Sign = !bits(arg2Bits, sizeBits - 1); 984 int resSign = bits(resBits, sizeBits - 1); 985 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 986 if (resSign == 0) 987 resBits = (ULL(1) << (sizeBits - 1)); 988 else 989 resBits = mask(sizeBits - 1); 990 } 991 } else { 992 if (arg2Bits > arg1Bits) { 993 resBits = 0; 994 } else if (!findCarry(sizeBits, resBits, 995 arg1Bits, ~arg2Bits)) { 996 resBits = mask(sizeBits); 997 } 998 } 999 } 1000 1001 result = insertBits(result, hiIndex, loIndex, resBits); 1002 } 1003 FpDestReg.uqw = result; 1004 ''' 1005 1006 class Mmuli(MediaOp): 1007 code = ''' 1008 int srcBits = srcSize * 8; 1009 int destBits = destSize * 8; 1010 assert(destBits <= 64); 1011 assert(destSize >= srcSize); 1012 int items = numItems(destSize); 1013 uint64_t result = FpDestReg.uqw; 1014 1015 for (int i = 0; i < items; i++) { 1016 int offset = 0; 1017 if (ext & 16) { 1018 if (ext & 32) 1019 offset = i * (destBits - srcBits); 1020 else 1021 offset = i * (destBits - srcBits) + srcBits; 1022 } 1023 int srcHiIndex = (i + 1) * srcBits - 1 + offset; 1024 int srcLoIndex = (i + 0) * srcBits + offset; 1025 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1026 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex); 1027 uint64_t resBits; 1028 1029 if (signedOp()) { 1030 int64_t arg1 = arg1Bits | 1031 (0 - (arg1Bits & (ULL(1) << (srcBits - 1)))); 1032 int64_t arg2 = arg2Bits | 1033 (0 - (arg2Bits & (ULL(1) << (srcBits - 1)))); 1034 resBits = (uint64_t)(arg1 * arg2); 1035 } else { 1036 resBits = arg1Bits * arg2Bits; 1037 } 1038 1039 if (ext & 0x4) 1040 resBits += (ULL(1) << (destBits - 1)); 1041 1042 if (multHi()) 1043 resBits >>= destBits; 1044 1045 int destHiIndex = (i + 1) * destBits - 1; 1046 int destLoIndex = (i + 0) * destBits; 1047 result = insertBits(result, destHiIndex, destLoIndex, resBits); 1048 } 1049 FpDestReg.uqw = result; 1050 ''' 1051 1052 class Mavg(MediaOp): 1053 code = ''' 1054 assert(srcSize == destSize); 1055 int size = srcSize; 1056 int sizeBits = size * 8; 1057 int items = numItems(size); 1058 uint64_t result = FpDestReg.uqw; 1059 1060 for (int i = 0; i < items; i++) { 1061 int hiIndex = (i + 1) * sizeBits - 1; 1062 int loIndex = (i + 0) * sizeBits; 1063 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1064 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1065 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; 1066 1067 result = insertBits(result, hiIndex, loIndex, resBits); 1068 } 1069 FpDestReg.uqw = result; 1070 ''' 1071 1072 class Msad(MediaOp): 1073 code = ''' 1074 int srcBits = srcSize * 8; 1075 int items = sizeof(FloatRegBits) / srcSize; 1076 1077 uint64_t sum = 0; 1078 for (int i = 0; i < items; i++) { 1079 int hiIndex = (i + 1) * srcBits - 1; 1080 int loIndex = (i + 0) * srcBits; 1081 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1082 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1083 int64_t resBits = arg1Bits - arg2Bits; 1084 if (resBits < 0) 1085 resBits = -resBits; 1086 sum += resBits; 1087 } 1088 FpDestReg.uqw = sum & mask(destSize * 8); 1089 ''' 1090 1091 class Msrl(MediaOp): 1092 code = ''' 1093 1094 assert(srcSize == destSize); 1095 int size = srcSize; 1096 int sizeBits = size * 8; 1097 int items = numItems(size); 1098 uint64_t shiftAmt = op2.uqw; 1099 uint64_t result = FpDestReg.uqw; 1100 1101 for (int i = 0; i < items; i++) { 1102 int hiIndex = (i + 1) * sizeBits - 1; 1103 int loIndex = (i + 0) * sizeBits; 1104 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1105 uint64_t resBits; 1106 if (shiftAmt >= sizeBits) { 1107 resBits = 0; 1108 } else { 1109 resBits = (arg1Bits >> shiftAmt) & 1110 mask(sizeBits - shiftAmt); 1111 } 1112 1113 result = insertBits(result, hiIndex, loIndex, resBits); 1114 } 1115 FpDestReg.uqw = result; 1116 ''' 1117 1118 class Msra(MediaOp): 1119 code = ''' 1120 1121 assert(srcSize == destSize); 1122 int size = srcSize; 1123 int sizeBits = size * 8; 1124 int items = numItems(size); 1125 uint64_t shiftAmt = op2.uqw; 1126 uint64_t result = FpDestReg.uqw; 1127 1128 for (int i = 0; i < items; i++) { 1129 int hiIndex = (i + 1) * sizeBits - 1; 1130 int loIndex = (i + 0) * sizeBits; 1131 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1132 uint64_t resBits; 1133 if (shiftAmt >= sizeBits) { 1134 if (bits(arg1Bits, sizeBits - 1)) 1135 resBits = mask(sizeBits); 1136 else 1137 resBits = 0; 1138 } else { 1139 resBits = (arg1Bits >> shiftAmt); 1140 resBits = resBits | 1141 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt)))); 1142 } 1143 1144 result = insertBits(result, hiIndex, loIndex, resBits); 1145 } 1146 FpDestReg.uqw = result; 1147 ''' 1148 1149 class Msll(MediaOp): 1150 code = ''' 1151 1152 assert(srcSize == destSize); 1153 int size = srcSize; 1154 int sizeBits = size * 8; 1155 int items = numItems(size); 1156 uint64_t shiftAmt = op2.uqw; 1157 uint64_t result = FpDestReg.uqw; 1158 1159 for (int i = 0; i < items; i++) { 1160 int hiIndex = (i + 1) * sizeBits - 1; 1161 int loIndex = (i + 0) * sizeBits; 1162 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1163 uint64_t resBits; 1164 if (shiftAmt >= sizeBits) { 1165 resBits = 0; 1166 } else { 1167 resBits = (arg1Bits << shiftAmt); 1168 } 1169 1170 result = insertBits(result, hiIndex, loIndex, resBits); 1171 } 1172 FpDestReg.uqw = result; 1173 ''' 1174 1175 class Cvtf2i(MediaOp): 1176 def __init__(self, dest, src, \ 1177 size = None, destSize = None, srcSize = None, ext = None): 1178 super(Cvtf2i, self).__init__(dest, src,\ 1179 "InstRegIndex(0)", size, destSize, srcSize, ext) 1180 code = ''' 1181 union floatInt 1182 { 1183 float f; 1184 uint32_t i; 1185 }; 1186 union doubleInt 1187 { 1188 double d; 1189 uint64_t i; 1190 }; 1191 1192 assert(destSize == 4 || destSize == 8); 1193 assert(srcSize == 4 || srcSize == 8); 1194 int srcSizeBits = srcSize * 8; 1195 int destSizeBits = destSize * 8; 1196 int items; 1197 int srcStart = 0; 1198 int destStart = 0; 1199 if (srcSize == 2 * destSize) { 1200 items = numItems(srcSize); 1201 if (ext & 0x2) 1202 destStart = destSizeBits * items; 1203 } else if (destSize == 2 * srcSize) { 1204 items = numItems(destSize); 1205 if (ext & 0x2) 1206 srcStart = srcSizeBits * items; 1207 } else { 1208 items = numItems(destSize); 1209 } 1210 uint64_t result = FpDestReg.uqw; 1211 1212 for (int i = 0; i < items; i++) { 1213 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1214 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1215 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1216 double arg; 1217 1218 if (srcSize == 4) { 1219 floatInt fi; 1220 fi.i = argBits; 1221 arg = fi.f; 1222 } else { 1223 doubleInt di; 1224 di.i = argBits; 1225 arg = di.d; 1226 } 1227 1228 if (ext & 0x4) { 1229 if (arg >= 0) 1230 arg += 0.5; 1231 else 1232 arg -= 0.5; 1233 } 1234 1235 if (destSize == 4) { 1236 argBits = (uint32_t)arg; 1237 } else { 1238 argBits = (uint64_t)arg; 1239 } 1240 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1241 int destLoIndex = destStart + (i + 0) * destSizeBits; 1242 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1243 } 1244 FpDestReg.uqw = result; 1245 ''' 1246 1247 class Cvti2f(MediaOp): 1248 def __init__(self, dest, src, \ 1249 size = None, destSize = None, srcSize = None, ext = None): 1250 super(Cvti2f, self).__init__(dest, src,\ 1251 "InstRegIndex(0)", size, destSize, srcSize, ext) 1252 code = ''' 1253 union floatInt 1254 { 1255 float f; 1256 uint32_t i; 1257 }; 1258 union doubleInt 1259 { 1260 double d; 1261 uint64_t i; 1262 }; 1263 1264 assert(destSize == 4 || destSize == 8); 1265 assert(srcSize == 4 || srcSize == 8); 1266 int srcSizeBits = srcSize * 8; 1267 int destSizeBits = destSize * 8; 1268 int items; 1269 int srcStart = 0; 1270 int destStart = 0; 1271 if (srcSize == 2 * destSize) { 1272 items = numItems(srcSize); 1273 if (ext & 0x2) 1274 destStart = destSizeBits * items; 1275 } else if (destSize == 2 * srcSize) { 1276 items = numItems(destSize); 1277 if (ext & 0x2) 1278 srcStart = srcSizeBits * items; 1279 } else { 1280 items = numItems(destSize); 1281 } 1282 uint64_t result = FpDestReg.uqw; 1283 1284 for (int i = 0; i < items; i++) { 1285 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1286 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1287 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1288 1289 int64_t sArg = argBits | 1290 (0 - (argBits & (ULL(1) << (srcSizeBits - 1)))); 1291 double arg = sArg; 1292 1293 if (destSize == 4) { 1294 floatInt fi; 1295 fi.f = arg; 1296 argBits = fi.i; 1297 } else { 1298 doubleInt di; 1299 di.d = arg; 1300 argBits = di.i; 1301 } 1302 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1303 int destLoIndex = destStart + (i + 0) * destSizeBits; 1304 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1305 } 1306 FpDestReg.uqw = result; 1307 ''' 1308 1309 class Cvtf2f(MediaOp): 1310 def __init__(self, dest, src, \ 1311 size = None, destSize = None, srcSize = None, ext = None): 1312 super(Cvtf2f, self).__init__(dest, src,\ 1313 "InstRegIndex(0)", size, destSize, srcSize, ext) 1314 code = ''' 1315 union floatInt 1316 { 1317 float f; 1318 uint32_t i; 1319 }; 1320 union doubleInt 1321 { 1322 double d; 1323 uint64_t i; 1324 }; 1325 1326 assert(destSize == 4 || destSize == 8); 1327 assert(srcSize == 4 || srcSize == 8); 1328 int srcSizeBits = srcSize * 8; 1329 int destSizeBits = destSize * 8; 1330 int items; 1331 int srcStart = 0; 1332 int destStart = 0; 1333 if (srcSize == 2 * destSize) { 1334 items = numItems(srcSize); 1335 if (ext & 0x2) 1336 destStart = destSizeBits * items; 1337 } else if (destSize == 2 * srcSize) { 1338 items = numItems(destSize); 1339 if (ext & 0x2) 1340 srcStart = srcSizeBits * items; 1341 } else { 1342 items = numItems(destSize); 1343 } 1344 uint64_t result = FpDestReg.uqw; 1345 1346 for (int i = 0; i < items; i++) { 1347 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1348 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1349 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1350 double arg; 1351 1352 if (srcSize == 4) { 1353 floatInt fi; 1354 fi.i = argBits; 1355 arg = fi.f; 1356 } else { 1357 doubleInt di; 1358 di.i = argBits; 1359 arg = di.d; 1360 } 1361 if (destSize == 4) { 1362 floatInt fi; 1363 fi.f = arg; 1364 argBits = fi.i; 1365 } else { 1366 doubleInt di; 1367 di.d = arg; 1368 argBits = di.i; 1369 } 1370 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1371 int destLoIndex = destStart + (i + 0) * destSizeBits; 1372 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1373 } 1374 FpDestReg.uqw = result; 1375 ''' 1376 1377 class Mcmpi2r(MediaOp): 1378 code = ''' 1379 union floatInt 1380 { 1381 float f; 1382 uint32_t i; 1383 }; 1384 union doubleInt 1385 { 1386 double d; 1387 uint64_t i; 1388 }; 1389 1390 assert(srcSize == destSize); 1391 int size = srcSize; 1392 int sizeBits = size * 8; 1393 int items = numItems(size); 1394 uint64_t result = FpDestReg.uqw; 1395 1396 for (int i = 0; i < items; i++) { 1397 int hiIndex = (i + 1) * sizeBits - 1; 1398 int loIndex = (i + 0) * sizeBits; 1399 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1400 int64_t arg1 = arg1Bits | 1401 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 1402 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1403 int64_t arg2 = arg2Bits | 1404 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 1405 1406 uint64_t resBits = 0; 1407 if (((ext & 0x2) == 0 && arg1 == arg2) || 1408 ((ext & 0x2) == 0x2 && arg1 > arg2)) 1409 resBits = mask(sizeBits); 1410 1411 result = insertBits(result, hiIndex, loIndex, resBits); 1412 } 1413 FpDestReg.uqw = result; 1414 ''' 1415 1416 class Mcmpf2r(MediaOp): 1417 code = ''' 1418 union floatInt 1419 { 1420 float f; 1421 uint32_t i; 1422 }; 1423 union doubleInt 1424 { 1425 double d; 1426 uint64_t i; 1427 }; 1428 1429 assert(srcSize == destSize); 1430 int size = srcSize; 1431 int sizeBits = size * 8; 1432 int items = numItems(size); 1433 uint64_t result = FpDestReg.uqw; 1434 1435 for (int i = 0; i < items; i++) { 1436 int hiIndex = (i + 1) * sizeBits - 1; 1437 int loIndex = (i + 0) * sizeBits; 1438 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1439 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1440 double arg1, arg2; 1441 1442 if (size == 4) { 1443 floatInt fi; 1444 fi.i = arg1Bits; 1445 arg1 = fi.f; 1446 fi.i = arg2Bits; 1447 arg2 = fi.f; 1448 } else { 1449 doubleInt di; 1450 di.i = arg1Bits; 1451 arg1 = di.d; 1452 di.i = arg2Bits; 1453 arg2 = di.d; 1454 } 1455 1456 uint64_t resBits = 0; 1457 bool nanop = isnan(arg1) || isnan(arg2); 1458 switch (ext & mask(3)) { 1459 case 0: 1460 if (arg1 == arg2 && !nanop) 1461 resBits = mask(sizeBits); 1462 break; 1463 case 1: 1464 if (arg1 < arg2 && !nanop) 1465 resBits = mask(sizeBits); 1466 break; 1467 case 2: 1468 if (arg1 <= arg2 && !nanop) 1469 resBits = mask(sizeBits); 1470 break; 1471 case 3: 1472 if (nanop) 1473 resBits = mask(sizeBits); 1474 break; 1475 case 4: 1476 if (arg1 != arg2 || nanop) 1477 resBits = mask(sizeBits); 1478 break; 1479 case 5: 1480 if (!(arg1 < arg2) || nanop) 1481 resBits = mask(sizeBits); 1482 break; 1483 case 6: 1484 if (!(arg1 <= arg2) || nanop) 1485 resBits = mask(sizeBits); 1486 break; 1487 case 7: 1488 if (!nanop) 1489 resBits = mask(sizeBits); 1490 break; 1491 }; 1492 1493 result = insertBits(result, hiIndex, loIndex, resBits); 1494 } 1495 FpDestReg.uqw = result; 1496 ''' 1497 1498 class Mcmpf2rf(MediaOp): 1499 def __init__(self, src1, src2,\ 1500 size = None, destSize = None, srcSize = None, ext = None): 1501 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\ 1502 src2, size, destSize, srcSize, ext) 1503 code = ''' 1504 union floatInt 1505 { 1506 float f; 1507 uint32_t i; 1508 }; 1509 union doubleInt 1510 { 1511 double d; 1512 uint64_t i; 1513 }; 1514 1515 assert(srcSize == destSize); 1516 assert(srcSize == 4 || srcSize == 8); 1517 int size = srcSize; 1518 int sizeBits = size * 8; 1519 1520 double arg1, arg2; 1521 uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0); 1522 uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0); 1523 if (size == 4) { 1524 floatInt fi; 1525 fi.i = arg1Bits; 1526 arg1 = fi.f; 1527 fi.i = arg2Bits; 1528 arg2 = fi.f; 1529 } else { 1530 doubleInt di; 1531 di.i = arg1Bits; 1532 arg1 = di.d; 1533 di.i = arg2Bits; 1534 arg2 = di.d; 1535 } 1536 1537 // ZF PF CF 1538 // Unordered 1 1 1 1539 // Greater than 0 0 0 1540 // Less than 0 0 1 1541 // Equal 1 0 0 1542 // OF = SF = AF = 0 1543 ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit | 1544 ZFBit | PFBit | CFBit); 1545 if (isnan(arg1) || isnan(arg2)) 1546 ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit); 1547 else if(arg1 < arg2) 1548 ccFlagBits = ccFlagBits | CFBit; 1549 else if(arg1 == arg2) 1550 ccFlagBits = ccFlagBits | ZFBit; 1551 ''' 1552}};
| 99 } 100}}; 101 102let {{ 103 # Make these empty strings so that concatenating onto 104 # them will always work. 105 header_output = "" 106 decoder_output = "" 107 exec_output = "" 108 109 immTemplates = ( 110 MediaOpImmDeclare, 111 MediaOpImmConstructor, 112 MediaOpExecute) 113 114 regTemplates = ( 115 MediaOpRegDeclare, 116 MediaOpRegConstructor, 117 MediaOpExecute) 118 119 class MediaOpMeta(type): 120 def buildCppClasses(self, name, Name, suffix, code): 121 122 # Globals to stick the output in 123 global header_output 124 global decoder_output 125 global exec_output 126 127 # If op2 is used anywhere, make register and immediate versions 128 # of this code. 129 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?") 130 match = matcher.search(code) 131 if match: 132 typeQual = "" 133 if match.group("typeQual"): 134 typeQual = match.group("typeQual") 135 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual) 136 self.buildCppClasses(name, Name, suffix, 137 matcher.sub(src2_name, code)) 138 self.buildCppClasses(name + "i", Name, suffix + "Imm", 139 matcher.sub("imm8", code)) 140 return 141 142 base = "X86ISA::MediaOp" 143 144 # If imm8 shows up in the code, use the immediate templates, if 145 # not, hopefully the register ones will be correct. 146 matcher = re.compile("(?<!\w)imm8(?!\w)") 147 if matcher.search(code): 148 base += "Imm" 149 templates = immTemplates 150 else: 151 base += "Reg" 152 templates = regTemplates 153 154 # Get everything ready for the substitution 155 iop = InstObjParams(name, Name + suffix, base, {"code" : code}) 156 157 # Generate the actual code (finally!) 158 header_output += templates[0].subst(iop) 159 decoder_output += templates[1].subst(iop) 160 exec_output += templates[2].subst(iop) 161 162 163 def __new__(mcls, Name, bases, dict): 164 abstract = False 165 name = Name.lower() 166 if "abstract" in dict: 167 abstract = dict['abstract'] 168 del dict['abstract'] 169 170 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict) 171 if not abstract: 172 cls.className = Name 173 cls.base_mnemonic = name 174 code = cls.code 175 176 # Set up the C++ classes 177 mcls.buildCppClasses(cls, name, Name, "", code) 178 179 # Hook into the microassembler dict 180 global microopClasses 181 microopClasses[name] = cls 182 183 # If op2 is used anywhere, make register and immediate versions 184 # of this code. 185 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?") 186 if matcher.search(code): 187 microopClasses[name + 'i'] = cls 188 return cls 189 190 191 class MediaOp(X86Microop): 192 __metaclass__ = MediaOpMeta 193 # This class itself doesn't act as a microop 194 abstract = True 195 196 def __init__(self, dest, src1, op2, 197 size = None, destSize = None, srcSize = None, ext = None): 198 self.dest = dest 199 self.src1 = src1 200 self.op2 = op2 201 if size is not None: 202 self.srcSize = size 203 self.destSize = size 204 if srcSize is not None: 205 self.srcSize = srcSize 206 if destSize is not None: 207 self.destSize = destSize 208 if self.srcSize is None: 209 raise Exception, "Source size not set." 210 if self.destSize is None: 211 raise Exception, "Dest size not set." 212 if ext is None: 213 self.ext = 0 214 else: 215 self.ext = ext 216 217 def getAllocator(self, microFlags): 218 className = self.className 219 if self.mnemonic == self.base_mnemonic + 'i': 220 className += "Imm" 221 allocator = '''new %(class_name)s(machInst, macrocodeBlock, 222 %(flags)s, %(src1)s, %(op2)s, %(dest)s, 223 %(srcSize)s, %(destSize)s, %(ext)s)''' % { 224 "class_name" : className, 225 "flags" : self.microFlagsText(microFlags), 226 "src1" : self.src1, "op2" : self.op2, 227 "dest" : self.dest, 228 "srcSize" : self.srcSize, 229 "destSize" : self.destSize, 230 "ext" : self.ext} 231 return allocator 232 233 class Mov2int(MediaOp): 234 def __init__(self, dest, src1, src2 = 0, \ 235 size = None, destSize = None, srcSize = None, ext = None): 236 super(Mov2int, self).__init__(dest, src1,\ 237 src2, size, destSize, srcSize, ext) 238 code = ''' 239 int items = sizeof(FloatRegBits) / srcSize; 240 int offset = imm8; 241 if (bits(src1, 0) && (ext & 0x1)) 242 offset -= items; 243 if (offset >= 0 && offset < items) { 244 uint64_t fpSrcReg1 = 245 bits(FpSrcReg1.uqw, 246 (offset + 1) * srcSize * 8 - 1, 247 (offset + 0) * srcSize * 8); 248 DestReg = merge(0, fpSrcReg1, destSize); 249 } else { 250 DestReg = DestReg; 251 } 252 ''' 253 254 class Mov2fp(MediaOp): 255 def __init__(self, dest, src1, src2 = 0, \ 256 size = None, destSize = None, srcSize = None, ext = None): 257 super(Mov2fp, self).__init__(dest, src1,\ 258 src2, size, destSize, srcSize, ext) 259 code = ''' 260 int items = sizeof(FloatRegBits) / destSize; 261 int offset = imm8; 262 if (bits(dest, 0) && (ext & 0x1)) 263 offset -= items; 264 if (offset >= 0 && offset < items) { 265 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); 266 FpDestReg.uqw = 267 insertBits(FpDestReg.uqw, 268 (offset + 1) * destSize * 8 - 1, 269 (offset + 0) * destSize * 8, srcReg1); 270 } else { 271 FpDestReg.uqw = FpDestReg.uqw; 272 } 273 ''' 274 275 class Movsign(MediaOp): 276 def __init__(self, dest, src, \ 277 size = None, destSize = None, srcSize = None, ext = None): 278 super(Movsign, self).__init__(dest, src,\ 279 "InstRegIndex(0)", size, destSize, srcSize, ext) 280 code = ''' 281 int items = sizeof(FloatRegBits) / srcSize; 282 uint64_t result = 0; 283 int offset = (ext & 0x1) ? items : 0; 284 for (int i = 0; i < items; i++) { 285 uint64_t picked = 286 bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1); 287 result = insertBits(result, i + offset, i + offset, picked); 288 } 289 DestReg = DestReg | result; 290 ''' 291 292 class Maskmov(MediaOp): 293 code = ''' 294 assert(srcSize == destSize); 295 int size = srcSize; 296 int sizeBits = size * 8; 297 int items = numItems(size); 298 uint64_t result = FpDestReg.uqw; 299 300 for (int i = 0; i < items; i++) { 301 int hiIndex = (i + 1) * sizeBits - 1; 302 int loIndex = (i + 0) * sizeBits; 303 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 304 if (bits(FpSrcReg2.uqw, hiIndex)) 305 result = insertBits(result, hiIndex, loIndex, arg1Bits); 306 } 307 FpDestReg.uqw = result; 308 ''' 309 310 class shuffle(MediaOp): 311 code = ''' 312 assert(srcSize == destSize); 313 int size = srcSize; 314 int sizeBits = size * 8; 315 int items = sizeof(FloatRegBits) / size; 316 int options; 317 int optionBits; 318 if (size == 8) { 319 options = 2; 320 optionBits = 1; 321 } else { 322 options = 4; 323 optionBits = 2; 324 } 325 326 uint64_t result = 0; 327 uint8_t sel = ext; 328 329 for (int i = 0; i < items; i++) { 330 uint64_t resBits; 331 uint8_t lsel = sel & mask(optionBits); 332 if (lsel * size >= sizeof(FloatRegBits)) { 333 lsel -= options / 2; 334 resBits = bits(FpSrcReg2.uqw, 335 (lsel + 1) * sizeBits - 1, 336 (lsel + 0) * sizeBits); 337 } else { 338 resBits = bits(FpSrcReg1.uqw, 339 (lsel + 1) * sizeBits - 1, 340 (lsel + 0) * sizeBits); 341 } 342 343 sel >>= optionBits; 344 345 int hiIndex = (i + 1) * sizeBits - 1; 346 int loIndex = (i + 0) * sizeBits; 347 result = insertBits(result, hiIndex, loIndex, resBits); 348 } 349 FpDestReg.uqw = result; 350 ''' 351 352 class Unpack(MediaOp): 353 code = ''' 354 assert(srcSize == destSize); 355 int size = destSize; 356 int items = (sizeof(FloatRegBits) / size) / 2; 357 int offset = ext ? items : 0; 358 uint64_t result = 0; 359 for (int i = 0; i < items; i++) { 360 uint64_t pickedLow = 361 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1, 362 (i + offset) * 8 * size); 363 result = insertBits(result, 364 (2 * i + 1) * 8 * size - 1, 365 (2 * i + 0) * 8 * size, 366 pickedLow); 367 uint64_t pickedHigh = 368 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1, 369 (i + offset) * 8 * size); 370 result = insertBits(result, 371 (2 * i + 2) * 8 * size - 1, 372 (2 * i + 1) * 8 * size, 373 pickedHigh); 374 } 375 FpDestReg.uqw = result; 376 ''' 377 378 class Pack(MediaOp): 379 code = ''' 380 assert(srcSize == destSize * 2); 381 int items = (sizeof(FloatRegBits) / destSize); 382 int destBits = destSize * 8; 383 int srcBits = srcSize * 8; 384 uint64_t result = 0; 385 int i; 386 for (i = 0; i < items / 2; i++) { 387 uint64_t picked = 388 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1, 389 (i + 0) * srcBits); 390 unsigned signBit = bits(picked, srcBits - 1); 391 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 392 393 // Handle saturation. 394 if (signBit) { 395 if (overflow != mask(destBits - srcBits + 1)) { 396 if (signedOp()) 397 picked = (ULL(1) << (destBits - 1)); 398 else 399 picked = 0; 400 } 401 } else { 402 if (overflow != 0) { 403 if (signedOp()) 404 picked = mask(destBits - 1); 405 else 406 picked = mask(destBits); 407 } 408 } 409 result = insertBits(result, 410 (i + 1) * destBits - 1, 411 (i + 0) * destBits, 412 picked); 413 } 414 for (;i < items; i++) { 415 uint64_t picked = 416 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1, 417 (i - items + 0) * srcBits); 418 unsigned signBit = bits(picked, srcBits - 1); 419 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 420 421 // Handle saturation. 422 if (signBit) { 423 if (overflow != mask(destBits - srcBits + 1)) { 424 if (signedOp()) 425 picked = (ULL(1) << (destBits - 1)); 426 else 427 picked = 0; 428 } 429 } else { 430 if (overflow != 0) { 431 if (signedOp()) 432 picked = mask(destBits - 1); 433 else 434 picked = mask(destBits); 435 } 436 } 437 result = insertBits(result, 438 (i + 1) * destBits - 1, 439 (i + 0) * destBits, 440 picked); 441 } 442 FpDestReg.uqw = result; 443 ''' 444 445 class Mxor(MediaOp): 446 def __init__(self, dest, src1, src2): 447 super(Mxor, self).__init__(dest, src1, src2, 1) 448 code = ''' 449 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw; 450 ''' 451 452 class Mor(MediaOp): 453 def __init__(self, dest, src1, src2): 454 super(Mor, self).__init__(dest, src1, src2, 1) 455 code = ''' 456 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw; 457 ''' 458 459 class Mand(MediaOp): 460 def __init__(self, dest, src1, src2): 461 super(Mand, self).__init__(dest, src1, src2, 1) 462 code = ''' 463 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw; 464 ''' 465 466 class Mandn(MediaOp): 467 def __init__(self, dest, src1, src2): 468 super(Mandn, self).__init__(dest, src1, src2, 1) 469 code = ''' 470 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw; 471 ''' 472 473 class Mminf(MediaOp): 474 code = ''' 475 union floatInt 476 { 477 float f; 478 uint32_t i; 479 }; 480 union doubleInt 481 { 482 double d; 483 uint64_t i; 484 }; 485 486 assert(srcSize == destSize); 487 int size = srcSize; 488 int sizeBits = size * 8; 489 assert(srcSize == 4 || srcSize == 8); 490 int items = numItems(size); 491 uint64_t result = FpDestReg.uqw; 492 493 for (int i = 0; i < items; i++) { 494 double arg1, arg2; 495 int hiIndex = (i + 1) * sizeBits - 1; 496 int loIndex = (i + 0) * sizeBits; 497 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 498 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 499 500 if (size == 4) { 501 floatInt fi; 502 fi.i = arg1Bits; 503 arg1 = fi.f; 504 fi.i = arg2Bits; 505 arg2 = fi.f; 506 } else { 507 doubleInt di; 508 di.i = arg1Bits; 509 arg1 = di.d; 510 di.i = arg2Bits; 511 arg2 = di.d; 512 } 513 514 if (arg1 < arg2) { 515 result = insertBits(result, hiIndex, loIndex, arg1Bits); 516 } else { 517 result = insertBits(result, hiIndex, loIndex, arg2Bits); 518 } 519 } 520 FpDestReg.uqw = result; 521 ''' 522 523 class Mmaxf(MediaOp): 524 code = ''' 525 union floatInt 526 { 527 float f; 528 uint32_t i; 529 }; 530 union doubleInt 531 { 532 double d; 533 uint64_t i; 534 }; 535 536 assert(srcSize == destSize); 537 int size = srcSize; 538 int sizeBits = size * 8; 539 assert(srcSize == 4 || srcSize == 8); 540 int items = numItems(size); 541 uint64_t result = FpDestReg.uqw; 542 543 for (int i = 0; i < items; i++) { 544 double arg1, arg2; 545 int hiIndex = (i + 1) * sizeBits - 1; 546 int loIndex = (i + 0) * sizeBits; 547 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 548 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 549 550 if (size == 4) { 551 floatInt fi; 552 fi.i = arg1Bits; 553 arg1 = fi.f; 554 fi.i = arg2Bits; 555 arg2 = fi.f; 556 } else { 557 doubleInt di; 558 di.i = arg1Bits; 559 arg1 = di.d; 560 di.i = arg2Bits; 561 arg2 = di.d; 562 } 563 564 if (arg1 > arg2) { 565 result = insertBits(result, hiIndex, loIndex, arg1Bits); 566 } else { 567 result = insertBits(result, hiIndex, loIndex, arg2Bits); 568 } 569 } 570 FpDestReg.uqw = result; 571 ''' 572 573 class Mmini(MediaOp): 574 code = ''' 575 576 assert(srcSize == destSize); 577 int size = srcSize; 578 int sizeBits = size * 8; 579 int items = numItems(size); 580 uint64_t result = FpDestReg.uqw; 581 582 for (int i = 0; i < items; i++) { 583 int hiIndex = (i + 1) * sizeBits - 1; 584 int loIndex = (i + 0) * sizeBits; 585 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 586 int64_t arg1 = arg1Bits | 587 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 588 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 589 int64_t arg2 = arg2Bits | 590 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 591 uint64_t resBits; 592 593 if (signedOp()) { 594 if (arg1 < arg2) { 595 resBits = arg1Bits; 596 } else { 597 resBits = arg2Bits; 598 } 599 } else { 600 if (arg1Bits < arg2Bits) { 601 resBits = arg1Bits; 602 } else { 603 resBits = arg2Bits; 604 } 605 } 606 result = insertBits(result, hiIndex, loIndex, resBits); 607 } 608 FpDestReg.uqw = result; 609 ''' 610 611 class Mmaxi(MediaOp): 612 code = ''' 613 614 assert(srcSize == destSize); 615 int size = srcSize; 616 int sizeBits = size * 8; 617 int items = numItems(size); 618 uint64_t result = FpDestReg.uqw; 619 620 for (int i = 0; i < items; i++) { 621 int hiIndex = (i + 1) * sizeBits - 1; 622 int loIndex = (i + 0) * sizeBits; 623 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 624 int64_t arg1 = arg1Bits | 625 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 626 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 627 int64_t arg2 = arg2Bits | 628 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 629 uint64_t resBits; 630 631 if (signedOp()) { 632 if (arg1 > arg2) { 633 resBits = arg1Bits; 634 } else { 635 resBits = arg2Bits; 636 } 637 } else { 638 if (arg1Bits > arg2Bits) { 639 resBits = arg1Bits; 640 } else { 641 resBits = arg2Bits; 642 } 643 } 644 result = insertBits(result, hiIndex, loIndex, resBits); 645 } 646 FpDestReg.uqw = result; 647 ''' 648 649 class Msqrt(MediaOp): 650 def __init__(self, dest, src, \ 651 size = None, destSize = None, srcSize = None, ext = None): 652 super(Msqrt, self).__init__(dest, src,\ 653 "InstRegIndex(0)", size, destSize, srcSize, ext) 654 code = ''' 655 union floatInt 656 { 657 float f; 658 uint32_t i; 659 }; 660 union doubleInt 661 { 662 double d; 663 uint64_t i; 664 }; 665 666 assert(srcSize == destSize); 667 int size = srcSize; 668 int sizeBits = size * 8; 669 assert(srcSize == 4 || srcSize == 8); 670 int items = numItems(size); 671 uint64_t result = FpDestReg.uqw; 672 673 for (int i = 0; i < items; i++) { 674 int hiIndex = (i + 1) * sizeBits - 1; 675 int loIndex = (i + 0) * sizeBits; 676 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 677 678 if (size == 4) { 679 floatInt fi; 680 fi.i = argBits; 681 fi.f = sqrt(fi.f); 682 argBits = fi.i; 683 } else { 684 doubleInt di; 685 di.i = argBits; 686 di.d = sqrt(di.d); 687 argBits = di.i; 688 } 689 result = insertBits(result, hiIndex, loIndex, argBits); 690 } 691 FpDestReg.uqw = result; 692 ''' 693 694 class Maddf(MediaOp): 695 code = ''' 696 union floatInt 697 { 698 float f; 699 uint32_t i; 700 }; 701 union doubleInt 702 { 703 double d; 704 uint64_t i; 705 }; 706 707 assert(srcSize == destSize); 708 int size = srcSize; 709 int sizeBits = size * 8; 710 assert(srcSize == 4 || srcSize == 8); 711 int items = numItems(size); 712 uint64_t result = FpDestReg.uqw; 713 714 for (int i = 0; i < items; i++) { 715 int hiIndex = (i + 1) * sizeBits - 1; 716 int loIndex = (i + 0) * sizeBits; 717 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 718 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 719 uint64_t resBits; 720 721 if (size == 4) { 722 floatInt arg1, arg2, res; 723 arg1.i = arg1Bits; 724 arg2.i = arg2Bits; 725 res.f = arg1.f + arg2.f; 726 resBits = res.i; 727 } else { 728 doubleInt arg1, arg2, res; 729 arg1.i = arg1Bits; 730 arg2.i = arg2Bits; 731 res.d = arg1.d + arg2.d; 732 resBits = res.i; 733 } 734 735 result = insertBits(result, hiIndex, loIndex, resBits); 736 } 737 FpDestReg.uqw = result; 738 ''' 739 740 class Msubf(MediaOp): 741 code = ''' 742 union floatInt 743 { 744 float f; 745 uint32_t i; 746 }; 747 union doubleInt 748 { 749 double d; 750 uint64_t i; 751 }; 752 753 assert(srcSize == destSize); 754 int size = srcSize; 755 int sizeBits = size * 8; 756 assert(srcSize == 4 || srcSize == 8); 757 int items = numItems(size); 758 uint64_t result = FpDestReg.uqw; 759 760 for (int i = 0; i < items; i++) { 761 int hiIndex = (i + 1) * sizeBits - 1; 762 int loIndex = (i + 0) * sizeBits; 763 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 764 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 765 uint64_t resBits; 766 767 if (size == 4) { 768 floatInt arg1, arg2, res; 769 arg1.i = arg1Bits; 770 arg2.i = arg2Bits; 771 res.f = arg1.f - arg2.f; 772 resBits = res.i; 773 } else { 774 doubleInt arg1, arg2, res; 775 arg1.i = arg1Bits; 776 arg2.i = arg2Bits; 777 res.d = arg1.d - arg2.d; 778 resBits = res.i; 779 } 780 781 result = insertBits(result, hiIndex, loIndex, resBits); 782 } 783 FpDestReg.uqw = result; 784 ''' 785 786 class Mmulf(MediaOp): 787 code = ''' 788 union floatInt 789 { 790 float f; 791 uint32_t i; 792 }; 793 union doubleInt 794 { 795 double d; 796 uint64_t i; 797 }; 798 799 assert(srcSize == destSize); 800 int size = srcSize; 801 int sizeBits = size * 8; 802 assert(srcSize == 4 || srcSize == 8); 803 int items = numItems(size); 804 uint64_t result = FpDestReg.uqw; 805 806 for (int i = 0; i < items; i++) { 807 int hiIndex = (i + 1) * sizeBits - 1; 808 int loIndex = (i + 0) * sizeBits; 809 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 810 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 811 uint64_t resBits; 812 813 if (size == 4) { 814 floatInt arg1, arg2, res; 815 arg1.i = arg1Bits; 816 arg2.i = arg2Bits; 817 res.f = arg1.f * arg2.f; 818 resBits = res.i; 819 } else { 820 doubleInt arg1, arg2, res; 821 arg1.i = arg1Bits; 822 arg2.i = arg2Bits; 823 res.d = arg1.d * arg2.d; 824 resBits = res.i; 825 } 826 827 result = insertBits(result, hiIndex, loIndex, resBits); 828 } 829 FpDestReg.uqw = result; 830 ''' 831 832 class Mdivf(MediaOp): 833 code = ''' 834 union floatInt 835 { 836 float f; 837 uint32_t i; 838 }; 839 union doubleInt 840 { 841 double d; 842 uint64_t i; 843 }; 844 845 assert(srcSize == destSize); 846 int size = srcSize; 847 int sizeBits = size * 8; 848 assert(srcSize == 4 || srcSize == 8); 849 int items = numItems(size); 850 uint64_t result = FpDestReg.uqw; 851 852 for (int i = 0; i < items; i++) { 853 int hiIndex = (i + 1) * sizeBits - 1; 854 int loIndex = (i + 0) * sizeBits; 855 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 856 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 857 uint64_t resBits; 858 859 if (size == 4) { 860 floatInt arg1, arg2, res; 861 arg1.i = arg1Bits; 862 arg2.i = arg2Bits; 863 res.f = arg1.f / arg2.f; 864 resBits = res.i; 865 } else { 866 doubleInt arg1, arg2, res; 867 arg1.i = arg1Bits; 868 arg2.i = arg2Bits; 869 res.d = arg1.d / arg2.d; 870 resBits = res.i; 871 } 872 873 result = insertBits(result, hiIndex, loIndex, resBits); 874 } 875 FpDestReg.uqw = result; 876 ''' 877 878 class Maddi(MediaOp): 879 code = ''' 880 assert(srcSize == destSize); 881 int size = srcSize; 882 int sizeBits = size * 8; 883 int items = numItems(size); 884 uint64_t result = FpDestReg.uqw; 885 886 for (int i = 0; i < items; i++) { 887 int hiIndex = (i + 1) * sizeBits - 1; 888 int loIndex = (i + 0) * sizeBits; 889 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 890 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 891 uint64_t resBits = arg1Bits + arg2Bits; 892 893 if (ext & 0x2) { 894 if (signedOp()) { 895 int arg1Sign = bits(arg1Bits, sizeBits - 1); 896 int arg2Sign = bits(arg2Bits, sizeBits - 1); 897 int resSign = bits(resBits, sizeBits - 1); 898 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 899 if (resSign == 0) 900 resBits = (ULL(1) << (sizeBits - 1)); 901 else 902 resBits = mask(sizeBits - 1); 903 } 904 } else { 905 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) 906 resBits = mask(sizeBits); 907 } 908 } 909 910 result = insertBits(result, hiIndex, loIndex, resBits); 911 } 912 FpDestReg.uqw = result; 913 ''' 914 915 class Msubi(MediaOp): 916 code = ''' 917 assert(srcSize == destSize); 918 int size = srcSize; 919 int sizeBits = size * 8; 920 int items = numItems(size); 921 uint64_t result = FpDestReg.uqw; 922 923 for (int i = 0; i < items; i++) { 924 int hiIndex = (i + 1) * sizeBits - 1; 925 int loIndex = (i + 0) * sizeBits; 926 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 927 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 928 uint64_t resBits = arg1Bits - arg2Bits; 929 930 if (ext & 0x2) { 931 if (signedOp()) { 932 int arg1Sign = bits(arg1Bits, sizeBits - 1); 933 int arg2Sign = !bits(arg2Bits, sizeBits - 1); 934 int resSign = bits(resBits, sizeBits - 1); 935 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 936 if (resSign == 0) 937 resBits = (ULL(1) << (sizeBits - 1)); 938 else 939 resBits = mask(sizeBits - 1); 940 } 941 } else { 942 if (arg2Bits > arg1Bits) { 943 resBits = 0; 944 } else if (!findCarry(sizeBits, resBits, 945 arg1Bits, ~arg2Bits)) { 946 resBits = mask(sizeBits); 947 } 948 } 949 } 950 951 result = insertBits(result, hiIndex, loIndex, resBits); 952 } 953 FpDestReg.uqw = result; 954 ''' 955 956 class Mmuli(MediaOp): 957 code = ''' 958 int srcBits = srcSize * 8; 959 int destBits = destSize * 8; 960 assert(destBits <= 64); 961 assert(destSize >= srcSize); 962 int items = numItems(destSize); 963 uint64_t result = FpDestReg.uqw; 964 965 for (int i = 0; i < items; i++) { 966 int offset = 0; 967 if (ext & 16) { 968 if (ext & 32) 969 offset = i * (destBits - srcBits); 970 else 971 offset = i * (destBits - srcBits) + srcBits; 972 } 973 int srcHiIndex = (i + 1) * srcBits - 1 + offset; 974 int srcLoIndex = (i + 0) * srcBits + offset; 975 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 976 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex); 977 uint64_t resBits; 978 979 if (signedOp()) { 980 int64_t arg1 = arg1Bits | 981 (0 - (arg1Bits & (ULL(1) << (srcBits - 1)))); 982 int64_t arg2 = arg2Bits | 983 (0 - (arg2Bits & (ULL(1) << (srcBits - 1)))); 984 resBits = (uint64_t)(arg1 * arg2); 985 } else { 986 resBits = arg1Bits * arg2Bits; 987 } 988 989 if (ext & 0x4) 990 resBits += (ULL(1) << (destBits - 1)); 991 992 if (multHi()) 993 resBits >>= destBits; 994 995 int destHiIndex = (i + 1) * destBits - 1; 996 int destLoIndex = (i + 0) * destBits; 997 result = insertBits(result, destHiIndex, destLoIndex, resBits); 998 } 999 FpDestReg.uqw = result; 1000 ''' 1001 1002 class Mavg(MediaOp): 1003 code = ''' 1004 assert(srcSize == destSize); 1005 int size = srcSize; 1006 int sizeBits = size * 8; 1007 int items = numItems(size); 1008 uint64_t result = FpDestReg.uqw; 1009 1010 for (int i = 0; i < items; i++) { 1011 int hiIndex = (i + 1) * sizeBits - 1; 1012 int loIndex = (i + 0) * sizeBits; 1013 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1014 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1015 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; 1016 1017 result = insertBits(result, hiIndex, loIndex, resBits); 1018 } 1019 FpDestReg.uqw = result; 1020 ''' 1021 1022 class Msad(MediaOp): 1023 code = ''' 1024 int srcBits = srcSize * 8; 1025 int items = sizeof(FloatRegBits) / srcSize; 1026 1027 uint64_t sum = 0; 1028 for (int i = 0; i < items; i++) { 1029 int hiIndex = (i + 1) * srcBits - 1; 1030 int loIndex = (i + 0) * srcBits; 1031 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1032 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1033 int64_t resBits = arg1Bits - arg2Bits; 1034 if (resBits < 0) 1035 resBits = -resBits; 1036 sum += resBits; 1037 } 1038 FpDestReg.uqw = sum & mask(destSize * 8); 1039 ''' 1040 1041 class Msrl(MediaOp): 1042 code = ''' 1043 1044 assert(srcSize == destSize); 1045 int size = srcSize; 1046 int sizeBits = size * 8; 1047 int items = numItems(size); 1048 uint64_t shiftAmt = op2.uqw; 1049 uint64_t result = FpDestReg.uqw; 1050 1051 for (int i = 0; i < items; i++) { 1052 int hiIndex = (i + 1) * sizeBits - 1; 1053 int loIndex = (i + 0) * sizeBits; 1054 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1055 uint64_t resBits; 1056 if (shiftAmt >= sizeBits) { 1057 resBits = 0; 1058 } else { 1059 resBits = (arg1Bits >> shiftAmt) & 1060 mask(sizeBits - shiftAmt); 1061 } 1062 1063 result = insertBits(result, hiIndex, loIndex, resBits); 1064 } 1065 FpDestReg.uqw = result; 1066 ''' 1067 1068 class Msra(MediaOp): 1069 code = ''' 1070 1071 assert(srcSize == destSize); 1072 int size = srcSize; 1073 int sizeBits = size * 8; 1074 int items = numItems(size); 1075 uint64_t shiftAmt = op2.uqw; 1076 uint64_t result = FpDestReg.uqw; 1077 1078 for (int i = 0; i < items; i++) { 1079 int hiIndex = (i + 1) * sizeBits - 1; 1080 int loIndex = (i + 0) * sizeBits; 1081 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1082 uint64_t resBits; 1083 if (shiftAmt >= sizeBits) { 1084 if (bits(arg1Bits, sizeBits - 1)) 1085 resBits = mask(sizeBits); 1086 else 1087 resBits = 0; 1088 } else { 1089 resBits = (arg1Bits >> shiftAmt); 1090 resBits = resBits | 1091 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt)))); 1092 } 1093 1094 result = insertBits(result, hiIndex, loIndex, resBits); 1095 } 1096 FpDestReg.uqw = result; 1097 ''' 1098 1099 class Msll(MediaOp): 1100 code = ''' 1101 1102 assert(srcSize == destSize); 1103 int size = srcSize; 1104 int sizeBits = size * 8; 1105 int items = numItems(size); 1106 uint64_t shiftAmt = op2.uqw; 1107 uint64_t result = FpDestReg.uqw; 1108 1109 for (int i = 0; i < items; i++) { 1110 int hiIndex = (i + 1) * sizeBits - 1; 1111 int loIndex = (i + 0) * sizeBits; 1112 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1113 uint64_t resBits; 1114 if (shiftAmt >= sizeBits) { 1115 resBits = 0; 1116 } else { 1117 resBits = (arg1Bits << shiftAmt); 1118 } 1119 1120 result = insertBits(result, hiIndex, loIndex, resBits); 1121 } 1122 FpDestReg.uqw = result; 1123 ''' 1124 1125 class Cvtf2i(MediaOp): 1126 def __init__(self, dest, src, \ 1127 size = None, destSize = None, srcSize = None, ext = None): 1128 super(Cvtf2i, self).__init__(dest, src,\ 1129 "InstRegIndex(0)", size, destSize, srcSize, ext) 1130 code = ''' 1131 union floatInt 1132 { 1133 float f; 1134 uint32_t i; 1135 }; 1136 union doubleInt 1137 { 1138 double d; 1139 uint64_t i; 1140 }; 1141 1142 assert(destSize == 4 || destSize == 8); 1143 assert(srcSize == 4 || srcSize == 8); 1144 int srcSizeBits = srcSize * 8; 1145 int destSizeBits = destSize * 8; 1146 int items; 1147 int srcStart = 0; 1148 int destStart = 0; 1149 if (srcSize == 2 * destSize) { 1150 items = numItems(srcSize); 1151 if (ext & 0x2) 1152 destStart = destSizeBits * items; 1153 } else if (destSize == 2 * srcSize) { 1154 items = numItems(destSize); 1155 if (ext & 0x2) 1156 srcStart = srcSizeBits * items; 1157 } else { 1158 items = numItems(destSize); 1159 } 1160 uint64_t result = FpDestReg.uqw; 1161 1162 for (int i = 0; i < items; i++) { 1163 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1164 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1165 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1166 double arg; 1167 1168 if (srcSize == 4) { 1169 floatInt fi; 1170 fi.i = argBits; 1171 arg = fi.f; 1172 } else { 1173 doubleInt di; 1174 di.i = argBits; 1175 arg = di.d; 1176 } 1177 1178 if (ext & 0x4) { 1179 if (arg >= 0) 1180 arg += 0.5; 1181 else 1182 arg -= 0.5; 1183 } 1184 1185 if (destSize == 4) { 1186 argBits = (uint32_t)arg; 1187 } else { 1188 argBits = (uint64_t)arg; 1189 } 1190 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1191 int destLoIndex = destStart + (i + 0) * destSizeBits; 1192 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1193 } 1194 FpDestReg.uqw = result; 1195 ''' 1196 1197 class Cvti2f(MediaOp): 1198 def __init__(self, dest, src, \ 1199 size = None, destSize = None, srcSize = None, ext = None): 1200 super(Cvti2f, self).__init__(dest, src,\ 1201 "InstRegIndex(0)", size, destSize, srcSize, ext) 1202 code = ''' 1203 union floatInt 1204 { 1205 float f; 1206 uint32_t i; 1207 }; 1208 union doubleInt 1209 { 1210 double d; 1211 uint64_t i; 1212 }; 1213 1214 assert(destSize == 4 || destSize == 8); 1215 assert(srcSize == 4 || srcSize == 8); 1216 int srcSizeBits = srcSize * 8; 1217 int destSizeBits = destSize * 8; 1218 int items; 1219 int srcStart = 0; 1220 int destStart = 0; 1221 if (srcSize == 2 * destSize) { 1222 items = numItems(srcSize); 1223 if (ext & 0x2) 1224 destStart = destSizeBits * items; 1225 } else if (destSize == 2 * srcSize) { 1226 items = numItems(destSize); 1227 if (ext & 0x2) 1228 srcStart = srcSizeBits * items; 1229 } else { 1230 items = numItems(destSize); 1231 } 1232 uint64_t result = FpDestReg.uqw; 1233 1234 for (int i = 0; i < items; i++) { 1235 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1236 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1237 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1238 1239 int64_t sArg = argBits | 1240 (0 - (argBits & (ULL(1) << (srcSizeBits - 1)))); 1241 double arg = sArg; 1242 1243 if (destSize == 4) { 1244 floatInt fi; 1245 fi.f = arg; 1246 argBits = fi.i; 1247 } else { 1248 doubleInt di; 1249 di.d = arg; 1250 argBits = di.i; 1251 } 1252 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1253 int destLoIndex = destStart + (i + 0) * destSizeBits; 1254 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1255 } 1256 FpDestReg.uqw = result; 1257 ''' 1258 1259 class Cvtf2f(MediaOp): 1260 def __init__(self, dest, src, \ 1261 size = None, destSize = None, srcSize = None, ext = None): 1262 super(Cvtf2f, self).__init__(dest, src,\ 1263 "InstRegIndex(0)", size, destSize, srcSize, ext) 1264 code = ''' 1265 union floatInt 1266 { 1267 float f; 1268 uint32_t i; 1269 }; 1270 union doubleInt 1271 { 1272 double d; 1273 uint64_t i; 1274 }; 1275 1276 assert(destSize == 4 || destSize == 8); 1277 assert(srcSize == 4 || srcSize == 8); 1278 int srcSizeBits = srcSize * 8; 1279 int destSizeBits = destSize * 8; 1280 int items; 1281 int srcStart = 0; 1282 int destStart = 0; 1283 if (srcSize == 2 * destSize) { 1284 items = numItems(srcSize); 1285 if (ext & 0x2) 1286 destStart = destSizeBits * items; 1287 } else if (destSize == 2 * srcSize) { 1288 items = numItems(destSize); 1289 if (ext & 0x2) 1290 srcStart = srcSizeBits * items; 1291 } else { 1292 items = numItems(destSize); 1293 } 1294 uint64_t result = FpDestReg.uqw; 1295 1296 for (int i = 0; i < items; i++) { 1297 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1298 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1299 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1300 double arg; 1301 1302 if (srcSize == 4) { 1303 floatInt fi; 1304 fi.i = argBits; 1305 arg = fi.f; 1306 } else { 1307 doubleInt di; 1308 di.i = argBits; 1309 arg = di.d; 1310 } 1311 if (destSize == 4) { 1312 floatInt fi; 1313 fi.f = arg; 1314 argBits = fi.i; 1315 } else { 1316 doubleInt di; 1317 di.d = arg; 1318 argBits = di.i; 1319 } 1320 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1321 int destLoIndex = destStart + (i + 0) * destSizeBits; 1322 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1323 } 1324 FpDestReg.uqw = result; 1325 ''' 1326 1327 class Mcmpi2r(MediaOp): 1328 code = ''' 1329 union floatInt 1330 { 1331 float f; 1332 uint32_t i; 1333 }; 1334 union doubleInt 1335 { 1336 double d; 1337 uint64_t i; 1338 }; 1339 1340 assert(srcSize == destSize); 1341 int size = srcSize; 1342 int sizeBits = size * 8; 1343 int items = numItems(size); 1344 uint64_t result = FpDestReg.uqw; 1345 1346 for (int i = 0; i < items; i++) { 1347 int hiIndex = (i + 1) * sizeBits - 1; 1348 int loIndex = (i + 0) * sizeBits; 1349 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1350 int64_t arg1 = arg1Bits | 1351 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 1352 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1353 int64_t arg2 = arg2Bits | 1354 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 1355 1356 uint64_t resBits = 0; 1357 if (((ext & 0x2) == 0 && arg1 == arg2) || 1358 ((ext & 0x2) == 0x2 && arg1 > arg2)) 1359 resBits = mask(sizeBits); 1360 1361 result = insertBits(result, hiIndex, loIndex, resBits); 1362 } 1363 FpDestReg.uqw = result; 1364 ''' 1365 1366 class Mcmpf2r(MediaOp): 1367 code = ''' 1368 union floatInt 1369 { 1370 float f; 1371 uint32_t i; 1372 }; 1373 union doubleInt 1374 { 1375 double d; 1376 uint64_t i; 1377 }; 1378 1379 assert(srcSize == destSize); 1380 int size = srcSize; 1381 int sizeBits = size * 8; 1382 int items = numItems(size); 1383 uint64_t result = FpDestReg.uqw; 1384 1385 for (int i = 0; i < items; i++) { 1386 int hiIndex = (i + 1) * sizeBits - 1; 1387 int loIndex = (i + 0) * sizeBits; 1388 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1389 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1390 double arg1, arg2; 1391 1392 if (size == 4) { 1393 floatInt fi; 1394 fi.i = arg1Bits; 1395 arg1 = fi.f; 1396 fi.i = arg2Bits; 1397 arg2 = fi.f; 1398 } else { 1399 doubleInt di; 1400 di.i = arg1Bits; 1401 arg1 = di.d; 1402 di.i = arg2Bits; 1403 arg2 = di.d; 1404 } 1405 1406 uint64_t resBits = 0; 1407 bool nanop = isnan(arg1) || isnan(arg2); 1408 switch (ext & mask(3)) { 1409 case 0: 1410 if (arg1 == arg2 && !nanop) 1411 resBits = mask(sizeBits); 1412 break; 1413 case 1: 1414 if (arg1 < arg2 && !nanop) 1415 resBits = mask(sizeBits); 1416 break; 1417 case 2: 1418 if (arg1 <= arg2 && !nanop) 1419 resBits = mask(sizeBits); 1420 break; 1421 case 3: 1422 if (nanop) 1423 resBits = mask(sizeBits); 1424 break; 1425 case 4: 1426 if (arg1 != arg2 || nanop) 1427 resBits = mask(sizeBits); 1428 break; 1429 case 5: 1430 if (!(arg1 < arg2) || nanop) 1431 resBits = mask(sizeBits); 1432 break; 1433 case 6: 1434 if (!(arg1 <= arg2) || nanop) 1435 resBits = mask(sizeBits); 1436 break; 1437 case 7: 1438 if (!nanop) 1439 resBits = mask(sizeBits); 1440 break; 1441 }; 1442 1443 result = insertBits(result, hiIndex, loIndex, resBits); 1444 } 1445 FpDestReg.uqw = result; 1446 ''' 1447 1448 class Mcmpf2rf(MediaOp): 1449 def __init__(self, src1, src2,\ 1450 size = None, destSize = None, srcSize = None, ext = None): 1451 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\ 1452 src2, size, destSize, srcSize, ext) 1453 code = ''' 1454 union floatInt 1455 { 1456 float f; 1457 uint32_t i; 1458 }; 1459 union doubleInt 1460 { 1461 double d; 1462 uint64_t i; 1463 }; 1464 1465 assert(srcSize == destSize); 1466 assert(srcSize == 4 || srcSize == 8); 1467 int size = srcSize; 1468 int sizeBits = size * 8; 1469 1470 double arg1, arg2; 1471 uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0); 1472 uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0); 1473 if (size == 4) { 1474 floatInt fi; 1475 fi.i = arg1Bits; 1476 arg1 = fi.f; 1477 fi.i = arg2Bits; 1478 arg2 = fi.f; 1479 } else { 1480 doubleInt di; 1481 di.i = arg1Bits; 1482 arg1 = di.d; 1483 di.i = arg2Bits; 1484 arg2 = di.d; 1485 } 1486 1487 // ZF PF CF 1488 // Unordered 1 1 1 1489 // Greater than 0 0 0 1490 // Less than 0 0 1 1491 // Equal 1 0 0 1492 // OF = SF = AF = 0 1493 ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit | 1494 ZFBit | PFBit | CFBit); 1495 if (isnan(arg1) || isnan(arg2)) 1496 ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit); 1497 else if(arg1 < arg2) 1498 ccFlagBits = ccFlagBits | CFBit; 1499 else if(arg1 == arg2) 1500 ccFlagBits = ccFlagBits | ZFBit; 1501 ''' 1502}};
|