280 %(flags)s, %(src1)s, %(op2)s, %(dest)s, 281 %(srcSize)s, %(destSize)s, %(ext)s)''' % { 282 "class_name" : className, 283 "flags" : self.microFlagsText(microFlags), 284 "src1" : self.src1, "op2" : self.op2, 285 "dest" : self.dest, 286 "srcSize" : self.srcSize, 287 "destSize" : self.destSize, 288 "ext" : self.ext} 289 return allocator 290 291 class Mov2int(MediaOp): 292 def __init__(self, dest, src1, src2 = 0, \ 293 size = None, destSize = None, srcSize = None, ext = None): 294 super(Mov2int, self).__init__(dest, src1,\ 295 src2, size, destSize, srcSize, ext) 296 code = ''' 297 int items = sizeof(FloatRegBits) / srcSize; 298 int offset = imm8; 299 if (bits(src1, 0) && (ext & 0x1)) 300 offset -= items; 301 if (offset >= 0 && offset < items) { 302 uint64_t fpSrcReg1 = 303 bits(FpSrcReg1.uqw, 304 (offset + 1) * srcSize * 8 - 1, 305 (offset + 0) * srcSize * 8); 306 DestReg = merge(0, fpSrcReg1, destSize); 307 } else { 308 DestReg = DestReg; 309 } 310 ''' 311 312 class Mov2fp(MediaOp): 313 def __init__(self, dest, src1, src2 = 0, \ 314 size = None, destSize = None, srcSize = None, ext = None): 315 super(Mov2fp, self).__init__(dest, src1,\ 316 src2, size, destSize, srcSize, ext) 317 code = ''' 318 int items = sizeof(FloatRegBits) / destSize; 319 int offset = imm8; 320 if (bits(dest, 0) && (ext & 0x1)) 321 offset -= items; 322 if (offset >= 0 && offset < items) { 323 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); 324 FpDestReg.uqw = 325 insertBits(FpDestReg.uqw, 326 (offset + 1) * destSize * 8 - 1, 327 (offset + 0) * destSize * 8, srcReg1); 328 } else { 329 FpDestReg.uqw = FpDestReg.uqw; 330 } 331 ''' 332 333 class Movsign(MediaOp): 334 def __init__(self, dest, src, \ 335 size = None, destSize = None, srcSize = None, ext = None): 336 super(Movsign, self).__init__(dest, src,\ 337 "InstRegIndex(0)", size, destSize, srcSize, ext) 338 code = ''' 339 int items = sizeof(FloatRegBits) / srcSize; 340 uint64_t result = 0; 341 int offset = (ext & 0x1) ? items : 0; 342 for (int i = 0; i < items; i++) { 343 uint64_t picked = 344 bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1); 345 result = insertBits(result, i + offset, i + offset, picked); 346 } 347 DestReg = DestReg | result; 348 ''' 349 350 class Maskmov(MediaOp): 351 code = ''' 352 assert(srcSize == destSize); 353 int size = srcSize; 354 int sizeBits = size * 8; 355 int items = numItems(size); 356 uint64_t result = FpDestReg.uqw; 357 358 for (int i = 0; i < items; i++) { 359 int hiIndex = (i + 1) * sizeBits - 1; 360 int loIndex = (i + 0) * sizeBits; 361 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 362 if (bits(FpSrcReg2.uqw, hiIndex)) 363 result = insertBits(result, hiIndex, loIndex, arg1Bits); 364 } 365 FpDestReg.uqw = result; 366 ''' 367 368 class shuffle(MediaOp): 369 code = ''' 370 assert(srcSize == destSize); 371 int size = srcSize; 372 int sizeBits = size * 8; 373 int items = sizeof(FloatRegBits) / size; 374 int options; 375 int optionBits; 376 if (size == 8) { 377 options = 2; 378 optionBits = 1; 379 } else { 380 options = 4; 381 optionBits = 2; 382 } 383 384 uint64_t result = 0; 385 uint8_t sel = ext; 386 387 for (int i = 0; i < items; i++) { 388 uint64_t resBits; 389 uint8_t lsel = sel & mask(optionBits); 390 if (lsel * size >= sizeof(FloatRegBits)) { 391 lsel -= options / 2; 392 resBits = bits(FpSrcReg2.uqw, 393 (lsel + 1) * sizeBits - 1, 394 (lsel + 0) * sizeBits); 395 } else { 396 resBits = bits(FpSrcReg1.uqw, 397 (lsel + 1) * sizeBits - 1, 398 (lsel + 0) * sizeBits); 399 } 400 401 sel >>= optionBits; 402 403 int hiIndex = (i + 1) * sizeBits - 1; 404 int loIndex = (i + 0) * sizeBits; 405 result = insertBits(result, hiIndex, loIndex, resBits); 406 } 407 FpDestReg.uqw = result; 408 ''' 409 410 class Unpack(MediaOp): 411 code = ''' 412 assert(srcSize == destSize); 413 int size = destSize; 414 int items = (sizeof(FloatRegBits) / size) / 2; 415 int offset = ext ? items : 0; 416 uint64_t result = 0; 417 for (int i = 0; i < items; i++) { 418 uint64_t pickedLow = 419 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1, 420 (i + offset) * 8 * size); 421 result = insertBits(result, 422 (2 * i + 1) * 8 * size - 1, 423 (2 * i + 0) * 8 * size, 424 pickedLow); 425 uint64_t pickedHigh = 426 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1, 427 (i + offset) * 8 * size); 428 result = insertBits(result, 429 (2 * i + 2) * 8 * size - 1, 430 (2 * i + 1) * 8 * size, 431 pickedHigh); 432 } 433 FpDestReg.uqw = result; 434 ''' 435 436 class Pack(MediaOp): 437 code = ''' 438 assert(srcSize == destSize * 2); 439 int items = (sizeof(FloatRegBits) / destSize); 440 int destBits = destSize * 8; 441 int srcBits = srcSize * 8; 442 uint64_t result = 0; 443 int i; 444 for (i = 0; i < items / 2; i++) { 445 uint64_t picked = 446 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1, 447 (i + 0) * srcBits); 448 unsigned signBit = bits(picked, srcBits - 1); 449 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 450 451 // Handle saturation. 452 if (signBit) { 453 if (overflow != mask(destBits - srcBits + 1)) { 454 if (signedOp()) 455 picked = (ULL(1) << (destBits - 1)); 456 else 457 picked = 0; 458 } 459 } else { 460 if (overflow != 0) { 461 if (signedOp()) 462 picked = mask(destBits - 1); 463 else 464 picked = mask(destBits); 465 } 466 } 467 result = insertBits(result, 468 (i + 1) * destBits - 1, 469 (i + 0) * destBits, 470 picked); 471 } 472 for (;i < items; i++) { 473 uint64_t picked = 474 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1, 475 (i - items + 0) * srcBits); 476 unsigned signBit = bits(picked, srcBits - 1); 477 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 478 479 // Handle saturation. 480 if (signBit) { 481 if (overflow != mask(destBits - srcBits + 1)) { 482 if (signedOp()) 483 picked = (ULL(1) << (destBits - 1)); 484 else 485 picked = 0; 486 } 487 } else { 488 if (overflow != 0) { 489 if (signedOp()) 490 picked = mask(destBits - 1); 491 else 492 picked = mask(destBits); 493 } 494 } 495 result = insertBits(result, 496 (i + 1) * destBits - 1, 497 (i + 0) * destBits, 498 picked); 499 } 500 FpDestReg.uqw = result; 501 ''' 502 503 class Mxor(MediaOp): 504 def __init__(self, dest, src1, src2): 505 super(Mxor, self).__init__(dest, src1, src2, 1) 506 code = ''' 507 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw; 508 ''' 509 510 class Mor(MediaOp): 511 def __init__(self, dest, src1, src2): 512 super(Mor, self).__init__(dest, src1, src2, 1) 513 code = ''' 514 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw; 515 ''' 516 517 class Mand(MediaOp): 518 def __init__(self, dest, src1, src2): 519 super(Mand, self).__init__(dest, src1, src2, 1) 520 code = ''' 521 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw; 522 ''' 523 524 class Mandn(MediaOp): 525 def __init__(self, dest, src1, src2): 526 super(Mandn, self).__init__(dest, src1, src2, 1) 527 code = ''' 528 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw; 529 ''' 530 531 class Mminf(MediaOp): 532 code = ''' 533 union floatInt 534 { 535 float f; 536 uint32_t i; 537 }; 538 union doubleInt 539 { 540 double d; 541 uint64_t i; 542 }; 543 544 assert(srcSize == destSize); 545 int size = srcSize; 546 int sizeBits = size * 8; 547 assert(srcSize == 4 || srcSize == 8); 548 int items = numItems(size); 549 uint64_t result = FpDestReg.uqw; 550 551 for (int i = 0; i < items; i++) { 552 double arg1, arg2; 553 int hiIndex = (i + 1) * sizeBits - 1; 554 int loIndex = (i + 0) * sizeBits; 555 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 556 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 557 558 if (size == 4) { 559 floatInt fi; 560 fi.i = arg1Bits; 561 arg1 = fi.f; 562 fi.i = arg2Bits; 563 arg2 = fi.f; 564 } else { 565 doubleInt di; 566 di.i = arg1Bits; 567 arg1 = di.d; 568 di.i = arg2Bits; 569 arg2 = di.d; 570 } 571 572 if (arg1 < arg2) { 573 result = insertBits(result, hiIndex, loIndex, arg1Bits); 574 } else { 575 result = insertBits(result, hiIndex, loIndex, arg2Bits); 576 } 577 } 578 FpDestReg.uqw = result; 579 ''' 580 581 class Mmaxf(MediaOp): 582 code = ''' 583 union floatInt 584 { 585 float f; 586 uint32_t i; 587 }; 588 union doubleInt 589 { 590 double d; 591 uint64_t i; 592 }; 593 594 assert(srcSize == destSize); 595 int size = srcSize; 596 int sizeBits = size * 8; 597 assert(srcSize == 4 || srcSize == 8); 598 int items = numItems(size); 599 uint64_t result = FpDestReg.uqw; 600 601 for (int i = 0; i < items; i++) { 602 double arg1, arg2; 603 int hiIndex = (i + 1) * sizeBits - 1; 604 int loIndex = (i + 0) * sizeBits; 605 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 606 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 607 608 if (size == 4) { 609 floatInt fi; 610 fi.i = arg1Bits; 611 arg1 = fi.f; 612 fi.i = arg2Bits; 613 arg2 = fi.f; 614 } else { 615 doubleInt di; 616 di.i = arg1Bits; 617 arg1 = di.d; 618 di.i = arg2Bits; 619 arg2 = di.d; 620 } 621 622 if (arg1 > arg2) { 623 result = insertBits(result, hiIndex, loIndex, arg1Bits); 624 } else { 625 result = insertBits(result, hiIndex, loIndex, arg2Bits); 626 } 627 } 628 FpDestReg.uqw = result; 629 ''' 630 631 class Mmini(MediaOp): 632 code = ''' 633 634 assert(srcSize == destSize); 635 int size = srcSize; 636 int sizeBits = size * 8; 637 int items = numItems(size); 638 uint64_t result = FpDestReg.uqw; 639 640 for (int i = 0; i < items; i++) { 641 int hiIndex = (i + 1) * sizeBits - 1; 642 int loIndex = (i + 0) * sizeBits; 643 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 644 int64_t arg1 = arg1Bits | 645 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 646 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 647 int64_t arg2 = arg2Bits | 648 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 649 uint64_t resBits; 650 651 if (signedOp()) { 652 if (arg1 < arg2) { 653 resBits = arg1Bits; 654 } else { 655 resBits = arg2Bits; 656 } 657 } else { 658 if (arg1Bits < arg2Bits) { 659 resBits = arg1Bits; 660 } else { 661 resBits = arg2Bits; 662 } 663 } 664 result = insertBits(result, hiIndex, loIndex, resBits); 665 } 666 FpDestReg.uqw = result; 667 ''' 668 669 class Mmaxi(MediaOp): 670 code = ''' 671 672 assert(srcSize == destSize); 673 int size = srcSize; 674 int sizeBits = size * 8; 675 int items = numItems(size); 676 uint64_t result = FpDestReg.uqw; 677 678 for (int i = 0; i < items; i++) { 679 int hiIndex = (i + 1) * sizeBits - 1; 680 int loIndex = (i + 0) * sizeBits; 681 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 682 int64_t arg1 = arg1Bits | 683 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 684 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 685 int64_t arg2 = arg2Bits | 686 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 687 uint64_t resBits; 688 689 if (signedOp()) { 690 if (arg1 > arg2) { 691 resBits = arg1Bits; 692 } else { 693 resBits = arg2Bits; 694 } 695 } else { 696 if (arg1Bits > arg2Bits) { 697 resBits = arg1Bits; 698 } else { 699 resBits = arg2Bits; 700 } 701 } 702 result = insertBits(result, hiIndex, loIndex, resBits); 703 } 704 FpDestReg.uqw = result; 705 ''' 706 707 class Msqrt(MediaOp): 708 def __init__(self, dest, src, \ 709 size = None, destSize = None, srcSize = None, ext = None): 710 super(Msqrt, self).__init__(dest, src,\ 711 "InstRegIndex(0)", size, destSize, srcSize, ext) 712 code = ''' 713 union floatInt 714 { 715 float f; 716 uint32_t i; 717 }; 718 union doubleInt 719 { 720 double d; 721 uint64_t i; 722 }; 723 724 assert(srcSize == destSize); 725 int size = srcSize; 726 int sizeBits = size * 8; 727 assert(srcSize == 4 || srcSize == 8); 728 int items = numItems(size); 729 uint64_t result = FpDestReg.uqw; 730 731 for (int i = 0; i < items; i++) { 732 int hiIndex = (i + 1) * sizeBits - 1; 733 int loIndex = (i + 0) * sizeBits; 734 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 735 736 if (size == 4) { 737 floatInt fi; 738 fi.i = argBits; 739 fi.f = sqrt(fi.f); 740 argBits = fi.i; 741 } else { 742 doubleInt di; 743 di.i = argBits; 744 di.d = sqrt(di.d); 745 argBits = di.i; 746 } 747 result = insertBits(result, hiIndex, loIndex, argBits); 748 } 749 FpDestReg.uqw = result; 750 ''' 751 752 class Maddf(MediaOp): 753 code = ''' 754 union floatInt 755 { 756 float f; 757 uint32_t i; 758 }; 759 union doubleInt 760 { 761 double d; 762 uint64_t i; 763 }; 764 765 assert(srcSize == destSize); 766 int size = srcSize; 767 int sizeBits = size * 8; 768 assert(srcSize == 4 || srcSize == 8); 769 int items = numItems(size); 770 uint64_t result = FpDestReg.uqw; 771 772 for (int i = 0; i < items; i++) { 773 int hiIndex = (i + 1) * sizeBits - 1; 774 int loIndex = (i + 0) * sizeBits; 775 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 776 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 777 uint64_t resBits; 778 779 if (size == 4) { 780 floatInt arg1, arg2, res; 781 arg1.i = arg1Bits; 782 arg2.i = arg2Bits; 783 res.f = arg1.f + arg2.f; 784 resBits = res.i; 785 } else { 786 doubleInt arg1, arg2, res; 787 arg1.i = arg1Bits; 788 arg2.i = arg2Bits; 789 res.d = arg1.d + arg2.d; 790 resBits = res.i; 791 } 792 793 result = insertBits(result, hiIndex, loIndex, resBits); 794 } 795 FpDestReg.uqw = result; 796 ''' 797 798 class Msubf(MediaOp): 799 code = ''' 800 union floatInt 801 { 802 float f; 803 uint32_t i; 804 }; 805 union doubleInt 806 { 807 double d; 808 uint64_t i; 809 }; 810 811 assert(srcSize == destSize); 812 int size = srcSize; 813 int sizeBits = size * 8; 814 assert(srcSize == 4 || srcSize == 8); 815 int items = numItems(size); 816 uint64_t result = FpDestReg.uqw; 817 818 for (int i = 0; i < items; i++) { 819 int hiIndex = (i + 1) * sizeBits - 1; 820 int loIndex = (i + 0) * sizeBits; 821 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 822 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 823 uint64_t resBits; 824 825 if (size == 4) { 826 floatInt arg1, arg2, res; 827 arg1.i = arg1Bits; 828 arg2.i = arg2Bits; 829 res.f = arg1.f - arg2.f; 830 resBits = res.i; 831 } else { 832 doubleInt arg1, arg2, res; 833 arg1.i = arg1Bits; 834 arg2.i = arg2Bits; 835 res.d = arg1.d - arg2.d; 836 resBits = res.i; 837 } 838 839 result = insertBits(result, hiIndex, loIndex, resBits); 840 } 841 FpDestReg.uqw = result; 842 ''' 843 844 class Mmulf(MediaOp): 845 code = ''' 846 union floatInt 847 { 848 float f; 849 uint32_t i; 850 }; 851 union doubleInt 852 { 853 double d; 854 uint64_t i; 855 }; 856 857 assert(srcSize == destSize); 858 int size = srcSize; 859 int sizeBits = size * 8; 860 assert(srcSize == 4 || srcSize == 8); 861 int items = numItems(size); 862 uint64_t result = FpDestReg.uqw; 863 864 for (int i = 0; i < items; i++) { 865 int hiIndex = (i + 1) * sizeBits - 1; 866 int loIndex = (i + 0) * sizeBits; 867 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 868 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 869 uint64_t resBits; 870 871 if (size == 4) { 872 floatInt arg1, arg2, res; 873 arg1.i = arg1Bits; 874 arg2.i = arg2Bits; 875 res.f = arg1.f * arg2.f; 876 resBits = res.i; 877 } else { 878 doubleInt arg1, arg2, res; 879 arg1.i = arg1Bits; 880 arg2.i = arg2Bits; 881 res.d = arg1.d * arg2.d; 882 resBits = res.i; 883 } 884 885 result = insertBits(result, hiIndex, loIndex, resBits); 886 } 887 FpDestReg.uqw = result; 888 ''' 889 890 class Mdivf(MediaOp): 891 code = ''' 892 union floatInt 893 { 894 float f; 895 uint32_t i; 896 }; 897 union doubleInt 898 { 899 double d; 900 uint64_t i; 901 }; 902 903 assert(srcSize == destSize); 904 int size = srcSize; 905 int sizeBits = size * 8; 906 assert(srcSize == 4 || srcSize == 8); 907 int items = numItems(size); 908 uint64_t result = FpDestReg.uqw; 909 910 for (int i = 0; i < items; i++) { 911 int hiIndex = (i + 1) * sizeBits - 1; 912 int loIndex = (i + 0) * sizeBits; 913 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 914 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 915 uint64_t resBits; 916 917 if (size == 4) { 918 floatInt arg1, arg2, res; 919 arg1.i = arg1Bits; 920 arg2.i = arg2Bits; 921 res.f = arg1.f / arg2.f; 922 resBits = res.i; 923 } else { 924 doubleInt arg1, arg2, res; 925 arg1.i = arg1Bits; 926 arg2.i = arg2Bits; 927 res.d = arg1.d / arg2.d; 928 resBits = res.i; 929 } 930 931 result = insertBits(result, hiIndex, loIndex, resBits); 932 } 933 FpDestReg.uqw = result; 934 ''' 935 936 class Maddi(MediaOp): 937 code = ''' 938 assert(srcSize == destSize); 939 int size = srcSize; 940 int sizeBits = size * 8; 941 int items = numItems(size); 942 uint64_t result = FpDestReg.uqw; 943 944 for (int i = 0; i < items; i++) { 945 int hiIndex = (i + 1) * sizeBits - 1; 946 int loIndex = (i + 0) * sizeBits; 947 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 948 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 949 uint64_t resBits = arg1Bits + arg2Bits; 950 951 if (ext & 0x2) { 952 if (signedOp()) { 953 int arg1Sign = bits(arg1Bits, sizeBits - 1); 954 int arg2Sign = bits(arg2Bits, sizeBits - 1); 955 int resSign = bits(resBits, sizeBits - 1); 956 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 957 if (resSign == 0) 958 resBits = (ULL(1) << (sizeBits - 1)); 959 else 960 resBits = mask(sizeBits - 1); 961 } 962 } else { 963 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) 964 resBits = mask(sizeBits); 965 } 966 } 967 968 result = insertBits(result, hiIndex, loIndex, resBits); 969 } 970 FpDestReg.uqw = result; 971 ''' 972 973 class Msubi(MediaOp): 974 code = ''' 975 assert(srcSize == destSize); 976 int size = srcSize; 977 int sizeBits = size * 8; 978 int items = numItems(size); 979 uint64_t result = FpDestReg.uqw; 980 981 for (int i = 0; i < items; i++) { 982 int hiIndex = (i + 1) * sizeBits - 1; 983 int loIndex = (i + 0) * sizeBits; 984 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 985 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 986 uint64_t resBits = arg1Bits - arg2Bits; 987 988 if (ext & 0x2) { 989 if (signedOp()) { 990 int arg1Sign = bits(arg1Bits, sizeBits - 1); 991 int arg2Sign = !bits(arg2Bits, sizeBits - 1); 992 int resSign = bits(resBits, sizeBits - 1); 993 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 994 if (resSign == 0) 995 resBits = (ULL(1) << (sizeBits - 1)); 996 else 997 resBits = mask(sizeBits - 1); 998 } 999 } else { 1000 if (arg2Bits > arg1Bits) { 1001 resBits = 0; 1002 } else if (!findCarry(sizeBits, resBits, 1003 arg1Bits, ~arg2Bits)) { 1004 resBits = mask(sizeBits); 1005 } 1006 } 1007 } 1008 1009 result = insertBits(result, hiIndex, loIndex, resBits); 1010 } 1011 FpDestReg.uqw = result; 1012 ''' 1013 1014 class Mmuli(MediaOp): 1015 code = ''' 1016 int srcBits = srcSize * 8; 1017 int destBits = destSize * 8; 1018 assert(destBits <= 64); 1019 assert(destSize >= srcSize); 1020 int items = numItems(destSize); 1021 uint64_t result = FpDestReg.uqw; 1022 1023 for (int i = 0; i < items; i++) { 1024 int offset = 0; 1025 if (ext & 16) { 1026 if (ext & 32) 1027 offset = i * (destBits - srcBits); 1028 else 1029 offset = i * (destBits - srcBits) + srcBits; 1030 } 1031 int srcHiIndex = (i + 1) * srcBits - 1 + offset; 1032 int srcLoIndex = (i + 0) * srcBits + offset; 1033 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1034 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex); 1035 uint64_t resBits; 1036 1037 if (signedOp()) { 1038 int64_t arg1 = arg1Bits | 1039 (0 - (arg1Bits & (ULL(1) << (srcBits - 1)))); 1040 int64_t arg2 = arg2Bits | 1041 (0 - (arg2Bits & (ULL(1) << (srcBits - 1)))); 1042 resBits = (uint64_t)(arg1 * arg2); 1043 } else { 1044 resBits = arg1Bits * arg2Bits; 1045 } 1046 1047 if (ext & 0x4) 1048 resBits += (ULL(1) << (destBits - 1)); 1049 1050 if (multHi()) 1051 resBits >>= destBits; 1052 1053 int destHiIndex = (i + 1) * destBits - 1; 1054 int destLoIndex = (i + 0) * destBits; 1055 result = insertBits(result, destHiIndex, destLoIndex, resBits); 1056 } 1057 FpDestReg.uqw = result; 1058 ''' 1059 1060 class Mavg(MediaOp): 1061 code = ''' 1062 assert(srcSize == destSize); 1063 int size = srcSize; 1064 int sizeBits = size * 8; 1065 int items = numItems(size); 1066 uint64_t result = FpDestReg.uqw; 1067 1068 for (int i = 0; i < items; i++) { 1069 int hiIndex = (i + 1) * sizeBits - 1; 1070 int loIndex = (i + 0) * sizeBits; 1071 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1072 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1073 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; 1074 1075 result = insertBits(result, hiIndex, loIndex, resBits); 1076 } 1077 FpDestReg.uqw = result; 1078 ''' 1079 1080 class Msad(MediaOp): 1081 code = ''' 1082 int srcBits = srcSize * 8; 1083 int items = sizeof(FloatRegBits) / srcSize; 1084 1085 uint64_t sum = 0; 1086 for (int i = 0; i < items; i++) { 1087 int hiIndex = (i + 1) * srcBits - 1; 1088 int loIndex = (i + 0) * srcBits; 1089 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1090 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1091 int64_t resBits = arg1Bits - arg2Bits; 1092 if (resBits < 0) 1093 resBits = -resBits; 1094 sum += resBits; 1095 } 1096 FpDestReg.uqw = sum & mask(destSize * 8); 1097 ''' 1098 1099 class Msrl(MediaOp): 1100 code = ''' 1101 1102 assert(srcSize == destSize); 1103 int size = srcSize; 1104 int sizeBits = size * 8; 1105 int items = numItems(size); 1106 uint64_t shiftAmt = op2.uqw; 1107 uint64_t result = FpDestReg.uqw; 1108 1109 for (int i = 0; i < items; i++) { 1110 int hiIndex = (i + 1) * sizeBits - 1; 1111 int loIndex = (i + 0) * sizeBits; 1112 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1113 uint64_t resBits; 1114 if (shiftAmt >= sizeBits) { 1115 resBits = 0; 1116 } else { 1117 resBits = (arg1Bits >> shiftAmt) & 1118 mask(sizeBits - shiftAmt); 1119 } 1120 1121 result = insertBits(result, hiIndex, loIndex, resBits); 1122 } 1123 FpDestReg.uqw = result; 1124 ''' 1125 1126 class Msra(MediaOp): 1127 code = ''' 1128 1129 assert(srcSize == destSize); 1130 int size = srcSize; 1131 int sizeBits = size * 8; 1132 int items = numItems(size); 1133 uint64_t shiftAmt = op2.uqw; 1134 uint64_t result = FpDestReg.uqw; 1135 1136 for (int i = 0; i < items; i++) { 1137 int hiIndex = (i + 1) * sizeBits - 1; 1138 int loIndex = (i + 0) * sizeBits; 1139 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1140 uint64_t resBits; 1141 if (shiftAmt >= sizeBits) { 1142 if (bits(arg1Bits, sizeBits - 1)) 1143 resBits = mask(sizeBits); 1144 else 1145 resBits = 0; 1146 } else { 1147 resBits = (arg1Bits >> shiftAmt); 1148 resBits = resBits | 1149 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt)))); 1150 } 1151 1152 result = insertBits(result, hiIndex, loIndex, resBits); 1153 } 1154 FpDestReg.uqw = result; 1155 ''' 1156 1157 class Msll(MediaOp): 1158 code = ''' 1159 1160 assert(srcSize == destSize); 1161 int size = srcSize; 1162 int sizeBits = size * 8; 1163 int items = numItems(size); 1164 uint64_t shiftAmt = op2.uqw; 1165 uint64_t result = FpDestReg.uqw; 1166 1167 for (int i = 0; i < items; i++) { 1168 int hiIndex = (i + 1) * sizeBits - 1; 1169 int loIndex = (i + 0) * sizeBits; 1170 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1171 uint64_t resBits; 1172 if (shiftAmt >= sizeBits) { 1173 resBits = 0; 1174 } else { 1175 resBits = (arg1Bits << shiftAmt); 1176 } 1177 1178 result = insertBits(result, hiIndex, loIndex, resBits); 1179 } 1180 FpDestReg.uqw = result; 1181 ''' 1182 1183 class Cvtf2i(MediaOp): 1184 def __init__(self, dest, src, \ 1185 size = None, destSize = None, srcSize = None, ext = None): 1186 super(Cvtf2i, self).__init__(dest, src,\ 1187 "InstRegIndex(0)", size, destSize, srcSize, ext) 1188 code = ''' 1189 union floatInt 1190 { 1191 float f; 1192 uint32_t i; 1193 }; 1194 union doubleInt 1195 { 1196 double d; 1197 uint64_t i; 1198 }; 1199 1200 assert(destSize == 4 || destSize == 8); 1201 assert(srcSize == 4 || srcSize == 8); 1202 int srcSizeBits = srcSize * 8; 1203 int destSizeBits = destSize * 8; 1204 int items; 1205 int srcStart = 0; 1206 int destStart = 0; 1207 if (srcSize == 2 * destSize) { 1208 items = numItems(srcSize); 1209 if (ext & 0x2) 1210 destStart = destSizeBits * items; 1211 } else if (destSize == 2 * srcSize) { 1212 items = numItems(destSize); 1213 if (ext & 0x2) 1214 srcStart = srcSizeBits * items; 1215 } else { 1216 items = numItems(destSize); 1217 } 1218 uint64_t result = FpDestReg.uqw; 1219 1220 for (int i = 0; i < items; i++) { 1221 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1222 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1223 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1224 double arg; 1225 1226 if (srcSize == 4) { 1227 floatInt fi; 1228 fi.i = argBits; 1229 arg = fi.f; 1230 } else { 1231 doubleInt di; 1232 di.i = argBits; 1233 arg = di.d; 1234 } 1235 1236 if (ext & 0x4) { 1237 if (arg >= 0) 1238 arg += 0.5; 1239 else 1240 arg -= 0.5; 1241 } 1242 1243 if (destSize == 4) { 1244 argBits = (uint32_t)arg; 1245 } else { 1246 argBits = (uint64_t)arg; 1247 } 1248 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1249 int destLoIndex = destStart + (i + 0) * destSizeBits; 1250 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1251 } 1252 FpDestReg.uqw = result; 1253 ''' 1254 1255 class Cvti2f(MediaOp): 1256 def __init__(self, dest, src, \ 1257 size = None, destSize = None, srcSize = None, ext = None): 1258 super(Cvti2f, self).__init__(dest, src,\ 1259 "InstRegIndex(0)", size, destSize, srcSize, ext) 1260 code = ''' 1261 union floatInt 1262 { 1263 float f; 1264 uint32_t i; 1265 }; 1266 union doubleInt 1267 { 1268 double d; 1269 uint64_t i; 1270 }; 1271 1272 assert(destSize == 4 || destSize == 8); 1273 assert(srcSize == 4 || srcSize == 8); 1274 int srcSizeBits = srcSize * 8; 1275 int destSizeBits = destSize * 8; 1276 int items; 1277 int srcStart = 0; 1278 int destStart = 0; 1279 if (srcSize == 2 * destSize) { 1280 items = numItems(srcSize); 1281 if (ext & 0x2) 1282 destStart = destSizeBits * items; 1283 } else if (destSize == 2 * srcSize) { 1284 items = numItems(destSize); 1285 if (ext & 0x2) 1286 srcStart = srcSizeBits * items; 1287 } else { 1288 items = numItems(destSize); 1289 } 1290 uint64_t result = FpDestReg.uqw; 1291 1292 for (int i = 0; i < items; i++) { 1293 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1294 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1295 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1296 1297 int64_t sArg = argBits | 1298 (0 - (argBits & (ULL(1) << (srcSizeBits - 1)))); 1299 double arg = sArg; 1300 1301 if (destSize == 4) { 1302 floatInt fi; 1303 fi.f = arg; 1304 argBits = fi.i; 1305 } else { 1306 doubleInt di; 1307 di.d = arg; 1308 argBits = di.i; 1309 } 1310 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1311 int destLoIndex = destStart + (i + 0) * destSizeBits; 1312 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1313 } 1314 FpDestReg.uqw = result; 1315 ''' 1316 1317 class Cvtf2f(MediaOp): 1318 def __init__(self, dest, src, \ 1319 size = None, destSize = None, srcSize = None, ext = None): 1320 super(Cvtf2f, self).__init__(dest, src,\ 1321 "InstRegIndex(0)", size, destSize, srcSize, ext) 1322 code = ''' 1323 union floatInt 1324 { 1325 float f; 1326 uint32_t i; 1327 }; 1328 union doubleInt 1329 { 1330 double d; 1331 uint64_t i; 1332 }; 1333 1334 assert(destSize == 4 || destSize == 8); 1335 assert(srcSize == 4 || srcSize == 8); 1336 int srcSizeBits = srcSize * 8; 1337 int destSizeBits = destSize * 8; 1338 int items; 1339 int srcStart = 0; 1340 int destStart = 0; 1341 if (srcSize == 2 * destSize) { 1342 items = numItems(srcSize); 1343 if (ext & 0x2) 1344 destStart = destSizeBits * items; 1345 } else if (destSize == 2 * srcSize) { 1346 items = numItems(destSize); 1347 if (ext & 0x2) 1348 srcStart = srcSizeBits * items; 1349 } else { 1350 items = numItems(destSize); 1351 } 1352 uint64_t result = FpDestReg.uqw; 1353 1354 for (int i = 0; i < items; i++) { 1355 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1356 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1357 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1358 double arg; 1359 1360 if (srcSize == 4) { 1361 floatInt fi; 1362 fi.i = argBits; 1363 arg = fi.f; 1364 } else { 1365 doubleInt di; 1366 di.i = argBits; 1367 arg = di.d; 1368 } 1369 if (destSize == 4) { 1370 floatInt fi; 1371 fi.f = arg; 1372 argBits = fi.i; 1373 } else { 1374 doubleInt di; 1375 di.d = arg; 1376 argBits = di.i; 1377 } 1378 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1379 int destLoIndex = destStart + (i + 0) * destSizeBits; 1380 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1381 } 1382 FpDestReg.uqw = result; 1383 ''' 1384 1385 class Mcmpi2r(MediaOp): 1386 code = ''' 1387 union floatInt 1388 { 1389 float f; 1390 uint32_t i; 1391 }; 1392 union doubleInt 1393 { 1394 double d; 1395 uint64_t i; 1396 }; 1397 1398 assert(srcSize == destSize); 1399 int size = srcSize; 1400 int sizeBits = size * 8; 1401 int items = numItems(size); 1402 uint64_t result = FpDestReg.uqw; 1403 1404 for (int i = 0; i < items; i++) { 1405 int hiIndex = (i + 1) * sizeBits - 1; 1406 int loIndex = (i + 0) * sizeBits; 1407 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1408 int64_t arg1 = arg1Bits | 1409 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 1410 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1411 int64_t arg2 = arg2Bits | 1412 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 1413 1414 uint64_t resBits = 0; 1415 if (((ext & 0x2) == 0 && arg1 == arg2) || 1416 ((ext & 0x2) == 0x2 && arg1 > arg2)) 1417 resBits = mask(sizeBits); 1418 1419 result = insertBits(result, hiIndex, loIndex, resBits); 1420 } 1421 FpDestReg.uqw = result; 1422 ''' 1423 1424 class Mcmpf2r(MediaOp): 1425 code = ''' 1426 union floatInt 1427 { 1428 float f; 1429 uint32_t i; 1430 }; 1431 union doubleInt 1432 { 1433 double d; 1434 uint64_t i; 1435 }; 1436 1437 assert(srcSize == destSize); 1438 int size = srcSize; 1439 int sizeBits = size * 8; 1440 int items = numItems(size); 1441 uint64_t result = FpDestReg.uqw; 1442 1443 for (int i = 0; i < items; i++) { 1444 int hiIndex = (i + 1) * sizeBits - 1; 1445 int loIndex = (i + 0) * sizeBits; 1446 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1447 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1448 double arg1, arg2; 1449 1450 if (size == 4) { 1451 floatInt fi; 1452 fi.i = arg1Bits; 1453 arg1 = fi.f; 1454 fi.i = arg2Bits; 1455 arg2 = fi.f; 1456 } else { 1457 doubleInt di; 1458 di.i = arg1Bits; 1459 arg1 = di.d; 1460 di.i = arg2Bits; 1461 arg2 = di.d; 1462 } 1463 1464 uint64_t resBits = 0; 1465 bool nanop = isnan(arg1) || isnan(arg2); 1466 switch (ext & mask(3)) { 1467 case 0: 1468 if (arg1 == arg2 && !nanop) 1469 resBits = mask(sizeBits); 1470 break; 1471 case 1: 1472 if (arg1 < arg2 && !nanop) 1473 resBits = mask(sizeBits); 1474 break; 1475 case 2: 1476 if (arg1 <= arg2 && !nanop) 1477 resBits = mask(sizeBits); 1478 break; 1479 case 3: 1480 if (nanop) 1481 resBits = mask(sizeBits); 1482 break; 1483 case 4: 1484 if (arg1 != arg2 || nanop) 1485 resBits = mask(sizeBits); 1486 break; 1487 case 5: 1488 if (!(arg1 < arg2) || nanop) 1489 resBits = mask(sizeBits); 1490 break; 1491 case 6: 1492 if (!(arg1 <= arg2) || nanop) 1493 resBits = mask(sizeBits); 1494 break; 1495 case 7: 1496 if (!nanop) 1497 resBits = mask(sizeBits); 1498 break; 1499 }; 1500 1501 result = insertBits(result, hiIndex, loIndex, resBits); 1502 } 1503 FpDestReg.uqw = result; 1504 ''' 1505 1506 class Mcmpf2rf(MediaOp): 1507 def __init__(self, src1, src2,\ 1508 size = None, destSize = None, srcSize = None, ext = None): 1509 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\ 1510 src2, size, destSize, srcSize, ext) 1511 code = ''' 1512 union floatInt 1513 { 1514 float f; 1515 uint32_t i; 1516 }; 1517 union doubleInt 1518 { 1519 double d; 1520 uint64_t i; 1521 }; 1522 1523 assert(srcSize == destSize); 1524 assert(srcSize == 4 || srcSize == 8); 1525 int size = srcSize; 1526 int sizeBits = size * 8; 1527 1528 double arg1, arg2; 1529 uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0); 1530 uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0); 1531 if (size == 4) { 1532 floatInt fi; 1533 fi.i = arg1Bits; 1534 arg1 = fi.f; 1535 fi.i = arg2Bits; 1536 arg2 = fi.f; 1537 } else { 1538 doubleInt di; 1539 di.i = arg1Bits; 1540 arg1 = di.d; 1541 di.i = arg2Bits; 1542 arg2 = di.d; 1543 } 1544 1545 // ZF PF CF 1546 // Unordered 1 1 1 1547 // Greater than 0 0 0 1548 // Less than 0 0 1 1549 // Equal 1 0 0 1550 // OF = SF = AF = 0 1551 ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit | 1552 ZFBit | PFBit | CFBit); 1553 if (isnan(arg1) || isnan(arg2)) 1554 ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit); 1555 else if(arg1 < arg2) 1556 ccFlagBits = ccFlagBits | CFBit; 1557 else if(arg1 == arg2) 1558 ccFlagBits = ccFlagBits | ZFBit; 1559 ''' 1560}};
| 272 %(flags)s, %(src1)s, %(op2)s, %(dest)s, 273 %(srcSize)s, %(destSize)s, %(ext)s)''' % { 274 "class_name" : className, 275 "flags" : self.microFlagsText(microFlags), 276 "src1" : self.src1, "op2" : self.op2, 277 "dest" : self.dest, 278 "srcSize" : self.srcSize, 279 "destSize" : self.destSize, 280 "ext" : self.ext} 281 return allocator 282 283 class Mov2int(MediaOp): 284 def __init__(self, dest, src1, src2 = 0, \ 285 size = None, destSize = None, srcSize = None, ext = None): 286 super(Mov2int, self).__init__(dest, src1,\ 287 src2, size, destSize, srcSize, ext) 288 code = ''' 289 int items = sizeof(FloatRegBits) / srcSize; 290 int offset = imm8; 291 if (bits(src1, 0) && (ext & 0x1)) 292 offset -= items; 293 if (offset >= 0 && offset < items) { 294 uint64_t fpSrcReg1 = 295 bits(FpSrcReg1.uqw, 296 (offset + 1) * srcSize * 8 - 1, 297 (offset + 0) * srcSize * 8); 298 DestReg = merge(0, fpSrcReg1, destSize); 299 } else { 300 DestReg = DestReg; 301 } 302 ''' 303 304 class Mov2fp(MediaOp): 305 def __init__(self, dest, src1, src2 = 0, \ 306 size = None, destSize = None, srcSize = None, ext = None): 307 super(Mov2fp, self).__init__(dest, src1,\ 308 src2, size, destSize, srcSize, ext) 309 code = ''' 310 int items = sizeof(FloatRegBits) / destSize; 311 int offset = imm8; 312 if (bits(dest, 0) && (ext & 0x1)) 313 offset -= items; 314 if (offset >= 0 && offset < items) { 315 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); 316 FpDestReg.uqw = 317 insertBits(FpDestReg.uqw, 318 (offset + 1) * destSize * 8 - 1, 319 (offset + 0) * destSize * 8, srcReg1); 320 } else { 321 FpDestReg.uqw = FpDestReg.uqw; 322 } 323 ''' 324 325 class Movsign(MediaOp): 326 def __init__(self, dest, src, \ 327 size = None, destSize = None, srcSize = None, ext = None): 328 super(Movsign, self).__init__(dest, src,\ 329 "InstRegIndex(0)", size, destSize, srcSize, ext) 330 code = ''' 331 int items = sizeof(FloatRegBits) / srcSize; 332 uint64_t result = 0; 333 int offset = (ext & 0x1) ? items : 0; 334 for (int i = 0; i < items; i++) { 335 uint64_t picked = 336 bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1); 337 result = insertBits(result, i + offset, i + offset, picked); 338 } 339 DestReg = DestReg | result; 340 ''' 341 342 class Maskmov(MediaOp): 343 code = ''' 344 assert(srcSize == destSize); 345 int size = srcSize; 346 int sizeBits = size * 8; 347 int items = numItems(size); 348 uint64_t result = FpDestReg.uqw; 349 350 for (int i = 0; i < items; i++) { 351 int hiIndex = (i + 1) * sizeBits - 1; 352 int loIndex = (i + 0) * sizeBits; 353 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 354 if (bits(FpSrcReg2.uqw, hiIndex)) 355 result = insertBits(result, hiIndex, loIndex, arg1Bits); 356 } 357 FpDestReg.uqw = result; 358 ''' 359 360 class shuffle(MediaOp): 361 code = ''' 362 assert(srcSize == destSize); 363 int size = srcSize; 364 int sizeBits = size * 8; 365 int items = sizeof(FloatRegBits) / size; 366 int options; 367 int optionBits; 368 if (size == 8) { 369 options = 2; 370 optionBits = 1; 371 } else { 372 options = 4; 373 optionBits = 2; 374 } 375 376 uint64_t result = 0; 377 uint8_t sel = ext; 378 379 for (int i = 0; i < items; i++) { 380 uint64_t resBits; 381 uint8_t lsel = sel & mask(optionBits); 382 if (lsel * size >= sizeof(FloatRegBits)) { 383 lsel -= options / 2; 384 resBits = bits(FpSrcReg2.uqw, 385 (lsel + 1) * sizeBits - 1, 386 (lsel + 0) * sizeBits); 387 } else { 388 resBits = bits(FpSrcReg1.uqw, 389 (lsel + 1) * sizeBits - 1, 390 (lsel + 0) * sizeBits); 391 } 392 393 sel >>= optionBits; 394 395 int hiIndex = (i + 1) * sizeBits - 1; 396 int loIndex = (i + 0) * sizeBits; 397 result = insertBits(result, hiIndex, loIndex, resBits); 398 } 399 FpDestReg.uqw = result; 400 ''' 401 402 class Unpack(MediaOp): 403 code = ''' 404 assert(srcSize == destSize); 405 int size = destSize; 406 int items = (sizeof(FloatRegBits) / size) / 2; 407 int offset = ext ? items : 0; 408 uint64_t result = 0; 409 for (int i = 0; i < items; i++) { 410 uint64_t pickedLow = 411 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1, 412 (i + offset) * 8 * size); 413 result = insertBits(result, 414 (2 * i + 1) * 8 * size - 1, 415 (2 * i + 0) * 8 * size, 416 pickedLow); 417 uint64_t pickedHigh = 418 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1, 419 (i + offset) * 8 * size); 420 result = insertBits(result, 421 (2 * i + 2) * 8 * size - 1, 422 (2 * i + 1) * 8 * size, 423 pickedHigh); 424 } 425 FpDestReg.uqw = result; 426 ''' 427 428 class Pack(MediaOp): 429 code = ''' 430 assert(srcSize == destSize * 2); 431 int items = (sizeof(FloatRegBits) / destSize); 432 int destBits = destSize * 8; 433 int srcBits = srcSize * 8; 434 uint64_t result = 0; 435 int i; 436 for (i = 0; i < items / 2; i++) { 437 uint64_t picked = 438 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1, 439 (i + 0) * srcBits); 440 unsigned signBit = bits(picked, srcBits - 1); 441 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 442 443 // Handle saturation. 444 if (signBit) { 445 if (overflow != mask(destBits - srcBits + 1)) { 446 if (signedOp()) 447 picked = (ULL(1) << (destBits - 1)); 448 else 449 picked = 0; 450 } 451 } else { 452 if (overflow != 0) { 453 if (signedOp()) 454 picked = mask(destBits - 1); 455 else 456 picked = mask(destBits); 457 } 458 } 459 result = insertBits(result, 460 (i + 1) * destBits - 1, 461 (i + 0) * destBits, 462 picked); 463 } 464 for (;i < items; i++) { 465 uint64_t picked = 466 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1, 467 (i - items + 0) * srcBits); 468 unsigned signBit = bits(picked, srcBits - 1); 469 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); 470 471 // Handle saturation. 472 if (signBit) { 473 if (overflow != mask(destBits - srcBits + 1)) { 474 if (signedOp()) 475 picked = (ULL(1) << (destBits - 1)); 476 else 477 picked = 0; 478 } 479 } else { 480 if (overflow != 0) { 481 if (signedOp()) 482 picked = mask(destBits - 1); 483 else 484 picked = mask(destBits); 485 } 486 } 487 result = insertBits(result, 488 (i + 1) * destBits - 1, 489 (i + 0) * destBits, 490 picked); 491 } 492 FpDestReg.uqw = result; 493 ''' 494 495 class Mxor(MediaOp): 496 def __init__(self, dest, src1, src2): 497 super(Mxor, self).__init__(dest, src1, src2, 1) 498 code = ''' 499 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw; 500 ''' 501 502 class Mor(MediaOp): 503 def __init__(self, dest, src1, src2): 504 super(Mor, self).__init__(dest, src1, src2, 1) 505 code = ''' 506 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw; 507 ''' 508 509 class Mand(MediaOp): 510 def __init__(self, dest, src1, src2): 511 super(Mand, self).__init__(dest, src1, src2, 1) 512 code = ''' 513 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw; 514 ''' 515 516 class Mandn(MediaOp): 517 def __init__(self, dest, src1, src2): 518 super(Mandn, self).__init__(dest, src1, src2, 1) 519 code = ''' 520 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw; 521 ''' 522 523 class Mminf(MediaOp): 524 code = ''' 525 union floatInt 526 { 527 float f; 528 uint32_t i; 529 }; 530 union doubleInt 531 { 532 double d; 533 uint64_t i; 534 }; 535 536 assert(srcSize == destSize); 537 int size = srcSize; 538 int sizeBits = size * 8; 539 assert(srcSize == 4 || srcSize == 8); 540 int items = numItems(size); 541 uint64_t result = FpDestReg.uqw; 542 543 for (int i = 0; i < items; i++) { 544 double arg1, arg2; 545 int hiIndex = (i + 1) * sizeBits - 1; 546 int loIndex = (i + 0) * sizeBits; 547 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 548 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 549 550 if (size == 4) { 551 floatInt fi; 552 fi.i = arg1Bits; 553 arg1 = fi.f; 554 fi.i = arg2Bits; 555 arg2 = fi.f; 556 } else { 557 doubleInt di; 558 di.i = arg1Bits; 559 arg1 = di.d; 560 di.i = arg2Bits; 561 arg2 = di.d; 562 } 563 564 if (arg1 < arg2) { 565 result = insertBits(result, hiIndex, loIndex, arg1Bits); 566 } else { 567 result = insertBits(result, hiIndex, loIndex, arg2Bits); 568 } 569 } 570 FpDestReg.uqw = result; 571 ''' 572 573 class Mmaxf(MediaOp): 574 code = ''' 575 union floatInt 576 { 577 float f; 578 uint32_t i; 579 }; 580 union doubleInt 581 { 582 double d; 583 uint64_t i; 584 }; 585 586 assert(srcSize == destSize); 587 int size = srcSize; 588 int sizeBits = size * 8; 589 assert(srcSize == 4 || srcSize == 8); 590 int items = numItems(size); 591 uint64_t result = FpDestReg.uqw; 592 593 for (int i = 0; i < items; i++) { 594 double arg1, arg2; 595 int hiIndex = (i + 1) * sizeBits - 1; 596 int loIndex = (i + 0) * sizeBits; 597 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 598 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 599 600 if (size == 4) { 601 floatInt fi; 602 fi.i = arg1Bits; 603 arg1 = fi.f; 604 fi.i = arg2Bits; 605 arg2 = fi.f; 606 } else { 607 doubleInt di; 608 di.i = arg1Bits; 609 arg1 = di.d; 610 di.i = arg2Bits; 611 arg2 = di.d; 612 } 613 614 if (arg1 > arg2) { 615 result = insertBits(result, hiIndex, loIndex, arg1Bits); 616 } else { 617 result = insertBits(result, hiIndex, loIndex, arg2Bits); 618 } 619 } 620 FpDestReg.uqw = result; 621 ''' 622 623 class Mmini(MediaOp): 624 code = ''' 625 626 assert(srcSize == destSize); 627 int size = srcSize; 628 int sizeBits = size * 8; 629 int items = numItems(size); 630 uint64_t result = FpDestReg.uqw; 631 632 for (int i = 0; i < items; i++) { 633 int hiIndex = (i + 1) * sizeBits - 1; 634 int loIndex = (i + 0) * sizeBits; 635 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 636 int64_t arg1 = arg1Bits | 637 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 638 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 639 int64_t arg2 = arg2Bits | 640 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 641 uint64_t resBits; 642 643 if (signedOp()) { 644 if (arg1 < arg2) { 645 resBits = arg1Bits; 646 } else { 647 resBits = arg2Bits; 648 } 649 } else { 650 if (arg1Bits < arg2Bits) { 651 resBits = arg1Bits; 652 } else { 653 resBits = arg2Bits; 654 } 655 } 656 result = insertBits(result, hiIndex, loIndex, resBits); 657 } 658 FpDestReg.uqw = result; 659 ''' 660 661 class Mmaxi(MediaOp): 662 code = ''' 663 664 assert(srcSize == destSize); 665 int size = srcSize; 666 int sizeBits = size * 8; 667 int items = numItems(size); 668 uint64_t result = FpDestReg.uqw; 669 670 for (int i = 0; i < items; i++) { 671 int hiIndex = (i + 1) * sizeBits - 1; 672 int loIndex = (i + 0) * sizeBits; 673 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 674 int64_t arg1 = arg1Bits | 675 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 676 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 677 int64_t arg2 = arg2Bits | 678 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 679 uint64_t resBits; 680 681 if (signedOp()) { 682 if (arg1 > arg2) { 683 resBits = arg1Bits; 684 } else { 685 resBits = arg2Bits; 686 } 687 } else { 688 if (arg1Bits > arg2Bits) { 689 resBits = arg1Bits; 690 } else { 691 resBits = arg2Bits; 692 } 693 } 694 result = insertBits(result, hiIndex, loIndex, resBits); 695 } 696 FpDestReg.uqw = result; 697 ''' 698 699 class Msqrt(MediaOp): 700 def __init__(self, dest, src, \ 701 size = None, destSize = None, srcSize = None, ext = None): 702 super(Msqrt, self).__init__(dest, src,\ 703 "InstRegIndex(0)", size, destSize, srcSize, ext) 704 code = ''' 705 union floatInt 706 { 707 float f; 708 uint32_t i; 709 }; 710 union doubleInt 711 { 712 double d; 713 uint64_t i; 714 }; 715 716 assert(srcSize == destSize); 717 int size = srcSize; 718 int sizeBits = size * 8; 719 assert(srcSize == 4 || srcSize == 8); 720 int items = numItems(size); 721 uint64_t result = FpDestReg.uqw; 722 723 for (int i = 0; i < items; i++) { 724 int hiIndex = (i + 1) * sizeBits - 1; 725 int loIndex = (i + 0) * sizeBits; 726 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 727 728 if (size == 4) { 729 floatInt fi; 730 fi.i = argBits; 731 fi.f = sqrt(fi.f); 732 argBits = fi.i; 733 } else { 734 doubleInt di; 735 di.i = argBits; 736 di.d = sqrt(di.d); 737 argBits = di.i; 738 } 739 result = insertBits(result, hiIndex, loIndex, argBits); 740 } 741 FpDestReg.uqw = result; 742 ''' 743 744 class Maddf(MediaOp): 745 code = ''' 746 union floatInt 747 { 748 float f; 749 uint32_t i; 750 }; 751 union doubleInt 752 { 753 double d; 754 uint64_t i; 755 }; 756 757 assert(srcSize == destSize); 758 int size = srcSize; 759 int sizeBits = size * 8; 760 assert(srcSize == 4 || srcSize == 8); 761 int items = numItems(size); 762 uint64_t result = FpDestReg.uqw; 763 764 for (int i = 0; i < items; i++) { 765 int hiIndex = (i + 1) * sizeBits - 1; 766 int loIndex = (i + 0) * sizeBits; 767 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 768 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 769 uint64_t resBits; 770 771 if (size == 4) { 772 floatInt arg1, arg2, res; 773 arg1.i = arg1Bits; 774 arg2.i = arg2Bits; 775 res.f = arg1.f + arg2.f; 776 resBits = res.i; 777 } else { 778 doubleInt arg1, arg2, res; 779 arg1.i = arg1Bits; 780 arg2.i = arg2Bits; 781 res.d = arg1.d + arg2.d; 782 resBits = res.i; 783 } 784 785 result = insertBits(result, hiIndex, loIndex, resBits); 786 } 787 FpDestReg.uqw = result; 788 ''' 789 790 class Msubf(MediaOp): 791 code = ''' 792 union floatInt 793 { 794 float f; 795 uint32_t i; 796 }; 797 union doubleInt 798 { 799 double d; 800 uint64_t i; 801 }; 802 803 assert(srcSize == destSize); 804 int size = srcSize; 805 int sizeBits = size * 8; 806 assert(srcSize == 4 || srcSize == 8); 807 int items = numItems(size); 808 uint64_t result = FpDestReg.uqw; 809 810 for (int i = 0; i < items; i++) { 811 int hiIndex = (i + 1) * sizeBits - 1; 812 int loIndex = (i + 0) * sizeBits; 813 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 814 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 815 uint64_t resBits; 816 817 if (size == 4) { 818 floatInt arg1, arg2, res; 819 arg1.i = arg1Bits; 820 arg2.i = arg2Bits; 821 res.f = arg1.f - arg2.f; 822 resBits = res.i; 823 } else { 824 doubleInt arg1, arg2, res; 825 arg1.i = arg1Bits; 826 arg2.i = arg2Bits; 827 res.d = arg1.d - arg2.d; 828 resBits = res.i; 829 } 830 831 result = insertBits(result, hiIndex, loIndex, resBits); 832 } 833 FpDestReg.uqw = result; 834 ''' 835 836 class Mmulf(MediaOp): 837 code = ''' 838 union floatInt 839 { 840 float f; 841 uint32_t i; 842 }; 843 union doubleInt 844 { 845 double d; 846 uint64_t i; 847 }; 848 849 assert(srcSize == destSize); 850 int size = srcSize; 851 int sizeBits = size * 8; 852 assert(srcSize == 4 || srcSize == 8); 853 int items = numItems(size); 854 uint64_t result = FpDestReg.uqw; 855 856 for (int i = 0; i < items; i++) { 857 int hiIndex = (i + 1) * sizeBits - 1; 858 int loIndex = (i + 0) * sizeBits; 859 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 860 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 861 uint64_t resBits; 862 863 if (size == 4) { 864 floatInt arg1, arg2, res; 865 arg1.i = arg1Bits; 866 arg2.i = arg2Bits; 867 res.f = arg1.f * arg2.f; 868 resBits = res.i; 869 } else { 870 doubleInt arg1, arg2, res; 871 arg1.i = arg1Bits; 872 arg2.i = arg2Bits; 873 res.d = arg1.d * arg2.d; 874 resBits = res.i; 875 } 876 877 result = insertBits(result, hiIndex, loIndex, resBits); 878 } 879 FpDestReg.uqw = result; 880 ''' 881 882 class Mdivf(MediaOp): 883 code = ''' 884 union floatInt 885 { 886 float f; 887 uint32_t i; 888 }; 889 union doubleInt 890 { 891 double d; 892 uint64_t i; 893 }; 894 895 assert(srcSize == destSize); 896 int size = srcSize; 897 int sizeBits = size * 8; 898 assert(srcSize == 4 || srcSize == 8); 899 int items = numItems(size); 900 uint64_t result = FpDestReg.uqw; 901 902 for (int i = 0; i < items; i++) { 903 int hiIndex = (i + 1) * sizeBits - 1; 904 int loIndex = (i + 0) * sizeBits; 905 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 906 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 907 uint64_t resBits; 908 909 if (size == 4) { 910 floatInt arg1, arg2, res; 911 arg1.i = arg1Bits; 912 arg2.i = arg2Bits; 913 res.f = arg1.f / arg2.f; 914 resBits = res.i; 915 } else { 916 doubleInt arg1, arg2, res; 917 arg1.i = arg1Bits; 918 arg2.i = arg2Bits; 919 res.d = arg1.d / arg2.d; 920 resBits = res.i; 921 } 922 923 result = insertBits(result, hiIndex, loIndex, resBits); 924 } 925 FpDestReg.uqw = result; 926 ''' 927 928 class Maddi(MediaOp): 929 code = ''' 930 assert(srcSize == destSize); 931 int size = srcSize; 932 int sizeBits = size * 8; 933 int items = numItems(size); 934 uint64_t result = FpDestReg.uqw; 935 936 for (int i = 0; i < items; i++) { 937 int hiIndex = (i + 1) * sizeBits - 1; 938 int loIndex = (i + 0) * sizeBits; 939 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 940 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 941 uint64_t resBits = arg1Bits + arg2Bits; 942 943 if (ext & 0x2) { 944 if (signedOp()) { 945 int arg1Sign = bits(arg1Bits, sizeBits - 1); 946 int arg2Sign = bits(arg2Bits, sizeBits - 1); 947 int resSign = bits(resBits, sizeBits - 1); 948 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 949 if (resSign == 0) 950 resBits = (ULL(1) << (sizeBits - 1)); 951 else 952 resBits = mask(sizeBits - 1); 953 } 954 } else { 955 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) 956 resBits = mask(sizeBits); 957 } 958 } 959 960 result = insertBits(result, hiIndex, loIndex, resBits); 961 } 962 FpDestReg.uqw = result; 963 ''' 964 965 class Msubi(MediaOp): 966 code = ''' 967 assert(srcSize == destSize); 968 int size = srcSize; 969 int sizeBits = size * 8; 970 int items = numItems(size); 971 uint64_t result = FpDestReg.uqw; 972 973 for (int i = 0; i < items; i++) { 974 int hiIndex = (i + 1) * sizeBits - 1; 975 int loIndex = (i + 0) * sizeBits; 976 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 977 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 978 uint64_t resBits = arg1Bits - arg2Bits; 979 980 if (ext & 0x2) { 981 if (signedOp()) { 982 int arg1Sign = bits(arg1Bits, sizeBits - 1); 983 int arg2Sign = !bits(arg2Bits, sizeBits - 1); 984 int resSign = bits(resBits, sizeBits - 1); 985 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { 986 if (resSign == 0) 987 resBits = (ULL(1) << (sizeBits - 1)); 988 else 989 resBits = mask(sizeBits - 1); 990 } 991 } else { 992 if (arg2Bits > arg1Bits) { 993 resBits = 0; 994 } else if (!findCarry(sizeBits, resBits, 995 arg1Bits, ~arg2Bits)) { 996 resBits = mask(sizeBits); 997 } 998 } 999 } 1000 1001 result = insertBits(result, hiIndex, loIndex, resBits); 1002 } 1003 FpDestReg.uqw = result; 1004 ''' 1005 1006 class Mmuli(MediaOp): 1007 code = ''' 1008 int srcBits = srcSize * 8; 1009 int destBits = destSize * 8; 1010 assert(destBits <= 64); 1011 assert(destSize >= srcSize); 1012 int items = numItems(destSize); 1013 uint64_t result = FpDestReg.uqw; 1014 1015 for (int i = 0; i < items; i++) { 1016 int offset = 0; 1017 if (ext & 16) { 1018 if (ext & 32) 1019 offset = i * (destBits - srcBits); 1020 else 1021 offset = i * (destBits - srcBits) + srcBits; 1022 } 1023 int srcHiIndex = (i + 1) * srcBits - 1 + offset; 1024 int srcLoIndex = (i + 0) * srcBits + offset; 1025 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1026 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex); 1027 uint64_t resBits; 1028 1029 if (signedOp()) { 1030 int64_t arg1 = arg1Bits | 1031 (0 - (arg1Bits & (ULL(1) << (srcBits - 1)))); 1032 int64_t arg2 = arg2Bits | 1033 (0 - (arg2Bits & (ULL(1) << (srcBits - 1)))); 1034 resBits = (uint64_t)(arg1 * arg2); 1035 } else { 1036 resBits = arg1Bits * arg2Bits; 1037 } 1038 1039 if (ext & 0x4) 1040 resBits += (ULL(1) << (destBits - 1)); 1041 1042 if (multHi()) 1043 resBits >>= destBits; 1044 1045 int destHiIndex = (i + 1) * destBits - 1; 1046 int destLoIndex = (i + 0) * destBits; 1047 result = insertBits(result, destHiIndex, destLoIndex, resBits); 1048 } 1049 FpDestReg.uqw = result; 1050 ''' 1051 1052 class Mavg(MediaOp): 1053 code = ''' 1054 assert(srcSize == destSize); 1055 int size = srcSize; 1056 int sizeBits = size * 8; 1057 int items = numItems(size); 1058 uint64_t result = FpDestReg.uqw; 1059 1060 for (int i = 0; i < items; i++) { 1061 int hiIndex = (i + 1) * sizeBits - 1; 1062 int loIndex = (i + 0) * sizeBits; 1063 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1064 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1065 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; 1066 1067 result = insertBits(result, hiIndex, loIndex, resBits); 1068 } 1069 FpDestReg.uqw = result; 1070 ''' 1071 1072 class Msad(MediaOp): 1073 code = ''' 1074 int srcBits = srcSize * 8; 1075 int items = sizeof(FloatRegBits) / srcSize; 1076 1077 uint64_t sum = 0; 1078 for (int i = 0; i < items; i++) { 1079 int hiIndex = (i + 1) * srcBits - 1; 1080 int loIndex = (i + 0) * srcBits; 1081 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1082 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1083 int64_t resBits = arg1Bits - arg2Bits; 1084 if (resBits < 0) 1085 resBits = -resBits; 1086 sum += resBits; 1087 } 1088 FpDestReg.uqw = sum & mask(destSize * 8); 1089 ''' 1090 1091 class Msrl(MediaOp): 1092 code = ''' 1093 1094 assert(srcSize == destSize); 1095 int size = srcSize; 1096 int sizeBits = size * 8; 1097 int items = numItems(size); 1098 uint64_t shiftAmt = op2.uqw; 1099 uint64_t result = FpDestReg.uqw; 1100 1101 for (int i = 0; i < items; i++) { 1102 int hiIndex = (i + 1) * sizeBits - 1; 1103 int loIndex = (i + 0) * sizeBits; 1104 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1105 uint64_t resBits; 1106 if (shiftAmt >= sizeBits) { 1107 resBits = 0; 1108 } else { 1109 resBits = (arg1Bits >> shiftAmt) & 1110 mask(sizeBits - shiftAmt); 1111 } 1112 1113 result = insertBits(result, hiIndex, loIndex, resBits); 1114 } 1115 FpDestReg.uqw = result; 1116 ''' 1117 1118 class Msra(MediaOp): 1119 code = ''' 1120 1121 assert(srcSize == destSize); 1122 int size = srcSize; 1123 int sizeBits = size * 8; 1124 int items = numItems(size); 1125 uint64_t shiftAmt = op2.uqw; 1126 uint64_t result = FpDestReg.uqw; 1127 1128 for (int i = 0; i < items; i++) { 1129 int hiIndex = (i + 1) * sizeBits - 1; 1130 int loIndex = (i + 0) * sizeBits; 1131 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1132 uint64_t resBits; 1133 if (shiftAmt >= sizeBits) { 1134 if (bits(arg1Bits, sizeBits - 1)) 1135 resBits = mask(sizeBits); 1136 else 1137 resBits = 0; 1138 } else { 1139 resBits = (arg1Bits >> shiftAmt); 1140 resBits = resBits | 1141 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt)))); 1142 } 1143 1144 result = insertBits(result, hiIndex, loIndex, resBits); 1145 } 1146 FpDestReg.uqw = result; 1147 ''' 1148 1149 class Msll(MediaOp): 1150 code = ''' 1151 1152 assert(srcSize == destSize); 1153 int size = srcSize; 1154 int sizeBits = size * 8; 1155 int items = numItems(size); 1156 uint64_t shiftAmt = op2.uqw; 1157 uint64_t result = FpDestReg.uqw; 1158 1159 for (int i = 0; i < items; i++) { 1160 int hiIndex = (i + 1) * sizeBits - 1; 1161 int loIndex = (i + 0) * sizeBits; 1162 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1163 uint64_t resBits; 1164 if (shiftAmt >= sizeBits) { 1165 resBits = 0; 1166 } else { 1167 resBits = (arg1Bits << shiftAmt); 1168 } 1169 1170 result = insertBits(result, hiIndex, loIndex, resBits); 1171 } 1172 FpDestReg.uqw = result; 1173 ''' 1174 1175 class Cvtf2i(MediaOp): 1176 def __init__(self, dest, src, \ 1177 size = None, destSize = None, srcSize = None, ext = None): 1178 super(Cvtf2i, self).__init__(dest, src,\ 1179 "InstRegIndex(0)", size, destSize, srcSize, ext) 1180 code = ''' 1181 union floatInt 1182 { 1183 float f; 1184 uint32_t i; 1185 }; 1186 union doubleInt 1187 { 1188 double d; 1189 uint64_t i; 1190 }; 1191 1192 assert(destSize == 4 || destSize == 8); 1193 assert(srcSize == 4 || srcSize == 8); 1194 int srcSizeBits = srcSize * 8; 1195 int destSizeBits = destSize * 8; 1196 int items; 1197 int srcStart = 0; 1198 int destStart = 0; 1199 if (srcSize == 2 * destSize) { 1200 items = numItems(srcSize); 1201 if (ext & 0x2) 1202 destStart = destSizeBits * items; 1203 } else if (destSize == 2 * srcSize) { 1204 items = numItems(destSize); 1205 if (ext & 0x2) 1206 srcStart = srcSizeBits * items; 1207 } else { 1208 items = numItems(destSize); 1209 } 1210 uint64_t result = FpDestReg.uqw; 1211 1212 for (int i = 0; i < items; i++) { 1213 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1214 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1215 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1216 double arg; 1217 1218 if (srcSize == 4) { 1219 floatInt fi; 1220 fi.i = argBits; 1221 arg = fi.f; 1222 } else { 1223 doubleInt di; 1224 di.i = argBits; 1225 arg = di.d; 1226 } 1227 1228 if (ext & 0x4) { 1229 if (arg >= 0) 1230 arg += 0.5; 1231 else 1232 arg -= 0.5; 1233 } 1234 1235 if (destSize == 4) { 1236 argBits = (uint32_t)arg; 1237 } else { 1238 argBits = (uint64_t)arg; 1239 } 1240 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1241 int destLoIndex = destStart + (i + 0) * destSizeBits; 1242 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1243 } 1244 FpDestReg.uqw = result; 1245 ''' 1246 1247 class Cvti2f(MediaOp): 1248 def __init__(self, dest, src, \ 1249 size = None, destSize = None, srcSize = None, ext = None): 1250 super(Cvti2f, self).__init__(dest, src,\ 1251 "InstRegIndex(0)", size, destSize, srcSize, ext) 1252 code = ''' 1253 union floatInt 1254 { 1255 float f; 1256 uint32_t i; 1257 }; 1258 union doubleInt 1259 { 1260 double d; 1261 uint64_t i; 1262 }; 1263 1264 assert(destSize == 4 || destSize == 8); 1265 assert(srcSize == 4 || srcSize == 8); 1266 int srcSizeBits = srcSize * 8; 1267 int destSizeBits = destSize * 8; 1268 int items; 1269 int srcStart = 0; 1270 int destStart = 0; 1271 if (srcSize == 2 * destSize) { 1272 items = numItems(srcSize); 1273 if (ext & 0x2) 1274 destStart = destSizeBits * items; 1275 } else if (destSize == 2 * srcSize) { 1276 items = numItems(destSize); 1277 if (ext & 0x2) 1278 srcStart = srcSizeBits * items; 1279 } else { 1280 items = numItems(destSize); 1281 } 1282 uint64_t result = FpDestReg.uqw; 1283 1284 for (int i = 0; i < items; i++) { 1285 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1286 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1287 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1288 1289 int64_t sArg = argBits | 1290 (0 - (argBits & (ULL(1) << (srcSizeBits - 1)))); 1291 double arg = sArg; 1292 1293 if (destSize == 4) { 1294 floatInt fi; 1295 fi.f = arg; 1296 argBits = fi.i; 1297 } else { 1298 doubleInt di; 1299 di.d = arg; 1300 argBits = di.i; 1301 } 1302 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1303 int destLoIndex = destStart + (i + 0) * destSizeBits; 1304 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1305 } 1306 FpDestReg.uqw = result; 1307 ''' 1308 1309 class Cvtf2f(MediaOp): 1310 def __init__(self, dest, src, \ 1311 size = None, destSize = None, srcSize = None, ext = None): 1312 super(Cvtf2f, self).__init__(dest, src,\ 1313 "InstRegIndex(0)", size, destSize, srcSize, ext) 1314 code = ''' 1315 union floatInt 1316 { 1317 float f; 1318 uint32_t i; 1319 }; 1320 union doubleInt 1321 { 1322 double d; 1323 uint64_t i; 1324 }; 1325 1326 assert(destSize == 4 || destSize == 8); 1327 assert(srcSize == 4 || srcSize == 8); 1328 int srcSizeBits = srcSize * 8; 1329 int destSizeBits = destSize * 8; 1330 int items; 1331 int srcStart = 0; 1332 int destStart = 0; 1333 if (srcSize == 2 * destSize) { 1334 items = numItems(srcSize); 1335 if (ext & 0x2) 1336 destStart = destSizeBits * items; 1337 } else if (destSize == 2 * srcSize) { 1338 items = numItems(destSize); 1339 if (ext & 0x2) 1340 srcStart = srcSizeBits * items; 1341 } else { 1342 items = numItems(destSize); 1343 } 1344 uint64_t result = FpDestReg.uqw; 1345 1346 for (int i = 0; i < items; i++) { 1347 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; 1348 int srcLoIndex = srcStart + (i + 0) * srcSizeBits; 1349 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); 1350 double arg; 1351 1352 if (srcSize == 4) { 1353 floatInt fi; 1354 fi.i = argBits; 1355 arg = fi.f; 1356 } else { 1357 doubleInt di; 1358 di.i = argBits; 1359 arg = di.d; 1360 } 1361 if (destSize == 4) { 1362 floatInt fi; 1363 fi.f = arg; 1364 argBits = fi.i; 1365 } else { 1366 doubleInt di; 1367 di.d = arg; 1368 argBits = di.i; 1369 } 1370 int destHiIndex = destStart + (i + 1) * destSizeBits - 1; 1371 int destLoIndex = destStart + (i + 0) * destSizeBits; 1372 result = insertBits(result, destHiIndex, destLoIndex, argBits); 1373 } 1374 FpDestReg.uqw = result; 1375 ''' 1376 1377 class Mcmpi2r(MediaOp): 1378 code = ''' 1379 union floatInt 1380 { 1381 float f; 1382 uint32_t i; 1383 }; 1384 union doubleInt 1385 { 1386 double d; 1387 uint64_t i; 1388 }; 1389 1390 assert(srcSize == destSize); 1391 int size = srcSize; 1392 int sizeBits = size * 8; 1393 int items = numItems(size); 1394 uint64_t result = FpDestReg.uqw; 1395 1396 for (int i = 0; i < items; i++) { 1397 int hiIndex = (i + 1) * sizeBits - 1; 1398 int loIndex = (i + 0) * sizeBits; 1399 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1400 int64_t arg1 = arg1Bits | 1401 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); 1402 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1403 int64_t arg2 = arg2Bits | 1404 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); 1405 1406 uint64_t resBits = 0; 1407 if (((ext & 0x2) == 0 && arg1 == arg2) || 1408 ((ext & 0x2) == 0x2 && arg1 > arg2)) 1409 resBits = mask(sizeBits); 1410 1411 result = insertBits(result, hiIndex, loIndex, resBits); 1412 } 1413 FpDestReg.uqw = result; 1414 ''' 1415 1416 class Mcmpf2r(MediaOp): 1417 code = ''' 1418 union floatInt 1419 { 1420 float f; 1421 uint32_t i; 1422 }; 1423 union doubleInt 1424 { 1425 double d; 1426 uint64_t i; 1427 }; 1428 1429 assert(srcSize == destSize); 1430 int size = srcSize; 1431 int sizeBits = size * 8; 1432 int items = numItems(size); 1433 uint64_t result = FpDestReg.uqw; 1434 1435 for (int i = 0; i < items; i++) { 1436 int hiIndex = (i + 1) * sizeBits - 1; 1437 int loIndex = (i + 0) * sizeBits; 1438 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); 1439 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); 1440 double arg1, arg2; 1441 1442 if (size == 4) { 1443 floatInt fi; 1444 fi.i = arg1Bits; 1445 arg1 = fi.f; 1446 fi.i = arg2Bits; 1447 arg2 = fi.f; 1448 } else { 1449 doubleInt di; 1450 di.i = arg1Bits; 1451 arg1 = di.d; 1452 di.i = arg2Bits; 1453 arg2 = di.d; 1454 } 1455 1456 uint64_t resBits = 0; 1457 bool nanop = isnan(arg1) || isnan(arg2); 1458 switch (ext & mask(3)) { 1459 case 0: 1460 if (arg1 == arg2 && !nanop) 1461 resBits = mask(sizeBits); 1462 break; 1463 case 1: 1464 if (arg1 < arg2 && !nanop) 1465 resBits = mask(sizeBits); 1466 break; 1467 case 2: 1468 if (arg1 <= arg2 && !nanop) 1469 resBits = mask(sizeBits); 1470 break; 1471 case 3: 1472 if (nanop) 1473 resBits = mask(sizeBits); 1474 break; 1475 case 4: 1476 if (arg1 != arg2 || nanop) 1477 resBits = mask(sizeBits); 1478 break; 1479 case 5: 1480 if (!(arg1 < arg2) || nanop) 1481 resBits = mask(sizeBits); 1482 break; 1483 case 6: 1484 if (!(arg1 <= arg2) || nanop) 1485 resBits = mask(sizeBits); 1486 break; 1487 case 7: 1488 if (!nanop) 1489 resBits = mask(sizeBits); 1490 break; 1491 }; 1492 1493 result = insertBits(result, hiIndex, loIndex, resBits); 1494 } 1495 FpDestReg.uqw = result; 1496 ''' 1497 1498 class Mcmpf2rf(MediaOp): 1499 def __init__(self, src1, src2,\ 1500 size = None, destSize = None, srcSize = None, ext = None): 1501 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\ 1502 src2, size, destSize, srcSize, ext) 1503 code = ''' 1504 union floatInt 1505 { 1506 float f; 1507 uint32_t i; 1508 }; 1509 union doubleInt 1510 { 1511 double d; 1512 uint64_t i; 1513 }; 1514 1515 assert(srcSize == destSize); 1516 assert(srcSize == 4 || srcSize == 8); 1517 int size = srcSize; 1518 int sizeBits = size * 8; 1519 1520 double arg1, arg2; 1521 uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0); 1522 uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0); 1523 if (size == 4) { 1524 floatInt fi; 1525 fi.i = arg1Bits; 1526 arg1 = fi.f; 1527 fi.i = arg2Bits; 1528 arg2 = fi.f; 1529 } else { 1530 doubleInt di; 1531 di.i = arg1Bits; 1532 arg1 = di.d; 1533 di.i = arg2Bits; 1534 arg2 = di.d; 1535 } 1536 1537 // ZF PF CF 1538 // Unordered 1 1 1 1539 // Greater than 0 0 0 1540 // Less than 0 0 1 1541 // Equal 1 0 0 1542 // OF = SF = AF = 0 1543 ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit | 1544 ZFBit | PFBit | CFBit); 1545 if (isnan(arg1) || isnan(arg2)) 1546 ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit); 1547 else if(arg1 < arg2) 1548 ccFlagBits = ccFlagBits | CFBit; 1549 else if(arg1 == arg2) 1550 ccFlagBits = ccFlagBits | ZFBit; 1551 ''' 1552}};
|