396} 397 398VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 399 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 400 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 401 PredMacroOp(mnem, machInst, __opClass) 402{ 403 assert(regs > 0 && regs <= 4); 404 assert(regs % elems == 0); 405 406 numMicroops = (regs > 2) ? 2 : 1; 407 bool wb = (rm != 15); 408 bool deinterleave = (elems > 1); 409 410 if (wb) numMicroops++; 411 if (deinterleave) numMicroops += (regs / elems); 412 microOps = new StaticInstPtr[numMicroops]; 413 414 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; 415 416 uint32_t noAlign = TLB::MustBeOne; 417 418 unsigned uopIdx = 0; 419 switch (regs) { 420 case 4: 421 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 422 size, machInst, rMid, rn, 0, align); 423 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 424 size, machInst, rMid + 4, rn, 16, noAlign); 425 break; 426 case 3: 427 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 428 size, machInst, rMid, rn, 0, align); 429 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 430 size, machInst, rMid + 4, rn, 16, noAlign); 431 break; 432 case 2: 433 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 434 size, machInst, rMid, rn, 0, align); 435 break; 436 case 1: 437 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 438 size, machInst, rMid, rn, 0, align); 439 break; 440 default: 441 // Unknown number of registers 442 microOps[uopIdx++] = new Unknown(machInst); 443 } 444 if (wb) { 445 if (rm != 15 && rm != 13) { 446 microOps[uopIdx++] = 447 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 448 } else { 449 microOps[uopIdx++] = 450 new MicroAddiUop(machInst, rn, rn, regs * 8); 451 } 452 } 453 if (deinterleave) { 454 switch (elems) { 455 case 4: 456 assert(regs == 4); 457 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>( 458 size, machInst, vd * 2, rMid, inc * 2); 459 break; 460 case 3: 461 assert(regs == 3); 462 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>( 463 size, machInst, vd * 2, rMid, inc * 2); 464 break; 465 case 2: 466 assert(regs == 4 || regs == 2); 467 if (regs == 4) { 468 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 469 size, machInst, vd * 2, rMid, inc * 2); 470 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 471 size, machInst, vd * 2 + 2, rMid + 4, inc * 2); 472 } else { 473 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 474 size, machInst, vd * 2, rMid, inc * 2); 475 } 476 break; 477 default: 478 // Bad number of elements to deinterleave 479 microOps[uopIdx++] = new Unknown(machInst); 480 } 481 } 482 assert(uopIdx == numMicroops); 483 484 for (unsigned i = 0; i < numMicroops - 1; i++) { 485 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 486 assert(uopPtr); 487 uopPtr->setDelayedCommit(); 488 } 489 microOps[numMicroops - 1]->setLastMicroop(); 490} 491 492VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst, 493 OpClass __opClass, bool all, unsigned elems, 494 RegIndex rn, RegIndex vd, unsigned regs, 495 unsigned inc, uint32_t size, uint32_t align, 496 RegIndex rm, unsigned lane) : 497 PredMacroOp(mnem, machInst, __opClass) 498{ 499 assert(regs > 0 && regs <= 4); 500 assert(regs % elems == 0); 501 502 unsigned eBytes = (1 << size); 503 unsigned loadSize = eBytes * elems; 504 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) / 505 sizeof(FloatRegBits); 506 507 assert(loadRegs > 0 && loadRegs <= 4); 508 509 numMicroops = 1; 510 bool wb = (rm != 15); 511 512 if (wb) numMicroops++; 513 numMicroops += (regs / elems); 514 microOps = new StaticInstPtr[numMicroops]; 515 516 RegIndex ufp0 = NumFloatV7ArchRegs; 517 518 unsigned uopIdx = 0; 519 switch (loadSize) { 520 case 1: 521 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>( 522 machInst, ufp0, rn, 0, align); 523 break; 524 case 2: 525 if (eBytes == 2) { 526 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>( 527 machInst, ufp0, rn, 0, align); 528 } else { 529 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>( 530 machInst, ufp0, rn, 0, align); 531 } 532 break; 533 case 3: 534 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>( 535 machInst, ufp0, rn, 0, align); 536 break; 537 case 4: 538 switch (eBytes) { 539 case 1: 540 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>( 541 machInst, ufp0, rn, 0, align); 542 break; 543 case 2: 544 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>( 545 machInst, ufp0, rn, 0, align); 546 break; 547 case 4: 548 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>( 549 machInst, ufp0, rn, 0, align); 550 break; 551 } 552 break; 553 case 6: 554 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>( 555 machInst, ufp0, rn, 0, align); 556 break; 557 case 8: 558 switch (eBytes) { 559 case 2: 560 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>( 561 machInst, ufp0, rn, 0, align); 562 break; 563 case 4: 564 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>( 565 machInst, ufp0, rn, 0, align); 566 break; 567 } 568 break; 569 case 12: 570 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>( 571 machInst, ufp0, rn, 0, align); 572 break; 573 case 16: 574 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>( 575 machInst, ufp0, rn, 0, align); 576 break; 577 default: 578 // Unrecognized load size 579 microOps[uopIdx++] = new Unknown(machInst); 580 } 581 if (wb) { 582 if (rm != 15 && rm != 13) { 583 microOps[uopIdx++] = 584 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 585 } else { 586 microOps[uopIdx++] = 587 new MicroAddiUop(machInst, rn, rn, loadSize); 588 } 589 } 590 switch (elems) { 591 case 4: 592 assert(regs == 4); 593 switch (size) { 594 case 0: 595 if (all) { 596 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>( 597 machInst, vd * 2, ufp0, inc * 2); 598 } else { 599 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>( 600 machInst, vd * 2, ufp0, inc * 2, lane); 601 } 602 break; 603 case 1: 604 if (all) { 605 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>( 606 machInst, vd * 2, ufp0, inc * 2); 607 } else { 608 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>( 609 machInst, vd * 2, ufp0, inc * 2, lane); 610 } 611 break; 612 case 2: 613 if (all) { 614 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>( 615 machInst, vd * 2, ufp0, inc * 2); 616 } else { 617 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>( 618 machInst, vd * 2, ufp0, inc * 2, lane); 619 } 620 break; 621 default: 622 // Bad size 623 microOps[uopIdx++] = new Unknown(machInst); 624 break; 625 } 626 break; 627 case 3: 628 assert(regs == 3); 629 switch (size) { 630 case 0: 631 if (all) { 632 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>( 633 machInst, vd * 2, ufp0, inc * 2); 634 } else { 635 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>( 636 machInst, vd * 2, ufp0, inc * 2, lane); 637 } 638 break; 639 case 1: 640 if (all) { 641 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>( 642 machInst, vd * 2, ufp0, inc * 2); 643 } else { 644 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>( 645 machInst, vd * 2, ufp0, inc * 2, lane); 646 } 647 break; 648 case 2: 649 if (all) { 650 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>( 651 machInst, vd * 2, ufp0, inc * 2); 652 } else { 653 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>( 654 machInst, vd * 2, ufp0, inc * 2, lane); 655 } 656 break; 657 default: 658 // Bad size 659 microOps[uopIdx++] = new Unknown(machInst); 660 break; 661 } 662 break; 663 case 2: 664 assert(regs == 2); 665 assert(loadRegs <= 2); 666 switch (size) { 667 case 0: 668 if (all) { 669 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>( 670 machInst, vd * 2, ufp0, inc * 2); 671 } else { 672 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>( 673 machInst, vd * 2, ufp0, inc * 2, lane); 674 } 675 break; 676 case 1: 677 if (all) { 678 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>( 679 machInst, vd * 2, ufp0, inc * 2); 680 } else { 681 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>( 682 machInst, vd * 2, ufp0, inc * 2, lane); 683 } 684 break; 685 case 2: 686 if (all) { 687 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>( 688 machInst, vd * 2, ufp0, inc * 2); 689 } else { 690 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>( 691 machInst, vd * 2, ufp0, inc * 2, lane); 692 } 693 break; 694 default: 695 // Bad size 696 microOps[uopIdx++] = new Unknown(machInst); 697 break; 698 } 699 break; 700 case 1: 701 assert(regs == 1 || (all && regs == 2)); 702 assert(loadRegs <= 2); 703 for (unsigned offset = 0; offset < regs; offset++) { 704 switch (size) { 705 case 0: 706 if (all) { 707 microOps[uopIdx++] = 708 new MicroUnpackAllNeon2to2Uop<uint8_t>( 709 machInst, (vd + offset) * 2, ufp0, inc * 2); 710 } else { 711 microOps[uopIdx++] = 712 new MicroUnpackNeon2to2Uop<uint8_t>( 713 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 714 } 715 break; 716 case 1: 717 if (all) { 718 microOps[uopIdx++] = 719 new MicroUnpackAllNeon2to2Uop<uint16_t>( 720 machInst, (vd + offset) * 2, ufp0, inc * 2); 721 } else { 722 microOps[uopIdx++] = 723 new MicroUnpackNeon2to2Uop<uint16_t>( 724 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 725 } 726 break; 727 case 2: 728 if (all) { 729 microOps[uopIdx++] = 730 new MicroUnpackAllNeon2to2Uop<uint32_t>( 731 machInst, (vd + offset) * 2, ufp0, inc * 2); 732 } else { 733 microOps[uopIdx++] = 734 new MicroUnpackNeon2to2Uop<uint32_t>( 735 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 736 } 737 break; 738 default: 739 // Bad size 740 microOps[uopIdx++] = new Unknown(machInst); 741 break; 742 } 743 } 744 break; 745 default: 746 // Bad number of elements to unpack 747 microOps[uopIdx++] = new Unknown(machInst); 748 } 749 assert(uopIdx == numMicroops); 750 751 for (unsigned i = 0; i < numMicroops - 1; i++) { 752 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 753 assert(uopPtr); 754 uopPtr->setDelayedCommit(); 755 } 756 microOps[numMicroops - 1]->setLastMicroop(); 757} 758 759VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 760 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 761 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 762 PredMacroOp(mnem, machInst, __opClass) 763{ 764 assert(regs > 0 && regs <= 4); 765 assert(regs % elems == 0); 766 767 numMicroops = (regs > 2) ? 2 : 1; 768 bool wb = (rm != 15); 769 bool interleave = (elems > 1); 770 771 if (wb) numMicroops++; 772 if (interleave) numMicroops += (regs / elems); 773 microOps = new StaticInstPtr[numMicroops]; 774 775 uint32_t noAlign = TLB::MustBeOne; 776 777 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; 778 779 unsigned uopIdx = 0; 780 if (interleave) { 781 switch (elems) { 782 case 4: 783 assert(regs == 4); 784 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>( 785 size, machInst, rMid, vd * 2, inc * 2); 786 break; 787 case 3: 788 assert(regs == 3); 789 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>( 790 size, machInst, rMid, vd * 2, inc * 2); 791 break; 792 case 2: 793 assert(regs == 4 || regs == 2); 794 if (regs == 4) { 795 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 796 size, machInst, rMid, vd * 2, inc * 2); 797 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 798 size, machInst, rMid + 4, vd * 2 + 2, inc * 2); 799 } else { 800 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 801 size, machInst, rMid, vd * 2, inc * 2); 802 } 803 break; 804 default: 805 // Bad number of elements to interleave 806 microOps[uopIdx++] = new Unknown(machInst); 807 } 808 } 809 switch (regs) { 810 case 4: 811 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 812 size, machInst, rMid, rn, 0, align); 813 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 814 size, machInst, rMid + 4, rn, 16, noAlign); 815 break; 816 case 3: 817 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 818 size, machInst, rMid, rn, 0, align); 819 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 820 size, machInst, rMid + 4, rn, 16, noAlign); 821 break; 822 case 2: 823 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 824 size, machInst, rMid, rn, 0, align); 825 break; 826 case 1: 827 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 828 size, machInst, rMid, rn, 0, align); 829 break; 830 default: 831 // Unknown number of registers 832 microOps[uopIdx++] = new Unknown(machInst); 833 } 834 if (wb) { 835 if (rm != 15 && rm != 13) { 836 microOps[uopIdx++] = 837 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 838 } else { 839 microOps[uopIdx++] = 840 new MicroAddiUop(machInst, rn, rn, regs * 8); 841 } 842 } 843 assert(uopIdx == numMicroops); 844 845 for (unsigned i = 0; i < numMicroops - 1; i++) { 846 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 847 assert(uopPtr); 848 uopPtr->setDelayedCommit(); 849 } 850 microOps[numMicroops - 1]->setLastMicroop(); 851} 852 853VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, 854 OpClass __opClass, bool all, unsigned elems, 855 RegIndex rn, RegIndex vd, unsigned regs, 856 unsigned inc, uint32_t size, uint32_t align, 857 RegIndex rm, unsigned lane) : 858 PredMacroOp(mnem, machInst, __opClass) 859{ 860 assert(!all); 861 assert(regs > 0 && regs <= 4); 862 assert(regs % elems == 0); 863 864 unsigned eBytes = (1 << size); 865 unsigned storeSize = eBytes * elems; 866 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) / 867 sizeof(FloatRegBits); 868 869 assert(storeRegs > 0 && storeRegs <= 4); 870 871 numMicroops = 1; 872 bool wb = (rm != 15); 873 874 if (wb) numMicroops++; 875 numMicroops += (regs / elems); 876 microOps = new StaticInstPtr[numMicroops]; 877 878 RegIndex ufp0 = NumFloatV7ArchRegs; 879 880 unsigned uopIdx = 0; 881 switch (elems) { 882 case 4: 883 assert(regs == 4); 884 switch (size) { 885 case 0: 886 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>( 887 machInst, ufp0, vd * 2, inc * 2, lane); 888 break; 889 case 1: 890 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>( 891 machInst, ufp0, vd * 2, inc * 2, lane); 892 break; 893 case 2: 894 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>( 895 machInst, ufp0, vd * 2, inc * 2, lane); 896 break; 897 default: 898 // Bad size 899 microOps[uopIdx++] = new Unknown(machInst); 900 break; 901 } 902 break; 903 case 3: 904 assert(regs == 3); 905 switch (size) { 906 case 0: 907 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>( 908 machInst, ufp0, vd * 2, inc * 2, lane); 909 break; 910 case 1: 911 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>( 912 machInst, ufp0, vd * 2, inc * 2, lane); 913 break; 914 case 2: 915 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>( 916 machInst, ufp0, vd * 2, inc * 2, lane); 917 break; 918 default: 919 // Bad size 920 microOps[uopIdx++] = new Unknown(machInst); 921 break; 922 } 923 break; 924 case 2: 925 assert(regs == 2); 926 assert(storeRegs <= 2); 927 switch (size) { 928 case 0: 929 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>( 930 machInst, ufp0, vd * 2, inc * 2, lane); 931 break; 932 case 1: 933 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>( 934 machInst, ufp0, vd * 2, inc * 2, lane); 935 break; 936 case 2: 937 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>( 938 machInst, ufp0, vd * 2, inc * 2, lane); 939 break; 940 default: 941 // Bad size 942 microOps[uopIdx++] = new Unknown(machInst); 943 break; 944 } 945 break; 946 case 1: 947 assert(regs == 1 || (all && regs == 2)); 948 assert(storeRegs <= 2); 949 for (unsigned offset = 0; offset < regs; offset++) { 950 switch (size) { 951 case 0: 952 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>( 953 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 954 break; 955 case 1: 956 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>( 957 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 958 break; 959 case 2: 960 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>( 961 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 962 break; 963 default: 964 // Bad size 965 microOps[uopIdx++] = new Unknown(machInst); 966 break; 967 } 968 } 969 break; 970 default: 971 // Bad number of elements to unpack 972 microOps[uopIdx++] = new Unknown(machInst); 973 } 974 switch (storeSize) { 975 case 1: 976 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>( 977 machInst, ufp0, rn, 0, align); 978 break; 979 case 2: 980 if (eBytes == 2) { 981 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>( 982 machInst, ufp0, rn, 0, align); 983 } else { 984 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>( 985 machInst, ufp0, rn, 0, align); 986 } 987 break; 988 case 3: 989 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>( 990 machInst, ufp0, rn, 0, align); 991 break; 992 case 4: 993 switch (eBytes) { 994 case 1: 995 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>( 996 machInst, ufp0, rn, 0, align); 997 break; 998 case 2: 999 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>( 1000 machInst, ufp0, rn, 0, align); 1001 break; 1002 case 4: 1003 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>( 1004 machInst, ufp0, rn, 0, align); 1005 break; 1006 } 1007 break; 1008 case 6: 1009 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>( 1010 machInst, ufp0, rn, 0, align); 1011 break; 1012 case 8: 1013 switch (eBytes) { 1014 case 2: 1015 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>( 1016 machInst, ufp0, rn, 0, align); 1017 break; 1018 case 4: 1019 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>( 1020 machInst, ufp0, rn, 0, align); 1021 break; 1022 } 1023 break; 1024 case 12: 1025 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>( 1026 machInst, ufp0, rn, 0, align); 1027 break; 1028 case 16: 1029 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>( 1030 machInst, ufp0, rn, 0, align); 1031 break; 1032 default: 1033 // Bad store size 1034 microOps[uopIdx++] = new Unknown(machInst); 1035 } 1036 if (wb) { 1037 if (rm != 15 && rm != 13) { 1038 microOps[uopIdx++] = 1039 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 1040 } else { 1041 microOps[uopIdx++] = 1042 new MicroAddiUop(machInst, rn, rn, storeSize); 1043 } 1044 } 1045 assert(uopIdx == numMicroops); 1046 1047 for (unsigned i = 0; i < numMicroops - 1; i++) { 1048 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 1049 assert(uopPtr); 1050 uopPtr->setDelayedCommit(); 1051 } 1052 microOps[numMicroops - 1]->setLastMicroop(); 1053} 1054 1055VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst, 1056 OpClass __opClass, RegIndex rn, RegIndex vd, 1057 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1058 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1059 PredMacroOp(mnem, machInst, __opClass) 1060{ 1061 RegIndex vx = NumFloatV8ArchRegs / 4; 1062 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1063 bool baseIsSP = isSP((IntRegIndex) rnsp); 1064 1065 numMicroops = wb ? 1 : 0; 1066 1067 int totNumBytes = numRegs * dataSize / 8; 1068 assert(totNumBytes <= 64); 1069 1070 // The guiding principle here is that no more than 16 bytes can be 1071 // transferred at a time 1072 int numMemMicroops = totNumBytes / 16; 1073 int residuum = totNumBytes % 16; 1074 if (residuum) 1075 ++numMemMicroops; 1076 numMicroops += numMemMicroops; 1077 1078 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); 1079 numMicroops += numMarshalMicroops; 1080 1081 microOps = new StaticInstPtr[numMicroops]; 1082 unsigned uopIdx = 0; 1083 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1084 TLB::AllowUnaligned; 1085 1086 int i = 0; 1087 for(; i < numMemMicroops - 1; ++i) { 1088 microOps[uopIdx++] = new MicroNeonLoad64( 1089 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1090 baseIsSP, 16 /* accSize */, eSize); 1091 } 1092 microOps[uopIdx++] = new MicroNeonLoad64( 1093 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1094 residuum ? residuum : 16 /* accSize */, eSize); 1095 1096 // Writeback microop: the post-increment amount is encoded in "Rm": a 1097 // 64-bit general register OR as '11111' for an immediate value equal to 1098 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1099 if (wb) { 1100 if (rm != ((RegIndex) INTREG_X31)) { 1101 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1102 UXTX, 0); 1103 } else { 1104 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1105 totNumBytes); 1106 } 1107 } 1108 1109 for (int i = 0; i < numMarshalMicroops; ++i) { 1110 switch(numRegs) { 1111 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg( 1112 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1113 numStructElems, 1, i /* step */); 1114 break; 1115 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg( 1116 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1117 numStructElems, 2, i /* step */); 1118 break; 1119 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg( 1120 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1121 numStructElems, 3, i /* step */); 1122 break; 1123 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg( 1124 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1125 numStructElems, 4, i /* step */); 1126 break; 1127 default: panic("Invalid number of registers"); 1128 } 1129 1130 } 1131 1132 assert(uopIdx == numMicroops); 1133 1134 for (int i = 0; i < numMicroops - 1; ++i) { 1135 microOps[i]->setDelayedCommit(); 1136 } 1137 microOps[numMicroops - 1]->setLastMicroop(); 1138} 1139 1140VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst, 1141 OpClass __opClass, RegIndex rn, RegIndex vd, 1142 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1143 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1144 PredMacroOp(mnem, machInst, __opClass) 1145{ 1146 RegIndex vx = NumFloatV8ArchRegs / 4; 1147 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1148 bool baseIsSP = isSP((IntRegIndex) rnsp); 1149 1150 numMicroops = wb ? 1 : 0; 1151 1152 int totNumBytes = numRegs * dataSize / 8; 1153 assert(totNumBytes <= 64); 1154 1155 // The guiding principle here is that no more than 16 bytes can be 1156 // transferred at a time 1157 int numMemMicroops = totNumBytes / 16; 1158 int residuum = totNumBytes % 16; 1159 if (residuum) 1160 ++numMemMicroops; 1161 numMicroops += numMemMicroops; 1162 1163 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1164 numMicroops += numMarshalMicroops; 1165 1166 microOps = new StaticInstPtr[numMicroops]; 1167 unsigned uopIdx = 0; 1168 1169 for(int i = 0; i < numMarshalMicroops; ++i) { 1170 switch (numRegs) { 1171 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg( 1172 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1173 numStructElems, 1, i /* step */); 1174 break; 1175 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg( 1176 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1177 numStructElems, 2, i /* step */); 1178 break; 1179 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg( 1180 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1181 numStructElems, 3, i /* step */); 1182 break; 1183 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg( 1184 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1185 numStructElems, 4, i /* step */); 1186 break; 1187 default: panic("Invalid number of registers"); 1188 } 1189 } 1190 1191 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1192 TLB::AllowUnaligned; 1193 1194 int i = 0; 1195 for(; i < numMemMicroops - 1; ++i) { 1196 microOps[uopIdx++] = new MicroNeonStore64( 1197 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1198 baseIsSP, 16 /* accSize */, eSize); 1199 } 1200 microOps[uopIdx++] = new MicroNeonStore64( 1201 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1202 residuum ? residuum : 16 /* accSize */, eSize); 1203 1204 // Writeback microop: the post-increment amount is encoded in "Rm": a 1205 // 64-bit general register OR as '11111' for an immediate value equal to 1206 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1207 if (wb) { 1208 if (rm != ((RegIndex) INTREG_X31)) { 1209 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1210 UXTX, 0); 1211 } else { 1212 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1213 totNumBytes); 1214 } 1215 } 1216 1217 assert(uopIdx == numMicroops); 1218 1219 for (int i = 0; i < numMicroops - 1; i++) { 1220 microOps[i]->setDelayedCommit(); 1221 } 1222 microOps[numMicroops - 1]->setLastMicroop(); 1223} 1224 1225VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst, 1226 OpClass __opClass, RegIndex rn, RegIndex vd, 1227 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1228 uint8_t numStructElems, uint8_t index, bool wb, 1229 bool replicate) : 1230 PredMacroOp(mnem, machInst, __opClass) 1231{ 1232 RegIndex vx = NumFloatV8ArchRegs / 4; 1233 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1234 bool baseIsSP = isSP((IntRegIndex) rnsp); 1235 1236 numMicroops = wb ? 1 : 0; 1237 1238 int eSizeBytes = 1 << eSize; 1239 int totNumBytes = numStructElems * eSizeBytes; 1240 assert(totNumBytes <= 64); 1241 1242 // The guiding principle here is that no more than 16 bytes can be 1243 // transferred at a time 1244 int numMemMicroops = totNumBytes / 16; 1245 int residuum = totNumBytes % 16; 1246 if (residuum) 1247 ++numMemMicroops; 1248 numMicroops += numMemMicroops; 1249 1250 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); 1251 numMicroops += numMarshalMicroops; 1252 1253 microOps = new StaticInstPtr[numMicroops]; 1254 unsigned uopIdx = 0; 1255 1256 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1257 TLB::AllowUnaligned; 1258 1259 int i = 0; 1260 for (; i < numMemMicroops - 1; ++i) { 1261 microOps[uopIdx++] = new MicroNeonLoad64( 1262 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1263 baseIsSP, 16 /* accSize */, eSize); 1264 } 1265 microOps[uopIdx++] = new MicroNeonLoad64( 1266 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1267 residuum ? residuum : 16 /* accSize */, eSize); 1268 1269 // Writeback microop: the post-increment amount is encoded in "Rm": a 1270 // 64-bit general register OR as '11111' for an immediate value equal to 1271 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1272 if (wb) { 1273 if (rm != ((RegIndex) INTREG_X31)) { 1274 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1275 UXTX, 0); 1276 } else { 1277 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1278 totNumBytes); 1279 } 1280 } 1281 1282 for(int i = 0; i < numMarshalMicroops; ++i) { 1283 microOps[uopIdx++] = new MicroUnpackNeon64( 1284 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1285 numStructElems, index, i /* step */, replicate); 1286 } 1287 1288 assert(uopIdx == numMicroops); 1289 1290 for (int i = 0; i < numMicroops - 1; i++) { 1291 microOps[i]->setDelayedCommit(); 1292 } 1293 microOps[numMicroops - 1]->setLastMicroop(); 1294} 1295 1296VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst, 1297 OpClass __opClass, RegIndex rn, RegIndex vd, 1298 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1299 uint8_t numStructElems, uint8_t index, bool wb, 1300 bool replicate) : 1301 PredMacroOp(mnem, machInst, __opClass) 1302{ 1303 RegIndex vx = NumFloatV8ArchRegs / 4; 1304 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1305 bool baseIsSP = isSP((IntRegIndex) rnsp); 1306 1307 numMicroops = wb ? 1 : 0; 1308 1309 int eSizeBytes = 1 << eSize; 1310 int totNumBytes = numStructElems * eSizeBytes; 1311 assert(totNumBytes <= 64); 1312 1313 // The guiding principle here is that no more than 16 bytes can be 1314 // transferred at a time 1315 int numMemMicroops = totNumBytes / 16; 1316 int residuum = totNumBytes % 16; 1317 if (residuum) 1318 ++numMemMicroops; 1319 numMicroops += numMemMicroops; 1320 1321 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1322 numMicroops += numMarshalMicroops; 1323 1324 microOps = new StaticInstPtr[numMicroops]; 1325 unsigned uopIdx = 0; 1326 1327 for(int i = 0; i < numMarshalMicroops; ++i) { 1328 microOps[uopIdx++] = new MicroPackNeon64( 1329 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1330 numStructElems, index, i /* step */, replicate); 1331 } 1332 1333 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1334 TLB::AllowUnaligned; 1335 1336 int i = 0; 1337 for(; i < numMemMicroops - 1; ++i) { 1338 microOps[uopIdx++] = new MicroNeonStore64( 1339 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1340 baseIsSP, 16 /* accsize */, eSize); 1341 } 1342 microOps[uopIdx++] = new MicroNeonStore64( 1343 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1344 residuum ? residuum : 16 /* accSize */, eSize); 1345 1346 // Writeback microop: the post-increment amount is encoded in "Rm": a 1347 // 64-bit general register OR as '11111' for an immediate value equal to 1348 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1349 if (wb) { 1350 if (rm != ((RegIndex) INTREG_X31)) { 1351 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1352 UXTX, 0); 1353 } else { 1354 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1355 totNumBytes); 1356 } 1357 } 1358 1359 assert(uopIdx == numMicroops); 1360 1361 for (int i = 0; i < numMicroops - 1; i++) { 1362 microOps[i]->setDelayedCommit(); 1363 } 1364 microOps[numMicroops - 1]->setLastMicroop(); 1365} 1366 1367MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, 1368 OpClass __opClass, IntRegIndex rn, 1369 RegIndex vd, bool single, bool up, 1370 bool writeback, bool load, uint32_t offset) : 1371 PredMacroOp(mnem, machInst, __opClass) 1372{ 1373 int i = 0; 1374 1375 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem 1376 // to be functionally identical except that fldmx is deprecated. For now 1377 // we'll assume they're otherwise interchangable. 1378 int count = (single ? offset : (offset / 2)); 1379 if (count == 0 || count > NumFloatV7ArchRegs) 1380 warn_once("Bad offset field for VFP load/store multiple.\n"); 1381 if (count == 0) { 1382 // Force there to be at least one microop so the macroop makes sense. 1383 writeback = true; 1384 } 1385 if (count > NumFloatV7ArchRegs) 1386 count = NumFloatV7ArchRegs; 1387 1388 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); 1389 microOps = new StaticInstPtr[numMicroops]; 1390 1391 int64_t addr = 0; 1392 1393 if (!up) 1394 addr = 4 * offset; 1395 1396 bool tempUp = up; 1397 for (int j = 0; j < count; j++) { 1398 if (load) { 1399 if (single) { 1400 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, 1401 tempUp, addr); 1402 } else { 1403 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn, 1404 tempUp, addr); 1405 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp, 1406 addr + (up ? 4 : -4)); 1407 } 1408 } else { 1409 if (single) { 1410 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, 1411 tempUp, addr); 1412 } else { 1413 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn, 1414 tempUp, addr); 1415 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp, 1416 addr + (up ? 4 : -4)); 1417 } 1418 } 1419 if (!tempUp) { 1420 addr -= (single ? 4 : 8); 1421 // The microops don't handle negative displacement, so turn if we 1422 // hit zero, flip polarity and start adding. 1423 if (addr <= 0) { 1424 tempUp = true; 1425 addr = -addr; 1426 } 1427 } else { 1428 addr += (single ? 4 : 8); 1429 } 1430 } 1431 1432 if (writeback) { 1433 if (up) { 1434 microOps[i++] = 1435 new MicroAddiUop(machInst, rn, rn, 4 * offset); 1436 } else { 1437 microOps[i++] = 1438 new MicroSubiUop(machInst, rn, rn, 4 * offset); 1439 } 1440 } 1441 1442 assert(numMicroops == i); 1443 microOps[numMicroops - 1]->setLastMicroop(); 1444 1445 for (StaticInstPtr *curUop = microOps; 1446 !(*curUop)->isLastMicroop(); curUop++) { 1447 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get()); 1448 assert(uopPtr); 1449 uopPtr->setDelayedCommit(); 1450 } 1451} 1452 1453std::string 1454MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1455{ 1456 std::stringstream ss; 1457 printMnemonic(ss); 1458 printReg(ss, ura); 1459 ss << ", "; 1460 printReg(ss, urb); 1461 ss << ", "; 1462 ccprintf(ss, "#%d", imm); 1463 return ss.str(); 1464} 1465 1466std::string 1467MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1468{ 1469 std::stringstream ss; 1470 printMnemonic(ss); 1471 printReg(ss, ura); 1472 ss << ", "; 1473 printReg(ss, urb); 1474 ss << ", "; 1475 ccprintf(ss, "#%d", imm); 1476 return ss.str(); 1477} 1478 1479std::string 1480MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1481{ 1482 std::stringstream ss; 1483 printMnemonic(ss); 1484 ss << "[PC,CPSR]"; 1485 return ss.str(); 1486} 1487 1488std::string 1489MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1490{ 1491 std::stringstream ss; 1492 printMnemonic(ss); 1493 printReg(ss, ura); 1494 ccprintf(ss, ", "); 1495 printReg(ss, urb); 1496 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); 1497 return ss.str(); 1498} 1499 1500std::string 1501MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1502{ 1503 std::stringstream ss; 1504 printMnemonic(ss); 1505 printReg(ss, ura); 1506 ss << ", "; 1507 printReg(ss, urb); 1508 return ss.str(); 1509} 1510 1511std::string 1512MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1513{ 1514 std::stringstream ss; 1515 printMnemonic(ss); 1516 printReg(ss, ura); 1517 ss << ", "; 1518 printReg(ss, urb); 1519 ss << ", "; 1520 printReg(ss, urc); 1521 return ss.str(); 1522} 1523 1524std::string 1525MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1526{ 1527 std::stringstream ss; 1528 printMnemonic(ss); 1529 if (isFloating()) 1530 printReg(ss, ura + FP_Reg_Base); 1531 else 1532 printReg(ss, ura); 1533 ss << ", ["; 1534 printReg(ss, urb); 1535 ss << ", "; 1536 ccprintf(ss, "#%d", imm); 1537 ss << "]"; 1538 return ss.str(); 1539} 1540
| 450} 451 452VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 453 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 454 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 455 PredMacroOp(mnem, machInst, __opClass) 456{ 457 assert(regs > 0 && regs <= 4); 458 assert(regs % elems == 0); 459 460 numMicroops = (regs > 2) ? 2 : 1; 461 bool wb = (rm != 15); 462 bool deinterleave = (elems > 1); 463 464 if (wb) numMicroops++; 465 if (deinterleave) numMicroops += (regs / elems); 466 microOps = new StaticInstPtr[numMicroops]; 467 468 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; 469 470 uint32_t noAlign = TLB::MustBeOne; 471 472 unsigned uopIdx = 0; 473 switch (regs) { 474 case 4: 475 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 476 size, machInst, rMid, rn, 0, align); 477 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 478 size, machInst, rMid + 4, rn, 16, noAlign); 479 break; 480 case 3: 481 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 482 size, machInst, rMid, rn, 0, align); 483 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 484 size, machInst, rMid + 4, rn, 16, noAlign); 485 break; 486 case 2: 487 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 488 size, machInst, rMid, rn, 0, align); 489 break; 490 case 1: 491 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 492 size, machInst, rMid, rn, 0, align); 493 break; 494 default: 495 // Unknown number of registers 496 microOps[uopIdx++] = new Unknown(machInst); 497 } 498 if (wb) { 499 if (rm != 15 && rm != 13) { 500 microOps[uopIdx++] = 501 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 502 } else { 503 microOps[uopIdx++] = 504 new MicroAddiUop(machInst, rn, rn, regs * 8); 505 } 506 } 507 if (deinterleave) { 508 switch (elems) { 509 case 4: 510 assert(regs == 4); 511 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>( 512 size, machInst, vd * 2, rMid, inc * 2); 513 break; 514 case 3: 515 assert(regs == 3); 516 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>( 517 size, machInst, vd * 2, rMid, inc * 2); 518 break; 519 case 2: 520 assert(regs == 4 || regs == 2); 521 if (regs == 4) { 522 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 523 size, machInst, vd * 2, rMid, inc * 2); 524 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 525 size, machInst, vd * 2 + 2, rMid + 4, inc * 2); 526 } else { 527 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 528 size, machInst, vd * 2, rMid, inc * 2); 529 } 530 break; 531 default: 532 // Bad number of elements to deinterleave 533 microOps[uopIdx++] = new Unknown(machInst); 534 } 535 } 536 assert(uopIdx == numMicroops); 537 538 for (unsigned i = 0; i < numMicroops - 1; i++) { 539 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 540 assert(uopPtr); 541 uopPtr->setDelayedCommit(); 542 } 543 microOps[numMicroops - 1]->setLastMicroop(); 544} 545 546VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst, 547 OpClass __opClass, bool all, unsigned elems, 548 RegIndex rn, RegIndex vd, unsigned regs, 549 unsigned inc, uint32_t size, uint32_t align, 550 RegIndex rm, unsigned lane) : 551 PredMacroOp(mnem, machInst, __opClass) 552{ 553 assert(regs > 0 && regs <= 4); 554 assert(regs % elems == 0); 555 556 unsigned eBytes = (1 << size); 557 unsigned loadSize = eBytes * elems; 558 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) / 559 sizeof(FloatRegBits); 560 561 assert(loadRegs > 0 && loadRegs <= 4); 562 563 numMicroops = 1; 564 bool wb = (rm != 15); 565 566 if (wb) numMicroops++; 567 numMicroops += (regs / elems); 568 microOps = new StaticInstPtr[numMicroops]; 569 570 RegIndex ufp0 = NumFloatV7ArchRegs; 571 572 unsigned uopIdx = 0; 573 switch (loadSize) { 574 case 1: 575 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>( 576 machInst, ufp0, rn, 0, align); 577 break; 578 case 2: 579 if (eBytes == 2) { 580 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>( 581 machInst, ufp0, rn, 0, align); 582 } else { 583 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>( 584 machInst, ufp0, rn, 0, align); 585 } 586 break; 587 case 3: 588 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>( 589 machInst, ufp0, rn, 0, align); 590 break; 591 case 4: 592 switch (eBytes) { 593 case 1: 594 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>( 595 machInst, ufp0, rn, 0, align); 596 break; 597 case 2: 598 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>( 599 machInst, ufp0, rn, 0, align); 600 break; 601 case 4: 602 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>( 603 machInst, ufp0, rn, 0, align); 604 break; 605 } 606 break; 607 case 6: 608 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>( 609 machInst, ufp0, rn, 0, align); 610 break; 611 case 8: 612 switch (eBytes) { 613 case 2: 614 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>( 615 machInst, ufp0, rn, 0, align); 616 break; 617 case 4: 618 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>( 619 machInst, ufp0, rn, 0, align); 620 break; 621 } 622 break; 623 case 12: 624 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>( 625 machInst, ufp0, rn, 0, align); 626 break; 627 case 16: 628 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>( 629 machInst, ufp0, rn, 0, align); 630 break; 631 default: 632 // Unrecognized load size 633 microOps[uopIdx++] = new Unknown(machInst); 634 } 635 if (wb) { 636 if (rm != 15 && rm != 13) { 637 microOps[uopIdx++] = 638 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 639 } else { 640 microOps[uopIdx++] = 641 new MicroAddiUop(machInst, rn, rn, loadSize); 642 } 643 } 644 switch (elems) { 645 case 4: 646 assert(regs == 4); 647 switch (size) { 648 case 0: 649 if (all) { 650 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>( 651 machInst, vd * 2, ufp0, inc * 2); 652 } else { 653 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>( 654 machInst, vd * 2, ufp0, inc * 2, lane); 655 } 656 break; 657 case 1: 658 if (all) { 659 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>( 660 machInst, vd * 2, ufp0, inc * 2); 661 } else { 662 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>( 663 machInst, vd * 2, ufp0, inc * 2, lane); 664 } 665 break; 666 case 2: 667 if (all) { 668 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>( 669 machInst, vd * 2, ufp0, inc * 2); 670 } else { 671 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>( 672 machInst, vd * 2, ufp0, inc * 2, lane); 673 } 674 break; 675 default: 676 // Bad size 677 microOps[uopIdx++] = new Unknown(machInst); 678 break; 679 } 680 break; 681 case 3: 682 assert(regs == 3); 683 switch (size) { 684 case 0: 685 if (all) { 686 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>( 687 machInst, vd * 2, ufp0, inc * 2); 688 } else { 689 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>( 690 machInst, vd * 2, ufp0, inc * 2, lane); 691 } 692 break; 693 case 1: 694 if (all) { 695 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>( 696 machInst, vd * 2, ufp0, inc * 2); 697 } else { 698 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>( 699 machInst, vd * 2, ufp0, inc * 2, lane); 700 } 701 break; 702 case 2: 703 if (all) { 704 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>( 705 machInst, vd * 2, ufp0, inc * 2); 706 } else { 707 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>( 708 machInst, vd * 2, ufp0, inc * 2, lane); 709 } 710 break; 711 default: 712 // Bad size 713 microOps[uopIdx++] = new Unknown(machInst); 714 break; 715 } 716 break; 717 case 2: 718 assert(regs == 2); 719 assert(loadRegs <= 2); 720 switch (size) { 721 case 0: 722 if (all) { 723 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>( 724 machInst, vd * 2, ufp0, inc * 2); 725 } else { 726 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>( 727 machInst, vd * 2, ufp0, inc * 2, lane); 728 } 729 break; 730 case 1: 731 if (all) { 732 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>( 733 machInst, vd * 2, ufp0, inc * 2); 734 } else { 735 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>( 736 machInst, vd * 2, ufp0, inc * 2, lane); 737 } 738 break; 739 case 2: 740 if (all) { 741 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>( 742 machInst, vd * 2, ufp0, inc * 2); 743 } else { 744 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>( 745 machInst, vd * 2, ufp0, inc * 2, lane); 746 } 747 break; 748 default: 749 // Bad size 750 microOps[uopIdx++] = new Unknown(machInst); 751 break; 752 } 753 break; 754 case 1: 755 assert(regs == 1 || (all && regs == 2)); 756 assert(loadRegs <= 2); 757 for (unsigned offset = 0; offset < regs; offset++) { 758 switch (size) { 759 case 0: 760 if (all) { 761 microOps[uopIdx++] = 762 new MicroUnpackAllNeon2to2Uop<uint8_t>( 763 machInst, (vd + offset) * 2, ufp0, inc * 2); 764 } else { 765 microOps[uopIdx++] = 766 new MicroUnpackNeon2to2Uop<uint8_t>( 767 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 768 } 769 break; 770 case 1: 771 if (all) { 772 microOps[uopIdx++] = 773 new MicroUnpackAllNeon2to2Uop<uint16_t>( 774 machInst, (vd + offset) * 2, ufp0, inc * 2); 775 } else { 776 microOps[uopIdx++] = 777 new MicroUnpackNeon2to2Uop<uint16_t>( 778 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 779 } 780 break; 781 case 2: 782 if (all) { 783 microOps[uopIdx++] = 784 new MicroUnpackAllNeon2to2Uop<uint32_t>( 785 machInst, (vd + offset) * 2, ufp0, inc * 2); 786 } else { 787 microOps[uopIdx++] = 788 new MicroUnpackNeon2to2Uop<uint32_t>( 789 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 790 } 791 break; 792 default: 793 // Bad size 794 microOps[uopIdx++] = new Unknown(machInst); 795 break; 796 } 797 } 798 break; 799 default: 800 // Bad number of elements to unpack 801 microOps[uopIdx++] = new Unknown(machInst); 802 } 803 assert(uopIdx == numMicroops); 804 805 for (unsigned i = 0; i < numMicroops - 1; i++) { 806 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 807 assert(uopPtr); 808 uopPtr->setDelayedCommit(); 809 } 810 microOps[numMicroops - 1]->setLastMicroop(); 811} 812 813VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 814 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 815 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 816 PredMacroOp(mnem, machInst, __opClass) 817{ 818 assert(regs > 0 && regs <= 4); 819 assert(regs % elems == 0); 820 821 numMicroops = (regs > 2) ? 2 : 1; 822 bool wb = (rm != 15); 823 bool interleave = (elems > 1); 824 825 if (wb) numMicroops++; 826 if (interleave) numMicroops += (regs / elems); 827 microOps = new StaticInstPtr[numMicroops]; 828 829 uint32_t noAlign = TLB::MustBeOne; 830 831 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; 832 833 unsigned uopIdx = 0; 834 if (interleave) { 835 switch (elems) { 836 case 4: 837 assert(regs == 4); 838 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>( 839 size, machInst, rMid, vd * 2, inc * 2); 840 break; 841 case 3: 842 assert(regs == 3); 843 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>( 844 size, machInst, rMid, vd * 2, inc * 2); 845 break; 846 case 2: 847 assert(regs == 4 || regs == 2); 848 if (regs == 4) { 849 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 850 size, machInst, rMid, vd * 2, inc * 2); 851 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 852 size, machInst, rMid + 4, vd * 2 + 2, inc * 2); 853 } else { 854 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 855 size, machInst, rMid, vd * 2, inc * 2); 856 } 857 break; 858 default: 859 // Bad number of elements to interleave 860 microOps[uopIdx++] = new Unknown(machInst); 861 } 862 } 863 switch (regs) { 864 case 4: 865 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 866 size, machInst, rMid, rn, 0, align); 867 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 868 size, machInst, rMid + 4, rn, 16, noAlign); 869 break; 870 case 3: 871 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 872 size, machInst, rMid, rn, 0, align); 873 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 874 size, machInst, rMid + 4, rn, 16, noAlign); 875 break; 876 case 2: 877 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 878 size, machInst, rMid, rn, 0, align); 879 break; 880 case 1: 881 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 882 size, machInst, rMid, rn, 0, align); 883 break; 884 default: 885 // Unknown number of registers 886 microOps[uopIdx++] = new Unknown(machInst); 887 } 888 if (wb) { 889 if (rm != 15 && rm != 13) { 890 microOps[uopIdx++] = 891 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 892 } else { 893 microOps[uopIdx++] = 894 new MicroAddiUop(machInst, rn, rn, regs * 8); 895 } 896 } 897 assert(uopIdx == numMicroops); 898 899 for (unsigned i = 0; i < numMicroops - 1; i++) { 900 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 901 assert(uopPtr); 902 uopPtr->setDelayedCommit(); 903 } 904 microOps[numMicroops - 1]->setLastMicroop(); 905} 906 907VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, 908 OpClass __opClass, bool all, unsigned elems, 909 RegIndex rn, RegIndex vd, unsigned regs, 910 unsigned inc, uint32_t size, uint32_t align, 911 RegIndex rm, unsigned lane) : 912 PredMacroOp(mnem, machInst, __opClass) 913{ 914 assert(!all); 915 assert(regs > 0 && regs <= 4); 916 assert(regs % elems == 0); 917 918 unsigned eBytes = (1 << size); 919 unsigned storeSize = eBytes * elems; 920 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) / 921 sizeof(FloatRegBits); 922 923 assert(storeRegs > 0 && storeRegs <= 4); 924 925 numMicroops = 1; 926 bool wb = (rm != 15); 927 928 if (wb) numMicroops++; 929 numMicroops += (regs / elems); 930 microOps = new StaticInstPtr[numMicroops]; 931 932 RegIndex ufp0 = NumFloatV7ArchRegs; 933 934 unsigned uopIdx = 0; 935 switch (elems) { 936 case 4: 937 assert(regs == 4); 938 switch (size) { 939 case 0: 940 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>( 941 machInst, ufp0, vd * 2, inc * 2, lane); 942 break; 943 case 1: 944 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>( 945 machInst, ufp0, vd * 2, inc * 2, lane); 946 break; 947 case 2: 948 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>( 949 machInst, ufp0, vd * 2, inc * 2, lane); 950 break; 951 default: 952 // Bad size 953 microOps[uopIdx++] = new Unknown(machInst); 954 break; 955 } 956 break; 957 case 3: 958 assert(regs == 3); 959 switch (size) { 960 case 0: 961 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>( 962 machInst, ufp0, vd * 2, inc * 2, lane); 963 break; 964 case 1: 965 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>( 966 machInst, ufp0, vd * 2, inc * 2, lane); 967 break; 968 case 2: 969 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>( 970 machInst, ufp0, vd * 2, inc * 2, lane); 971 break; 972 default: 973 // Bad size 974 microOps[uopIdx++] = new Unknown(machInst); 975 break; 976 } 977 break; 978 case 2: 979 assert(regs == 2); 980 assert(storeRegs <= 2); 981 switch (size) { 982 case 0: 983 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>( 984 machInst, ufp0, vd * 2, inc * 2, lane); 985 break; 986 case 1: 987 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>( 988 machInst, ufp0, vd * 2, inc * 2, lane); 989 break; 990 case 2: 991 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>( 992 machInst, ufp0, vd * 2, inc * 2, lane); 993 break; 994 default: 995 // Bad size 996 microOps[uopIdx++] = new Unknown(machInst); 997 break; 998 } 999 break; 1000 case 1: 1001 assert(regs == 1 || (all && regs == 2)); 1002 assert(storeRegs <= 2); 1003 for (unsigned offset = 0; offset < regs; offset++) { 1004 switch (size) { 1005 case 0: 1006 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>( 1007 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1008 break; 1009 case 1: 1010 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>( 1011 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1012 break; 1013 case 2: 1014 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>( 1015 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1016 break; 1017 default: 1018 // Bad size 1019 microOps[uopIdx++] = new Unknown(machInst); 1020 break; 1021 } 1022 } 1023 break; 1024 default: 1025 // Bad number of elements to unpack 1026 microOps[uopIdx++] = new Unknown(machInst); 1027 } 1028 switch (storeSize) { 1029 case 1: 1030 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>( 1031 machInst, ufp0, rn, 0, align); 1032 break; 1033 case 2: 1034 if (eBytes == 2) { 1035 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>( 1036 machInst, ufp0, rn, 0, align); 1037 } else { 1038 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>( 1039 machInst, ufp0, rn, 0, align); 1040 } 1041 break; 1042 case 3: 1043 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>( 1044 machInst, ufp0, rn, 0, align); 1045 break; 1046 case 4: 1047 switch (eBytes) { 1048 case 1: 1049 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>( 1050 machInst, ufp0, rn, 0, align); 1051 break; 1052 case 2: 1053 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>( 1054 machInst, ufp0, rn, 0, align); 1055 break; 1056 case 4: 1057 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>( 1058 machInst, ufp0, rn, 0, align); 1059 break; 1060 } 1061 break; 1062 case 6: 1063 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>( 1064 machInst, ufp0, rn, 0, align); 1065 break; 1066 case 8: 1067 switch (eBytes) { 1068 case 2: 1069 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>( 1070 machInst, ufp0, rn, 0, align); 1071 break; 1072 case 4: 1073 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>( 1074 machInst, ufp0, rn, 0, align); 1075 break; 1076 } 1077 break; 1078 case 12: 1079 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>( 1080 machInst, ufp0, rn, 0, align); 1081 break; 1082 case 16: 1083 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>( 1084 machInst, ufp0, rn, 0, align); 1085 break; 1086 default: 1087 // Bad store size 1088 microOps[uopIdx++] = new Unknown(machInst); 1089 } 1090 if (wb) { 1091 if (rm != 15 && rm != 13) { 1092 microOps[uopIdx++] = 1093 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 1094 } else { 1095 microOps[uopIdx++] = 1096 new MicroAddiUop(machInst, rn, rn, storeSize); 1097 } 1098 } 1099 assert(uopIdx == numMicroops); 1100 1101 for (unsigned i = 0; i < numMicroops - 1; i++) { 1102 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 1103 assert(uopPtr); 1104 uopPtr->setDelayedCommit(); 1105 } 1106 microOps[numMicroops - 1]->setLastMicroop(); 1107} 1108 1109VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst, 1110 OpClass __opClass, RegIndex rn, RegIndex vd, 1111 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1112 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1113 PredMacroOp(mnem, machInst, __opClass) 1114{ 1115 RegIndex vx = NumFloatV8ArchRegs / 4; 1116 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1117 bool baseIsSP = isSP((IntRegIndex) rnsp); 1118 1119 numMicroops = wb ? 1 : 0; 1120 1121 int totNumBytes = numRegs * dataSize / 8; 1122 assert(totNumBytes <= 64); 1123 1124 // The guiding principle here is that no more than 16 bytes can be 1125 // transferred at a time 1126 int numMemMicroops = totNumBytes / 16; 1127 int residuum = totNumBytes % 16; 1128 if (residuum) 1129 ++numMemMicroops; 1130 numMicroops += numMemMicroops; 1131 1132 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); 1133 numMicroops += numMarshalMicroops; 1134 1135 microOps = new StaticInstPtr[numMicroops]; 1136 unsigned uopIdx = 0; 1137 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1138 TLB::AllowUnaligned; 1139 1140 int i = 0; 1141 for(; i < numMemMicroops - 1; ++i) { 1142 microOps[uopIdx++] = new MicroNeonLoad64( 1143 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1144 baseIsSP, 16 /* accSize */, eSize); 1145 } 1146 microOps[uopIdx++] = new MicroNeonLoad64( 1147 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1148 residuum ? residuum : 16 /* accSize */, eSize); 1149 1150 // Writeback microop: the post-increment amount is encoded in "Rm": a 1151 // 64-bit general register OR as '11111' for an immediate value equal to 1152 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1153 if (wb) { 1154 if (rm != ((RegIndex) INTREG_X31)) { 1155 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1156 UXTX, 0); 1157 } else { 1158 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1159 totNumBytes); 1160 } 1161 } 1162 1163 for (int i = 0; i < numMarshalMicroops; ++i) { 1164 switch(numRegs) { 1165 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg( 1166 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1167 numStructElems, 1, i /* step */); 1168 break; 1169 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg( 1170 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1171 numStructElems, 2, i /* step */); 1172 break; 1173 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg( 1174 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1175 numStructElems, 3, i /* step */); 1176 break; 1177 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg( 1178 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1179 numStructElems, 4, i /* step */); 1180 break; 1181 default: panic("Invalid number of registers"); 1182 } 1183 1184 } 1185 1186 assert(uopIdx == numMicroops); 1187 1188 for (int i = 0; i < numMicroops - 1; ++i) { 1189 microOps[i]->setDelayedCommit(); 1190 } 1191 microOps[numMicroops - 1]->setLastMicroop(); 1192} 1193 1194VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst, 1195 OpClass __opClass, RegIndex rn, RegIndex vd, 1196 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1197 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1198 PredMacroOp(mnem, machInst, __opClass) 1199{ 1200 RegIndex vx = NumFloatV8ArchRegs / 4; 1201 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1202 bool baseIsSP = isSP((IntRegIndex) rnsp); 1203 1204 numMicroops = wb ? 1 : 0; 1205 1206 int totNumBytes = numRegs * dataSize / 8; 1207 assert(totNumBytes <= 64); 1208 1209 // The guiding principle here is that no more than 16 bytes can be 1210 // transferred at a time 1211 int numMemMicroops = totNumBytes / 16; 1212 int residuum = totNumBytes % 16; 1213 if (residuum) 1214 ++numMemMicroops; 1215 numMicroops += numMemMicroops; 1216 1217 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1218 numMicroops += numMarshalMicroops; 1219 1220 microOps = new StaticInstPtr[numMicroops]; 1221 unsigned uopIdx = 0; 1222 1223 for(int i = 0; i < numMarshalMicroops; ++i) { 1224 switch (numRegs) { 1225 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg( 1226 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1227 numStructElems, 1, i /* step */); 1228 break; 1229 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg( 1230 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1231 numStructElems, 2, i /* step */); 1232 break; 1233 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg( 1234 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1235 numStructElems, 3, i /* step */); 1236 break; 1237 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg( 1238 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1239 numStructElems, 4, i /* step */); 1240 break; 1241 default: panic("Invalid number of registers"); 1242 } 1243 } 1244 1245 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1246 TLB::AllowUnaligned; 1247 1248 int i = 0; 1249 for(; i < numMemMicroops - 1; ++i) { 1250 microOps[uopIdx++] = new MicroNeonStore64( 1251 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1252 baseIsSP, 16 /* accSize */, eSize); 1253 } 1254 microOps[uopIdx++] = new MicroNeonStore64( 1255 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1256 residuum ? residuum : 16 /* accSize */, eSize); 1257 1258 // Writeback microop: the post-increment amount is encoded in "Rm": a 1259 // 64-bit general register OR as '11111' for an immediate value equal to 1260 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1261 if (wb) { 1262 if (rm != ((RegIndex) INTREG_X31)) { 1263 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1264 UXTX, 0); 1265 } else { 1266 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1267 totNumBytes); 1268 } 1269 } 1270 1271 assert(uopIdx == numMicroops); 1272 1273 for (int i = 0; i < numMicroops - 1; i++) { 1274 microOps[i]->setDelayedCommit(); 1275 } 1276 microOps[numMicroops - 1]->setLastMicroop(); 1277} 1278 1279VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst, 1280 OpClass __opClass, RegIndex rn, RegIndex vd, 1281 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1282 uint8_t numStructElems, uint8_t index, bool wb, 1283 bool replicate) : 1284 PredMacroOp(mnem, machInst, __opClass) 1285{ 1286 RegIndex vx = NumFloatV8ArchRegs / 4; 1287 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1288 bool baseIsSP = isSP((IntRegIndex) rnsp); 1289 1290 numMicroops = wb ? 1 : 0; 1291 1292 int eSizeBytes = 1 << eSize; 1293 int totNumBytes = numStructElems * eSizeBytes; 1294 assert(totNumBytes <= 64); 1295 1296 // The guiding principle here is that no more than 16 bytes can be 1297 // transferred at a time 1298 int numMemMicroops = totNumBytes / 16; 1299 int residuum = totNumBytes % 16; 1300 if (residuum) 1301 ++numMemMicroops; 1302 numMicroops += numMemMicroops; 1303 1304 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); 1305 numMicroops += numMarshalMicroops; 1306 1307 microOps = new StaticInstPtr[numMicroops]; 1308 unsigned uopIdx = 0; 1309 1310 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1311 TLB::AllowUnaligned; 1312 1313 int i = 0; 1314 for (; i < numMemMicroops - 1; ++i) { 1315 microOps[uopIdx++] = new MicroNeonLoad64( 1316 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1317 baseIsSP, 16 /* accSize */, eSize); 1318 } 1319 microOps[uopIdx++] = new MicroNeonLoad64( 1320 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1321 residuum ? residuum : 16 /* accSize */, eSize); 1322 1323 // Writeback microop: the post-increment amount is encoded in "Rm": a 1324 // 64-bit general register OR as '11111' for an immediate value equal to 1325 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1326 if (wb) { 1327 if (rm != ((RegIndex) INTREG_X31)) { 1328 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1329 UXTX, 0); 1330 } else { 1331 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1332 totNumBytes); 1333 } 1334 } 1335 1336 for(int i = 0; i < numMarshalMicroops; ++i) { 1337 microOps[uopIdx++] = new MicroUnpackNeon64( 1338 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1339 numStructElems, index, i /* step */, replicate); 1340 } 1341 1342 assert(uopIdx == numMicroops); 1343 1344 for (int i = 0; i < numMicroops - 1; i++) { 1345 microOps[i]->setDelayedCommit(); 1346 } 1347 microOps[numMicroops - 1]->setLastMicroop(); 1348} 1349 1350VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst, 1351 OpClass __opClass, RegIndex rn, RegIndex vd, 1352 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1353 uint8_t numStructElems, uint8_t index, bool wb, 1354 bool replicate) : 1355 PredMacroOp(mnem, machInst, __opClass) 1356{ 1357 RegIndex vx = NumFloatV8ArchRegs / 4; 1358 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1359 bool baseIsSP = isSP((IntRegIndex) rnsp); 1360 1361 numMicroops = wb ? 1 : 0; 1362 1363 int eSizeBytes = 1 << eSize; 1364 int totNumBytes = numStructElems * eSizeBytes; 1365 assert(totNumBytes <= 64); 1366 1367 // The guiding principle here is that no more than 16 bytes can be 1368 // transferred at a time 1369 int numMemMicroops = totNumBytes / 16; 1370 int residuum = totNumBytes % 16; 1371 if (residuum) 1372 ++numMemMicroops; 1373 numMicroops += numMemMicroops; 1374 1375 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1376 numMicroops += numMarshalMicroops; 1377 1378 microOps = new StaticInstPtr[numMicroops]; 1379 unsigned uopIdx = 0; 1380 1381 for(int i = 0; i < numMarshalMicroops; ++i) { 1382 microOps[uopIdx++] = new MicroPackNeon64( 1383 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1384 numStructElems, index, i /* step */, replicate); 1385 } 1386 1387 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1388 TLB::AllowUnaligned; 1389 1390 int i = 0; 1391 for(; i < numMemMicroops - 1; ++i) { 1392 microOps[uopIdx++] = new MicroNeonStore64( 1393 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1394 baseIsSP, 16 /* accsize */, eSize); 1395 } 1396 microOps[uopIdx++] = new MicroNeonStore64( 1397 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1398 residuum ? residuum : 16 /* accSize */, eSize); 1399 1400 // Writeback microop: the post-increment amount is encoded in "Rm": a 1401 // 64-bit general register OR as '11111' for an immediate value equal to 1402 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1403 if (wb) { 1404 if (rm != ((RegIndex) INTREG_X31)) { 1405 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1406 UXTX, 0); 1407 } else { 1408 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1409 totNumBytes); 1410 } 1411 } 1412 1413 assert(uopIdx == numMicroops); 1414 1415 for (int i = 0; i < numMicroops - 1; i++) { 1416 microOps[i]->setDelayedCommit(); 1417 } 1418 microOps[numMicroops - 1]->setLastMicroop(); 1419} 1420 1421MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, 1422 OpClass __opClass, IntRegIndex rn, 1423 RegIndex vd, bool single, bool up, 1424 bool writeback, bool load, uint32_t offset) : 1425 PredMacroOp(mnem, machInst, __opClass) 1426{ 1427 int i = 0; 1428 1429 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem 1430 // to be functionally identical except that fldmx is deprecated. For now 1431 // we'll assume they're otherwise interchangable. 1432 int count = (single ? offset : (offset / 2)); 1433 if (count == 0 || count > NumFloatV7ArchRegs) 1434 warn_once("Bad offset field for VFP load/store multiple.\n"); 1435 if (count == 0) { 1436 // Force there to be at least one microop so the macroop makes sense. 1437 writeback = true; 1438 } 1439 if (count > NumFloatV7ArchRegs) 1440 count = NumFloatV7ArchRegs; 1441 1442 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); 1443 microOps = new StaticInstPtr[numMicroops]; 1444 1445 int64_t addr = 0; 1446 1447 if (!up) 1448 addr = 4 * offset; 1449 1450 bool tempUp = up; 1451 for (int j = 0; j < count; j++) { 1452 if (load) { 1453 if (single) { 1454 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, 1455 tempUp, addr); 1456 } else { 1457 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn, 1458 tempUp, addr); 1459 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp, 1460 addr + (up ? 4 : -4)); 1461 } 1462 } else { 1463 if (single) { 1464 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, 1465 tempUp, addr); 1466 } else { 1467 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn, 1468 tempUp, addr); 1469 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp, 1470 addr + (up ? 4 : -4)); 1471 } 1472 } 1473 if (!tempUp) { 1474 addr -= (single ? 4 : 8); 1475 // The microops don't handle negative displacement, so turn if we 1476 // hit zero, flip polarity and start adding. 1477 if (addr <= 0) { 1478 tempUp = true; 1479 addr = -addr; 1480 } 1481 } else { 1482 addr += (single ? 4 : 8); 1483 } 1484 } 1485 1486 if (writeback) { 1487 if (up) { 1488 microOps[i++] = 1489 new MicroAddiUop(machInst, rn, rn, 4 * offset); 1490 } else { 1491 microOps[i++] = 1492 new MicroSubiUop(machInst, rn, rn, 4 * offset); 1493 } 1494 } 1495 1496 assert(numMicroops == i); 1497 microOps[numMicroops - 1]->setLastMicroop(); 1498 1499 for (StaticInstPtr *curUop = microOps; 1500 !(*curUop)->isLastMicroop(); curUop++) { 1501 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get()); 1502 assert(uopPtr); 1503 uopPtr->setDelayedCommit(); 1504 } 1505} 1506 1507std::string 1508MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1509{ 1510 std::stringstream ss; 1511 printMnemonic(ss); 1512 printReg(ss, ura); 1513 ss << ", "; 1514 printReg(ss, urb); 1515 ss << ", "; 1516 ccprintf(ss, "#%d", imm); 1517 return ss.str(); 1518} 1519 1520std::string 1521MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1522{ 1523 std::stringstream ss; 1524 printMnemonic(ss); 1525 printReg(ss, ura); 1526 ss << ", "; 1527 printReg(ss, urb); 1528 ss << ", "; 1529 ccprintf(ss, "#%d", imm); 1530 return ss.str(); 1531} 1532 1533std::string 1534MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1535{ 1536 std::stringstream ss; 1537 printMnemonic(ss); 1538 ss << "[PC,CPSR]"; 1539 return ss.str(); 1540} 1541 1542std::string 1543MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1544{ 1545 std::stringstream ss; 1546 printMnemonic(ss); 1547 printReg(ss, ura); 1548 ccprintf(ss, ", "); 1549 printReg(ss, urb); 1550 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); 1551 return ss.str(); 1552} 1553 1554std::string 1555MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1556{ 1557 std::stringstream ss; 1558 printMnemonic(ss); 1559 printReg(ss, ura); 1560 ss << ", "; 1561 printReg(ss, urb); 1562 return ss.str(); 1563} 1564 1565std::string 1566MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1567{ 1568 std::stringstream ss; 1569 printMnemonic(ss); 1570 printReg(ss, ura); 1571 ss << ", "; 1572 printReg(ss, urb); 1573 ss << ", "; 1574 printReg(ss, urc); 1575 return ss.str(); 1576} 1577 1578std::string 1579MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1580{ 1581 std::stringstream ss; 1582 printMnemonic(ss); 1583 if (isFloating()) 1584 printReg(ss, ura + FP_Reg_Base); 1585 else 1586 printReg(ss, ura); 1587 ss << ", ["; 1588 printReg(ss, urb); 1589 ss << ", "; 1590 ccprintf(ss, "#%d", imm); 1591 ss << "]"; 1592 return ss.str(); 1593} 1594
|