neon.isa (7644:62873d5c2bfc) neon.isa (7760:e93e7e0caae1)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating

--- 617 unchanged lines hidden (view full) ---

626
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
633
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating

--- 617 unchanged lines hidden (view full) ---

626
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
633
634 def threeEqualRegInst(name, Name, types, rCount, op,
634 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
639 '''
640 for reg in range(rCount):
641 eWalkCode += '''
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 35 unchanged lines hidden (view full) ---

678 for reg in range(rCount):
679 eWalkCode += '''
680 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
683 "RegRegRegOp",
684 { "code": eWalkCode,
685 "r_count": rCount,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
639 '''
640 for reg in range(rCount):
641 eWalkCode += '''
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 35 unchanged lines hidden (view full) ---

678 for reg in range(rCount):
679 eWalkCode += '''
680 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
683 "RegRegRegOp",
684 { "code": eWalkCode,
685 "r_count": rCount,
686 "predicate_test": predicateTest }, [])
686 "predicate_test": predicateTest,
687 "op_class": opClass }, [])
687 header_output += NeonRegRegRegOpDeclare.subst(iop)
688 exec_output += NeonEqualRegExecute.subst(iop)
689 for type in types:
690 substDict = { "targs" : type,
691 "class_name" : Name }
692 exec_output += NeonExecDeclare.subst(substDict)
693
688 header_output += NeonRegRegRegOpDeclare.subst(iop)
689 exec_output += NeonEqualRegExecute.subst(iop)
690 for type in types:
691 substDict = { "targs" : type,
692 "class_name" : Name }
693 exec_output += NeonExecDeclare.subst(substDict)
694
694 def threeEqualRegInstFp(name, Name, types, rCount, op,
695 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
695 readDest=False, pairwise=False, toInt=False):
696 global header_output, exec_output
697 eWalkCode = simdEnabledCheckCode + '''
698 typedef FloatReg FloatVect[rCount];
699 FloatVect srcRegs1, srcRegs2;
700 '''
701 if toInt:
702 eWalkCode += 'RegVect destRegs;\n'

--- 59 unchanged lines hidden (view full) ---

762 else:
763 eWalkCode += '''
764 FpDestP%(reg)d = destRegs[%(reg)d];
765 ''' % { "reg" : reg }
766 iop = InstObjParams(name, Name,
767 "FpRegRegRegOp",
768 { "code": eWalkCode,
769 "r_count": rCount,
696 readDest=False, pairwise=False, toInt=False):
697 global header_output, exec_output
698 eWalkCode = simdEnabledCheckCode + '''
699 typedef FloatReg FloatVect[rCount];
700 FloatVect srcRegs1, srcRegs2;
701 '''
702 if toInt:
703 eWalkCode += 'RegVect destRegs;\n'

--- 59 unchanged lines hidden (view full) ---

763 else:
764 eWalkCode += '''
765 FpDestP%(reg)d = destRegs[%(reg)d];
766 ''' % { "reg" : reg }
767 iop = InstObjParams(name, Name,
768 "FpRegRegRegOp",
769 { "code": eWalkCode,
770 "r_count": rCount,
770 "predicate_test": predicateTest }, [])
771 "predicate_test": predicateTest,
772 "op_class": opClass }, [])
771 header_output += NeonRegRegRegOpDeclare.subst(iop)
772 exec_output += NeonEqualRegExecute.subst(iop)
773 for type in types:
774 substDict = { "targs" : type,
775 "class_name" : Name }
776 exec_output += NeonExecDeclare.subst(substDict)
777
773 header_output += NeonRegRegRegOpDeclare.subst(iop)
774 exec_output += NeonEqualRegExecute.subst(iop)
775 for type in types:
776 substDict = { "targs" : type,
777 "class_name" : Name }
778 exec_output += NeonExecDeclare.subst(substDict)
779
778 def threeUnequalRegInst(name, Name, types, op,
780 def threeUnequalRegInst(name, Name, opClass, types, op,
779 bigSrc1, bigSrc2, bigDest, readDest):
780 global header_output, exec_output
781 src1Cnt = src2Cnt = destCnt = 2
782 src1Prefix = src2Prefix = destPrefix = ''
783 if bigSrc1:
784 src1Cnt = 4
785 src1Prefix = 'Big'
786 if bigSrc2:

--- 38 unchanged lines hidden (view full) ---

825 for reg in range(destCnt):
826 eWalkCode += '''
827 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
828 ''' % { "reg" : reg }
829 iop = InstObjParams(name, Name,
830 "RegRegRegOp",
831 { "code": eWalkCode,
832 "r_count": 2,
781 bigSrc1, bigSrc2, bigDest, readDest):
782 global header_output, exec_output
783 src1Cnt = src2Cnt = destCnt = 2
784 src1Prefix = src2Prefix = destPrefix = ''
785 if bigSrc1:
786 src1Cnt = 4
787 src1Prefix = 'Big'
788 if bigSrc2:

--- 38 unchanged lines hidden (view full) ---

827 for reg in range(destCnt):
828 eWalkCode += '''
829 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
830 ''' % { "reg" : reg }
831 iop = InstObjParams(name, Name,
832 "RegRegRegOp",
833 { "code": eWalkCode,
834 "r_count": 2,
833 "predicate_test": predicateTest }, [])
835 "predicate_test": predicateTest,
836 "op_class": opClass }, [])
834 header_output += NeonRegRegRegOpDeclare.subst(iop)
835 exec_output += NeonUnequalRegExecute.subst(iop)
836 for type in types:
837 substDict = { "targs" : type,
838 "class_name" : Name }
839 exec_output += NeonExecDeclare.subst(substDict)
840
837 header_output += NeonRegRegRegOpDeclare.subst(iop)
838 exec_output += NeonUnequalRegExecute.subst(iop)
839 for type in types:
840 substDict = { "targs" : type,
841 "class_name" : Name }
842 exec_output += NeonExecDeclare.subst(substDict)
843
841 def threeRegNarrowInst(name, Name, types, op, readDest=False):
842 threeUnequalRegInst(name, Name, types, op,
844 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845 threeUnequalRegInst(name, Name, opClass, types, op,
843 True, True, False, readDest)
844
846 True, True, False, readDest)
847
845 def threeRegLongInst(name, Name, types, op, readDest=False):
846 threeUnequalRegInst(name, Name, types, op,
848 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849 threeUnequalRegInst(name, Name, opClass, types, op,
847 False, False, True, readDest)
848
850 False, False, True, readDest)
851
849 def threeRegWideInst(name, Name, types, op, readDest=False):
850 threeUnequalRegInst(name, Name, types, op,
852 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853 threeUnequalRegInst(name, Name, opClass, types, op,
851 True, False, True, readDest)
852
854 True, False, True, readDest)
855
853 def twoEqualRegInst(name, Name, types, rCount, op, readDest=False):
856 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
854 global header_output, exec_output
855 eWalkCode = simdEnabledCheckCode + '''
856 RegVect srcReg1, srcReg2, destReg;
857 '''
858 for reg in range(rCount):
859 eWalkCode += '''
860 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
861 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);

--- 19 unchanged lines hidden (view full) ---

881 for reg in range(rCount):
882 eWalkCode += '''
883 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
884 ''' % { "reg" : reg }
885 iop = InstObjParams(name, Name,
886 "RegRegRegImmOp",
887 { "code": eWalkCode,
888 "r_count": rCount,
857 global header_output, exec_output
858 eWalkCode = simdEnabledCheckCode + '''
859 RegVect srcReg1, srcReg2, destReg;
860 '''
861 for reg in range(rCount):
862 eWalkCode += '''
863 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
864 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);

--- 19 unchanged lines hidden (view full) ---

884 for reg in range(rCount):
885 eWalkCode += '''
886 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
887 ''' % { "reg" : reg }
888 iop = InstObjParams(name, Name,
889 "RegRegRegImmOp",
890 { "code": eWalkCode,
891 "r_count": rCount,
889 "predicate_test": predicateTest }, [])
892 "predicate_test": predicateTest,
893 "op_class": opClass }, [])
890 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
891 exec_output += NeonEqualRegExecute.subst(iop)
892 for type in types:
893 substDict = { "targs" : type,
894 "class_name" : Name }
895 exec_output += NeonExecDeclare.subst(substDict)
896
894 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
895 exec_output += NeonEqualRegExecute.subst(iop)
896 for type in types:
897 substDict = { "targs" : type,
898 "class_name" : Name }
899 exec_output += NeonExecDeclare.subst(substDict)
900
897 def twoRegLongInst(name, Name, types, op, readDest=False):
901 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
898 global header_output, exec_output
899 rCount = 2
900 eWalkCode = simdEnabledCheckCode + '''
901 RegVect srcReg1, srcReg2;
902 BigRegVect destReg;
903 '''
904 for reg in range(rCount):
905 eWalkCode += '''

--- 22 unchanged lines hidden (view full) ---

928 for reg in range(2 * rCount):
929 eWalkCode += '''
930 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
931 ''' % { "reg" : reg }
932 iop = InstObjParams(name, Name,
933 "RegRegRegImmOp",
934 { "code": eWalkCode,
935 "r_count": rCount,
902 global header_output, exec_output
903 rCount = 2
904 eWalkCode = simdEnabledCheckCode + '''
905 RegVect srcReg1, srcReg2;
906 BigRegVect destReg;
907 '''
908 for reg in range(rCount):
909 eWalkCode += '''

--- 22 unchanged lines hidden (view full) ---

932 for reg in range(2 * rCount):
933 eWalkCode += '''
934 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
935 ''' % { "reg" : reg }
936 iop = InstObjParams(name, Name,
937 "RegRegRegImmOp",
938 { "code": eWalkCode,
939 "r_count": rCount,
936 "predicate_test": predicateTest }, [])
940 "predicate_test": predicateTest,
941 "op_class": opClass }, [])
937 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
938 exec_output += NeonUnequalRegExecute.subst(iop)
939 for type in types:
940 substDict = { "targs" : type,
941 "class_name" : Name }
942 exec_output += NeonExecDeclare.subst(substDict)
943
942 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
943 exec_output += NeonUnequalRegExecute.subst(iop)
944 for type in types:
945 substDict = { "targs" : type,
946 "class_name" : Name }
947 exec_output += NeonExecDeclare.subst(substDict)
948
944 def twoEqualRegInstFp(name, Name, types, rCount, op, readDest=False):
949 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
945 global header_output, exec_output
946 eWalkCode = simdEnabledCheckCode + '''
947 typedef FloatReg FloatVect[rCount];
948 FloatVect srcRegs1, srcRegs2, destRegs;
949 '''
950 for reg in range(rCount):
951 eWalkCode += '''
952 srcRegs1[%(reg)d] = FpOp1P%(reg)d;

--- 20 unchanged lines hidden (view full) ---

973 for reg in range(rCount):
974 eWalkCode += '''
975 FpDestP%(reg)d = destRegs[%(reg)d];
976 ''' % { "reg" : reg }
977 iop = InstObjParams(name, Name,
978 "FpRegRegRegImmOp",
979 { "code": eWalkCode,
980 "r_count": rCount,
950 global header_output, exec_output
951 eWalkCode = simdEnabledCheckCode + '''
952 typedef FloatReg FloatVect[rCount];
953 FloatVect srcRegs1, srcRegs2, destRegs;
954 '''
955 for reg in range(rCount):
956 eWalkCode += '''
957 srcRegs1[%(reg)d] = FpOp1P%(reg)d;

--- 20 unchanged lines hidden (view full) ---

978 for reg in range(rCount):
979 eWalkCode += '''
980 FpDestP%(reg)d = destRegs[%(reg)d];
981 ''' % { "reg" : reg }
982 iop = InstObjParams(name, Name,
983 "FpRegRegRegImmOp",
984 { "code": eWalkCode,
985 "r_count": rCount,
981 "predicate_test": predicateTest }, [])
986 "predicate_test": predicateTest,
987 "op_class": opClass }, [])
982 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
983 exec_output += NeonEqualRegExecute.subst(iop)
984 for type in types:
985 substDict = { "targs" : type,
986 "class_name" : Name }
987 exec_output += NeonExecDeclare.subst(substDict)
988
988 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
989 exec_output += NeonEqualRegExecute.subst(iop)
990 for type in types:
991 substDict = { "targs" : type,
992 "class_name" : Name }
993 exec_output += NeonExecDeclare.subst(substDict)
994
989 def twoRegShiftInst(name, Name, types, rCount, op,
995 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
990 readDest=False, toInt=False, fromInt=False):
991 global header_output, exec_output
992 eWalkCode = simdEnabledCheckCode + '''
993 RegVect srcRegs1, destRegs;
994 '''
995 for reg in range(rCount):
996 eWalkCode += '''
997 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 31 unchanged lines hidden (view full) ---

1029 for reg in range(rCount):
1030 eWalkCode += '''
1031 FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1032 ''' % { "reg" : reg }
1033 iop = InstObjParams(name, Name,
1034 "RegRegImmOp",
1035 { "code": eWalkCode,
1036 "r_count": rCount,
996 readDest=False, toInt=False, fromInt=False):
997 global header_output, exec_output
998 eWalkCode = simdEnabledCheckCode + '''
999 RegVect srcRegs1, destRegs;
1000 '''
1001 for reg in range(rCount):
1002 eWalkCode += '''
1003 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 31 unchanged lines hidden (view full) ---

1035 for reg in range(rCount):
1036 eWalkCode += '''
1037 FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1038 ''' % { "reg" : reg }
1039 iop = InstObjParams(name, Name,
1040 "RegRegImmOp",
1041 { "code": eWalkCode,
1042 "r_count": rCount,
1037 "predicate_test": predicateTest }, [])
1043 "predicate_test": predicateTest,
1044 "op_class": opClass }, [])
1038 header_output += NeonRegRegImmOpDeclare.subst(iop)
1039 exec_output += NeonEqualRegExecute.subst(iop)
1040 for type in types:
1041 substDict = { "targs" : type,
1042 "class_name" : Name }
1043 exec_output += NeonExecDeclare.subst(substDict)
1044
1045 header_output += NeonRegRegImmOpDeclare.subst(iop)
1046 exec_output += NeonEqualRegExecute.subst(iop)
1047 for type in types:
1048 substDict = { "targs" : type,
1049 "class_name" : Name }
1050 exec_output += NeonExecDeclare.subst(substDict)
1051
1045 def twoRegNarrowShiftInst(name, Name, types, op, readDest=False):
1052 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1046 global header_output, exec_output
1047 eWalkCode = simdEnabledCheckCode + '''
1048 BigRegVect srcReg1;
1049 RegVect destReg;
1050 '''
1051 for reg in range(4):
1052 eWalkCode += '''
1053 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 18 unchanged lines hidden (view full) ---

1072 for reg in range(2):
1073 eWalkCode += '''
1074 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1075 ''' % { "reg" : reg }
1076 iop = InstObjParams(name, Name,
1077 "RegRegImmOp",
1078 { "code": eWalkCode,
1079 "r_count": 2,
1053 global header_output, exec_output
1054 eWalkCode = simdEnabledCheckCode + '''
1055 BigRegVect srcReg1;
1056 RegVect destReg;
1057 '''
1058 for reg in range(4):
1059 eWalkCode += '''
1060 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 18 unchanged lines hidden (view full) ---

1079 for reg in range(2):
1080 eWalkCode += '''
1081 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1082 ''' % { "reg" : reg }
1083 iop = InstObjParams(name, Name,
1084 "RegRegImmOp",
1085 { "code": eWalkCode,
1086 "r_count": 2,
1080 "predicate_test": predicateTest }, [])
1087 "predicate_test": predicateTest,
1088 "op_class": opClass }, [])
1081 header_output += NeonRegRegImmOpDeclare.subst(iop)
1082 exec_output += NeonUnequalRegExecute.subst(iop)
1083 for type in types:
1084 substDict = { "targs" : type,
1085 "class_name" : Name }
1086 exec_output += NeonExecDeclare.subst(substDict)
1087
1089 header_output += NeonRegRegImmOpDeclare.subst(iop)
1090 exec_output += NeonUnequalRegExecute.subst(iop)
1091 for type in types:
1092 substDict = { "targs" : type,
1093 "class_name" : Name }
1094 exec_output += NeonExecDeclare.subst(substDict)
1095
1088 def twoRegLongShiftInst(name, Name, types, op, readDest=False):
1096 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1089 global header_output, exec_output
1090 eWalkCode = simdEnabledCheckCode + '''
1091 RegVect srcReg1;
1092 BigRegVect destReg;
1093 '''
1094 for reg in range(2):
1095 eWalkCode += '''
1096 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 18 unchanged lines hidden (view full) ---

1115 for reg in range(4):
1116 eWalkCode += '''
1117 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1118 ''' % { "reg" : reg }
1119 iop = InstObjParams(name, Name,
1120 "RegRegImmOp",
1121 { "code": eWalkCode,
1122 "r_count": 2,
1097 global header_output, exec_output
1098 eWalkCode = simdEnabledCheckCode + '''
1099 RegVect srcReg1;
1100 BigRegVect destReg;
1101 '''
1102 for reg in range(2):
1103 eWalkCode += '''
1104 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 18 unchanged lines hidden (view full) ---

1123 for reg in range(4):
1124 eWalkCode += '''
1125 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1126 ''' % { "reg" : reg }
1127 iop = InstObjParams(name, Name,
1128 "RegRegImmOp",
1129 { "code": eWalkCode,
1130 "r_count": 2,
1123 "predicate_test": predicateTest }, [])
1131 "predicate_test": predicateTest,
1132 "op_class": opClass }, [])
1124 header_output += NeonRegRegImmOpDeclare.subst(iop)
1125 exec_output += NeonUnequalRegExecute.subst(iop)
1126 for type in types:
1127 substDict = { "targs" : type,
1128 "class_name" : Name }
1129 exec_output += NeonExecDeclare.subst(substDict)
1130
1133 header_output += NeonRegRegImmOpDeclare.subst(iop)
1134 exec_output += NeonUnequalRegExecute.subst(iop)
1135 for type in types:
1136 substDict = { "targs" : type,
1137 "class_name" : Name }
1138 exec_output += NeonExecDeclare.subst(substDict)
1139
1131 def twoRegMiscInst(name, Name, types, rCount, op, readDest=False):
1140 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1132 global header_output, exec_output
1133 eWalkCode = simdEnabledCheckCode + '''
1134 RegVect srcReg1, destReg;
1135 '''
1136 for reg in range(rCount):
1137 eWalkCode += '''
1138 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1139 ''' % { "reg" : reg }

--- 17 unchanged lines hidden (view full) ---

1157 for reg in range(rCount):
1158 eWalkCode += '''
1159 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1160 ''' % { "reg" : reg }
1161 iop = InstObjParams(name, Name,
1162 "RegRegOp",
1163 { "code": eWalkCode,
1164 "r_count": rCount,
1141 global header_output, exec_output
1142 eWalkCode = simdEnabledCheckCode + '''
1143 RegVect srcReg1, destReg;
1144 '''
1145 for reg in range(rCount):
1146 eWalkCode += '''
1147 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1148 ''' % { "reg" : reg }

--- 17 unchanged lines hidden (view full) ---

1166 for reg in range(rCount):
1167 eWalkCode += '''
1168 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1169 ''' % { "reg" : reg }
1170 iop = InstObjParams(name, Name,
1171 "RegRegOp",
1172 { "code": eWalkCode,
1173 "r_count": rCount,
1165 "predicate_test": predicateTest }, [])
1174 "predicate_test": predicateTest,
1175 "op_class": opClass }, [])
1166 header_output += NeonRegRegOpDeclare.subst(iop)
1167 exec_output += NeonEqualRegExecute.subst(iop)
1168 for type in types:
1169 substDict = { "targs" : type,
1170 "class_name" : Name }
1171 exec_output += NeonExecDeclare.subst(substDict)
1172
1176 header_output += NeonRegRegOpDeclare.subst(iop)
1177 exec_output += NeonEqualRegExecute.subst(iop)
1178 for type in types:
1179 substDict = { "targs" : type,
1180 "class_name" : Name }
1181 exec_output += NeonExecDeclare.subst(substDict)
1182
1173 def twoRegMiscScInst(name, Name, types, rCount, op, readDest=False):
1183 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1174 global header_output, exec_output
1175 eWalkCode = simdEnabledCheckCode + '''
1176 RegVect srcReg1, destReg;
1177 '''
1178 for reg in range(rCount):
1179 eWalkCode += '''
1180 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1181 ''' % { "reg" : reg }

--- 16 unchanged lines hidden (view full) ---

1198 for reg in range(rCount):
1199 eWalkCode += '''
1200 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1201 ''' % { "reg" : reg }
1202 iop = InstObjParams(name, Name,
1203 "RegRegImmOp",
1204 { "code": eWalkCode,
1205 "r_count": rCount,
1184 global header_output, exec_output
1185 eWalkCode = simdEnabledCheckCode + '''
1186 RegVect srcReg1, destReg;
1187 '''
1188 for reg in range(rCount):
1189 eWalkCode += '''
1190 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1191 ''' % { "reg" : reg }

--- 16 unchanged lines hidden (view full) ---

1208 for reg in range(rCount):
1209 eWalkCode += '''
1210 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1211 ''' % { "reg" : reg }
1212 iop = InstObjParams(name, Name,
1213 "RegRegImmOp",
1214 { "code": eWalkCode,
1215 "r_count": rCount,
1206 "predicate_test": predicateTest }, [])
1216 "predicate_test": predicateTest,
1217 "op_class": opClass }, [])
1207 header_output += NeonRegRegImmOpDeclare.subst(iop)
1208 exec_output += NeonEqualRegExecute.subst(iop)
1209 for type in types:
1210 substDict = { "targs" : type,
1211 "class_name" : Name }
1212 exec_output += NeonExecDeclare.subst(substDict)
1213
1218 header_output += NeonRegRegImmOpDeclare.subst(iop)
1219 exec_output += NeonEqualRegExecute.subst(iop)
1220 for type in types:
1221 substDict = { "targs" : type,
1222 "class_name" : Name }
1223 exec_output += NeonExecDeclare.subst(substDict)
1224
1214 def twoRegMiscScramble(name, Name, types, rCount, op, readDest=False):
1225 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1215 global header_output, exec_output
1216 eWalkCode = simdEnabledCheckCode + '''
1217 RegVect srcReg1, destReg;
1218 '''
1219 for reg in range(rCount):
1220 eWalkCode += '''
1221 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1222 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);

--- 9 unchanged lines hidden (view full) ---

1232 eWalkCode += '''
1233 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1234 FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1235 ''' % { "reg" : reg }
1236 iop = InstObjParams(name, Name,
1237 "RegRegOp",
1238 { "code": eWalkCode,
1239 "r_count": rCount,
1226 global header_output, exec_output
1227 eWalkCode = simdEnabledCheckCode + '''
1228 RegVect srcReg1, destReg;
1229 '''
1230 for reg in range(rCount):
1231 eWalkCode += '''
1232 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1233 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);

--- 9 unchanged lines hidden (view full) ---

1243 eWalkCode += '''
1244 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1245 FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1246 ''' % { "reg" : reg }
1247 iop = InstObjParams(name, Name,
1248 "RegRegOp",
1249 { "code": eWalkCode,
1250 "r_count": rCount,
1240 "predicate_test": predicateTest }, [])
1251 "predicate_test": predicateTest,
1252 "op_class": opClass }, [])
1241 header_output += NeonRegRegOpDeclare.subst(iop)
1242 exec_output += NeonEqualRegExecute.subst(iop)
1243 for type in types:
1244 substDict = { "targs" : type,
1245 "class_name" : Name }
1246 exec_output += NeonExecDeclare.subst(substDict)
1247
1253 header_output += NeonRegRegOpDeclare.subst(iop)
1254 exec_output += NeonEqualRegExecute.subst(iop)
1255 for type in types:
1256 substDict = { "targs" : type,
1257 "class_name" : Name }
1258 exec_output += NeonExecDeclare.subst(substDict)
1259
1248 def twoRegMiscInstFp(name, Name, types, rCount, op,
1260 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1249 readDest=False, toInt=False):
1250 global header_output, exec_output
1251 eWalkCode = simdEnabledCheckCode + '''
1252 typedef FloatReg FloatVect[rCount];
1253 FloatVect srcRegs1;
1254 '''
1255 if toInt:
1256 eWalkCode += 'RegVect destRegs;\n'

--- 40 unchanged lines hidden (view full) ---

1297 else:
1298 eWalkCode += '''
1299 FpDestP%(reg)d = destRegs[%(reg)d];
1300 ''' % { "reg" : reg }
1301 iop = InstObjParams(name, Name,
1302 "FpRegRegOp",
1303 { "code": eWalkCode,
1304 "r_count": rCount,
1261 readDest=False, toInt=False):
1262 global header_output, exec_output
1263 eWalkCode = simdEnabledCheckCode + '''
1264 typedef FloatReg FloatVect[rCount];
1265 FloatVect srcRegs1;
1266 '''
1267 if toInt:
1268 eWalkCode += 'RegVect destRegs;\n'

--- 40 unchanged lines hidden (view full) ---

1309 else:
1310 eWalkCode += '''
1311 FpDestP%(reg)d = destRegs[%(reg)d];
1312 ''' % { "reg" : reg }
1313 iop = InstObjParams(name, Name,
1314 "FpRegRegOp",
1315 { "code": eWalkCode,
1316 "r_count": rCount,
1305 "predicate_test": predicateTest }, [])
1317 "predicate_test": predicateTest,
1318 "op_class": opClass }, [])
1306 header_output += NeonRegRegOpDeclare.subst(iop)
1307 exec_output += NeonEqualRegExecute.subst(iop)
1308 for type in types:
1309 substDict = { "targs" : type,
1310 "class_name" : Name }
1311 exec_output += NeonExecDeclare.subst(substDict)
1312
1319 header_output += NeonRegRegOpDeclare.subst(iop)
1320 exec_output += NeonEqualRegExecute.subst(iop)
1321 for type in types:
1322 substDict = { "targs" : type,
1323 "class_name" : Name }
1324 exec_output += NeonExecDeclare.subst(substDict)
1325
1313 def twoRegCondenseInst(name, Name, types, rCount, op, readDest=False):
1326 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1314 global header_output, exec_output
1315 eWalkCode = simdEnabledCheckCode + '''
1316 RegVect srcRegs;
1317 BigRegVect destReg;
1318 '''
1319 for reg in range(rCount):
1320 eWalkCode += '''
1321 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 18 unchanged lines hidden (view full) ---

1340 for reg in range(rCount):
1341 eWalkCode += '''
1342 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1343 ''' % { "reg" : reg }
1344 iop = InstObjParams(name, Name,
1345 "RegRegOp",
1346 { "code": eWalkCode,
1347 "r_count": rCount,
1327 global header_output, exec_output
1328 eWalkCode = simdEnabledCheckCode + '''
1329 RegVect srcRegs;
1330 BigRegVect destReg;
1331 '''
1332 for reg in range(rCount):
1333 eWalkCode += '''
1334 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 18 unchanged lines hidden (view full) ---

1353 for reg in range(rCount):
1354 eWalkCode += '''
1355 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1356 ''' % { "reg" : reg }
1357 iop = InstObjParams(name, Name,
1358 "RegRegOp",
1359 { "code": eWalkCode,
1360 "r_count": rCount,
1348 "predicate_test": predicateTest }, [])
1361 "predicate_test": predicateTest,
1362 "op_class": opClass }, [])
1349 header_output += NeonRegRegOpDeclare.subst(iop)
1350 exec_output += NeonUnequalRegExecute.subst(iop)
1351 for type in types:
1352 substDict = { "targs" : type,
1353 "class_name" : Name }
1354 exec_output += NeonExecDeclare.subst(substDict)
1355
1363 header_output += NeonRegRegOpDeclare.subst(iop)
1364 exec_output += NeonUnequalRegExecute.subst(iop)
1365 for type in types:
1366 substDict = { "targs" : type,
1367 "class_name" : Name }
1368 exec_output += NeonExecDeclare.subst(substDict)
1369
1356 def twoRegNarrowMiscInst(name, Name, types, op, readDest=False):
1370 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1357 global header_output, exec_output
1358 eWalkCode = simdEnabledCheckCode + '''
1359 BigRegVect srcReg1;
1360 RegVect destReg;
1361 '''
1362 for reg in range(4):
1363 eWalkCode += '''
1364 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 18 unchanged lines hidden (view full) ---

1383 for reg in range(2):
1384 eWalkCode += '''
1385 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1386 ''' % { "reg" : reg }
1387 iop = InstObjParams(name, Name,
1388 "RegRegOp",
1389 { "code": eWalkCode,
1390 "r_count": 2,
1371 global header_output, exec_output
1372 eWalkCode = simdEnabledCheckCode + '''
1373 BigRegVect srcReg1;
1374 RegVect destReg;
1375 '''
1376 for reg in range(4):
1377 eWalkCode += '''
1378 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 18 unchanged lines hidden (view full) ---

1397 for reg in range(2):
1398 eWalkCode += '''
1399 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1400 ''' % { "reg" : reg }
1401 iop = InstObjParams(name, Name,
1402 "RegRegOp",
1403 { "code": eWalkCode,
1404 "r_count": 2,
1391 "predicate_test": predicateTest }, [])
1405 "predicate_test": predicateTest,
1406 "op_class": opClass }, [])
1392 header_output += NeonRegRegOpDeclare.subst(iop)
1393 exec_output += NeonUnequalRegExecute.subst(iop)
1394 for type in types:
1395 substDict = { "targs" : type,
1396 "class_name" : Name }
1397 exec_output += NeonExecDeclare.subst(substDict)
1398
1407 header_output += NeonRegRegOpDeclare.subst(iop)
1408 exec_output += NeonUnequalRegExecute.subst(iop)
1409 for type in types:
1410 substDict = { "targs" : type,
1411 "class_name" : Name }
1412 exec_output += NeonExecDeclare.subst(substDict)
1413
1399 def oneRegImmInst(name, Name, types, rCount, op, readDest=False):
1414 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1400 global header_output, exec_output
1401 eWalkCode = simdEnabledCheckCode + '''
1402 RegVect destReg;
1403 '''
1404 if readDest:
1405 for reg in range(rCount):
1406 eWalkCode += '''
1407 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);

--- 12 unchanged lines hidden (view full) ---

1420 for reg in range(rCount):
1421 eWalkCode += '''
1422 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1423 ''' % { "reg" : reg }
1424 iop = InstObjParams(name, Name,
1425 "RegImmOp",
1426 { "code": eWalkCode,
1427 "r_count": rCount,
1415 global header_output, exec_output
1416 eWalkCode = simdEnabledCheckCode + '''
1417 RegVect destReg;
1418 '''
1419 if readDest:
1420 for reg in range(rCount):
1421 eWalkCode += '''
1422 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);

--- 12 unchanged lines hidden (view full) ---

1435 for reg in range(rCount):
1436 eWalkCode += '''
1437 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1438 ''' % { "reg" : reg }
1439 iop = InstObjParams(name, Name,
1440 "RegImmOp",
1441 { "code": eWalkCode,
1442 "r_count": rCount,
1428 "predicate_test": predicateTest }, [])
1443 "predicate_test": predicateTest,
1444 "op_class": opClass }, [])
1429 header_output += NeonRegImmOpDeclare.subst(iop)
1430 exec_output += NeonEqualRegExecute.subst(iop)
1431 for type in types:
1432 substDict = { "targs" : type,
1433 "class_name" : Name }
1434 exec_output += NeonExecDeclare.subst(substDict)
1435
1445 header_output += NeonRegImmOpDeclare.subst(iop)
1446 exec_output += NeonEqualRegExecute.subst(iop)
1447 for type in types:
1448 substDict = { "targs" : type,
1449 "class_name" : Name }
1450 exec_output += NeonExecDeclare.subst(substDict)
1451
1436 def twoRegLongMiscInst(name, Name, types, op, readDest=False):
1452 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1437 global header_output, exec_output
1438 eWalkCode = simdEnabledCheckCode + '''
1439 RegVect srcReg1;
1440 BigRegVect destReg;
1441 '''
1442 for reg in range(2):
1443 eWalkCode += '''
1444 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 18 unchanged lines hidden (view full) ---

1463 for reg in range(4):
1464 eWalkCode += '''
1465 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1466 ''' % { "reg" : reg }
1467 iop = InstObjParams(name, Name,
1468 "RegRegOp",
1469 { "code": eWalkCode,
1470 "r_count": 2,
1453 global header_output, exec_output
1454 eWalkCode = simdEnabledCheckCode + '''
1455 RegVect srcReg1;
1456 BigRegVect destReg;
1457 '''
1458 for reg in range(2):
1459 eWalkCode += '''
1460 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);

--- 18 unchanged lines hidden (view full) ---

1479 for reg in range(4):
1480 eWalkCode += '''
1481 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1482 ''' % { "reg" : reg }
1483 iop = InstObjParams(name, Name,
1484 "RegRegOp",
1485 { "code": eWalkCode,
1486 "r_count": 2,
1471 "predicate_test": predicateTest }, [])
1487 "predicate_test": predicateTest,
1488 "op_class": opClass }, [])
1472 header_output += NeonRegRegOpDeclare.subst(iop)
1473 exec_output += NeonUnequalRegExecute.subst(iop)
1474 for type in types:
1475 substDict = { "targs" : type,
1476 "class_name" : Name }
1477 exec_output += NeonExecDeclare.subst(substDict)
1478
1479 vhaddCode = '''
1480 Element carryBit =
1481 (((unsigned)srcElem1 & 0x1) +
1482 ((unsigned)srcElem2 & 0x1)) >> 1;
1483 // Use division instead of a shift to ensure the sign extension works
1484 // right. The compiler will figure out if it can be a shift. Mask the
1485 // inputs so they get truncated correctly.
1486 destElem = (((srcElem1 & ~(Element)1) / 2) +
1487 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1488 '''
1489 header_output += NeonRegRegOpDeclare.subst(iop)
1490 exec_output += NeonUnequalRegExecute.subst(iop)
1491 for type in types:
1492 substDict = { "targs" : type,
1493 "class_name" : Name }
1494 exec_output += NeonExecDeclare.subst(substDict)
1495
1496 vhaddCode = '''
1497 Element carryBit =
1498 (((unsigned)srcElem1 & 0x1) +
1499 ((unsigned)srcElem2 & 0x1)) >> 1;
1500 // Use division instead of a shift to ensure the sign extension works
1501 // right. The compiler will figure out if it can be a shift. Mask the
1502 // inputs so they get truncated correctly.
1503 destElem = (((srcElem1 & ~(Element)1) / 2) +
1504 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1505 '''
1489 threeEqualRegInst("vhadd", "VhaddD", allTypes, 2, vhaddCode)
1490 threeEqualRegInst("vhadd", "VhaddQ", allTypes, 4, vhaddCode)
1506 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1507 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1491
1492 vrhaddCode = '''
1493 Element carryBit =
1494 (((unsigned)srcElem1 & 0x1) +
1495 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1496 // Use division instead of a shift to ensure the sign extension works
1497 // right. The compiler will figure out if it can be a shift. Mask the
1498 // inputs so they get truncated correctly.
1499 destElem = (((srcElem1 & ~(Element)1) / 2) +
1500 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1501 '''
1508
1509 vrhaddCode = '''
1510 Element carryBit =
1511 (((unsigned)srcElem1 & 0x1) +
1512 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1513 // Use division instead of a shift to ensure the sign extension works
1514 // right. The compiler will figure out if it can be a shift. Mask the
1515 // inputs so they get truncated correctly.
1516 destElem = (((srcElem1 & ~(Element)1) / 2) +
1517 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1518 '''
1502 threeEqualRegInst("vrhadd", "VrhaddD", allTypes, 2, vrhaddCode)
1503 threeEqualRegInst("vrhadd", "VrhaddQ", allTypes, 4, vrhaddCode)
1519 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1520 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1504
1505 vhsubCode = '''
1506 Element barrowBit =
1507 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1508 // Use division instead of a shift to ensure the sign extension works
1509 // right. The compiler will figure out if it can be a shift. Mask the
1510 // inputs so they get truncated correctly.
1511 destElem = (((srcElem1 & ~(Element)1) / 2) -
1512 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1513 '''
1521
1522 vhsubCode = '''
1523 Element barrowBit =
1524 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1525 // Use division instead of a shift to ensure the sign extension works
1526 // right. The compiler will figure out if it can be a shift. Mask the
1527 // inputs so they get truncated correctly.
1528 destElem = (((srcElem1 & ~(Element)1) / 2) -
1529 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1530 '''
1514 threeEqualRegInst("vhsub", "VhsubD", allTypes, 2, vhsubCode)
1515 threeEqualRegInst("vhsub", "VhsubQ", allTypes, 4, vhsubCode)
1531 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1532 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1516
1517 vandCode = '''
1518 destElem = srcElem1 & srcElem2;
1519 '''
1533
1534 vandCode = '''
1535 destElem = srcElem1 & srcElem2;
1536 '''
1520 threeEqualRegInst("vand", "VandD", unsignedTypes, 2, vandCode)
1521 threeEqualRegInst("vand", "VandQ", unsignedTypes, 4, vandCode)
1537 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1538 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1522
1523 vbicCode = '''
1524 destElem = srcElem1 & ~srcElem2;
1525 '''
1539
1540 vbicCode = '''
1541 destElem = srcElem1 & ~srcElem2;
1542 '''
1526 threeEqualRegInst("vbic", "VbicD", unsignedTypes, 2, vbicCode)
1527 threeEqualRegInst("vbic", "VbicQ", unsignedTypes, 4, vbicCode)
1543 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1544 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1528
1529 vorrCode = '''
1530 destElem = srcElem1 | srcElem2;
1531 '''
1545
1546 vorrCode = '''
1547 destElem = srcElem1 | srcElem2;
1548 '''
1532 threeEqualRegInst("vorr", "VorrD", unsignedTypes, 2, vorrCode)
1533 threeEqualRegInst("vorr", "VorrQ", unsignedTypes, 4, vorrCode)
1549 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1550 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1534
1551
1535 threeEqualRegInst("vmov", "VmovD", unsignedTypes, 2, vorrCode)
1536 threeEqualRegInst("vmov", "VmovQ", unsignedTypes, 4, vorrCode)
1552 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1553 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1537
1538 vornCode = '''
1539 destElem = srcElem1 | ~srcElem2;
1540 '''
1554
1555 vornCode = '''
1556 destElem = srcElem1 | ~srcElem2;
1557 '''
1541 threeEqualRegInst("vorn", "VornD", unsignedTypes, 2, vornCode)
1542 threeEqualRegInst("vorn", "VornQ", unsignedTypes, 4, vornCode)
1558 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1559 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1543
1544 veorCode = '''
1545 destElem = srcElem1 ^ srcElem2;
1546 '''
1560
1561 veorCode = '''
1562 destElem = srcElem1 ^ srcElem2;
1563 '''
1547 threeEqualRegInst("veor", "VeorD", unsignedTypes, 2, veorCode)
1548 threeEqualRegInst("veor", "VeorQ", unsignedTypes, 4, veorCode)
1564 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1565 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1549
1550 vbifCode = '''
1551 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1552 '''
1566
1567 vbifCode = '''
1568 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1569 '''
1553 threeEqualRegInst("vbif", "VbifD", unsignedTypes, 2, vbifCode, True)
1554 threeEqualRegInst("vbif", "VbifQ", unsignedTypes, 4, vbifCode, True)
1570 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1571 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1555 vbitCode = '''
1556 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1557 '''
1572 vbitCode = '''
1573 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1574 '''
1558 threeEqualRegInst("vbit", "VbitD", unsignedTypes, 2, vbitCode, True)
1559 threeEqualRegInst("vbit", "VbitQ", unsignedTypes, 4, vbitCode, True)
1575 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1576 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1560 vbslCode = '''
1561 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1562 '''
1577 vbslCode = '''
1578 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1579 '''
1563 threeEqualRegInst("vbsl", "VbslD", unsignedTypes, 2, vbslCode, True)
1564 threeEqualRegInst("vbsl", "VbslQ", unsignedTypes, 4, vbslCode, True)
1580 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1581 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1565
1566 vmaxCode = '''
1567 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1568 '''
1582
1583 vmaxCode = '''
1584 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1585 '''
1569 threeEqualRegInst("vmax", "VmaxD", allTypes, 2, vmaxCode)
1570 threeEqualRegInst("vmax", "VmaxQ", allTypes, 4, vmaxCode)
1586 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1587 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1571
1572 vminCode = '''
1573 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1574 '''
1588
1589 vminCode = '''
1590 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1591 '''
1575 threeEqualRegInst("vmin", "VminD", allTypes, 2, vminCode)
1576 threeEqualRegInst("vmin", "VminQ", allTypes, 4, vminCode)
1592 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1593 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1577
1578 vaddCode = '''
1579 destElem = srcElem1 + srcElem2;
1580 '''
1594
1595 vaddCode = '''
1596 destElem = srcElem1 + srcElem2;
1597 '''
1581 threeEqualRegInst("vadd", "NVaddD", unsignedTypes, 2, vaddCode)
1582 threeEqualRegInst("vadd", "NVaddQ", unsignedTypes, 4, vaddCode)
1598 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1599 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1583
1600
1584 threeEqualRegInst("vpadd", "NVpaddD", unsignedTypes,
1601 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", unsignedTypes,
1585 2, vaddCode, pairwise=True)
1602 2, vaddCode, pairwise=True)
1586 threeEqualRegInst("vpadd", "NVpaddQ", unsignedTypes,
1603 threeEqualRegInst("vpadd", "NVpaddQ", "SimdAddOp", unsignedTypes,
1587 4, vaddCode, pairwise=True)
1588 vaddlwCode = '''
1589 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1590 '''
1604 4, vaddCode, pairwise=True)
1605 vaddlwCode = '''
1606 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1607 '''
1591 threeRegLongInst("vaddl", "Vaddl", smallTypes, vaddlwCode)
1592 threeRegWideInst("vaddw", "Vaddw", smallTypes, vaddlwCode)
1608 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1609 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1593 vaddhnCode = '''
1594 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1595 (sizeof(Element) * 8);
1596 '''
1610 vaddhnCode = '''
1611 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1612 (sizeof(Element) * 8);
1613 '''
1597 threeRegNarrowInst("vaddhn", "Vaddhn", smallTypes, vaddhnCode)
1614 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1598 vraddhnCode = '''
1599 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1600 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1601 (sizeof(Element) * 8);
1602 '''
1615 vraddhnCode = '''
1616 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1617 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1618 (sizeof(Element) * 8);
1619 '''
1603 threeRegNarrowInst("vraddhn", "Vraddhn", smallTypes, vraddhnCode)
1620 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1604
1605 vsubCode = '''
1606 destElem = srcElem1 - srcElem2;
1607 '''
1621
1622 vsubCode = '''
1623 destElem = srcElem1 - srcElem2;
1624 '''
1608 threeEqualRegInst("vsub", "NVsubD", unsignedTypes, 2, vsubCode)
1609 threeEqualRegInst("vsub", "NVsubQ", unsignedTypes, 4, vsubCode)
1625 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1626 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1610 vsublwCode = '''
1611 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1612 '''
1627 vsublwCode = '''
1628 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1629 '''
1613 threeRegLongInst("vsubl", "Vsubl", smallTypes, vsublwCode)
1614 threeRegWideInst("vsubw", "Vsubw", smallTypes, vsublwCode)
1630 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1631 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1615
1616 vqaddUCode = '''
1617 destElem = srcElem1 + srcElem2;
1618 FPSCR fpscr = (FPSCR)Fpscr;
1619 if (destElem < srcElem1 || destElem < srcElem2) {
1620 destElem = (Element)(-1);
1621 fpscr.qc = 1;
1622 }
1623 Fpscr = fpscr;
1624 '''
1632
1633 vqaddUCode = '''
1634 destElem = srcElem1 + srcElem2;
1635 FPSCR fpscr = (FPSCR)Fpscr;
1636 if (destElem < srcElem1 || destElem < srcElem2) {
1637 destElem = (Element)(-1);
1638 fpscr.qc = 1;
1639 }
1640 Fpscr = fpscr;
1641 '''
1625 threeEqualRegInst("vqadd", "VqaddUD", unsignedTypes, 2, vqaddUCode)
1626 threeEqualRegInst("vqadd", "VqaddUQ", unsignedTypes, 4, vqaddUCode)
1642 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1643 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1627 vsubhnCode = '''
1628 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1629 (sizeof(Element) * 8);
1630 '''
1644 vsubhnCode = '''
1645 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1646 (sizeof(Element) * 8);
1647 '''
1631 threeRegNarrowInst("vsubhn", "Vsubhn", smallTypes, vsubhnCode)
1648 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1632 vrsubhnCode = '''
1633 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1634 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1635 (sizeof(Element) * 8);
1636 '''
1649 vrsubhnCode = '''
1650 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1651 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1652 (sizeof(Element) * 8);
1653 '''
1637 threeRegNarrowInst("vrsubhn", "Vrsubhn", smallTypes, vrsubhnCode)
1654 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1638
1639 vqaddSCode = '''
1640 destElem = srcElem1 + srcElem2;
1641 FPSCR fpscr = (FPSCR)Fpscr;
1642 bool negDest = (destElem < 0);
1643 bool negSrc1 = (srcElem1 < 0);
1644 bool negSrc2 = (srcElem2 < 0);
1645 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1646 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1647 if (negDest)
1648 destElem -= 1;
1649 fpscr.qc = 1;
1650 }
1651 Fpscr = fpscr;
1652 '''
1655
1656 vqaddSCode = '''
1657 destElem = srcElem1 + srcElem2;
1658 FPSCR fpscr = (FPSCR)Fpscr;
1659 bool negDest = (destElem < 0);
1660 bool negSrc1 = (srcElem1 < 0);
1661 bool negSrc2 = (srcElem2 < 0);
1662 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1663 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1664 if (negDest)
1665 destElem -= 1;
1666 fpscr.qc = 1;
1667 }
1668 Fpscr = fpscr;
1669 '''
1653 threeEqualRegInst("vqadd", "VqaddSD", signedTypes, 2, vqaddSCode)
1654 threeEqualRegInst("vqadd", "VqaddSQ", signedTypes, 4, vqaddSCode)
1670 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1671 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1655
1656 vqsubUCode = '''
1657 destElem = srcElem1 - srcElem2;
1658 FPSCR fpscr = (FPSCR)Fpscr;
1659 if (destElem > srcElem1) {
1660 destElem = 0;
1661 fpscr.qc = 1;
1662 }
1663 Fpscr = fpscr;
1664 '''
1672
1673 vqsubUCode = '''
1674 destElem = srcElem1 - srcElem2;
1675 FPSCR fpscr = (FPSCR)Fpscr;
1676 if (destElem > srcElem1) {
1677 destElem = 0;
1678 fpscr.qc = 1;
1679 }
1680 Fpscr = fpscr;
1681 '''
1665 threeEqualRegInst("vqsub", "VqsubUD", unsignedTypes, 2, vqsubUCode)
1666 threeEqualRegInst("vqsub", "VqsubUQ", unsignedTypes, 4, vqsubUCode)
1682 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1683 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1667
1668 vqsubSCode = '''
1669 destElem = srcElem1 - srcElem2;
1670 FPSCR fpscr = (FPSCR)Fpscr;
1671 bool negDest = (destElem < 0);
1672 bool negSrc1 = (srcElem1 < 0);
1673 bool posSrc2 = (srcElem2 >= 0);
1674 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1675 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1676 if (negDest)
1677 destElem -= 1;
1678 fpscr.qc = 1;
1679 }
1680 Fpscr = fpscr;
1681 '''
1684
1685 vqsubSCode = '''
1686 destElem = srcElem1 - srcElem2;
1687 FPSCR fpscr = (FPSCR)Fpscr;
1688 bool negDest = (destElem < 0);
1689 bool negSrc1 = (srcElem1 < 0);
1690 bool posSrc2 = (srcElem2 >= 0);
1691 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1692 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1693 if (negDest)
1694 destElem -= 1;
1695 fpscr.qc = 1;
1696 }
1697 Fpscr = fpscr;
1698 '''
1682 threeEqualRegInst("vqsub", "VqsubSD", signedTypes, 2, vqsubSCode)
1683 threeEqualRegInst("vqsub", "VqsubSQ", signedTypes, 4, vqsubSCode)
1699 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1700 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1684
1685 vcgtCode = '''
1686 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1687 '''
1701
1702 vcgtCode = '''
1703 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1704 '''
1688 threeEqualRegInst("vcgt", "VcgtD", allTypes, 2, vcgtCode)
1689 threeEqualRegInst("vcgt", "VcgtQ", allTypes, 4, vcgtCode)
1705 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1706 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1690
1691 vcgeCode = '''
1692 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1693 '''
1707
1708 vcgeCode = '''
1709 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1710 '''
1694 threeEqualRegInst("vcge", "VcgeD", allTypes, 2, vcgeCode)
1695 threeEqualRegInst("vcge", "VcgeQ", allTypes, 4, vcgeCode)
1711 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1712 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1696
1697 vceqCode = '''
1698 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1699 '''
1713
1714 vceqCode = '''
1715 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1716 '''
1700 threeEqualRegInst("vceq", "VceqD", unsignedTypes, 2, vceqCode)
1701 threeEqualRegInst("vceq", "VceqQ", unsignedTypes, 4, vceqCode)
1717 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1718 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1702
1703 vshlCode = '''
1704 int16_t shiftAmt = (int8_t)srcElem2;
1705 if (shiftAmt < 0) {
1706 shiftAmt = -shiftAmt;
1707 if (shiftAmt >= sizeof(Element) * 8) {
1708 shiftAmt = sizeof(Element) * 8 - 1;
1709 destElem = 0;

--- 8 unchanged lines hidden (view full) ---

1718 } else {
1719 if (shiftAmt >= sizeof(Element) * 8) {
1720 destElem = 0;
1721 } else {
1722 destElem = srcElem1 << shiftAmt;
1723 }
1724 }
1725 '''
1719
1720 vshlCode = '''
1721 int16_t shiftAmt = (int8_t)srcElem2;
1722 if (shiftAmt < 0) {
1723 shiftAmt = -shiftAmt;
1724 if (shiftAmt >= sizeof(Element) * 8) {
1725 shiftAmt = sizeof(Element) * 8 - 1;
1726 destElem = 0;

--- 8 unchanged lines hidden (view full) ---

1735 } else {
1736 if (shiftAmt >= sizeof(Element) * 8) {
1737 destElem = 0;
1738 } else {
1739 destElem = srcElem1 << shiftAmt;
1740 }
1741 }
1742 '''
1726 threeEqualRegInst("vshl", "VshlD", allTypes, 2, vshlCode)
1727 threeEqualRegInst("vshl", "VshlQ", allTypes, 4, vshlCode)
1743 threeEqualRegInst("vshl", "VshlD", "SimdAluOp", allTypes, 2, vshlCode)
1744 threeEqualRegInst("vshl", "VshlQ", "SimdAluOp", allTypes, 4, vshlCode)
1728
1729 vrshlCode = '''
1730 int16_t shiftAmt = (int8_t)srcElem2;
1731 if (shiftAmt < 0) {
1732 shiftAmt = -shiftAmt;
1733 Element rBit = 0;
1734 if (shiftAmt <= sizeof(Element) * 8)
1735 rBit = bits(srcElem1, shiftAmt - 1);

--- 16 unchanged lines hidden (view full) ---

1752 destElem = 0;
1753 } else {
1754 destElem = srcElem1 << shiftAmt;
1755 }
1756 } else {
1757 destElem = srcElem1;
1758 }
1759 '''
1745
1746 vrshlCode = '''
1747 int16_t shiftAmt = (int8_t)srcElem2;
1748 if (shiftAmt < 0) {
1749 shiftAmt = -shiftAmt;
1750 Element rBit = 0;
1751 if (shiftAmt <= sizeof(Element) * 8)
1752 rBit = bits(srcElem1, shiftAmt - 1);

--- 16 unchanged lines hidden (view full) ---

1769 destElem = 0;
1770 } else {
1771 destElem = srcElem1 << shiftAmt;
1772 }
1773 } else {
1774 destElem = srcElem1;
1775 }
1776 '''
1760 threeEqualRegInst("vrshl", "VrshlD", allTypes, 2, vrshlCode)
1761 threeEqualRegInst("vrshl", "VrshlQ", allTypes, 4, vrshlCode)
1777 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1778 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1762
1763 vqshlUCode = '''
1764 int16_t shiftAmt = (int8_t)srcElem2;
1765 FPSCR fpscr = (FPSCR)Fpscr;
1766 if (shiftAmt < 0) {
1767 shiftAmt = -shiftAmt;
1768 if (shiftAmt >= sizeof(Element) * 8) {
1769 shiftAmt = sizeof(Element) * 8 - 1;

--- 18 unchanged lines hidden (view full) ---

1788 destElem = srcElem1 << shiftAmt;
1789 }
1790 }
1791 } else {
1792 destElem = srcElem1;
1793 }
1794 Fpscr = fpscr;
1795 '''
1779
1780 vqshlUCode = '''
1781 int16_t shiftAmt = (int8_t)srcElem2;
1782 FPSCR fpscr = (FPSCR)Fpscr;
1783 if (shiftAmt < 0) {
1784 shiftAmt = -shiftAmt;
1785 if (shiftAmt >= sizeof(Element) * 8) {
1786 shiftAmt = sizeof(Element) * 8 - 1;

--- 18 unchanged lines hidden (view full) ---

1805 destElem = srcElem1 << shiftAmt;
1806 }
1807 }
1808 } else {
1809 destElem = srcElem1;
1810 }
1811 Fpscr = fpscr;
1812 '''
1796 threeEqualRegInst("vqshl", "VqshlUD", unsignedTypes, 2, vqshlUCode)
1797 threeEqualRegInst("vqshl", "VqshlUQ", unsignedTypes, 4, vqshlUCode)
1813 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1814 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1798
1799 vqshlSCode = '''
1800 int16_t shiftAmt = (int8_t)srcElem2;
1801 FPSCR fpscr = (FPSCR)Fpscr;
1802 if (shiftAmt < 0) {
1803 shiftAmt = -shiftAmt;
1804 if (shiftAmt >= sizeof(Element) * 8) {
1805 shiftAmt = sizeof(Element) * 8 - 1;

--- 28 unchanged lines hidden (view full) ---

1834 if (srcElem1 < 0)
1835 destElem = ~destElem;
1836 }
1837 } else {
1838 destElem = srcElem1;
1839 }
1840 Fpscr = fpscr;
1841 '''
1815
1816 vqshlSCode = '''
1817 int16_t shiftAmt = (int8_t)srcElem2;
1818 FPSCR fpscr = (FPSCR)Fpscr;
1819 if (shiftAmt < 0) {
1820 shiftAmt = -shiftAmt;
1821 if (shiftAmt >= sizeof(Element) * 8) {
1822 shiftAmt = sizeof(Element) * 8 - 1;

--- 28 unchanged lines hidden (view full) ---

1851 if (srcElem1 < 0)
1852 destElem = ~destElem;
1853 }
1854 } else {
1855 destElem = srcElem1;
1856 }
1857 Fpscr = fpscr;
1858 '''
1842 threeEqualRegInst("vqshl", "VqshlSD", signedTypes, 2, vqshlSCode)
1843 threeEqualRegInst("vqshl", "VqshlSQ", signedTypes, 4, vqshlSCode)
1859 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1860 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1844
1845 vqrshlUCode = '''
1846 int16_t shiftAmt = (int8_t)srcElem2;
1847 FPSCR fpscr = (FPSCR)Fpscr;
1848 if (shiftAmt < 0) {
1849 shiftAmt = -shiftAmt;
1850 Element rBit = 0;
1851 if (shiftAmt <= sizeof(Element) * 8)

--- 20 unchanged lines hidden (view full) ---

1872 fpscr.qc = 1;
1873 } else {
1874 destElem = srcElem1 << shiftAmt;
1875 }
1876 }
1877 }
1878 Fpscr = fpscr;
1879 '''
1861
1862 vqrshlUCode = '''
1863 int16_t shiftAmt = (int8_t)srcElem2;
1864 FPSCR fpscr = (FPSCR)Fpscr;
1865 if (shiftAmt < 0) {
1866 shiftAmt = -shiftAmt;
1867 Element rBit = 0;
1868 if (shiftAmt <= sizeof(Element) * 8)

--- 20 unchanged lines hidden (view full) ---

1889 fpscr.qc = 1;
1890 } else {
1891 destElem = srcElem1 << shiftAmt;
1892 }
1893 }
1894 }
1895 Fpscr = fpscr;
1896 '''
1880 threeEqualRegInst("vqrshl", "VqrshlUD", unsignedTypes, 2, vqrshlUCode)
1881 threeEqualRegInst("vqrshl", "VqrshlUQ", unsignedTypes, 4, vqrshlUCode)
1897 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1898 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1882
1883 vqrshlSCode = '''
1884 int16_t shiftAmt = (int8_t)srcElem2;
1885 FPSCR fpscr = (FPSCR)Fpscr;
1886 if (shiftAmt < 0) {
1887 shiftAmt = -shiftAmt;
1888 Element rBit = 0;
1889 if (shiftAmt <= sizeof(Element) * 8)

--- 34 unchanged lines hidden (view full) ---

1924 if (srcElem1 < 0)
1925 destElem = ~destElem;
1926 }
1927 } else {
1928 destElem = srcElem1;
1929 }
1930 Fpscr = fpscr;
1931 '''
1899
1900 vqrshlSCode = '''
1901 int16_t shiftAmt = (int8_t)srcElem2;
1902 FPSCR fpscr = (FPSCR)Fpscr;
1903 if (shiftAmt < 0) {
1904 shiftAmt = -shiftAmt;
1905 Element rBit = 0;
1906 if (shiftAmt <= sizeof(Element) * 8)

--- 34 unchanged lines hidden (view full) ---

1941 if (srcElem1 < 0)
1942 destElem = ~destElem;
1943 }
1944 } else {
1945 destElem = srcElem1;
1946 }
1947 Fpscr = fpscr;
1948 '''
1932 threeEqualRegInst("vqrshl", "VqrshlSD", signedTypes, 2, vqrshlSCode)
1933 threeEqualRegInst("vqrshl", "VqrshlSQ", signedTypes, 4, vqrshlSCode)
1949 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1950 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1934
1935 vabaCode = '''
1936 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1937 (srcElem2 - srcElem1);
1938 '''
1951
1952 vabaCode = '''
1953 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1954 (srcElem2 - srcElem1);
1955 '''
1939 threeEqualRegInst("vaba", "VabaD", allTypes, 2, vabaCode, True)
1940 threeEqualRegInst("vaba", "VabaQ", allTypes, 4, vabaCode, True)
1956 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1957 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1941 vabalCode = '''
1942 destElem += (srcElem1 > srcElem2) ?
1943 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1944 ((BigElement)srcElem2 - (BigElement)srcElem1);
1945 '''
1958 vabalCode = '''
1959 destElem += (srcElem1 > srcElem2) ?
1960 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1961 ((BigElement)srcElem2 - (BigElement)srcElem1);
1962 '''
1946 threeRegLongInst("vabal", "Vabal", smallTypes, vabalCode, True)
1963 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1947
1948 vabdCode = '''
1949 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1950 (srcElem2 - srcElem1);
1951 '''
1964
1965 vabdCode = '''
1966 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1967 (srcElem2 - srcElem1);
1968 '''
1952 threeEqualRegInst("vabd", "VabdD", allTypes, 2, vabdCode)
1953 threeEqualRegInst("vabd", "VabdQ", allTypes, 4, vabdCode)
1969 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1970 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1954 vabdlCode = '''
1955 destElem = (srcElem1 > srcElem2) ?
1956 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1957 ((BigElement)srcElem2 - (BigElement)srcElem1);
1958 '''
1971 vabdlCode = '''
1972 destElem = (srcElem1 > srcElem2) ?
1973 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1974 ((BigElement)srcElem2 - (BigElement)srcElem1);
1975 '''
1959 threeRegLongInst("vabdl", "Vabdl", smallTypes, vabdlCode)
1976 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1960
1961 vtstCode = '''
1962 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1963 '''
1977
1978 vtstCode = '''
1979 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1980 '''
1964 threeEqualRegInst("vtst", "VtstD", unsignedTypes, 2, vtstCode)
1965 threeEqualRegInst("vtst", "VtstQ", unsignedTypes, 4, vtstCode)
1981 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
1982 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
1966
1967 vmulCode = '''
1968 destElem = srcElem1 * srcElem2;
1969 '''
1983
1984 vmulCode = '''
1985 destElem = srcElem1 * srcElem2;
1986 '''
1970 threeEqualRegInst("vmul", "NVmulD", allTypes, 2, vmulCode)
1971 threeEqualRegInst("vmul", "NVmulQ", allTypes, 4, vmulCode)
1987 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
1988 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
1972 vmullCode = '''
1973 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
1974 '''
1989 vmullCode = '''
1990 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
1991 '''
1975 threeRegLongInst("vmull", "Vmull", smallTypes, vmullCode)
1992 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
1976
1977 vmlaCode = '''
1978 destElem = destElem + srcElem1 * srcElem2;
1979 '''
1993
1994 vmlaCode = '''
1995 destElem = destElem + srcElem1 * srcElem2;
1996 '''
1980 threeEqualRegInst("vmla", "NVmlaD", allTypes, 2, vmlaCode, True)
1981 threeEqualRegInst("vmla", "NVmlaQ", allTypes, 4, vmlaCode, True)
1997 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
1998 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
1982 vmlalCode = '''
1983 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
1984 '''
1999 vmlalCode = '''
2000 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2001 '''
1985 threeRegLongInst("vmlal", "Vmlal", smallTypes, vmlalCode, True)
2002 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
1986
1987 vqdmlalCode = '''
1988 FPSCR fpscr = (FPSCR)Fpscr;
1989 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
1990 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
1991 Element halfNeg = maxNeg / 2;
1992 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
1993 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||

--- 8 unchanged lines hidden (view full) ---

2002 if (negPreDest == negMid && negMid != negDest) {
2003 destElem = mask(sizeof(BigElement) * 8 - 1);
2004 if (negPreDest)
2005 destElem = ~destElem;
2006 fpscr.qc = 1;
2007 }
2008 Fpscr = fpscr;
2009 '''
2003
2004 vqdmlalCode = '''
2005 FPSCR fpscr = (FPSCR)Fpscr;
2006 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2007 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2008 Element halfNeg = maxNeg / 2;
2009 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2010 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||

--- 8 unchanged lines hidden (view full) ---

2019 if (negPreDest == negMid && negMid != negDest) {
2020 destElem = mask(sizeof(BigElement) * 8 - 1);
2021 if (negPreDest)
2022 destElem = ~destElem;
2023 fpscr.qc = 1;
2024 }
2025 Fpscr = fpscr;
2026 '''
2010 threeRegLongInst("vqdmlal", "Vqdmlal", smallTypes, vqdmlalCode, True)
2027 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2011
2012 vqdmlslCode = '''
2013 FPSCR fpscr = (FPSCR)Fpscr;
2014 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2015 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2016 Element halfNeg = maxNeg / 2;
2017 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2018 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||

--- 8 unchanged lines hidden (view full) ---

2027 if (negPreDest == posMid && posMid != negDest) {
2028 destElem = mask(sizeof(BigElement) * 8 - 1);
2029 if (negPreDest)
2030 destElem = ~destElem;
2031 fpscr.qc = 1;
2032 }
2033 Fpscr = fpscr;
2034 '''
2028
2029 vqdmlslCode = '''
2030 FPSCR fpscr = (FPSCR)Fpscr;
2031 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2032 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2033 Element halfNeg = maxNeg / 2;
2034 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2035 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||

--- 8 unchanged lines hidden (view full) ---

2044 if (negPreDest == posMid && posMid != negDest) {
2045 destElem = mask(sizeof(BigElement) * 8 - 1);
2046 if (negPreDest)
2047 destElem = ~destElem;
2048 fpscr.qc = 1;
2049 }
2050 Fpscr = fpscr;
2051 '''
2035 threeRegLongInst("vqdmlsl", "Vqdmlsl", smallTypes, vqdmlslCode, True)
2052 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2036
2037 vqdmullCode = '''
2038 FPSCR fpscr = (FPSCR)Fpscr;
2039 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2040 if (srcElem1 == srcElem2 &&
2041 srcElem1 == (Element)((Element)1 <<
2042 (Element)(sizeof(Element) * 8 - 1))) {
2043 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2044 fpscr.qc = 1;
2045 }
2046 Fpscr = fpscr;
2047 '''
2053
2054 vqdmullCode = '''
2055 FPSCR fpscr = (FPSCR)Fpscr;
2056 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2057 if (srcElem1 == srcElem2 &&
2058 srcElem1 == (Element)((Element)1 <<
2059 (Element)(sizeof(Element) * 8 - 1))) {
2060 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2061 fpscr.qc = 1;
2062 }
2063 Fpscr = fpscr;
2064 '''
2048 threeRegLongInst("vqdmull", "Vqdmull", smallTypes, vqdmullCode)
2065 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2049
2050 vmlsCode = '''
2051 destElem = destElem - srcElem1 * srcElem2;
2052 '''
2066
2067 vmlsCode = '''
2068 destElem = destElem - srcElem1 * srcElem2;
2069 '''
2053 threeEqualRegInst("vmls", "NVmlsD", allTypes, 2, vmlsCode, True)
2054 threeEqualRegInst("vmls", "NVmlsQ", allTypes, 4, vmlsCode, True)
2070 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2071 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2055 vmlslCode = '''
2056 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2057 '''
2072 vmlslCode = '''
2073 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2074 '''
2058 threeRegLongInst("vmlsl", "Vmlsl", smallTypes, vmlslCode, True)
2075 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2059
2060 vmulpCode = '''
2061 destElem = 0;
2062 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2063 if (bits(srcElem2, j))
2064 destElem ^= srcElem1 << j;
2065 }
2066 '''
2076
2077 vmulpCode = '''
2078 destElem = 0;
2079 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2080 if (bits(srcElem2, j))
2081 destElem ^= srcElem1 << j;
2082 }
2083 '''
2067 threeEqualRegInst("vmul", "NVmulpD", unsignedTypes, 2, vmulpCode)
2068 threeEqualRegInst("vmul", "NVmulpQ", unsignedTypes, 4, vmulpCode)
2084 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2085 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2069 vmullpCode = '''
2070 destElem = 0;
2071 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2072 if (bits(srcElem2, j))
2073 destElem ^= (BigElement)srcElem1 << j;
2074 }
2075 '''
2086 vmullpCode = '''
2087 destElem = 0;
2088 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2089 if (bits(srcElem2, j))
2090 destElem ^= (BigElement)srcElem1 << j;
2091 }
2092 '''
2076 threeRegLongInst("vmull", "Vmullp", smallUnsignedTypes, vmullpCode)
2093 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2077
2094
2078 threeEqualRegInst("vpmax", "VpmaxD", allTypes, 2, vmaxCode, pairwise=True)
2079 threeEqualRegInst("vpmax", "VpmaxQ", allTypes, 4, vmaxCode, pairwise=True)
2095 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True)
2096 threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True)
2080
2097
2081 threeEqualRegInst("vpmin", "VpminD", allTypes, 2, vminCode, pairwise=True)
2082 threeEqualRegInst("vpmin", "VpminQ", allTypes, 4, vminCode, pairwise=True)
2098 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True)
2099 threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True)
2083
2084 vqdmulhCode = '''
2085 FPSCR fpscr = (FPSCR)Fpscr;
2086 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2087 (sizeof(Element) * 8);
2088 if (srcElem1 == srcElem2 &&
2089 srcElem1 == (Element)((Element)1 <<
2090 (sizeof(Element) * 8 - 1))) {
2091 destElem = ~srcElem1;
2092 fpscr.qc = 1;
2093 }
2094 Fpscr = fpscr;
2095 '''
2100
2101 vqdmulhCode = '''
2102 FPSCR fpscr = (FPSCR)Fpscr;
2103 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2104 (sizeof(Element) * 8);
2105 if (srcElem1 == srcElem2 &&
2106 srcElem1 == (Element)((Element)1 <<
2107 (sizeof(Element) * 8 - 1))) {
2108 destElem = ~srcElem1;
2109 fpscr.qc = 1;
2110 }
2111 Fpscr = fpscr;
2112 '''
2096 threeEqualRegInst("vqdmulh", "VqdmulhD", smallSignedTypes, 2, vqdmulhCode)
2097 threeEqualRegInst("vqdmulh", "VqdmulhQ", smallSignedTypes, 4, vqdmulhCode)
2113 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2114 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2098
2099 vqrdmulhCode = '''
2100 FPSCR fpscr = (FPSCR)Fpscr;
2101 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2102 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2103 (sizeof(Element) * 8);
2104 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2105 Element halfNeg = maxNeg / 2;

--- 5 unchanged lines hidden (view full) ---

2111 } else {
2112 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2113 }
2114 fpscr.qc = 1;
2115 }
2116 Fpscr = fpscr;
2117 '''
2118 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2115
2116 vqrdmulhCode = '''
2117 FPSCR fpscr = (FPSCR)Fpscr;
2118 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2119 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2120 (sizeof(Element) * 8);
2121 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2122 Element halfNeg = maxNeg / 2;

--- 5 unchanged lines hidden (view full) ---

2128 } else {
2129 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2130 }
2131 fpscr.qc = 1;
2132 }
2133 Fpscr = fpscr;
2134 '''
2135 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2119 smallSignedTypes, 2, vqrdmulhCode)
2136 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2120 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2137 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2121 smallSignedTypes, 4, vqrdmulhCode)
2138 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2122
2123 vmaxfpCode = '''
2124 FPSCR fpscr = (FPSCR)Fpscr;
2125 bool done;
2126 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2127 if (!done) {
2128 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2129 true, true, VfpRoundNearest);
2130 } else if (flushToZero(srcReg1, srcReg2)) {
2131 fpscr.idc = 1;
2132 }
2133 Fpscr = fpscr;
2134 '''
2139
2140 vmaxfpCode = '''
2141 FPSCR fpscr = (FPSCR)Fpscr;
2142 bool done;
2143 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2144 if (!done) {
2145 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2146 true, true, VfpRoundNearest);
2147 } else if (flushToZero(srcReg1, srcReg2)) {
2148 fpscr.idc = 1;
2149 }
2150 Fpscr = fpscr;
2151 '''
2135 threeEqualRegInstFp("vmax", "VmaxDFp", ("float",), 2, vmaxfpCode)
2136 threeEqualRegInstFp("vmax", "VmaxQFp", ("float",), 4, vmaxfpCode)
2152 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2153 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2137
2138 vminfpCode = '''
2139 FPSCR fpscr = (FPSCR)Fpscr;
2140 bool done;
2141 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2142 if (!done) {
2143 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2144 true, true, VfpRoundNearest);
2145 } else if (flushToZero(srcReg1, srcReg2)) {
2146 fpscr.idc = 1;
2147 }
2148 Fpscr = fpscr;
2149 '''
2154
2155 vminfpCode = '''
2156 FPSCR fpscr = (FPSCR)Fpscr;
2157 bool done;
2158 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2159 if (!done) {
2160 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2161 true, true, VfpRoundNearest);
2162 } else if (flushToZero(srcReg1, srcReg2)) {
2163 fpscr.idc = 1;
2164 }
2165 Fpscr = fpscr;
2166 '''
2150 threeEqualRegInstFp("vmin", "VminDFp", ("float",), 2, vminfpCode)
2151 threeEqualRegInstFp("vmin", "VminQFp", ("float",), 4, vminfpCode)
2167 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2168 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2152
2169
2153 threeEqualRegInstFp("vpmax", "VpmaxDFp", ("float",),
2170 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2154 2, vmaxfpCode, pairwise=True)
2171 2, vmaxfpCode, pairwise=True)
2155 threeEqualRegInstFp("vpmax", "VpmaxQFp", ("float",),
2172 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2156 4, vmaxfpCode, pairwise=True)
2157
2173 4, vmaxfpCode, pairwise=True)
2174
2158 threeEqualRegInstFp("vpmin", "VpminDFp", ("float",),
2175 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2159 2, vminfpCode, pairwise=True)
2176 2, vminfpCode, pairwise=True)
2160 threeEqualRegInstFp("vpmin", "VpminQFp", ("float",),
2177 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2161 4, vminfpCode, pairwise=True)
2162
2163 vaddfpCode = '''
2164 FPSCR fpscr = Fpscr;
2165 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2166 true, true, VfpRoundNearest);
2167 Fpscr = fpscr;
2168 '''
2178 4, vminfpCode, pairwise=True)
2179
2180 vaddfpCode = '''
2181 FPSCR fpscr = Fpscr;
2182 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2183 true, true, VfpRoundNearest);
2184 Fpscr = fpscr;
2185 '''
2169 threeEqualRegInstFp("vadd", "VaddDFp", ("float",), 2, vaddfpCode)
2170 threeEqualRegInstFp("vadd", "VaddQFp", ("float",), 4, vaddfpCode)
2186 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2187 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2171
2188
2172 threeEqualRegInstFp("vpadd", "VpaddDFp", ("float",),
2189 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2173 2, vaddfpCode, pairwise=True)
2190 2, vaddfpCode, pairwise=True)
2174 threeEqualRegInstFp("vpadd", "VpaddQFp", ("float",),
2191 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2175 4, vaddfpCode, pairwise=True)
2176
2177 vsubfpCode = '''
2178 FPSCR fpscr = Fpscr;
2179 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2180 true, true, VfpRoundNearest);
2181 Fpscr = fpscr;
2182 '''
2192 4, vaddfpCode, pairwise=True)
2193
2194 vsubfpCode = '''
2195 FPSCR fpscr = Fpscr;
2196 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2197 true, true, VfpRoundNearest);
2198 Fpscr = fpscr;
2199 '''
2183 threeEqualRegInstFp("vsub", "VsubDFp", ("float",), 2, vsubfpCode)
2184 threeEqualRegInstFp("vsub", "VsubQFp", ("float",), 4, vsubfpCode)
2200 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2201 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2185
2186 vmulfpCode = '''
2187 FPSCR fpscr = Fpscr;
2188 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2189 true, true, VfpRoundNearest);
2190 Fpscr = fpscr;
2191 '''
2202
2203 vmulfpCode = '''
2204 FPSCR fpscr = Fpscr;
2205 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2206 true, true, VfpRoundNearest);
2207 Fpscr = fpscr;
2208 '''
2192 threeEqualRegInstFp("vmul", "NVmulDFp", ("float",), 2, vmulfpCode)
2193 threeEqualRegInstFp("vmul", "NVmulQFp", ("float",), 4, vmulfpCode)
2209 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2210 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2194
2195 vmlafpCode = '''
2196 FPSCR fpscr = Fpscr;
2197 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2198 true, true, VfpRoundNearest);
2199 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2200 true, true, VfpRoundNearest);
2201 Fpscr = fpscr;
2202 '''
2211
2212 vmlafpCode = '''
2213 FPSCR fpscr = Fpscr;
2214 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2215 true, true, VfpRoundNearest);
2216 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2217 true, true, VfpRoundNearest);
2218 Fpscr = fpscr;
2219 '''
2203 threeEqualRegInstFp("vmla", "NVmlaDFp", ("float",), 2, vmlafpCode, True)
2204 threeEqualRegInstFp("vmla", "NVmlaQFp", ("float",), 4, vmlafpCode, True)
2220 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2221 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2205
2206 vmlsfpCode = '''
2207 FPSCR fpscr = Fpscr;
2208 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2209 true, true, VfpRoundNearest);
2210 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2211 true, true, VfpRoundNearest);
2212 Fpscr = fpscr;
2213 '''
2222
2223 vmlsfpCode = '''
2224 FPSCR fpscr = Fpscr;
2225 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2226 true, true, VfpRoundNearest);
2227 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2228 true, true, VfpRoundNearest);
2229 Fpscr = fpscr;
2230 '''
2214 threeEqualRegInstFp("vmls", "NVmlsDFp", ("float",), 2, vmlsfpCode, True)
2215 threeEqualRegInstFp("vmls", "NVmlsQFp", ("float",), 4, vmlsfpCode, True)
2231 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2232 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2216
2217 vcgtfpCode = '''
2218 FPSCR fpscr = (FPSCR)Fpscr;
2219 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2220 true, true, VfpRoundNearest);
2221 destReg = (res == 0) ? -1 : 0;
2222 if (res == 2.0)
2223 fpscr.ioc = 1;
2224 Fpscr = fpscr;
2225 '''
2233
2234 vcgtfpCode = '''
2235 FPSCR fpscr = (FPSCR)Fpscr;
2236 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2237 true, true, VfpRoundNearest);
2238 destReg = (res == 0) ? -1 : 0;
2239 if (res == 2.0)
2240 fpscr.ioc = 1;
2241 Fpscr = fpscr;
2242 '''
2226 threeEqualRegInstFp("vcgt", "VcgtDFp", ("float",),
2243 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2227 2, vcgtfpCode, toInt = True)
2244 2, vcgtfpCode, toInt = True)
2228 threeEqualRegInstFp("vcgt", "VcgtQFp", ("float",),
2245 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2229 4, vcgtfpCode, toInt = True)
2230
2231 vcgefpCode = '''
2232 FPSCR fpscr = (FPSCR)Fpscr;
2233 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2234 true, true, VfpRoundNearest);
2235 destReg = (res == 0) ? -1 : 0;
2236 if (res == 2.0)
2237 fpscr.ioc = 1;
2238 Fpscr = fpscr;
2239 '''
2246 4, vcgtfpCode, toInt = True)
2247
2248 vcgefpCode = '''
2249 FPSCR fpscr = (FPSCR)Fpscr;
2250 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2251 true, true, VfpRoundNearest);
2252 destReg = (res == 0) ? -1 : 0;
2253 if (res == 2.0)
2254 fpscr.ioc = 1;
2255 Fpscr = fpscr;
2256 '''
2240 threeEqualRegInstFp("vcge", "VcgeDFp", ("float",),
2257 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2241 2, vcgefpCode, toInt = True)
2258 2, vcgefpCode, toInt = True)
2242 threeEqualRegInstFp("vcge", "VcgeQFp", ("float",),
2259 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2243 4, vcgefpCode, toInt = True)
2244
2245 vacgtfpCode = '''
2246 FPSCR fpscr = (FPSCR)Fpscr;
2247 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2248 true, true, VfpRoundNearest);
2249 destReg = (res == 0) ? -1 : 0;
2250 if (res == 2.0)
2251 fpscr.ioc = 1;
2252 Fpscr = fpscr;
2253 '''
2260 4, vcgefpCode, toInt = True)
2261
2262 vacgtfpCode = '''
2263 FPSCR fpscr = (FPSCR)Fpscr;
2264 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2265 true, true, VfpRoundNearest);
2266 destReg = (res == 0) ? -1 : 0;
2267 if (res == 2.0)
2268 fpscr.ioc = 1;
2269 Fpscr = fpscr;
2270 '''
2254 threeEqualRegInstFp("vacgt", "VacgtDFp", ("float",),
2271 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2255 2, vacgtfpCode, toInt = True)
2272 2, vacgtfpCode, toInt = True)
2256 threeEqualRegInstFp("vacgt", "VacgtQFp", ("float",),
2273 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2257 4, vacgtfpCode, toInt = True)
2258
2259 vacgefpCode = '''
2260 FPSCR fpscr = (FPSCR)Fpscr;
2261 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2262 true, true, VfpRoundNearest);
2263 destReg = (res == 0) ? -1 : 0;
2264 if (res == 2.0)
2265 fpscr.ioc = 1;
2266 Fpscr = fpscr;
2267 '''
2274 4, vacgtfpCode, toInt = True)
2275
2276 vacgefpCode = '''
2277 FPSCR fpscr = (FPSCR)Fpscr;
2278 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2279 true, true, VfpRoundNearest);
2280 destReg = (res == 0) ? -1 : 0;
2281 if (res == 2.0)
2282 fpscr.ioc = 1;
2283 Fpscr = fpscr;
2284 '''
2268 threeEqualRegInstFp("vacge", "VacgeDFp", ("float",),
2285 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2269 2, vacgefpCode, toInt = True)
2286 2, vacgefpCode, toInt = True)
2270 threeEqualRegInstFp("vacge", "VacgeQFp", ("float",),
2287 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2271 4, vacgefpCode, toInt = True)
2272
2273 vceqfpCode = '''
2274 FPSCR fpscr = (FPSCR)Fpscr;
2275 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2276 true, true, VfpRoundNearest);
2277 destReg = (res == 0) ? -1 : 0;
2278 if (res == 2.0)
2279 fpscr.ioc = 1;
2280 Fpscr = fpscr;
2281 '''
2288 4, vacgefpCode, toInt = True)
2289
2290 vceqfpCode = '''
2291 FPSCR fpscr = (FPSCR)Fpscr;
2292 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2293 true, true, VfpRoundNearest);
2294 destReg = (res == 0) ? -1 : 0;
2295 if (res == 2.0)
2296 fpscr.ioc = 1;
2297 Fpscr = fpscr;
2298 '''
2282 threeEqualRegInstFp("vceq", "VceqDFp", ("float",),
2299 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2283 2, vceqfpCode, toInt = True)
2300 2, vceqfpCode, toInt = True)
2284 threeEqualRegInstFp("vceq", "VceqQFp", ("float",),
2301 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2285 4, vceqfpCode, toInt = True)
2286
2287 vrecpsCode = '''
2288 FPSCR fpscr = Fpscr;
2289 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2290 true, true, VfpRoundNearest);
2291 Fpscr = fpscr;
2292 '''
2302 4, vceqfpCode, toInt = True)
2303
2304 vrecpsCode = '''
2305 FPSCR fpscr = Fpscr;
2306 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2307 true, true, VfpRoundNearest);
2308 Fpscr = fpscr;
2309 '''
2293 threeEqualRegInstFp("vrecps", "VrecpsDFp", ("float",), 2, vrecpsCode)
2294 threeEqualRegInstFp("vrecps", "VrecpsQFp", ("float",), 4, vrecpsCode)
2310 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2311 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2295
2296 vrsqrtsCode = '''
2297 FPSCR fpscr = Fpscr;
2298 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2299 true, true, VfpRoundNearest);
2300 Fpscr = fpscr;
2301 '''
2312
2313 vrsqrtsCode = '''
2314 FPSCR fpscr = Fpscr;
2315 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2316 true, true, VfpRoundNearest);
2317 Fpscr = fpscr;
2318 '''
2302 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", ("float",), 2, vrsqrtsCode)
2303 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", ("float",), 4, vrsqrtsCode)
2319 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2320 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2304
2305 vabdfpCode = '''
2306 FPSCR fpscr = Fpscr;
2307 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2308 true, true, VfpRoundNearest);
2309 destReg = fabs(mid);
2310 Fpscr = fpscr;
2311 '''
2321
2322 vabdfpCode = '''
2323 FPSCR fpscr = Fpscr;
2324 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2325 true, true, VfpRoundNearest);
2326 destReg = fabs(mid);
2327 Fpscr = fpscr;
2328 '''
2312 threeEqualRegInstFp("vabd", "VabdDFp", ("float",), 2, vabdfpCode)
2313 threeEqualRegInstFp("vabd", "VabdQFp", ("float",), 4, vabdfpCode)
2329 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2330 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2314
2331
2315 twoEqualRegInst("vmla", "VmlasD", unsignedTypes, 2, vmlaCode, True)
2316 twoEqualRegInst("vmla", "VmlasQ", unsignedTypes, 4, vmlaCode, True)
2317 twoEqualRegInstFp("vmla", "VmlasDFp", ("float",), 2, vmlafpCode, True)
2318 twoEqualRegInstFp("vmla", "VmlasQFp", ("float",), 4, vmlafpCode, True)
2319 twoRegLongInst("vmlal", "Vmlals", smallTypes, vmlalCode, True)
2332 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2333 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2334 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2335 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2336 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2320
2337
2321 twoEqualRegInst("vmls", "VmlssD", allTypes, 2, vmlsCode, True)
2322 twoEqualRegInst("vmls", "VmlssQ", allTypes, 4, vmlsCode, True)
2323 twoEqualRegInstFp("vmls", "VmlssDFp", ("float",), 2, vmlsfpCode, True)
2324 twoEqualRegInstFp("vmls", "VmlssQFp", ("float",), 4, vmlsfpCode, True)
2325 twoRegLongInst("vmlsl", "Vmlsls", smallTypes, vmlslCode, True)
2338 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2339 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2340 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2341 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2342 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2326
2343
2327 twoEqualRegInst("vmul", "VmulsD", allTypes, 2, vmulCode)
2328 twoEqualRegInst("vmul", "VmulsQ", allTypes, 4, vmulCode)
2329 twoEqualRegInstFp("vmul", "VmulsDFp", ("float",), 2, vmulfpCode)
2330 twoEqualRegInstFp("vmul", "VmulsQFp", ("float",), 4, vmulfpCode)
2331 twoRegLongInst("vmull", "Vmulls", smallTypes, vmullCode)
2344 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2345 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2346 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2347 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2348 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2332
2349
2333 twoRegLongInst("vqdmull", "Vqdmulls", smallTypes, vqdmullCode)
2334 twoRegLongInst("vqdmlal", "Vqdmlals", smallTypes, vqdmlalCode, True)
2335 twoRegLongInst("vqdmlsl", "Vqdmlsls", smallTypes, vqdmlslCode, True)
2336 twoEqualRegInst("vqdmulh", "VqdmulhsD", smallSignedTypes, 2, vqdmulhCode)
2337 twoEqualRegInst("vqdmulh", "VqdmulhsQ", smallSignedTypes, 4, vqdmulhCode)
2350 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2351 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2352 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2353 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2354 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2338 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2355 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2339 smallSignedTypes, 2, vqrdmulhCode)
2356 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2340 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2357 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2341 smallSignedTypes, 4, vqrdmulhCode)
2358 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2342
2343 vshrCode = '''
2344 if (imm >= sizeof(srcElem1) * 8) {
2345 if (ltz(srcElem1))
2346 destElem = -1;
2347 else
2348 destElem = 0;
2349 } else {
2350 destElem = srcElem1 >> imm;
2351 }
2352 '''
2359
2360 vshrCode = '''
2361 if (imm >= sizeof(srcElem1) * 8) {
2362 if (ltz(srcElem1))
2363 destElem = -1;
2364 else
2365 destElem = 0;
2366 } else {
2367 destElem = srcElem1 >> imm;
2368 }
2369 '''
2353 twoRegShiftInst("vshr", "NVshrD", allTypes, 2, vshrCode)
2354 twoRegShiftInst("vshr", "NVshrQ", allTypes, 4, vshrCode)
2370 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2371 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2355
2356 vsraCode = '''
2357 Element mid;;
2358 if (imm >= sizeof(srcElem1) * 8) {
2359 mid = ltz(srcElem1) ? -1 : 0;
2360 } else {
2361 mid = srcElem1 >> imm;
2362 if (ltz(srcElem1) && !ltz(mid)) {
2363 mid |= -(mid & ((Element)1 <<
2364 (sizeof(Element) * 8 - 1 - imm)));
2365 }
2366 }
2367 destElem += mid;
2368 '''
2372
2373 vsraCode = '''
2374 Element mid;;
2375 if (imm >= sizeof(srcElem1) * 8) {
2376 mid = ltz(srcElem1) ? -1 : 0;
2377 } else {
2378 mid = srcElem1 >> imm;
2379 if (ltz(srcElem1) && !ltz(mid)) {
2380 mid |= -(mid & ((Element)1 <<
2381 (sizeof(Element) * 8 - 1 - imm)));
2382 }
2383 }
2384 destElem += mid;
2385 '''
2369 twoRegShiftInst("vsra", "NVsraD", allTypes, 2, vsraCode, True)
2370 twoRegShiftInst("vsra", "NVsraQ", allTypes, 4, vsraCode, True)
2386 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2387 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2371
2372 vrshrCode = '''
2373 if (imm > sizeof(srcElem1) * 8) {
2374 destElem = 0;
2375 } else if (imm) {
2376 Element rBit = bits(srcElem1, imm - 1);
2377 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2378 } else {
2379 destElem = srcElem1;
2380 }
2381 '''
2388
2389 vrshrCode = '''
2390 if (imm > sizeof(srcElem1) * 8) {
2391 destElem = 0;
2392 } else if (imm) {
2393 Element rBit = bits(srcElem1, imm - 1);
2394 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2395 } else {
2396 destElem = srcElem1;
2397 }
2398 '''
2382 twoRegShiftInst("vrshr", "NVrshrD", allTypes, 2, vrshrCode)
2383 twoRegShiftInst("vrshr", "NVrshrQ", allTypes, 4, vrshrCode)
2399 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2400 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2384
2385 vrsraCode = '''
2386 if (imm > sizeof(srcElem1) * 8) {
2387 destElem += 0;
2388 } else if (imm) {
2389 Element rBit = bits(srcElem1, imm - 1);
2390 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2391 } else {
2392 destElem += srcElem1;
2393 }
2394 '''
2401
2402 vrsraCode = '''
2403 if (imm > sizeof(srcElem1) * 8) {
2404 destElem += 0;
2405 } else if (imm) {
2406 Element rBit = bits(srcElem1, imm - 1);
2407 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2408 } else {
2409 destElem += srcElem1;
2410 }
2411 '''
2395 twoRegShiftInst("vrsra", "NVrsraD", allTypes, 2, vrsraCode, True)
2396 twoRegShiftInst("vrsra", "NVrsraQ", allTypes, 4, vrsraCode, True)
2412 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2413 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2397
2398 vsriCode = '''
2399 if (imm >= sizeof(Element) * 8)
2400 destElem = destElem;
2401 else
2402 destElem = (srcElem1 >> imm) |
2403 (destElem & ~mask(sizeof(Element) * 8 - imm));
2404 '''
2414
2415 vsriCode = '''
2416 if (imm >= sizeof(Element) * 8)
2417 destElem = destElem;
2418 else
2419 destElem = (srcElem1 >> imm) |
2420 (destElem & ~mask(sizeof(Element) * 8 - imm));
2421 '''
2405 twoRegShiftInst("vsri", "NVsriD", unsignedTypes, 2, vsriCode, True)
2406 twoRegShiftInst("vsri", "NVsriQ", unsignedTypes, 4, vsriCode, True)
2422 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2423 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2407
2408 vshlCode = '''
2409 if (imm >= sizeof(Element) * 8)
2410 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2411 else
2412 destElem = srcElem1 << imm;
2413 '''
2424
2425 vshlCode = '''
2426 if (imm >= sizeof(Element) * 8)
2427 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2428 else
2429 destElem = srcElem1 << imm;
2430 '''
2414 twoRegShiftInst("vshl", "NVshlD", unsignedTypes, 2, vshlCode)
2415 twoRegShiftInst("vshl", "NVshlQ", unsignedTypes, 4, vshlCode)
2431 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2432 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2416
2417 vsliCode = '''
2418 if (imm >= sizeof(Element) * 8)
2419 destElem = destElem;
2420 else
2421 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2422 '''
2433
2434 vsliCode = '''
2435 if (imm >= sizeof(Element) * 8)
2436 destElem = destElem;
2437 else
2438 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2439 '''
2423 twoRegShiftInst("vsli", "NVsliD", unsignedTypes, 2, vsliCode, True)
2424 twoRegShiftInst("vsli", "NVsliQ", unsignedTypes, 4, vsliCode, True)
2440 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2441 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2425
2426 vqshlCode = '''
2427 FPSCR fpscr = (FPSCR)Fpscr;
2428 if (imm >= sizeof(Element) * 8) {
2429 if (srcElem1 != 0) {
2430 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2431 if (srcElem1 > 0)
2432 destElem = ~destElem;

--- 12 unchanged lines hidden (view full) ---

2445 destElem = ~destElem;
2446 fpscr.qc = 1;
2447 }
2448 } else {
2449 destElem = srcElem1;
2450 }
2451 Fpscr = fpscr;
2452 '''
2442
2443 vqshlCode = '''
2444 FPSCR fpscr = (FPSCR)Fpscr;
2445 if (imm >= sizeof(Element) * 8) {
2446 if (srcElem1 != 0) {
2447 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2448 if (srcElem1 > 0)
2449 destElem = ~destElem;

--- 12 unchanged lines hidden (view full) ---

2462 destElem = ~destElem;
2463 fpscr.qc = 1;
2464 }
2465 } else {
2466 destElem = srcElem1;
2467 }
2468 Fpscr = fpscr;
2469 '''
2453 twoRegShiftInst("vqshl", "NVqshlD", signedTypes, 2, vqshlCode)
2454 twoRegShiftInst("vqshl", "NVqshlQ", signedTypes, 4, vqshlCode)
2470 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2471 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2455
2456 vqshluCode = '''
2457 FPSCR fpscr = (FPSCR)Fpscr;
2458 if (imm >= sizeof(Element) * 8) {
2459 if (srcElem1 != 0) {
2460 destElem = mask(sizeof(Element) * 8);
2461 fpscr.qc = 1;
2462 } else {

--- 8 unchanged lines hidden (view full) ---

2471 destElem = mask(sizeof(Element) * 8);
2472 fpscr.qc = 1;
2473 }
2474 } else {
2475 destElem = srcElem1;
2476 }
2477 Fpscr = fpscr;
2478 '''
2472
2473 vqshluCode = '''
2474 FPSCR fpscr = (FPSCR)Fpscr;
2475 if (imm >= sizeof(Element) * 8) {
2476 if (srcElem1 != 0) {
2477 destElem = mask(sizeof(Element) * 8);
2478 fpscr.qc = 1;
2479 } else {

--- 8 unchanged lines hidden (view full) ---

2488 destElem = mask(sizeof(Element) * 8);
2489 fpscr.qc = 1;
2490 }
2491 } else {
2492 destElem = srcElem1;
2493 }
2494 Fpscr = fpscr;
2495 '''
2479 twoRegShiftInst("vqshlu", "NVqshluD", unsignedTypes, 2, vqshluCode)
2480 twoRegShiftInst("vqshlu", "NVqshluQ", unsignedTypes, 4, vqshluCode)
2496 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2497 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2481
2482 vqshlusCode = '''
2483 FPSCR fpscr = (FPSCR)Fpscr;
2484 if (imm >= sizeof(Element) * 8) {
2485 if (srcElem1 < 0) {
2486 destElem = 0;
2487 fpscr.qc = 1;
2488 } else if (srcElem1 > 0) {

--- 19 unchanged lines hidden (view full) ---

2508 fpscr.qc = 1;
2509 destElem = 0;
2510 } else {
2511 destElem = srcElem1;
2512 }
2513 }
2514 Fpscr = fpscr;
2515 '''
2498
2499 vqshlusCode = '''
2500 FPSCR fpscr = (FPSCR)Fpscr;
2501 if (imm >= sizeof(Element) * 8) {
2502 if (srcElem1 < 0) {
2503 destElem = 0;
2504 fpscr.qc = 1;
2505 } else if (srcElem1 > 0) {

--- 19 unchanged lines hidden (view full) ---

2525 fpscr.qc = 1;
2526 destElem = 0;
2527 } else {
2528 destElem = srcElem1;
2529 }
2530 }
2531 Fpscr = fpscr;
2532 '''
2516 twoRegShiftInst("vqshlus", "NVqshlusD", signedTypes, 2, vqshlusCode)
2517 twoRegShiftInst("vqshlus", "NVqshlusQ", signedTypes, 4, vqshlusCode)
2533 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2534 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2518
2519 vshrnCode = '''
2520 if (imm >= sizeof(srcElem1) * 8) {
2521 destElem = 0;
2522 } else {
2523 destElem = srcElem1 >> imm;
2524 }
2525 '''
2535
2536 vshrnCode = '''
2537 if (imm >= sizeof(srcElem1) * 8) {
2538 destElem = 0;
2539 } else {
2540 destElem = srcElem1 >> imm;
2541 }
2542 '''
2526 twoRegNarrowShiftInst("vshrn", "NVshrn", smallUnsignedTypes, vshrnCode)
2543 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2527
2528 vrshrnCode = '''
2529 if (imm > sizeof(srcElem1) * 8) {
2530 destElem = 0;
2531 } else if (imm) {
2532 Element rBit = bits(srcElem1, imm - 1);
2533 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2534 } else {
2535 destElem = srcElem1;
2536 }
2537 '''
2544
2545 vrshrnCode = '''
2546 if (imm > sizeof(srcElem1) * 8) {
2547 destElem = 0;
2548 } else if (imm) {
2549 Element rBit = bits(srcElem1, imm - 1);
2550 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2551 } else {
2552 destElem = srcElem1;
2553 }
2554 '''
2538 twoRegNarrowShiftInst("vrshrn", "NVrshrn", smallUnsignedTypes, vrshrnCode)
2555 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2539
2540 vqshrnCode = '''
2541 FPSCR fpscr = (FPSCR)Fpscr;
2542 if (imm > sizeof(srcElem1) * 8) {
2543 if (srcElem1 != 0 && srcElem1 != -1)
2544 fpscr.qc = 1;
2545 destElem = 0;
2546 } else if (imm) {

--- 8 unchanged lines hidden (view full) ---

2555 } else {
2556 destElem = mid;
2557 }
2558 } else {
2559 destElem = srcElem1;
2560 }
2561 Fpscr = fpscr;
2562 '''
2556
2557 vqshrnCode = '''
2558 FPSCR fpscr = (FPSCR)Fpscr;
2559 if (imm > sizeof(srcElem1) * 8) {
2560 if (srcElem1 != 0 && srcElem1 != -1)
2561 fpscr.qc = 1;
2562 destElem = 0;
2563 } else if (imm) {

--- 8 unchanged lines hidden (view full) ---

2572 } else {
2573 destElem = mid;
2574 }
2575 } else {
2576 destElem = srcElem1;
2577 }
2578 Fpscr = fpscr;
2579 '''
2563 twoRegNarrowShiftInst("vqshrn", "NVqshrn", smallSignedTypes, vqshrnCode)
2580 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2564
2565 vqshrunCode = '''
2566 FPSCR fpscr = (FPSCR)Fpscr;
2567 if (imm > sizeof(srcElem1) * 8) {
2568 if (srcElem1 != 0)
2569 fpscr.qc = 1;
2570 destElem = 0;
2571 } else if (imm) {

--- 5 unchanged lines hidden (view full) ---

2577 destElem = mid;
2578 }
2579 } else {
2580 destElem = srcElem1;
2581 }
2582 Fpscr = fpscr;
2583 '''
2584 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2581
2582 vqshrunCode = '''
2583 FPSCR fpscr = (FPSCR)Fpscr;
2584 if (imm > sizeof(srcElem1) * 8) {
2585 if (srcElem1 != 0)
2586 fpscr.qc = 1;
2587 destElem = 0;
2588 } else if (imm) {

--- 5 unchanged lines hidden (view full) ---

2594 destElem = mid;
2595 }
2596 } else {
2597 destElem = srcElem1;
2598 }
2599 Fpscr = fpscr;
2600 '''
2601 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2585 smallUnsignedTypes, vqshrunCode)
2602 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2586
2587 vqshrunsCode = '''
2588 FPSCR fpscr = (FPSCR)Fpscr;
2589 if (imm > sizeof(srcElem1) * 8) {
2590 if (srcElem1 != 0)
2591 fpscr.qc = 1;
2592 destElem = 0;
2593 } else if (imm) {

--- 10 unchanged lines hidden (view full) ---

2604 destElem = mid;
2605 }
2606 } else {
2607 destElem = srcElem1;
2608 }
2609 Fpscr = fpscr;
2610 '''
2611 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2603
2604 vqshrunsCode = '''
2605 FPSCR fpscr = (FPSCR)Fpscr;
2606 if (imm > sizeof(srcElem1) * 8) {
2607 if (srcElem1 != 0)
2608 fpscr.qc = 1;
2609 destElem = 0;
2610 } else if (imm) {

--- 10 unchanged lines hidden (view full) ---

2621 destElem = mid;
2622 }
2623 } else {
2624 destElem = srcElem1;
2625 }
2626 Fpscr = fpscr;
2627 '''
2628 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2612 smallSignedTypes, vqshrunsCode)
2629 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2613
2614 vqrshrnCode = '''
2615 FPSCR fpscr = (FPSCR)Fpscr;
2616 if (imm > sizeof(srcElem1) * 8) {
2617 if (srcElem1 != 0 && srcElem1 != -1)
2618 fpscr.qc = 1;
2619 destElem = 0;
2620 } else if (imm) {

--- 19 unchanged lines hidden (view full) ---

2640 fpscr.qc = 1;
2641 } else {
2642 destElem = srcElem1;
2643 }
2644 }
2645 Fpscr = fpscr;
2646 '''
2647 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2630
2631 vqrshrnCode = '''
2632 FPSCR fpscr = (FPSCR)Fpscr;
2633 if (imm > sizeof(srcElem1) * 8) {
2634 if (srcElem1 != 0 && srcElem1 != -1)
2635 fpscr.qc = 1;
2636 destElem = 0;
2637 } else if (imm) {

--- 19 unchanged lines hidden (view full) ---

2657 fpscr.qc = 1;
2658 } else {
2659 destElem = srcElem1;
2660 }
2661 }
2662 Fpscr = fpscr;
2663 '''
2664 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2648 smallSignedTypes, vqrshrnCode)
2665 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2649
2650 vqrshrunCode = '''
2651 FPSCR fpscr = (FPSCR)Fpscr;
2652 if (imm > sizeof(srcElem1) * 8) {
2653 if (srcElem1 != 0)
2654 fpscr.qc = 1;
2655 destElem = 0;
2656 } else if (imm) {

--- 13 unchanged lines hidden (view full) ---

2670 fpscr.qc = 1;
2671 } else {
2672 destElem = srcElem1;
2673 }
2674 }
2675 Fpscr = fpscr;
2676 '''
2677 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2666
2667 vqrshrunCode = '''
2668 FPSCR fpscr = (FPSCR)Fpscr;
2669 if (imm > sizeof(srcElem1) * 8) {
2670 if (srcElem1 != 0)
2671 fpscr.qc = 1;
2672 destElem = 0;
2673 } else if (imm) {

--- 13 unchanged lines hidden (view full) ---

2687 fpscr.qc = 1;
2688 } else {
2689 destElem = srcElem1;
2690 }
2691 }
2692 Fpscr = fpscr;
2693 '''
2694 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2678 smallUnsignedTypes, vqrshrunCode)
2695 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2679
2680 vqrshrunsCode = '''
2681 FPSCR fpscr = (FPSCR)Fpscr;
2682 if (imm > sizeof(srcElem1) * 8) {
2683 if (srcElem1 != 0)
2684 fpscr.qc = 1;
2685 destElem = 0;
2686 } else if (imm) {

--- 20 unchanged lines hidden (view full) ---

2707 destElem = 0;
2708 } else {
2709 destElem = srcElem1;
2710 }
2711 }
2712 Fpscr = fpscr;
2713 '''
2714 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2696
2697 vqrshrunsCode = '''
2698 FPSCR fpscr = (FPSCR)Fpscr;
2699 if (imm > sizeof(srcElem1) * 8) {
2700 if (srcElem1 != 0)
2701 fpscr.qc = 1;
2702 destElem = 0;
2703 } else if (imm) {

--- 20 unchanged lines hidden (view full) ---

2724 destElem = 0;
2725 } else {
2726 destElem = srcElem1;
2727 }
2728 }
2729 Fpscr = fpscr;
2730 '''
2731 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2715 smallSignedTypes, vqrshrunsCode)
2732 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2716
2717 vshllCode = '''
2718 if (imm >= sizeof(destElem) * 8) {
2719 destElem = 0;
2720 } else {
2721 destElem = (BigElement)srcElem1 << imm;
2722 }
2723 '''
2733
2734 vshllCode = '''
2735 if (imm >= sizeof(destElem) * 8) {
2736 destElem = 0;
2737 } else {
2738 destElem = (BigElement)srcElem1 << imm;
2739 }
2740 '''
2724 twoRegLongShiftInst("vshll", "NVshll", smallTypes, vshllCode)
2741 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2725
2726 vmovlCode = '''
2727 destElem = srcElem1;
2728 '''
2742
2743 vmovlCode = '''
2744 destElem = srcElem1;
2745 '''
2729 twoRegLongShiftInst("vmovl", "NVmovl", smallTypes, vmovlCode)
2746 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2730
2731 vcvt2ufxCode = '''
2732 FPSCR fpscr = Fpscr;
2733 if (flushToZero(srcElem1))
2734 fpscr.idc = 1;
2735 VfpSavedState state = prepFpState(VfpRoundNearest);
2736 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2737 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2738 __asm__ __volatile__("" :: "m" (destReg));
2739 finishVfp(fpscr, state, true);
2740 Fpscr = fpscr;
2741 '''
2747
2748 vcvt2ufxCode = '''
2749 FPSCR fpscr = Fpscr;
2750 if (flushToZero(srcElem1))
2751 fpscr.idc = 1;
2752 VfpSavedState state = prepFpState(VfpRoundNearest);
2753 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2754 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2755 __asm__ __volatile__("" :: "m" (destReg));
2756 finishVfp(fpscr, state, true);
2757 Fpscr = fpscr;
2758 '''
2742 twoRegShiftInst("vcvt", "NVcvt2ufxD", ("float",),
2759 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2743 2, vcvt2ufxCode, toInt = True)
2760 2, vcvt2ufxCode, toInt = True)
2744 twoRegShiftInst("vcvt", "NVcvt2ufxQ", ("float",),
2761 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2745 4, vcvt2ufxCode, toInt = True)
2746
2747 vcvt2sfxCode = '''
2748 FPSCR fpscr = Fpscr;
2749 if (flushToZero(srcElem1))
2750 fpscr.idc = 1;
2751 VfpSavedState state = prepFpState(VfpRoundNearest);
2752 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2753 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2754 __asm__ __volatile__("" :: "m" (destReg));
2755 finishVfp(fpscr, state, true);
2756 Fpscr = fpscr;
2757 '''
2762 4, vcvt2ufxCode, toInt = True)
2763
2764 vcvt2sfxCode = '''
2765 FPSCR fpscr = Fpscr;
2766 if (flushToZero(srcElem1))
2767 fpscr.idc = 1;
2768 VfpSavedState state = prepFpState(VfpRoundNearest);
2769 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2770 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2771 __asm__ __volatile__("" :: "m" (destReg));
2772 finishVfp(fpscr, state, true);
2773 Fpscr = fpscr;
2774 '''
2758 twoRegShiftInst("vcvt", "NVcvt2sfxD", ("float",),
2775 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2759 2, vcvt2sfxCode, toInt = True)
2776 2, vcvt2sfxCode, toInt = True)
2760 twoRegShiftInst("vcvt", "NVcvt2sfxQ", ("float",),
2777 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2761 4, vcvt2sfxCode, toInt = True)
2762
2763 vcvtu2fpCode = '''
2764 FPSCR fpscr = Fpscr;
2765 VfpSavedState state = prepFpState(VfpRoundNearest);
2766 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2767 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2768 __asm__ __volatile__("" :: "m" (destElem));
2769 finishVfp(fpscr, state, true);
2770 Fpscr = fpscr;
2771 '''
2778 4, vcvt2sfxCode, toInt = True)
2779
2780 vcvtu2fpCode = '''
2781 FPSCR fpscr = Fpscr;
2782 VfpSavedState state = prepFpState(VfpRoundNearest);
2783 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2784 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2785 __asm__ __volatile__("" :: "m" (destElem));
2786 finishVfp(fpscr, state, true);
2787 Fpscr = fpscr;
2788 '''
2772 twoRegShiftInst("vcvt", "NVcvtu2fpD", ("float",),
2789 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2773 2, vcvtu2fpCode, fromInt = True)
2790 2, vcvtu2fpCode, fromInt = True)
2774 twoRegShiftInst("vcvt", "NVcvtu2fpQ", ("float",),
2791 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2775 4, vcvtu2fpCode, fromInt = True)
2776
2777 vcvts2fpCode = '''
2778 FPSCR fpscr = Fpscr;
2779 VfpSavedState state = prepFpState(VfpRoundNearest);
2780 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2781 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2782 __asm__ __volatile__("" :: "m" (destElem));
2783 finishVfp(fpscr, state, true);
2784 Fpscr = fpscr;
2785 '''
2792 4, vcvtu2fpCode, fromInt = True)
2793
2794 vcvts2fpCode = '''
2795 FPSCR fpscr = Fpscr;
2796 VfpSavedState state = prepFpState(VfpRoundNearest);
2797 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2798 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2799 __asm__ __volatile__("" :: "m" (destElem));
2800 finishVfp(fpscr, state, true);
2801 Fpscr = fpscr;
2802 '''
2786 twoRegShiftInst("vcvt", "NVcvts2fpD", ("float",),
2803 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2787 2, vcvts2fpCode, fromInt = True)
2804 2, vcvts2fpCode, fromInt = True)
2788 twoRegShiftInst("vcvt", "NVcvts2fpQ", ("float",),
2805 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2789 4, vcvts2fpCode, fromInt = True)
2790
2791 vcvts2hCode = '''
2792 FPSCR fpscr = Fpscr;
2793 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2794 if (flushToZero(srcFp1))
2795 fpscr.idc = 1;
2796 VfpSavedState state = prepFpState(VfpRoundNearest);
2797 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2798 : "m" (srcFp1), "m" (destElem));
2799 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2800 fpscr.ahp, srcFp1);
2801 __asm__ __volatile__("" :: "m" (destElem));
2802 finishVfp(fpscr, state, true);
2803 Fpscr = fpscr;
2804 '''
2806 4, vcvts2fpCode, fromInt = True)
2807
2808 vcvts2hCode = '''
2809 FPSCR fpscr = Fpscr;
2810 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2811 if (flushToZero(srcFp1))
2812 fpscr.idc = 1;
2813 VfpSavedState state = prepFpState(VfpRoundNearest);
2814 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2815 : "m" (srcFp1), "m" (destElem));
2816 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2817 fpscr.ahp, srcFp1);
2818 __asm__ __volatile__("" :: "m" (destElem));
2819 finishVfp(fpscr, state, true);
2820 Fpscr = fpscr;
2821 '''
2805 twoRegNarrowMiscInst("vcvt", "NVcvts2h", ("uint16_t",), vcvts2hCode)
2822 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2806
2807 vcvth2sCode = '''
2808 FPSCR fpscr = Fpscr;
2809 VfpSavedState state = prepFpState(VfpRoundNearest);
2810 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2811 : "m" (srcElem1), "m" (destElem));
2812 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2813 __asm__ __volatile__("" :: "m" (destElem));
2814 finishVfp(fpscr, state, true);
2815 Fpscr = fpscr;
2816 '''
2823
2824 vcvth2sCode = '''
2825 FPSCR fpscr = Fpscr;
2826 VfpSavedState state = prepFpState(VfpRoundNearest);
2827 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2828 : "m" (srcElem1), "m" (destElem));
2829 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2830 __asm__ __volatile__("" :: "m" (destElem));
2831 finishVfp(fpscr, state, true);
2832 Fpscr = fpscr;
2833 '''
2817 twoRegLongMiscInst("vcvt", "NVcvth2s", ("uint16_t",), vcvth2sCode)
2834 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2818
2819 vrsqrteCode = '''
2820 destElem = unsignedRSqrtEstimate(srcElem1);
2821 '''
2835
2836 vrsqrteCode = '''
2837 destElem = unsignedRSqrtEstimate(srcElem1);
2838 '''
2822 twoRegMiscInst("vrsqrte", "NVrsqrteD", ("uint32_t",), 2, vrsqrteCode)
2823 twoRegMiscInst("vrsqrte", "NVrsqrteQ", ("uint32_t",), 4, vrsqrteCode)
2839 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2840 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2824
2825 vrsqrtefpCode = '''
2826 FPSCR fpscr = Fpscr;
2827 if (flushToZero(srcReg1))
2828 fpscr.idc = 1;
2829 destReg = fprSqrtEstimate(fpscr, srcReg1);
2830 Fpscr = fpscr;
2831 '''
2841
2842 vrsqrtefpCode = '''
2843 FPSCR fpscr = Fpscr;
2844 if (flushToZero(srcReg1))
2845 fpscr.idc = 1;
2846 destReg = fprSqrtEstimate(fpscr, srcReg1);
2847 Fpscr = fpscr;
2848 '''
2832 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", ("float",), 2, vrsqrtefpCode)
2833 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", ("float",), 4, vrsqrtefpCode)
2849 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2850 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2834
2835 vrecpeCode = '''
2836 destElem = unsignedRecipEstimate(srcElem1);
2837 '''
2851
2852 vrecpeCode = '''
2853 destElem = unsignedRecipEstimate(srcElem1);
2854 '''
2838 twoRegMiscInst("vrecpe", "NVrecpeD", ("uint32_t",), 2, vrecpeCode)
2839 twoRegMiscInst("vrecpe", "NVrecpeQ", ("uint32_t",), 4, vrecpeCode)
2855 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2856 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2840
2841 vrecpefpCode = '''
2842 FPSCR fpscr = Fpscr;
2843 if (flushToZero(srcReg1))
2844 fpscr.idc = 1;
2845 destReg = fpRecipEstimate(fpscr, srcReg1);
2846 Fpscr = fpscr;
2847 '''
2857
2858 vrecpefpCode = '''
2859 FPSCR fpscr = Fpscr;
2860 if (flushToZero(srcReg1))
2861 fpscr.idc = 1;
2862 destReg = fpRecipEstimate(fpscr, srcReg1);
2863 Fpscr = fpscr;
2864 '''
2848 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", ("float",), 2, vrecpefpCode)
2849 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", ("float",), 4, vrecpefpCode)
2865 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2866 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2850
2851 vrev16Code = '''
2852 destElem = srcElem1;
2853 unsigned groupSize = ((1 << 1) / sizeof(Element));
2854 unsigned reverseMask = (groupSize - 1);
2855 j = i ^ reverseMask;
2856 '''
2867
2868 vrev16Code = '''
2869 destElem = srcElem1;
2870 unsigned groupSize = ((1 << 1) / sizeof(Element));
2871 unsigned reverseMask = (groupSize - 1);
2872 j = i ^ reverseMask;
2873 '''
2857 twoRegMiscInst("vrev16", "NVrev16D", ("uint8_t",), 2, vrev16Code)
2858 twoRegMiscInst("vrev16", "NVrev16Q", ("uint8_t",), 4, vrev16Code)
2874 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2875 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2859 vrev32Code = '''
2860 destElem = srcElem1;
2861 unsigned groupSize = ((1 << 2) / sizeof(Element));
2862 unsigned reverseMask = (groupSize - 1);
2863 j = i ^ reverseMask;
2864 '''
2865 twoRegMiscInst("vrev32", "NVrev32D",
2876 vrev32Code = '''
2877 destElem = srcElem1;
2878 unsigned groupSize = ((1 << 2) / sizeof(Element));
2879 unsigned reverseMask = (groupSize - 1);
2880 j = i ^ reverseMask;
2881 '''
2882 twoRegMiscInst("vrev32", "NVrev32D",
2866 ("uint8_t", "uint16_t"), 2, vrev32Code)
2883 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2867 twoRegMiscInst("vrev32", "NVrev32Q",
2884 twoRegMiscInst("vrev32", "NVrev32Q",
2868 ("uint8_t", "uint16_t"), 4, vrev32Code)
2885 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2869 vrev64Code = '''
2870 destElem = srcElem1;
2871 unsigned groupSize = ((1 << 3) / sizeof(Element));
2872 unsigned reverseMask = (groupSize - 1);
2873 j = i ^ reverseMask;
2874 '''
2886 vrev64Code = '''
2887 destElem = srcElem1;
2888 unsigned groupSize = ((1 << 3) / sizeof(Element));
2889 unsigned reverseMask = (groupSize - 1);
2890 j = i ^ reverseMask;
2891 '''
2875 twoRegMiscInst("vrev64", "NVrev64D", smallUnsignedTypes, 2, vrev64Code)
2876 twoRegMiscInst("vrev64", "NVrev64Q", smallUnsignedTypes, 4, vrev64Code)
2892 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2893 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2877
2878 vpaddlCode = '''
2879 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2880 '''
2894
2895 vpaddlCode = '''
2896 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2897 '''
2881 twoRegCondenseInst("vpaddl", "NVpaddlD", smallTypes, 2, vpaddlCode)
2882 twoRegCondenseInst("vpaddl", "NVpaddlQ", smallTypes, 4, vpaddlCode)
2898 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2899 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2883
2884 vpadalCode = '''
2885 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2886 '''
2900
2901 vpadalCode = '''
2902 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2903 '''
2887 twoRegCondenseInst("vpadal", "NVpadalD", smallTypes, 2, vpadalCode, True)
2888 twoRegCondenseInst("vpadal", "NVpadalQ", smallTypes, 4, vpadalCode, True)
2904 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2905 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2889
2890 vclsCode = '''
2891 unsigned count = 0;
2892 if (srcElem1 < 0) {
2893 srcElem1 <<= 1;
2894 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2895 count++;
2896 srcElem1 <<= 1;
2897 }
2898 } else {
2899 srcElem1 <<= 1;
2900 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2901 count++;
2902 srcElem1 <<= 1;
2903 }
2904 }
2905 destElem = count;
2906 '''
2906
2907 vclsCode = '''
2908 unsigned count = 0;
2909 if (srcElem1 < 0) {
2910 srcElem1 <<= 1;
2911 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2912 count++;
2913 srcElem1 <<= 1;
2914 }
2915 } else {
2916 srcElem1 <<= 1;
2917 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2918 count++;
2919 srcElem1 <<= 1;
2920 }
2921 }
2922 destElem = count;
2923 '''
2907 twoRegMiscInst("vcls", "NVclsD", signedTypes, 2, vclsCode)
2908 twoRegMiscInst("vcls", "NVclsQ", signedTypes, 4, vclsCode)
2924 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2925 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2909
2910 vclzCode = '''
2911 unsigned count = 0;
2912 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2913 count++;
2914 srcElem1 <<= 1;
2915 }
2916 destElem = count;
2917 '''
2926
2927 vclzCode = '''
2928 unsigned count = 0;
2929 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2930 count++;
2931 srcElem1 <<= 1;
2932 }
2933 destElem = count;
2934 '''
2918 twoRegMiscInst("vclz", "NVclzD", signedTypes, 2, vclzCode)
2919 twoRegMiscInst("vclz", "NVclzQ", signedTypes, 4, vclzCode)
2935 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2936 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2920
2921 vcntCode = '''
2922 unsigned count = 0;
2923 while (srcElem1 && count < sizeof(Element) * 8) {
2924 count += srcElem1 & 0x1;
2925 srcElem1 >>= 1;
2926 }
2927 destElem = count;
2928 '''
2937
2938 vcntCode = '''
2939 unsigned count = 0;
2940 while (srcElem1 && count < sizeof(Element) * 8) {
2941 count += srcElem1 & 0x1;
2942 srcElem1 >>= 1;
2943 }
2944 destElem = count;
2945 '''
2929 twoRegMiscInst("vcnt", "NVcntD", unsignedTypes, 2, vcntCode)
2930 twoRegMiscInst("vcnt", "NVcntQ", unsignedTypes, 4, vcntCode)
2931
2946
2947 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2948 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2949
2932 vmvnCode = '''
2933 destElem = ~srcElem1;
2934 '''
2950 vmvnCode = '''
2951 destElem = ~srcElem1;
2952 '''
2935 twoRegMiscInst("vmvn", "NVmvnD", ("uint64_t",), 2, vmvnCode)
2936 twoRegMiscInst("vmvn", "NVmvnQ", ("uint64_t",), 4, vmvnCode)
2953 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2954 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2937
2938 vqabsCode = '''
2939 FPSCR fpscr = (FPSCR)Fpscr;
2940 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2941 fpscr.qc = 1;
2942 destElem = ~srcElem1;
2943 } else if (srcElem1 < 0) {
2944 destElem = -srcElem1;
2945 } else {
2946 destElem = srcElem1;
2947 }
2948 Fpscr = fpscr;
2949 '''
2955
2956 vqabsCode = '''
2957 FPSCR fpscr = (FPSCR)Fpscr;
2958 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2959 fpscr.qc = 1;
2960 destElem = ~srcElem1;
2961 } else if (srcElem1 < 0) {
2962 destElem = -srcElem1;
2963 } else {
2964 destElem = srcElem1;
2965 }
2966 Fpscr = fpscr;
2967 '''
2950 twoRegMiscInst("vqabs", "NVqabsD", signedTypes, 2, vqabsCode)
2951 twoRegMiscInst("vqabs", "NVqabsQ", signedTypes, 4, vqabsCode)
2968 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2969 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2952
2953 vqnegCode = '''
2954 FPSCR fpscr = (FPSCR)Fpscr;
2955 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2956 fpscr.qc = 1;
2957 destElem = ~srcElem1;
2958 } else {
2959 destElem = -srcElem1;
2960 }
2961 Fpscr = fpscr;
2962 '''
2970
2971 vqnegCode = '''
2972 FPSCR fpscr = (FPSCR)Fpscr;
2973 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2974 fpscr.qc = 1;
2975 destElem = ~srcElem1;
2976 } else {
2977 destElem = -srcElem1;
2978 }
2979 Fpscr = fpscr;
2980 '''
2963 twoRegMiscInst("vqneg", "NVqnegD", signedTypes, 2, vqnegCode)
2964 twoRegMiscInst("vqneg", "NVqnegQ", signedTypes, 4, vqnegCode)
2981 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2982 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
2965
2966 vabsCode = '''
2967 if (srcElem1 < 0) {
2968 destElem = -srcElem1;
2969 } else {
2970 destElem = srcElem1;
2971 }
2972 '''
2983
2984 vabsCode = '''
2985 if (srcElem1 < 0) {
2986 destElem = -srcElem1;
2987 } else {
2988 destElem = srcElem1;
2989 }
2990 '''
2973 twoRegMiscInst("vabs", "NVabsD", signedTypes, 2, vabsCode)
2974 twoRegMiscInst("vabs", "NVabsQ", signedTypes, 4, vabsCode)
2991
2992 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
2993 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
2975 vabsfpCode = '''
2976 union
2977 {
2978 uint32_t i;
2979 float f;
2980 } cStruct;
2981 cStruct.f = srcReg1;
2982 cStruct.i &= mask(sizeof(Element) * 8 - 1);
2983 destReg = cStruct.f;
2984 '''
2994 vabsfpCode = '''
2995 union
2996 {
2997 uint32_t i;
2998 float f;
2999 } cStruct;
3000 cStruct.f = srcReg1;
3001 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3002 destReg = cStruct.f;
3003 '''
2985 twoRegMiscInstFp("vabs", "NVabsDFp", ("float",), 2, vabsfpCode)
2986 twoRegMiscInstFp("vabs", "NVabsQFp", ("float",), 4, vabsfpCode)
3004 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3005 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
2987
2988 vnegCode = '''
2989 destElem = -srcElem1;
2990 '''
3006
3007 vnegCode = '''
3008 destElem = -srcElem1;
3009 '''
2991 twoRegMiscInst("vneg", "NVnegD", signedTypes, 2, vnegCode)
2992 twoRegMiscInst("vneg", "NVnegQ", signedTypes, 4, vnegCode)
3010 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3011 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
2993 vnegfpCode = '''
2994 destReg = -srcReg1;
2995 '''
3012 vnegfpCode = '''
3013 destReg = -srcReg1;
3014 '''
2996 twoRegMiscInstFp("vneg", "NVnegDFp", ("float",), 2, vnegfpCode)
2997 twoRegMiscInstFp("vneg", "NVnegQFp", ("float",), 4, vnegfpCode)
3015 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3016 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
2998
2999 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3017
3018 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3000 twoRegMiscInst("vcgt", "NVcgtD", signedTypes, 2, vcgtCode)
3001 twoRegMiscInst("vcgt", "NVcgtQ", signedTypes, 4, vcgtCode)
3019 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3020 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3002 vcgtfpCode = '''
3003 FPSCR fpscr = (FPSCR)Fpscr;
3004 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3005 true, true, VfpRoundNearest);
3006 destReg = (res == 0) ? -1 : 0;
3007 if (res == 2.0)
3008 fpscr.ioc = 1;
3009 Fpscr = fpscr;
3010 '''
3021 vcgtfpCode = '''
3022 FPSCR fpscr = (FPSCR)Fpscr;
3023 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3024 true, true, VfpRoundNearest);
3025 destReg = (res == 0) ? -1 : 0;
3026 if (res == 2.0)
3027 fpscr.ioc = 1;
3028 Fpscr = fpscr;
3029 '''
3011 twoRegMiscInstFp("vcgt", "NVcgtDFp", ("float",),
3030 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3012 2, vcgtfpCode, toInt = True)
3031 2, vcgtfpCode, toInt = True)
3013 twoRegMiscInstFp("vcgt", "NVcgtQFp", ("float",),
3032 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3014 4, vcgtfpCode, toInt = True)
3015
3016 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3033 4, vcgtfpCode, toInt = True)
3034
3035 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3017 twoRegMiscInst("vcge", "NVcgeD", signedTypes, 2, vcgeCode)
3018 twoRegMiscInst("vcge", "NVcgeQ", signedTypes, 4, vcgeCode)
3036 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3037 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3019 vcgefpCode = '''
3020 FPSCR fpscr = (FPSCR)Fpscr;
3021 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3022 true, true, VfpRoundNearest);
3023 destReg = (res == 0) ? -1 : 0;
3024 if (res == 2.0)
3025 fpscr.ioc = 1;
3026 Fpscr = fpscr;
3027 '''
3038 vcgefpCode = '''
3039 FPSCR fpscr = (FPSCR)Fpscr;
3040 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3041 true, true, VfpRoundNearest);
3042 destReg = (res == 0) ? -1 : 0;
3043 if (res == 2.0)
3044 fpscr.ioc = 1;
3045 Fpscr = fpscr;
3046 '''
3028 twoRegMiscInstFp("vcge", "NVcgeDFp", ("float",),
3047 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3029 2, vcgefpCode, toInt = True)
3048 2, vcgefpCode, toInt = True)
3030 twoRegMiscInstFp("vcge", "NVcgeQFp", ("float",),
3049 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3031 4, vcgefpCode, toInt = True)
3032
3033 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3050 4, vcgefpCode, toInt = True)
3051
3052 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3034 twoRegMiscInst("vceq", "NVceqD", signedTypes, 2, vceqCode)
3035 twoRegMiscInst("vceq", "NVceqQ", signedTypes, 4, vceqCode)
3053 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3054 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3036 vceqfpCode = '''
3037 FPSCR fpscr = (FPSCR)Fpscr;
3038 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3039 true, true, VfpRoundNearest);
3040 destReg = (res == 0) ? -1 : 0;
3041 if (res == 2.0)
3042 fpscr.ioc = 1;
3043 Fpscr = fpscr;
3044 '''
3055 vceqfpCode = '''
3056 FPSCR fpscr = (FPSCR)Fpscr;
3057 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3058 true, true, VfpRoundNearest);
3059 destReg = (res == 0) ? -1 : 0;
3060 if (res == 2.0)
3061 fpscr.ioc = 1;
3062 Fpscr = fpscr;
3063 '''
3045 twoRegMiscInstFp("vceq", "NVceqDFp", ("float",),
3064 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3046 2, vceqfpCode, toInt = True)
3065 2, vceqfpCode, toInt = True)
3047 twoRegMiscInstFp("vceq", "NVceqQFp", ("float",),
3066 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3048 4, vceqfpCode, toInt = True)
3049
3050 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3067 4, vceqfpCode, toInt = True)
3068
3069 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3051 twoRegMiscInst("vcle", "NVcleD", signedTypes, 2, vcleCode)
3052 twoRegMiscInst("vcle", "NVcleQ", signedTypes, 4, vcleCode)
3070 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3071 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3053 vclefpCode = '''
3054 FPSCR fpscr = (FPSCR)Fpscr;
3055 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3056 true, true, VfpRoundNearest);
3057 destReg = (res == 0) ? -1 : 0;
3058 if (res == 2.0)
3059 fpscr.ioc = 1;
3060 Fpscr = fpscr;
3061 '''
3072 vclefpCode = '''
3073 FPSCR fpscr = (FPSCR)Fpscr;
3074 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3075 true, true, VfpRoundNearest);
3076 destReg = (res == 0) ? -1 : 0;
3077 if (res == 2.0)
3078 fpscr.ioc = 1;
3079 Fpscr = fpscr;
3080 '''
3062 twoRegMiscInstFp("vcle", "NVcleDFp", ("float",),
3081 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3063 2, vclefpCode, toInt = True)
3082 2, vclefpCode, toInt = True)
3064 twoRegMiscInstFp("vcle", "NVcleQFp", ("float",),
3083 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3065 4, vclefpCode, toInt = True)
3066
3067 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3084 4, vclefpCode, toInt = True)
3085
3086 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3068 twoRegMiscInst("vclt", "NVcltD", signedTypes, 2, vcltCode)
3069 twoRegMiscInst("vclt", "NVcltQ", signedTypes, 4, vcltCode)
3087 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3088 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3070 vcltfpCode = '''
3071 FPSCR fpscr = (FPSCR)Fpscr;
3072 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3073 true, true, VfpRoundNearest);
3074 destReg = (res == 0) ? -1 : 0;
3075 if (res == 2.0)
3076 fpscr.ioc = 1;
3077 Fpscr = fpscr;
3078 '''
3089 vcltfpCode = '''
3090 FPSCR fpscr = (FPSCR)Fpscr;
3091 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3092 true, true, VfpRoundNearest);
3093 destReg = (res == 0) ? -1 : 0;
3094 if (res == 2.0)
3095 fpscr.ioc = 1;
3096 Fpscr = fpscr;
3097 '''
3079 twoRegMiscInstFp("vclt", "NVcltDFp", ("float",),
3098 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3080 2, vcltfpCode, toInt = True)
3099 2, vcltfpCode, toInt = True)
3081 twoRegMiscInstFp("vclt", "NVcltQFp", ("float",),
3100 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3082 4, vcltfpCode, toInt = True)
3083
3084 vswpCode = '''
3085 FloatRegBits mid;
3086 for (unsigned r = 0; r < rCount; r++) {
3087 mid = srcReg1.regs[r];
3088 srcReg1.regs[r] = destReg.regs[r];
3089 destReg.regs[r] = mid;
3090 }
3091 '''
3101 4, vcltfpCode, toInt = True)
3102
3103 vswpCode = '''
3104 FloatRegBits mid;
3105 for (unsigned r = 0; r < rCount; r++) {
3106 mid = srcReg1.regs[r];
3107 srcReg1.regs[r] = destReg.regs[r];
3108 destReg.regs[r] = mid;
3109 }
3110 '''
3092 twoRegMiscScramble("vswp", "NVswpD", ("uint64_t",), 2, vswpCode)
3093 twoRegMiscScramble("vswp", "NVswpQ", ("uint64_t",), 4, vswpCode)
3111 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3112 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3094
3095 vtrnCode = '''
3096 Element mid;
3097 for (unsigned i = 0; i < eCount; i += 2) {
3098 mid = srcReg1.elements[i];
3099 srcReg1.elements[i] = destReg.elements[i + 1];
3100 destReg.elements[i + 1] = mid;
3101 }
3102 '''
3113
3114 vtrnCode = '''
3115 Element mid;
3116 for (unsigned i = 0; i < eCount; i += 2) {
3117 mid = srcReg1.elements[i];
3118 srcReg1.elements[i] = destReg.elements[i + 1];
3119 destReg.elements[i + 1] = mid;
3120 }
3121 '''
3103 twoRegMiscScramble("vtrn", "NVtrnD", unsignedTypes, 2, vtrnCode)
3104 twoRegMiscScramble("vtrn", "NVtrnQ", unsignedTypes, 4, vtrnCode)
3122 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", unsignedTypes, 2, vtrnCode)
3123 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", unsignedTypes, 4, vtrnCode)
3105
3106 vuzpCode = '''
3107 Element mid[eCount];
3108 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3109 for (unsigned i = 0; i < eCount / 2; i++) {
3110 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3111 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3112 destReg.elements[i] = destReg.elements[2 * i];
3113 }
3114 for (unsigned i = 0; i < eCount / 2; i++) {
3115 destReg.elements[eCount / 2 + i] = mid[2 * i];
3116 }
3117 '''
3124
3125 vuzpCode = '''
3126 Element mid[eCount];
3127 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3128 for (unsigned i = 0; i < eCount / 2; i++) {
3129 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3130 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3131 destReg.elements[i] = destReg.elements[2 * i];
3132 }
3133 for (unsigned i = 0; i < eCount / 2; i++) {
3134 destReg.elements[eCount / 2 + i] = mid[2 * i];
3135 }
3136 '''
3118 twoRegMiscScramble("vuzp", "NVuzpD", unsignedTypes, 2, vuzpCode)
3119 twoRegMiscScramble("vuzp", "NVuzpQ", unsignedTypes, 4, vuzpCode)
3137 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3138 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3120
3121 vzipCode = '''
3122 Element mid[eCount];
3123 memcpy(&mid, &destReg, sizeof(destReg));
3124 for (unsigned i = 0; i < eCount / 2; i++) {
3125 destReg.elements[2 * i] = mid[i];
3126 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3127 }
3128 for (int i = 0; i < eCount / 2; i++) {
3129 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3130 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3131 }
3132 '''
3139
3140 vzipCode = '''
3141 Element mid[eCount];
3142 memcpy(&mid, &destReg, sizeof(destReg));
3143 for (unsigned i = 0; i < eCount / 2; i++) {
3144 destReg.elements[2 * i] = mid[i];
3145 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3146 }
3147 for (int i = 0; i < eCount / 2; i++) {
3148 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3149 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3150 }
3151 '''
3133 twoRegMiscScramble("vzip", "NVzipD", unsignedTypes, 2, vzipCode)
3134 twoRegMiscScramble("vzip", "NVzipQ", unsignedTypes, 4, vzipCode)
3152 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3153 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3135
3136 vmovnCode = 'destElem = srcElem1;'
3154
3155 vmovnCode = 'destElem = srcElem1;'
3137 twoRegNarrowMiscInst("vmovn", "NVmovn", smallUnsignedTypes, vmovnCode)
3156 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3138
3139 vdupCode = 'destElem = srcElem1;'
3157
3158 vdupCode = 'destElem = srcElem1;'
3140 twoRegMiscScInst("vdup", "NVdupD", smallUnsignedTypes, 2, vdupCode)
3141 twoRegMiscScInst("vdup", "NVdupQ", smallUnsignedTypes, 4, vdupCode)
3159 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3160 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3142
3161
3143 def vdupGprInst(name, Name, types, rCount):
3162 def vdupGprInst(name, Name, opClass, types, rCount):
3144 global header_output, exec_output
3145 eWalkCode = '''
3146 RegVect destReg;
3147 for (unsigned i = 0; i < eCount; i++) {
3148 destReg.elements[i] = htog((Element)Op1);
3149 }
3150 '''
3151 for reg in range(rCount):
3152 eWalkCode += '''
3153 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3154 ''' % { "reg" : reg }
3155 iop = InstObjParams(name, Name,
3156 "RegRegOp",
3157 { "code": eWalkCode,
3158 "r_count": rCount,
3163 global header_output, exec_output
3164 eWalkCode = '''
3165 RegVect destReg;
3166 for (unsigned i = 0; i < eCount; i++) {
3167 destReg.elements[i] = htog((Element)Op1);
3168 }
3169 '''
3170 for reg in range(rCount):
3171 eWalkCode += '''
3172 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3173 ''' % { "reg" : reg }
3174 iop = InstObjParams(name, Name,
3175 "RegRegOp",
3176 { "code": eWalkCode,
3177 "r_count": rCount,
3159 "predicate_test": predicateTest }, [])
3178 "predicate_test": predicateTest,
3179 "op_class": opClass }, [])
3160 header_output += NeonRegRegOpDeclare.subst(iop)
3161 exec_output += NeonEqualRegExecute.subst(iop)
3162 for type in types:
3163 substDict = { "targs" : type,
3164 "class_name" : Name }
3165 exec_output += NeonExecDeclare.subst(substDict)
3180 header_output += NeonRegRegOpDeclare.subst(iop)
3181 exec_output += NeonEqualRegExecute.subst(iop)
3182 for type in types:
3183 substDict = { "targs" : type,
3184 "class_name" : Name }
3185 exec_output += NeonExecDeclare.subst(substDict)
3166 vdupGprInst("vdup", "NVdupDGpr", smallUnsignedTypes, 2)
3167 vdupGprInst("vdup", "NVdupQGpr", smallUnsignedTypes, 4)
3186 vdupGprInst("vdup", "NVdupDGpr", "SimdAluOp", smallUnsignedTypes, 2)
3187 vdupGprInst("vdup", "NVdupQGpr", "SimdAluOp", smallUnsignedTypes, 4)
3168
3169 vmovCode = 'destElem = imm;'
3188
3189 vmovCode = 'destElem = imm;'
3170 oneRegImmInst("vmov", "NVmoviD", ("uint64_t",), 2, vmovCode)
3171 oneRegImmInst("vmov", "NVmoviQ", ("uint64_t",), 4, vmovCode)
3190 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3191 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3172
3173 vorrCode = 'destElem |= imm;'
3192
3193 vorrCode = 'destElem |= imm;'
3174 oneRegImmInst("vorr", "NVorriD", ("uint64_t",), 2, vorrCode, True)
3175 oneRegImmInst("vorr", "NVorriQ", ("uint64_t",), 4, vorrCode, True)
3194 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3195 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3176
3177 vmvnCode = 'destElem = ~imm;'
3196
3197 vmvnCode = 'destElem = ~imm;'
3178 oneRegImmInst("vmvn", "NVmvniD", ("uint64_t",), 2, vmvnCode)
3179 oneRegImmInst("vmvn", "NVmvniQ", ("uint64_t",), 4, vmvnCode)
3198 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3199 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3180
3181 vbicCode = 'destElem &= ~imm;'
3200
3201 vbicCode = 'destElem &= ~imm;'
3182 oneRegImmInst("vbic", "NVbiciD", ("uint64_t",), 2, vbicCode, True)
3183 oneRegImmInst("vbic", "NVbiciQ", ("uint64_t",), 4, vbicCode, True)
3202 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3203 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3184
3185 vqmovnCode = '''
3186 FPSCR fpscr = (FPSCR)Fpscr;
3187 destElem = srcElem1;
3188 if ((BigElement)destElem != srcElem1) {
3189 fpscr.qc = 1;
3190 destElem = mask(sizeof(Element) * 8 - 1);
3191 if (srcElem1 < 0)
3192 destElem = ~destElem;
3193 }
3194 Fpscr = fpscr;
3195 '''
3204
3205 vqmovnCode = '''
3206 FPSCR fpscr = (FPSCR)Fpscr;
3207 destElem = srcElem1;
3208 if ((BigElement)destElem != srcElem1) {
3209 fpscr.qc = 1;
3210 destElem = mask(sizeof(Element) * 8 - 1);
3211 if (srcElem1 < 0)
3212 destElem = ~destElem;
3213 }
3214 Fpscr = fpscr;
3215 '''
3196 twoRegNarrowMiscInst("vqmovn", "NVqmovn", smallSignedTypes, vqmovnCode)
3216 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3197
3198 vqmovunCode = '''
3199 FPSCR fpscr = (FPSCR)Fpscr;
3200 destElem = srcElem1;
3201 if ((BigElement)destElem != srcElem1) {
3202 fpscr.qc = 1;
3203 destElem = mask(sizeof(Element) * 8);
3204 }
3205 Fpscr = fpscr;
3206 '''
3207 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3217
3218 vqmovunCode = '''
3219 FPSCR fpscr = (FPSCR)Fpscr;
3220 destElem = srcElem1;
3221 if ((BigElement)destElem != srcElem1) {
3222 fpscr.qc = 1;
3223 destElem = mask(sizeof(Element) * 8);
3224 }
3225 Fpscr = fpscr;
3226 '''
3227 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3208 smallUnsignedTypes, vqmovunCode)
3228 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3209
3210 vqmovunsCode = '''
3211 FPSCR fpscr = (FPSCR)Fpscr;
3212 destElem = srcElem1;
3213 if (srcElem1 < 0 ||
3214 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3215 fpscr.qc = 1;
3216 destElem = mask(sizeof(Element) * 8);
3217 if (srcElem1 < 0)
3218 destElem = ~destElem;
3219 }
3220 Fpscr = fpscr;
3221 '''
3222 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3229
3230 vqmovunsCode = '''
3231 FPSCR fpscr = (FPSCR)Fpscr;
3232 destElem = srcElem1;
3233 if (srcElem1 < 0 ||
3234 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3235 fpscr.qc = 1;
3236 destElem = mask(sizeof(Element) * 8);
3237 if (srcElem1 < 0)
3238 destElem = ~destElem;
3239 }
3240 Fpscr = fpscr;
3241 '''
3242 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3223 smallSignedTypes, vqmovunsCode)
3243 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3224
3244
3225 def buildVext(name, Name, types, rCount, op):
3245 def buildVext(name, Name, opClass, types, rCount, op):
3226 global header_output, exec_output
3227 eWalkCode = '''
3228 RegVect srcReg1, srcReg2, destReg;
3229 '''
3230 for reg in range(rCount):
3231 eWalkCode += simdEnabledCheckCode + '''
3232 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3233 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3234 ''' % { "reg" : reg }
3235 eWalkCode += op
3236 for reg in range(rCount):
3237 eWalkCode += '''
3238 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3239 ''' % { "reg" : reg }
3240 iop = InstObjParams(name, Name,
3241 "RegRegRegImmOp",
3242 { "code": eWalkCode,
3243 "r_count": rCount,
3246 global header_output, exec_output
3247 eWalkCode = '''
3248 RegVect srcReg1, srcReg2, destReg;
3249 '''
3250 for reg in range(rCount):
3251 eWalkCode += simdEnabledCheckCode + '''
3252 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3253 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3254 ''' % { "reg" : reg }
3255 eWalkCode += op
3256 for reg in range(rCount):
3257 eWalkCode += '''
3258 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3259 ''' % { "reg" : reg }
3260 iop = InstObjParams(name, Name,
3261 "RegRegRegImmOp",
3262 { "code": eWalkCode,
3263 "r_count": rCount,
3244 "predicate_test": predicateTest }, [])
3264 "predicate_test": predicateTest,
3265 "op_class": opClass }, [])
3245 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3246 exec_output += NeonEqualRegExecute.subst(iop)
3247 for type in types:
3248 substDict = { "targs" : type,
3249 "class_name" : Name }
3250 exec_output += NeonExecDeclare.subst(substDict)
3251
3252 vextCode = '''
3253 for (unsigned i = 0; i < eCount; i++) {
3254 unsigned index = i + imm;
3255 if (index < eCount) {
3256 destReg.elements[i] = srcReg1.elements[index];
3257 } else {
3258 index -= eCount;
3259 assert(index < eCount);
3260 destReg.elements[i] = srcReg2.elements[index];
3261 }
3262 }
3263 '''
3266 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3267 exec_output += NeonEqualRegExecute.subst(iop)
3268 for type in types:
3269 substDict = { "targs" : type,
3270 "class_name" : Name }
3271 exec_output += NeonExecDeclare.subst(substDict)
3272
3273 vextCode = '''
3274 for (unsigned i = 0; i < eCount; i++) {
3275 unsigned index = i + imm;
3276 if (index < eCount) {
3277 destReg.elements[i] = srcReg1.elements[index];
3278 } else {
3279 index -= eCount;
3280 assert(index < eCount);
3281 destReg.elements[i] = srcReg2.elements[index];
3282 }
3283 }
3284 '''
3264 buildVext("vext", "NVextD", ("uint8_t",), 2, vextCode)
3265 buildVext("vext", "NVextQ", ("uint8_t",), 4, vextCode)
3285 buildVext("vext", "NVextD", "SimdAluOp", ("uint8_t",), 2, vextCode)
3286 buildVext("vext", "NVextQ", "SimdAluOp", ("uint8_t",), 4, vextCode)
3266
3287
3267 def buildVtbxl(name, Name, length, isVtbl):
3288 def buildVtbxl(name, Name, opClass, length, isVtbl):
3268 global header_output, decoder_output, exec_output
3269 code = '''
3270 union
3271 {
3272 uint8_t bytes[32];
3273 FloatRegBits regs[8];
3274 } table;
3275

--- 31 unchanged lines hidden (view full) ---

3307 }
3308
3309 FpDestP0.uw = gtoh(destReg.regs[0]);
3310 FpDestP1.uw = gtoh(destReg.regs[1]);
3311 '''
3312 iop = InstObjParams(name, Name,
3313 "RegRegRegOp",
3314 { "code": code,
3289 global header_output, decoder_output, exec_output
3290 code = '''
3291 union
3292 {
3293 uint8_t bytes[32];
3294 FloatRegBits regs[8];
3295 } table;
3296

--- 31 unchanged lines hidden (view full) ---

3328 }
3329
3330 FpDestP0.uw = gtoh(destReg.regs[0]);
3331 FpDestP1.uw = gtoh(destReg.regs[1]);
3332 '''
3333 iop = InstObjParams(name, Name,
3334 "RegRegRegOp",
3335 { "code": code,
3315 "predicate_test": predicateTest }, [])
3336 "predicate_test": predicateTest,
3337 "op_class": opClass }, [])
3316 header_output += RegRegRegOpDeclare.subst(iop)
3317 decoder_output += RegRegRegOpConstructor.subst(iop)
3318 exec_output += PredOpExecute.subst(iop)
3319
3338 header_output += RegRegRegOpDeclare.subst(iop)
3339 decoder_output += RegRegRegOpConstructor.subst(iop)
3340 exec_output += PredOpExecute.subst(iop)
3341
3320 buildVtbxl("vtbl", "NVtbl1", 1, "true")
3321 buildVtbxl("vtbl", "NVtbl2", 2, "true")
3322 buildVtbxl("vtbl", "NVtbl3", 3, "true")
3323 buildVtbxl("vtbl", "NVtbl4", 4, "true")
3342 buildVtbxl("vtbl", "NVtbl1", "SimdAluOp", 1, "true")
3343 buildVtbxl("vtbl", "NVtbl2", "SimdAluOp", 2, "true")
3344 buildVtbxl("vtbl", "NVtbl3", "SimdAluOp", 3, "true")
3345 buildVtbxl("vtbl", "NVtbl4", "SimdAluOp", 4, "true")
3324
3346
3325 buildVtbxl("vtbx", "NVtbx1", 1, "false")
3326 buildVtbxl("vtbx", "NVtbx2", 2, "false")
3327 buildVtbxl("vtbx", "NVtbx3", 3, "false")
3328 buildVtbxl("vtbx", "NVtbx4", 4, "false")
3347 buildVtbxl("vtbx", "NVtbx1", "SimdAluOp", 1, "false")
3348 buildVtbxl("vtbx", "NVtbx2", "SimdAluOp", 2, "false")
3349 buildVtbxl("vtbx", "NVtbx3", "SimdAluOp", 3, "false")
3350 buildVtbxl("vtbx", "NVtbx4", "SimdAluOp", 4, "false")
3329}};
3351}};