neon64.isa revision 13544:0b4e5446167c
12632Sstever@eecs.umich.edu// -*- mode: c++ -*- 22632Sstever@eecs.umich.edu 32632Sstever@eecs.umich.edu// Copyright (c) 2012-2013, 2015-2018 ARM Limited 42632Sstever@eecs.umich.edu// All rights reserved 52632Sstever@eecs.umich.edu// 62632Sstever@eecs.umich.edu// The license below extends only to copyright in the software and shall 72632Sstever@eecs.umich.edu// not be construed as granting a license to any other intellectual 82632Sstever@eecs.umich.edu// property including but not limited to intellectual property relating 92632Sstever@eecs.umich.edu// to a hardware implementation of the functionality of the software 102632Sstever@eecs.umich.edu// licensed hereunder. You may use the software subject to the license 112632Sstever@eecs.umich.edu// terms below provided that you ensure that this notice is replicated 122632Sstever@eecs.umich.edu// unmodified and in its entirety in all distributions of the software, 132632Sstever@eecs.umich.edu// modified or unmodified, in source code or in binary form. 142632Sstever@eecs.umich.edu// 152632Sstever@eecs.umich.edu// Redistribution and use in source and binary forms, with or without 162632Sstever@eecs.umich.edu// modification, are permitted provided that the following conditions are 172632Sstever@eecs.umich.edu// met: redistributions of source code must retain the above copyright 182632Sstever@eecs.umich.edu// notice, this list of conditions and the following disclaimer; 192632Sstever@eecs.umich.edu// redistributions in binary form must reproduce the above copyright 202632Sstever@eecs.umich.edu// notice, this list of conditions and the following disclaimer in the 212632Sstever@eecs.umich.edu// documentation and/or other materials provided with the distribution; 222632Sstever@eecs.umich.edu// neither the name of the copyright holders nor the names of its 232632Sstever@eecs.umich.edu// contributors may be used to endorse or promote products derived from 242632Sstever@eecs.umich.edu// this software without specific prior written permission. 252632Sstever@eecs.umich.edu// 262632Sstever@eecs.umich.edu// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 272632Sstever@eecs.umich.edu// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 282632Sstever@eecs.umich.edu// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 292632Sstever@eecs.umich.edu// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 302632Sstever@eecs.umich.edu// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 312022SN/A// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 322022SN/A// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 332022SN/A// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 342022SN/A// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 352022SN/A// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 362469SN/A// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 372469SN/A// 382469SN/A// Authors: Giacomo Gabrielli 392469SN/A// Mbou Eyole 402516SN/A 412516SN/Alet {{ 422944Sgblack@eecs.umich.edu 432482SN/A header_output = "" 443598Sgblack@eecs.umich.edu exec_output = "" 453056Sgblack@eecs.umich.edu decoders = { 'Generic' : {} } 462469SN/A 473056Sgblack@eecs.umich.edu # FP types (FP operations always work with unsigned representations) 483056Sgblack@eecs.umich.edu floatTypes = ("uint16_t", "uint32_t", "uint64_t") 493056Sgblack@eecs.umich.edu smallFloatTypes = ("uint32_t",) 503598Sgblack@eecs.umich.edu 512516SN/A def threeEqualRegInstX(name, Name, opClass, types, rCount, op, 523056Sgblack@eecs.umich.edu readDest=False, pairwise=False, scalar=False, 533598Sgblack@eecs.umich.edu byElem=False, decoder='Generic'): 543056Sgblack@eecs.umich.edu assert (not pairwise) or ((not byElem) and (not scalar)) 553056Sgblack@eecs.umich.edu global header_output, exec_output, decoders 563056Sgblack@eecs.umich.edu eWalkCode = simd64EnabledCheckCode + ''' 573056Sgblack@eecs.umich.edu RegVect srcReg1, destReg; 583056Sgblack@eecs.umich.edu ''' 593056Sgblack@eecs.umich.edu if byElem: 603056Sgblack@eecs.umich.edu # 2nd register operand has to be read fully 613598Sgblack@eecs.umich.edu eWalkCode += ''' 623056Sgblack@eecs.umich.edu FullRegVect srcReg2; 633056Sgblack@eecs.umich.edu ''' 643598Sgblack@eecs.umich.edu else: 653056Sgblack@eecs.umich.edu eWalkCode += ''' 663056Sgblack@eecs.umich.edu RegVect srcReg2; 673056Sgblack@eecs.umich.edu ''' 683056Sgblack@eecs.umich.edu for reg in range(rCount): 693056Sgblack@eecs.umich.edu eWalkCode += ''' 703056Sgblack@eecs.umich.edu srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 713056Sgblack@eecs.umich.edu srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 723056Sgblack@eecs.umich.edu ''' % { "reg" : reg } 733056Sgblack@eecs.umich.edu if readDest: 743056Sgblack@eecs.umich.edu eWalkCode += ''' 753056Sgblack@eecs.umich.edu destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 763056Sgblack@eecs.umich.edu ''' % { "reg" : reg } 773056Sgblack@eecs.umich.edu if byElem: 783056Sgblack@eecs.umich.edu # 2nd operand has to be read fully 793056Sgblack@eecs.umich.edu for reg in range(rCount, 4): 803056Sgblack@eecs.umich.edu eWalkCode += ''' 813056Sgblack@eecs.umich.edu srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 823056Sgblack@eecs.umich.edu ''' % { "reg" : reg } 833056Sgblack@eecs.umich.edu readDestCode = '' 842482SN/A if readDest: 853598Sgblack@eecs.umich.edu readDestCode = 'destElem = gtoh(destReg.elements[i]);' 863598Sgblack@eecs.umich.edu if pairwise: 873598Sgblack@eecs.umich.edu eWalkCode += ''' 883598Sgblack@eecs.umich.edu for (unsigned i = 0; i < eCount; i++) { 893598Sgblack@eecs.umich.edu Element srcElem1 = gtoh(2 * i < eCount ? 903598Sgblack@eecs.umich.edu srcReg1.elements[2 * i] : 913598Sgblack@eecs.umich.edu srcReg2.elements[2 * i - eCount]); 923598Sgblack@eecs.umich.edu Element srcElem2 = gtoh(2 * i < eCount ? 933598Sgblack@eecs.umich.edu srcReg1.elements[2 * i + 1] : 943598Sgblack@eecs.umich.edu srcReg2.elements[2 * i + 1 - eCount]); 953598Sgblack@eecs.umich.edu Element destElem; 963598Sgblack@eecs.umich.edu %(readDest)s 973598Sgblack@eecs.umich.edu %(op)s 983598Sgblack@eecs.umich.edu destReg.elements[i] = htog(destElem); 993598Sgblack@eecs.umich.edu } 1003598Sgblack@eecs.umich.edu ''' % { "op" : op, "readDest" : readDestCode } 1013598Sgblack@eecs.umich.edu else: 1023598Sgblack@eecs.umich.edu scalarCheck = ''' 1033598Sgblack@eecs.umich.edu if (i != 0) { 1043598Sgblack@eecs.umich.edu destReg.elements[i] = 0; 1053598Sgblack@eecs.umich.edu continue; 1063598Sgblack@eecs.umich.edu } 1073598Sgblack@eecs.umich.edu ''' 1083598Sgblack@eecs.umich.edu eWalkCode += ''' 1093598Sgblack@eecs.umich.edu for (unsigned i = 0; i < eCount; i++) { 1103598Sgblack@eecs.umich.edu %(scalarCheck)s 1113598Sgblack@eecs.umich.edu Element srcElem1 = gtoh(srcReg1.elements[i]); 1123598Sgblack@eecs.umich.edu Element srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); 1133598Sgblack@eecs.umich.edu Element destElem; 1143598Sgblack@eecs.umich.edu %(readDest)s 1153598Sgblack@eecs.umich.edu %(op)s 1163598Sgblack@eecs.umich.edu destReg.elements[i] = htog(destElem); 1172516SN/A } 1182516SN/A ''' % { "op" : op, "readDest" : readDestCode, 1192516SN/A "scalarCheck" : scalarCheck if scalar else "", 1202516SN/A "src2Index" : "imm" if byElem else "i" } 1212482SN/A for reg in range(rCount): 1222482SN/A eWalkCode += ''' 1232591SN/A AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1242516SN/A ''' % { "reg" : reg } 1252580SN/A if rCount < 4: # zero upper half 1262580SN/A for reg in range(rCount, 4): 1272482SN/A eWalkCode += ''' 1282482SN/A AA64FpDestP%(reg)d_uw = 0; 1292591SN/A ''' % { "reg" : reg } 1302516SN/A iop = InstObjParams(name, Name, 1312580SN/A "DataX2RegImmOp" if byElem else "DataX2RegOp", 1322580SN/A { "code": eWalkCode, 1332482SN/A "r_count": rCount, 1342482SN/A "op_class": opClass }, []) 1352591SN/A if byElem: 1362516SN/A header_output += NeonX2RegImmOpDeclare.subst(iop) 1372580SN/A else: 1382580SN/A header_output += NeonX2RegOpDeclare.subst(iop) 1392482SN/A exec_output += NeonXEqualRegOpExecute.subst(iop) 1402482SN/A for type in types: 1412591SN/A substDict = { "targs" : type, 1422516SN/A "class_name" : Name } 1432580SN/A exec_output += NeonXExecDeclare.subst(substDict) 1442580SN/A 1452482SN/A def threeUnequalRegInstX(name, Name, opClass, types, op, 1462482SN/A bigSrc1, bigSrc2, bigDest, readDest, scalar=False, 1472591SN/A byElem=False, hi=False): 1482516SN/A assert not (scalar and hi) 1492580SN/A global header_output, exec_output 1502580SN/A src1Cnt = src2Cnt = destCnt = 2 1512482SN/A src1Prefix = src2Prefix = destPrefix = '' 1522482SN/A if bigSrc1: 1532591SN/A src1Cnt = 4 1542516SN/A src1Prefix = 'Big' 1552580SN/A if bigSrc2: 1562580SN/A src2Cnt = 4 1572482SN/A src2Prefix = 'Big' 1582469SN/A if bigDest: 1592482SN/A destCnt = 4 1602516SN/A destPrefix = 'Big' 1613042Sgblack@eecs.umich.edu if byElem: 1622516SN/A src2Prefix = 'Full' 1632516SN/A eWalkCode = simd64EnabledCheckCode + ''' 1642469SN/A %sRegVect srcReg1; 1652944Sgblack@eecs.umich.edu %sRegVect srcReg2; 1662516SN/A %sRegVect destReg; 1672516SN/A ''' % (src1Prefix, src2Prefix, destPrefix) 1682469SN/A srcReg1 = 0 1692469SN/A if hi and not bigSrc1: # long/widening operations 1702482SN/A srcReg1 = 2 1712482SN/A for reg in range(src1Cnt): 1722974Sgblack@eecs.umich.edu eWalkCode += ''' 1732974Sgblack@eecs.umich.edu srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(srcReg1)d_uw); 1742974Sgblack@eecs.umich.edu ''' % { "reg" : reg, "srcReg1" : srcReg1 } 1752526SN/A srcReg1 += 1 1762974Sgblack@eecs.umich.edu srcReg2 = 0 1772974Sgblack@eecs.umich.edu if (not byElem) and (hi and not bigSrc2): # long/widening operations 1782974Sgblack@eecs.umich.edu srcReg2 = 2 1792646Ssaidi@eecs.umich.edu for reg in range(src2Cnt): 1802974Sgblack@eecs.umich.edu eWalkCode += ''' 1812469SN/A srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(srcReg2)d_uw); 1822516SN/A ''' % { "reg" : reg, "srcReg2" : srcReg2 } 1832646Ssaidi@eecs.umich.edu srcReg2 += 1 1842482SN/A if byElem: 1852469SN/A # 2nd operand has to be read fully 1862516SN/A for reg in range(src2Cnt, 4): 1872646Ssaidi@eecs.umich.edu eWalkCode += ''' 1882482SN/A srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 1892954Sgblack@eecs.umich.edu ''' % { "reg" : reg } 1902469SN/A if readDest: 1912516SN/A for reg in range(destCnt): 1922516SN/A eWalkCode += ''' 1932482SN/A destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 1942469SN/A ''' % { "reg" : reg } 1952516SN/A readDestCode = '' 1962482SN/A if readDest: 1972482SN/A readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1982646Ssaidi@eecs.umich.edu scalarCheck = ''' 1992482SN/A if (i != 0) { 2002482SN/A destReg.elements[i] = 0; 2012482SN/A continue; 2022482SN/A } 2032482SN/A ''' 2042615SN/A eWalkCode += ''' 2052469SN/A for (unsigned i = 0; i < eCount; i++) { 2062469SN/A %(scalarCheck)s 2072482SN/A %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); 2082646Ssaidi@eecs.umich.edu %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); 2092482SN/A %(destPrefix)sElement destElem; 2102482SN/A %(readDest)s 2112482SN/A %(op)s 2122588SN/A destReg.elements[i] = htog(destElem); 2132482SN/A } 2142526SN/A ''' % { "op" : op, "readDest" : readDestCode, 2152469SN/A "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, 2162482SN/A "destPrefix" : destPrefix, 2172469SN/A "scalarCheck" : scalarCheck if scalar else "", 2182516SN/A "src2Index" : "imm" if byElem else "i" } 2192469SN/A destReg = 0 2202580SN/A if hi and not bigDest: 2212469SN/A # narrowing operations 2222580SN/A destReg = 2 2232469SN/A for reg in range(destCnt): 2242526SN/A eWalkCode += ''' 2252482SN/A AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); 2262482SN/A ''' % { "reg" : reg, "destReg": destReg } 2272482SN/A destReg += 1 2282469SN/A if destCnt < 4: 2292580SN/A if hi: # Explicitly merge with lower half 2302580SN/A for reg in range(0, destCnt): 2312580SN/A eWalkCode += ''' 2322580SN/A AA64FpDestP%(reg)d_uw = AA64FpDestP%(reg)d_uw;''' % { "reg" : reg } 2332580SN/A else: # zero upper half 2342580SN/A for reg in range(destCnt, 4): 2352580SN/A eWalkCode += ''' 2362526SN/A AA64FpDestP%(reg)d_uw = 0;''' % { "reg" : reg } 2372482SN/A 2382482SN/A iop = InstObjParams(name, Name, 2392482SN/A "DataX2RegImmOp" if byElem else "DataX2RegOp", 2402469SN/A { "code": eWalkCode, 2412516SN/A "r_count": 2, 2422646Ssaidi@eecs.umich.edu "op_class": opClass }, []) 2432469SN/A if byElem: 2442580SN/A header_output += NeonX2RegImmOpDeclare.subst(iop) 2452469SN/A else: 2462580SN/A header_output += NeonX2RegOpDeclare.subst(iop) 2472580SN/A exec_output += NeonXUnequalRegOpExecute.subst(iop) 2482469SN/A for type in types: 2492526SN/A substDict = { "targs" : type, 2502469SN/A "class_name" : Name } 2512615SN/A exec_output += NeonXExecDeclare.subst(substDict) 2522615SN/A 2532646Ssaidi@eecs.umich.edu def threeRegNarrowInstX(name, Name, opClass, types, op, readDest=False, 2542526SN/A scalar=False, byElem=False, hi=False): 2552469SN/A assert not byElem 2562615SN/A threeUnequalRegInstX(name, Name, opClass, types, op, 2572615SN/A True, True, False, readDest, scalar, byElem, hi) 2582646Ssaidi@eecs.umich.edu 2592526SN/A def threeRegLongInstX(name, Name, opClass, types, op, readDest=False, 2602469SN/A scalar=False, byElem=False, hi=False): 2612516SN/A threeUnequalRegInstX(name, Name, opClass, types, op, 2622646Ssaidi@eecs.umich.edu False, False, True, readDest, scalar, byElem, hi) 2632954Sgblack@eecs.umich.edu 2642580SN/A def threeRegWideInstX(name, Name, opClass, types, op, readDest=False, 2652469SN/A scalar=False, byElem=False, hi=False): 2662580SN/A assert not byElem 2672469SN/A threeUnequalRegInstX(name, Name, opClass, types, op, 2682526SN/A True, False, True, readDest, scalar, byElem, hi) 2692469SN/A 2702615SN/A def twoEqualRegInstX(name, Name, opClass, types, rCount, op, 2712615SN/A readDest=False, scalar=False, byElem=False, 2722526SN/A hasImm=False, isDup=False): 2732469SN/A global header_output, exec_output 2742615SN/A assert (not isDup) or byElem 2752989Ssaidi@eecs.umich.edu if byElem: 2762469SN/A hasImm = True 2772469SN/A if isDup: 2782224SN/A eWalkCode = simd64EnabledCheckCode + ''' 2792646Ssaidi@eecs.umich.edu FullRegVect srcReg1; 2802516SN/A RegVect destReg; 2812516SN/A ''' 2822516SN/A else: 2832469SN/A eWalkCode = simd64EnabledCheckCode + ''' 2842469SN/A RegVect srcReg1, destReg; 2852469SN/A ''' 2862469SN/A for reg in range(4 if isDup else rCount): 2872469SN/A eWalkCode += ''' 2882526SN/A srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 2892469SN/A ''' % { "reg" : reg } 2902996Sgblack@eecs.umich.edu if readDest: 2912996Sgblack@eecs.umich.edu eWalkCode += ''' 2922469SN/A destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 2932469SN/A ''' % { "reg" : reg } 2942469SN/A readDestCode = '' 2952996Sgblack@eecs.umich.edu if readDest: 2962996Sgblack@eecs.umich.edu readDestCode = 'destElem = gtoh(destReg.elements[i]);' 2972996Sgblack@eecs.umich.edu scalarCheck = ''' 2982996Sgblack@eecs.umich.edu if (i != 0) { 2992996Sgblack@eecs.umich.edu destReg.elements[i] = 0; 3002469SN/A continue; 3012469SN/A } 3022469SN/A ''' 3032469SN/A eWalkCode += ''' 3042469SN/A for (unsigned i = 0; i < eCount; i++) { 3052526SN/A %(scalarCheck)s 3062469SN/A unsigned j = i; 3072516SN/A Element srcElem1 = gtoh(srcReg1.elements[%(src1Index)s]); 3082469SN/A Element destElem; 3092469SN/A %(readDest)s 3103753Sgblack@eecs.umich.edu %(op)s 3112469SN/A destReg.elements[j] = htog(destElem); 3122469SN/A } 3132469SN/A ''' % { "op" : op, "readDest" : readDestCode, 3142526SN/A "scalarCheck" : scalarCheck if scalar else "", 3152469SN/A "src1Index" : "imm" if byElem else "i" } 3162516SN/A for reg in range(rCount): 3172469SN/A eWalkCode += ''' 3182469SN/A AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 3193753Sgblack@eecs.umich.edu ''' % { "reg" : reg } 3202469SN/A if rCount < 4: # zero upper half 3212469SN/A for reg in range(rCount, 4): 3222469SN/A eWalkCode += ''' 3232526SN/A AA64FpDestP%(reg)d_uw = 0; 3242469SN/A ''' % { "reg" : reg } 3252996Sgblack@eecs.umich.edu iop = InstObjParams(name, Name, 3262996Sgblack@eecs.umich.edu "DataX1RegImmOp" if hasImm else "DataX1RegOp", 3272954Sgblack@eecs.umich.edu { "code": eWalkCode, 3282954Sgblack@eecs.umich.edu "r_count": rCount, 3292469SN/A "op_class": opClass }, []) 3303753Sgblack@eecs.umich.edu if hasImm: 3312469SN/A header_output += NeonX1RegImmOpDeclare.subst(iop) 3322469SN/A else: 3332996Sgblack@eecs.umich.edu header_output += NeonX1RegOpDeclare.subst(iop) 3342526SN/A exec_output += NeonXEqualRegOpExecute.subst(iop) 3352469SN/A for type in types: 3362516SN/A substDict = { "targs" : type, 3372469SN/A "class_name" : Name } 3382469SN/A exec_output += NeonXExecDeclare.subst(substDict) 3392469SN/A 3403753Sgblack@eecs.umich.edu def twoRegLongInstX(name, Name, opClass, types, op, readDest=False, 3412469SN/A hi=False, hasImm=False): 3422469SN/A global header_output, exec_output 3432469SN/A eWalkCode = simd64EnabledCheckCode + ''' 3442526SN/A RegVect srcReg1; 3452469SN/A BigRegVect destReg; 3462516SN/A ''' 3472469SN/A destReg = 0 if not hi else 2 3482469SN/A for reg in range(2): 3492516SN/A eWalkCode += ''' 3503753Sgblack@eecs.umich.edu srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(destReg)d_uw); 3512646Ssaidi@eecs.umich.edu ''' % { "reg" : reg, "destReg": destReg } 3522469SN/A destReg += 1 3532469SN/A destReg = 0 if not hi else 2 3542646Ssaidi@eecs.umich.edu if readDest: 3553753Sgblack@eecs.umich.edu for reg in range(4): 3562469SN/A eWalkCode += ''' 3572469SN/A destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 3582469SN/A ''' % { "reg" : reg } 3592526SN/A destReg += 1 3602526SN/A readDestCode = '' 3612526SN/A if readDest: 3622526SN/A readDestCode = 'destReg = gtoh(destReg.elements[i]);' 3632526SN/A eWalkCode += ''' 3642526SN/A for (unsigned i = 0; i < eCount; i++) { 3652526SN/A Element srcElem1 = gtoh(srcReg1.elements[i]); 3662469SN/A BigElement destElem; 3672526SN/A %(readDest)s 3682526SN/A %(op)s 3692526SN/A destReg.elements[i] = htog(destElem); 3702526SN/A } 3712526SN/A ''' % { "op" : op, "readDest" : readDestCode } 3722526SN/A for reg in range(4): 3732526SN/A eWalkCode += ''' 3742526SN/A AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 3752954Sgblack@eecs.umich.edu ''' % { "reg" : reg } 3763587Sgblack@eecs.umich.edu iop = InstObjParams(name, Name, 3773587Sgblack@eecs.umich.edu "DataX1RegImmOp" if hasImm else "DataX1RegOp", 3783587Sgblack@eecs.umich.edu { "code": eWalkCode, 3793587Sgblack@eecs.umich.edu "r_count": 2, 3803823Ssaidi@eecs.umich.edu "op_class": opClass }, []) 3813587Sgblack@eecs.umich.edu if hasImm: 3823587Sgblack@eecs.umich.edu header_output += NeonX1RegImmOpDeclare.subst(iop) 3833587Sgblack@eecs.umich.edu else: 3843587Sgblack@eecs.umich.edu header_output += NeonX1RegOpDeclare.subst(iop) 3853587Sgblack@eecs.umich.edu exec_output += NeonXUnequalRegOpExecute.subst(iop) 3863587Sgblack@eecs.umich.edu for type in types: 3873587Sgblack@eecs.umich.edu substDict = { "targs" : type, 3883587Sgblack@eecs.umich.edu "class_name" : Name } 3893587Sgblack@eecs.umich.edu exec_output += NeonXExecDeclare.subst(substDict) 3903587Sgblack@eecs.umich.edu 3913587Sgblack@eecs.umich.edu def twoRegNarrowInstX(name, Name, opClass, types, op, readDest=False, 3922954Sgblack@eecs.umich.edu scalar=False, hi=False, hasImm=False): 3932954Sgblack@eecs.umich.edu global header_output, exec_output 3942954Sgblack@eecs.umich.edu eWalkCode = simd64EnabledCheckCode + ''' 3953587Sgblack@eecs.umich.edu BigRegVect srcReg1; 3963587Sgblack@eecs.umich.edu RegVect destReg; 3973587Sgblack@eecs.umich.edu ''' 3983587Sgblack@eecs.umich.edu for reg in range(4): 3993587Sgblack@eecs.umich.edu eWalkCode += ''' 4003587Sgblack@eecs.umich.edu srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 4013587Sgblack@eecs.umich.edu ''' % { "reg" : reg } 4023587Sgblack@eecs.umich.edu if readDest: 4032954Sgblack@eecs.umich.edu for reg in range(2): 4043587Sgblack@eecs.umich.edu eWalkCode += ''' 4053587Sgblack@eecs.umich.edu destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 4063823Ssaidi@eecs.umich.edu ''' % { "reg" : reg } 4073823Ssaidi@eecs.umich.edu else: 4083823Ssaidi@eecs.umich.edu eWalkCode += ''' 4093598Sgblack@eecs.umich.edu destReg.elements[0] = 0; 4103598Sgblack@eecs.umich.edu ''' % { "reg" : reg } 4113598Sgblack@eecs.umich.edu readDestCode = '' 4123598Sgblack@eecs.umich.edu if readDest: 4133598Sgblack@eecs.umich.edu readDestCode = 'destElem = gtoh(destReg.elements[i]);' 4143598Sgblack@eecs.umich.edu scalarCheck = ''' 4153598Sgblack@eecs.umich.edu if (i != 0) { 4163598Sgblack@eecs.umich.edu destReg.elements[i] = 0; 4173598Sgblack@eecs.umich.edu continue; 4182954Sgblack@eecs.umich.edu } 4193587Sgblack@eecs.umich.edu ''' 4203587Sgblack@eecs.umich.edu eWalkCode += ''' 4213587Sgblack@eecs.umich.edu for (unsigned i = 0; i < eCount; i++) { 4223587Sgblack@eecs.umich.edu %(scalarCheck)s 4233587Sgblack@eecs.umich.edu BigElement srcElem1 = gtoh(srcReg1.elements[i]); 4243587Sgblack@eecs.umich.edu Element destElem; 4253587Sgblack@eecs.umich.edu %(readDest)s 4263587Sgblack@eecs.umich.edu %(op)s 4273587Sgblack@eecs.umich.edu destReg.elements[i] = htog(destElem); 4283587Sgblack@eecs.umich.edu } 4293587Sgblack@eecs.umich.edu ''' % { "op" : op, "readDest" : readDestCode, 4303587Sgblack@eecs.umich.edu "scalarCheck" : scalarCheck if scalar else "" } 4313587Sgblack@eecs.umich.edu destReg = 0 if not hi else 2 4323823Ssaidi@eecs.umich.edu for reg in range(2): 4333587Sgblack@eecs.umich.edu eWalkCode += ''' 4343587Sgblack@eecs.umich.edu AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); 4353587Sgblack@eecs.umich.edu ''' % { "reg" : reg, "destReg": destReg } 4363587Sgblack@eecs.umich.edu destReg += 1 4373587Sgblack@eecs.umich.edu if hi: 4383587Sgblack@eecs.umich.edu for reg in range(0, 2): # Explicitly merge with the lower half 4393587Sgblack@eecs.umich.edu eWalkCode += ''' 4403587Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = AA64FpDestP%(reg)d_uw;''' % { "reg" : reg } 4413587Sgblack@eecs.umich.edu else: 4423587Sgblack@eecs.umich.edu for reg in range(2, 4): # zero upper half 4433587Sgblack@eecs.umich.edu eWalkCode += ''' 4443587Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = 0; 4453587Sgblack@eecs.umich.edu ''' % { "reg" : reg } 4463587Sgblack@eecs.umich.edu 4473587Sgblack@eecs.umich.edu iop = InstObjParams(name, Name, 4483587Sgblack@eecs.umich.edu "DataX1RegImmOp" if hasImm else "DataX1RegOp", 4493587Sgblack@eecs.umich.edu { "code": eWalkCode, 4503587Sgblack@eecs.umich.edu "r_count": 2, 4513587Sgblack@eecs.umich.edu "op_class": opClass }, []) 4523587Sgblack@eecs.umich.edu if hasImm: 4533587Sgblack@eecs.umich.edu header_output += NeonX1RegImmOpDeclare.subst(iop) 4543587Sgblack@eecs.umich.edu else: 4553823Ssaidi@eecs.umich.edu header_output += NeonX1RegOpDeclare.subst(iop) 4563587Sgblack@eecs.umich.edu exec_output += NeonXUnequalRegOpExecute.subst(iop) 4573587Sgblack@eecs.umich.edu for type in types: 4583587Sgblack@eecs.umich.edu substDict = { "targs" : type, 4593587Sgblack@eecs.umich.edu "class_name" : Name } 4603587Sgblack@eecs.umich.edu exec_output += NeonXExecDeclare.subst(substDict) 4613587Sgblack@eecs.umich.edu 4623587Sgblack@eecs.umich.edu def threeRegScrambleInstX(name, Name, opClass, types, rCount, op): 4633587Sgblack@eecs.umich.edu global header_output, exec_output 4643587Sgblack@eecs.umich.edu eWalkCode = simd64EnabledCheckCode + ''' 4653587Sgblack@eecs.umich.edu RegVect srcReg1, srcReg2, destReg; 4663587Sgblack@eecs.umich.edu ''' 4673587Sgblack@eecs.umich.edu for reg in range(rCount): 4683587Sgblack@eecs.umich.edu eWalkCode += ''' 4693587Sgblack@eecs.umich.edu srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 4702526SN/A srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 4712526SN/A ''' % { "reg" : reg } 4722526SN/A eWalkCode += op 4732526SN/A for reg in range(rCount): 4742646Ssaidi@eecs.umich.edu eWalkCode += ''' 4752526SN/A AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 4762646Ssaidi@eecs.umich.edu ''' % { "reg" : reg } 4772526SN/A if rCount < 4: 4782526SN/A for reg in range(rCount, 4): 4792526SN/A eWalkCode += ''' 4802469SN/A AA64FpDestP%(reg)d_uw = 0; 4812526SN/A ''' % { "reg" : reg } 4822526SN/A iop = InstObjParams(name, Name, 4832526SN/A "DataX2RegOp", 4842526SN/A { "code": eWalkCode, 4852646Ssaidi@eecs.umich.edu "r_count": rCount, 4862591SN/A "op_class": opClass }, []) 4872591SN/A header_output += NeonX2RegOpDeclare.subst(iop) 4882591SN/A exec_output += NeonXEqualRegOpExecute.subst(iop) 4892526SN/A for type in types: 4902526SN/A substDict = { "targs" : type, 4912646Ssaidi@eecs.umich.edu "class_name" : Name } 4922591SN/A exec_output += NeonXExecDeclare.subst(substDict) 4932591SN/A 4942591SN/A def insFromVecElemInstX(name, Name, opClass, types, rCount): 4952526SN/A global header_output, exec_output 4962224SN/A eWalkCode = simd64EnabledCheckCode + ''' 4972526SN/A FullRegVect srcReg1; 4982526SN/A RegVect destReg; 4992615SN/A ''' 5002615SN/A for reg in range(4): 5012526SN/A eWalkCode += ''' 5022526SN/A srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 5032526SN/A ''' % { "reg" : reg } 5042526SN/A for reg in range(rCount): 5052526SN/A eWalkCode += ''' 5062526SN/A destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 5072526SN/A ''' % { "reg" : reg } 5082526SN/A eWalkCode += ''' 5092469SN/A Element srcElem1 = gtoh(srcReg1.elements[imm2]); 5102526SN/A Element destElem = srcElem1; 5112526SN/A destReg.elements[imm1] = htog(destElem); 5122516SN/A ''' 5132591SN/A for reg in range(rCount): 5142516SN/A eWalkCode += ''' 5152526SN/A AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 5162526SN/A ''' % { "reg" : reg } 5172526SN/A iop = InstObjParams(name, Name, 5182615SN/A "DataX1Reg2ImmOp", 5192615SN/A { "code": eWalkCode, 5202615SN/A "r_count": rCount, 5212615SN/A "op_class": opClass }, []) 5222615SN/A header_output += NeonX1Reg2ImmOpDeclare.subst(iop) 5232615SN/A exec_output += NeonXEqualRegOpExecute.subst(iop) 5242526SN/A for type in types: 5253587Sgblack@eecs.umich.edu substDict = { "targs" : type, 5263587Sgblack@eecs.umich.edu "class_name" : Name } 5273587Sgblack@eecs.umich.edu exec_output += NeonXExecDeclare.subst(substDict) 5283587Sgblack@eecs.umich.edu 5293826Ssaidi@eecs.umich.edu def twoRegPairwiseScInstX(name, Name, opClass, types, rCount, op): 5303587Sgblack@eecs.umich.edu global header_output, exec_output 5313587Sgblack@eecs.umich.edu eWalkCode = simd64EnabledCheckCode + ''' 5323587Sgblack@eecs.umich.edu RegVect srcReg1, destReg; 5333587Sgblack@eecs.umich.edu ''' 5343587Sgblack@eecs.umich.edu for reg in range(rCount): 5353587Sgblack@eecs.umich.edu eWalkCode += ''' 5363587Sgblack@eecs.umich.edu srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 5373587Sgblack@eecs.umich.edu ''' % { "reg" : reg } 5383587Sgblack@eecs.umich.edu eWalkCode += ''' 5393587Sgblack@eecs.umich.edu Element srcElem1 = gtoh(srcReg1.elements[0]); 5403587Sgblack@eecs.umich.edu Element srcElem2 = gtoh(srcReg1.elements[1]); 5413587Sgblack@eecs.umich.edu Element destElem; 5423587Sgblack@eecs.umich.edu %(op)s 5433587Sgblack@eecs.umich.edu destReg.elements[0] = htog(destElem); 5443587Sgblack@eecs.umich.edu ''' % { "op" : op } 5453823Ssaidi@eecs.umich.edu destCnt = rCount / 2 5463587Sgblack@eecs.umich.edu for reg in range(destCnt): 5473587Sgblack@eecs.umich.edu eWalkCode += ''' 5483587Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 5493823Ssaidi@eecs.umich.edu ''' % { "reg" : reg } 5503587Sgblack@eecs.umich.edu for reg in range(destCnt, 4): # zero upper half 5513823Ssaidi@eecs.umich.edu eWalkCode += ''' 5523598Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = 0; 5533598Sgblack@eecs.umich.edu ''' % { "reg" : reg } 5543598Sgblack@eecs.umich.edu iop = InstObjParams(name, Name, 5553598Sgblack@eecs.umich.edu "DataX1RegOp", 5563598Sgblack@eecs.umich.edu { "code": eWalkCode, 5573598Sgblack@eecs.umich.edu "r_count": rCount, 5583598Sgblack@eecs.umich.edu "op_class": opClass }, []) 5593598Sgblack@eecs.umich.edu header_output += NeonX1RegOpDeclare.subst(iop) 5603598Sgblack@eecs.umich.edu exec_output += NeonXEqualRegOpExecute.subst(iop) 5613598Sgblack@eecs.umich.edu for type in types: 5623587Sgblack@eecs.umich.edu substDict = { "targs" : type, 5632526SN/A "class_name" : Name } 5643417Sgblack@eecs.umich.edu exec_output += NeonXExecDeclare.subst(substDict) 5653417Sgblack@eecs.umich.edu 5663417Sgblack@eecs.umich.edu def twoRegAcrossInstX(name, Name, opClass, types, rCount, op, 5673417Sgblack@eecs.umich.edu doubleDest=False, long=False): 5683417Sgblack@eecs.umich.edu global header_output, exec_output 5693417Sgblack@eecs.umich.edu destPrefix = "Big" if long else "" 5703417Sgblack@eecs.umich.edu eWalkCode = simd64EnabledCheckCode + ''' 5713417Sgblack@eecs.umich.edu RegVect srcReg1; 5723417Sgblack@eecs.umich.edu %sRegVect destReg; 5733598Sgblack@eecs.umich.edu ''' % destPrefix 5743417Sgblack@eecs.umich.edu for reg in range(rCount): 5753417Sgblack@eecs.umich.edu eWalkCode += ''' 5763417Sgblack@eecs.umich.edu srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 5773417Sgblack@eecs.umich.edu ''' % { "reg" : reg } 5783417Sgblack@eecs.umich.edu eWalkCode += ''' 5793417Sgblack@eecs.umich.edu destReg.regs[0] = 0; 5803417Sgblack@eecs.umich.edu %(destPrefix)sElement destElem = 0; 5813417Sgblack@eecs.umich.edu for (unsigned i = 0; i < eCount; i++) { 5822526SN/A Element srcElem1 = gtoh(srcReg1.elements[i]); 5833587Sgblack@eecs.umich.edu if (i == 0) { 5843587Sgblack@eecs.umich.edu destElem = srcElem1; 5853587Sgblack@eecs.umich.edu } else { 5863587Sgblack@eecs.umich.edu %(op)s 5873587Sgblack@eecs.umich.edu } 5883587Sgblack@eecs.umich.edu } 5893587Sgblack@eecs.umich.edu destReg.elements[0] = htog(destElem); 5903587Sgblack@eecs.umich.edu ''' % { "op" : op, "destPrefix" : destPrefix } 5913587Sgblack@eecs.umich.edu destCnt = 2 if doubleDest else 1 5923587Sgblack@eecs.umich.edu for reg in range(destCnt): 5933587Sgblack@eecs.umich.edu eWalkCode += ''' 5943587Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 5953587Sgblack@eecs.umich.edu ''' % { "reg" : reg } 5963587Sgblack@eecs.umich.edu for reg in range(destCnt, 4): # zero upper half 5973587Sgblack@eecs.umich.edu eWalkCode += ''' 5983587Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = 0; 5993587Sgblack@eecs.umich.edu ''' % { "reg" : reg } 6003587Sgblack@eecs.umich.edu iop = InstObjParams(name, Name, 6013587Sgblack@eecs.umich.edu "DataX1RegOp", 6023587Sgblack@eecs.umich.edu { "code": eWalkCode, 6033587Sgblack@eecs.umich.edu "r_count": rCount, 6043587Sgblack@eecs.umich.edu "op_class": opClass }, []) 6053587Sgblack@eecs.umich.edu header_output += NeonX1RegOpDeclare.subst(iop) 6063587Sgblack@eecs.umich.edu if long: 6073587Sgblack@eecs.umich.edu exec_output += NeonXUnequalRegOpExecute.subst(iop) 6083823Ssaidi@eecs.umich.edu else: 6093587Sgblack@eecs.umich.edu exec_output += NeonXEqualRegOpExecute.subst(iop) 6103587Sgblack@eecs.umich.edu for type in types: 6113587Sgblack@eecs.umich.edu substDict = { "targs" : type, 6123587Sgblack@eecs.umich.edu "class_name" : Name } 6133587Sgblack@eecs.umich.edu exec_output += NeonXExecDeclare.subst(substDict) 6143587Sgblack@eecs.umich.edu 6153587Sgblack@eecs.umich.edu def twoRegCondenseInstX(name, Name, opClass, types, rCount, op, 6163587Sgblack@eecs.umich.edu readDest=False): 6173587Sgblack@eecs.umich.edu global header_output, exec_output 6183587Sgblack@eecs.umich.edu eWalkCode = simd64EnabledCheckCode + ''' 6193587Sgblack@eecs.umich.edu RegVect srcRegs; 6203587Sgblack@eecs.umich.edu BigRegVect destReg; 6213587Sgblack@eecs.umich.edu ''' 6223587Sgblack@eecs.umich.edu for reg in range(rCount): 6233587Sgblack@eecs.umich.edu eWalkCode += ''' 6243587Sgblack@eecs.umich.edu srcRegs.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 6253587Sgblack@eecs.umich.edu ''' % { "reg" : reg } 6263587Sgblack@eecs.umich.edu if readDest: 6273587Sgblack@eecs.umich.edu eWalkCode += ''' 6283587Sgblack@eecs.umich.edu destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 6293587Sgblack@eecs.umich.edu ''' % { "reg" : reg } 6303587Sgblack@eecs.umich.edu readDestCode = '' 6313587Sgblack@eecs.umich.edu if readDest: 6323587Sgblack@eecs.umich.edu readDestCode = 'destElem = gtoh(destReg.elements[i]);' 6333587Sgblack@eecs.umich.edu eWalkCode += ''' 6343587Sgblack@eecs.umich.edu for (unsigned i = 0; i < eCount / 2; i++) { 6353587Sgblack@eecs.umich.edu Element srcElem1 = gtoh(srcRegs.elements[2 * i]); 6363587Sgblack@eecs.umich.edu Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); 6373587Sgblack@eecs.umich.edu BigElement destElem; 6383587Sgblack@eecs.umich.edu %(readDest)s 6393587Sgblack@eecs.umich.edu %(op)s 6403587Sgblack@eecs.umich.edu destReg.elements[i] = htog(destElem); 6413587Sgblack@eecs.umich.edu } 6423587Sgblack@eecs.umich.edu ''' % { "op" : op, "readDest" : readDestCode } 6433587Sgblack@eecs.umich.edu for reg in range(rCount): 6443587Sgblack@eecs.umich.edu eWalkCode += ''' 6453823Ssaidi@eecs.umich.edu AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 6463587Sgblack@eecs.umich.edu ''' % { "reg" : reg } 6472954Sgblack@eecs.umich.edu if rCount < 4: # zero upper half 6482963Sgblack@eecs.umich.edu for reg in range(rCount, 4): 6492963Sgblack@eecs.umich.edu eWalkCode += ''' 6503279Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = 0; 6512963Sgblack@eecs.umich.edu ''' % { "reg" : reg } 6522963Sgblack@eecs.umich.edu iop = InstObjParams(name, Name, 6532963Sgblack@eecs.umich.edu "DataX1RegOp", 6542963Sgblack@eecs.umich.edu { "code": eWalkCode, 6552963Sgblack@eecs.umich.edu "r_count": rCount, 6563057Sgblack@eecs.umich.edu "op_class": opClass }, []) 6572963Sgblack@eecs.umich.edu header_output += NeonX1RegOpDeclare.subst(iop) 6582963Sgblack@eecs.umich.edu exec_output += NeonXUnequalRegOpExecute.subst(iop) 6592963Sgblack@eecs.umich.edu for type in types: 6602963Sgblack@eecs.umich.edu substDict = { "targs" : type, 6612963Sgblack@eecs.umich.edu "class_name" : Name } 6622963Sgblack@eecs.umich.edu exec_output += NeonXExecDeclare.subst(substDict) 6633279Sgblack@eecs.umich.edu 6642963Sgblack@eecs.umich.edu def oneRegImmInstX(name, Name, opClass, types, rCount, op, readDest=False): 6652963Sgblack@eecs.umich.edu global header_output, exec_output 6662963Sgblack@eecs.umich.edu eWalkCode = simd64EnabledCheckCode + ''' 6672963Sgblack@eecs.umich.edu RegVect destReg; 6682963Sgblack@eecs.umich.edu ''' 6693057Sgblack@eecs.umich.edu if readDest: 6702963Sgblack@eecs.umich.edu for reg in range(rCount): 6712963Sgblack@eecs.umich.edu eWalkCode += ''' 6722963Sgblack@eecs.umich.edu destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 6732963Sgblack@eecs.umich.edu ''' % { "reg" : reg } 6742963Sgblack@eecs.umich.edu readDestCode = '' 6752963Sgblack@eecs.umich.edu if readDest: 6763279Sgblack@eecs.umich.edu readDestCode = 'destElem = gtoh(destReg.elements[i]);' 6772963Sgblack@eecs.umich.edu eWalkCode += ''' 6782963Sgblack@eecs.umich.edu for (unsigned i = 0; i < eCount; i++) { 6792963Sgblack@eecs.umich.edu Element destElem; 6802963Sgblack@eecs.umich.edu %(readDest)s 6812963Sgblack@eecs.umich.edu %(op)s 6823057Sgblack@eecs.umich.edu destReg.elements[i] = htog(destElem); 6832963Sgblack@eecs.umich.edu } 6842963Sgblack@eecs.umich.edu ''' % { "op" : op, "readDest" : readDestCode } 6852963Sgblack@eecs.umich.edu for reg in range(rCount): 6862963Sgblack@eecs.umich.edu eWalkCode += ''' 6872963Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 6883279Sgblack@eecs.umich.edu ''' % { "reg" : reg } 6892963Sgblack@eecs.umich.edu if rCount < 4: # zero upper half 6902963Sgblack@eecs.umich.edu for reg in range(rCount, 4): 6913279Sgblack@eecs.umich.edu eWalkCode += ''' 6922963Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = 0; 6932963Sgblack@eecs.umich.edu ''' % { "reg" : reg } 6943279Sgblack@eecs.umich.edu iop = InstObjParams(name, Name, 6952963Sgblack@eecs.umich.edu "DataXImmOnlyOp", 6962963Sgblack@eecs.umich.edu { "code": eWalkCode, 6973279Sgblack@eecs.umich.edu "r_count": rCount, 6982963Sgblack@eecs.umich.edu "op_class": opClass }, []) 6992963Sgblack@eecs.umich.edu header_output += NeonX1RegImmOnlyOpDeclare.subst(iop) 7003279Sgblack@eecs.umich.edu exec_output += NeonXEqualRegOpExecute.subst(iop) 7012963Sgblack@eecs.umich.edu for type in types: 7022963Sgblack@eecs.umich.edu substDict = { "targs" : type, 7033279Sgblack@eecs.umich.edu "class_name" : Name } 7042963Sgblack@eecs.umich.edu exec_output += NeonXExecDeclare.subst(substDict) 7052963Sgblack@eecs.umich.edu 7063279Sgblack@eecs.umich.edu def dupGprInstX(name, Name, opClass, types, rCount, gprSpec): 7072963Sgblack@eecs.umich.edu global header_output, exec_output 7082963Sgblack@eecs.umich.edu eWalkCode = simd64EnabledCheckCode + ''' 7092963Sgblack@eecs.umich.edu RegVect destReg; 7102963Sgblack@eecs.umich.edu for (unsigned i = 0; i < eCount; i++) { 7112963Sgblack@eecs.umich.edu destReg.elements[i] = htog((Element) %sOp1); 7122963Sgblack@eecs.umich.edu } 7133279Sgblack@eecs.umich.edu ''' % gprSpec 7142963Sgblack@eecs.umich.edu for reg in range(rCount): 7152963Sgblack@eecs.umich.edu eWalkCode += ''' 7162963Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 7172963Sgblack@eecs.umich.edu ''' % { "reg" : reg } 7182963Sgblack@eecs.umich.edu if rCount < 4: # zero upper half 7192963Sgblack@eecs.umich.edu for reg in range(rCount, 4): 7203279Sgblack@eecs.umich.edu eWalkCode += ''' 7212963Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = 0; 7223279Sgblack@eecs.umich.edu ''' % { "reg" : reg } 7232963Sgblack@eecs.umich.edu iop = InstObjParams(name, Name, 7242963Sgblack@eecs.umich.edu "DataX1RegOp", 7253279Sgblack@eecs.umich.edu { "code": eWalkCode, 7262963Sgblack@eecs.umich.edu "r_count": rCount, 7273279Sgblack@eecs.umich.edu "op_class": opClass }, []) 7282963Sgblack@eecs.umich.edu header_output += NeonX1RegOpDeclare.subst(iop) 7292963Sgblack@eecs.umich.edu exec_output += NeonXEqualRegOpExecute.subst(iop) 7302963Sgblack@eecs.umich.edu for type in types: 7312963Sgblack@eecs.umich.edu substDict = { "targs" : type, 7322963Sgblack@eecs.umich.edu "class_name" : Name } 7333279Sgblack@eecs.umich.edu exec_output += NeonXExecDeclare.subst(substDict) 7342963Sgblack@eecs.umich.edu 7352963Sgblack@eecs.umich.edu def extInstX(name, Name, opClass, types, rCount, op): 7363279Sgblack@eecs.umich.edu global header_output, exec_output 7372963Sgblack@eecs.umich.edu eWalkCode = simd64EnabledCheckCode + ''' 7382963Sgblack@eecs.umich.edu RegVect srcReg1, srcReg2, destReg; 7392963Sgblack@eecs.umich.edu ''' 7402963Sgblack@eecs.umich.edu for reg in range(rCount): 7412954Sgblack@eecs.umich.edu eWalkCode += ''' 7422526SN/A srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 7432954Sgblack@eecs.umich.edu srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 7442954Sgblack@eecs.umich.edu ''' % { "reg" : reg } 7452954Sgblack@eecs.umich.edu eWalkCode += op 7462954Sgblack@eecs.umich.edu for reg in range(rCount): 7472954Sgblack@eecs.umich.edu eWalkCode += ''' 7482954Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 7492954Sgblack@eecs.umich.edu ''' % { "reg" : reg } 7502954Sgblack@eecs.umich.edu if rCount < 4: # zero upper half 7512954Sgblack@eecs.umich.edu for reg in range(rCount, 4): 7522954Sgblack@eecs.umich.edu eWalkCode += ''' 7532954Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = 0; 7542954Sgblack@eecs.umich.edu ''' % { "reg" : reg } 7552954Sgblack@eecs.umich.edu iop = InstObjParams(name, Name, 7562954Sgblack@eecs.umich.edu "DataX2RegImmOp", 7572954Sgblack@eecs.umich.edu { "code": eWalkCode, 7582954Sgblack@eecs.umich.edu "r_count": rCount, 7592954Sgblack@eecs.umich.edu "op_class": opClass }, []) 7602954Sgblack@eecs.umich.edu header_output += NeonX2RegImmOpDeclare.subst(iop) 7613042Sgblack@eecs.umich.edu exec_output += NeonXEqualRegOpExecute.subst(iop) 7622963Sgblack@eecs.umich.edu for type in types: 7633042Sgblack@eecs.umich.edu substDict = { "targs" : type, 7642963Sgblack@eecs.umich.edu "class_name" : Name } 7652963Sgblack@eecs.umich.edu exec_output += NeonXExecDeclare.subst(substDict) 7662954Sgblack@eecs.umich.edu 7672963Sgblack@eecs.umich.edu def insFromGprInstX(name, Name, opClass, types, rCount, gprSpec): 7682963Sgblack@eecs.umich.edu global header_output, exec_output 7693042Sgblack@eecs.umich.edu eWalkCode = simd64EnabledCheckCode + ''' 7702963Sgblack@eecs.umich.edu RegVect destReg; 7712963Sgblack@eecs.umich.edu ''' 7722954Sgblack@eecs.umich.edu for reg in range(rCount): 7732954Sgblack@eecs.umich.edu eWalkCode += ''' 7742954Sgblack@eecs.umich.edu destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 7752954Sgblack@eecs.umich.edu ''' % { "reg" : reg } 7762954Sgblack@eecs.umich.edu eWalkCode += ''' 7772954Sgblack@eecs.umich.edu destReg.elements[imm] = htog((Element) %sOp1); 7782954Sgblack@eecs.umich.edu ''' % gprSpec 7792954Sgblack@eecs.umich.edu for reg in range(rCount): 7802954Sgblack@eecs.umich.edu eWalkCode += ''' 7812954Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 7822954Sgblack@eecs.umich.edu ''' % { "reg" : reg } 7832954Sgblack@eecs.umich.edu iop = InstObjParams(name, Name, 7842954Sgblack@eecs.umich.edu "DataX1RegImmOp", 7852954Sgblack@eecs.umich.edu { "code": eWalkCode, 7862954Sgblack@eecs.umich.edu "r_count": rCount, 7872954Sgblack@eecs.umich.edu "op_class": opClass }, []) 7882954Sgblack@eecs.umich.edu header_output += NeonX1RegImmOpDeclare.subst(iop) 7892954Sgblack@eecs.umich.edu exec_output += NeonXEqualRegOpExecute.subst(iop) 7902954Sgblack@eecs.umich.edu for type in types: 7912963Sgblack@eecs.umich.edu substDict = { "targs" : type, 7923057Sgblack@eecs.umich.edu "class_name" : Name } 7933057Sgblack@eecs.umich.edu exec_output += NeonXExecDeclare.subst(substDict) 7943057Sgblack@eecs.umich.edu 7953057Sgblack@eecs.umich.edu def insToGprInstX(name, Name, opClass, types, rCount, gprSpec, 7963057Sgblack@eecs.umich.edu signExt=False): 7973057Sgblack@eecs.umich.edu global header_output, exec_output 7983057Sgblack@eecs.umich.edu eWalkCode = simd64EnabledCheckCode + ''' 7993057Sgblack@eecs.umich.edu FullRegVect srcReg; 8003057Sgblack@eecs.umich.edu ''' 8013057Sgblack@eecs.umich.edu for reg in range(4): 8023057Sgblack@eecs.umich.edu eWalkCode += ''' 8033057Sgblack@eecs.umich.edu srcReg.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 8043057Sgblack@eecs.umich.edu ''' % { "reg" : reg } 8053057Sgblack@eecs.umich.edu if signExt: 8063057Sgblack@eecs.umich.edu eWalkCode += ''' 8073057Sgblack@eecs.umich.edu %sDest = sext<sizeof(Element) * 8>(srcReg.elements[imm]); 8083057Sgblack@eecs.umich.edu ''' % gprSpec 8093057Sgblack@eecs.umich.edu else: 8103057Sgblack@eecs.umich.edu eWalkCode += ''' 8113057Sgblack@eecs.umich.edu %sDest = srcReg.elements[imm]; 8123057Sgblack@eecs.umich.edu ''' % gprSpec 8133057Sgblack@eecs.umich.edu iop = InstObjParams(name, Name, 8143057Sgblack@eecs.umich.edu "DataX1RegImmOp", 8153057Sgblack@eecs.umich.edu { "code": eWalkCode, 8162963Sgblack@eecs.umich.edu "r_count": rCount, 8172954Sgblack@eecs.umich.edu "op_class": opClass }, []) 8182954Sgblack@eecs.umich.edu header_output += NeonX1RegImmOpDeclare.subst(iop) 8192954Sgblack@eecs.umich.edu exec_output += NeonXEqualRegOpExecute.subst(iop) 8202954Sgblack@eecs.umich.edu for type in types: 8212954Sgblack@eecs.umich.edu substDict = { "targs" : type, 8222954Sgblack@eecs.umich.edu "class_name" : Name } 8232954Sgblack@eecs.umich.edu exec_output += NeonXExecDeclare.subst(substDict) 8242954Sgblack@eecs.umich.edu 8252954Sgblack@eecs.umich.edu def tbxTblInstX(name, Name, opClass, types, length, isTbl, rCount): 8262954Sgblack@eecs.umich.edu global header_output, decoder_output, exec_output 8272954Sgblack@eecs.umich.edu code = simd64EnabledCheckCode + ''' 8282963Sgblack@eecs.umich.edu union 8293279Sgblack@eecs.umich.edu { 8302954Sgblack@eecs.umich.edu uint8_t bytes[64]; 8312954Sgblack@eecs.umich.edu uint32_t regs[16]; 8322954Sgblack@eecs.umich.edu } table; 8332954Sgblack@eecs.umich.edu 8342963Sgblack@eecs.umich.edu union 8352963Sgblack@eecs.umich.edu { 8362963Sgblack@eecs.umich.edu uint8_t bytes[%(rCount)d * 4]; 8372963Sgblack@eecs.umich.edu uint32_t regs[%(rCount)d]; 8383279Sgblack@eecs.umich.edu } destReg, srcReg2; 8392963Sgblack@eecs.umich.edu 8402954Sgblack@eecs.umich.edu const unsigned length = %(length)d; 8412954Sgblack@eecs.umich.edu const bool isTbl = %(isTbl)s; 8422963Sgblack@eecs.umich.edu ''' % { "rCount" : rCount, "length" : length, "isTbl" : isTbl } 8432963Sgblack@eecs.umich.edu for reg in range(rCount): 8442963Sgblack@eecs.umich.edu code += ''' 8452963Sgblack@eecs.umich.edu srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 8463279Sgblack@eecs.umich.edu destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 8472963Sgblack@eecs.umich.edu ''' % { "reg" : reg } 8482954Sgblack@eecs.umich.edu for reg in range(16): 8492954Sgblack@eecs.umich.edu if reg < length * 4: 8502954Sgblack@eecs.umich.edu code += ''' 8512954Sgblack@eecs.umich.edu table.regs[%(reg)d] = htog(AA64FpOp1P%(p)dV%(v)dS_uw); 8522954Sgblack@eecs.umich.edu ''' % { "reg" : reg, "p" : reg % 4, "v" : reg / 4 } 8532954Sgblack@eecs.umich.edu else: 8542954Sgblack@eecs.umich.edu code += ''' 8552954Sgblack@eecs.umich.edu table.regs[%(reg)d] = 0; 8563057Sgblack@eecs.umich.edu ''' % { "reg" : reg } 8573057Sgblack@eecs.umich.edu code += ''' 8582954Sgblack@eecs.umich.edu for (unsigned i = 0; i < sizeof(destReg); i++) { 8592954Sgblack@eecs.umich.edu uint8_t index = srcReg2.bytes[i]; 8603057Sgblack@eecs.umich.edu if (index < 16 * length) { 8613057Sgblack@eecs.umich.edu destReg.bytes[i] = table.bytes[index]; 8622954Sgblack@eecs.umich.edu } else { 8632954Sgblack@eecs.umich.edu if (isTbl) 8642954Sgblack@eecs.umich.edu destReg.bytes[i] = 0; 8652954Sgblack@eecs.umich.edu // else destReg.bytes[i] unchanged 8662954Sgblack@eecs.umich.edu } 8672954Sgblack@eecs.umich.edu } 8682954Sgblack@eecs.umich.edu ''' 8692954Sgblack@eecs.umich.edu for reg in range(rCount): 8702954Sgblack@eecs.umich.edu code += ''' 8712954Sgblack@eecs.umich.edu AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 8722526SN/A ''' % { "reg" : reg } 8732526SN/A if rCount < 4: # zero upper half 8742526SN/A for reg in range(rCount, 4): 8752526SN/A code += ''' 8762526SN/A AA64FpDestP%(reg)d_uw = 0; 8772526SN/A ''' % { "reg" : reg } 8782526SN/A iop = InstObjParams(name, Name, 8792526SN/A "DataX2RegOp", 8802526SN/A { "code": code, 8812526SN/A "r_count": rCount, 8822526SN/A "op_class": opClass }, []) 8832561SN/A header_output += NeonX2RegOpDeclare.subst(iop) 8842561SN/A exec_output += NeonXEqualRegOpExecute.subst(iop) 8852561SN/A for type in types: 8862526SN/A substDict = { "targs" : type, 8872526SN/A "class_name" : Name } 8882526SN/A exec_output += NeonXExecDeclare.subst(substDict) 8892526SN/A 8902526SN/A # ABS 8912561SN/A absCode = ''' 8922561SN/A if (srcElem1 < 0) { 8932561SN/A destElem = -srcElem1; 8942561SN/A } else { 8952561SN/A destElem = srcElem1; 8962646Ssaidi@eecs.umich.edu } 8972561SN/A ''' 8982646Ssaidi@eecs.umich.edu twoEqualRegInstX("abs", "AbsDX", "SimdAluOp", signedTypes, 2, absCode) 8992561SN/A twoEqualRegInstX("abs", "AbsQX", "SimdAluOp", signedTypes, 4, absCode) 9002561SN/A # ADD 9012561SN/A addCode = "destElem = srcElem1 + srcElem2;" 9023417Sgblack@eecs.umich.edu threeEqualRegInstX("add", "AddDX", "SimdAddOp", unsignedTypes, 2, addCode) 9033417Sgblack@eecs.umich.edu threeEqualRegInstX("add", "AddQX", "SimdAddOp", unsignedTypes, 4, addCode) 9043417Sgblack@eecs.umich.edu # ADDHN, ADDHN2 9053417Sgblack@eecs.umich.edu addhnCode = ''' 9062561SN/A destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> 9072561SN/A (sizeof(Element) * 8); 9083417Sgblack@eecs.umich.edu ''' 9093417Sgblack@eecs.umich.edu threeRegNarrowInstX("addhn", "AddhnX", "SimdAddOp", smallUnsignedTypes, 9103417Sgblack@eecs.umich.edu addhnCode) 9113417Sgblack@eecs.umich.edu threeRegNarrowInstX("addhn2", "Addhn2X", "SimdAddOp", smallUnsignedTypes, 9122561SN/A addhnCode, hi=True) 9132561SN/A # ADDP (scalar) 9142526SN/A twoRegPairwiseScInstX("addp", "AddpScQX", "SimdAddOp", ("uint64_t",), 4, 9152526SN/A addCode) 9162526SN/A # ADDP (vector) 9172526SN/A threeEqualRegInstX("addp", "AddpDX", "SimdAddOp", smallUnsignedTypes, 2, 9182646Ssaidi@eecs.umich.edu addCode, pairwise=True) 9192561SN/A threeEqualRegInstX("addp", "AddpQX", "SimdAddOp", unsignedTypes, 4, 9203039Sstever@eecs.umich.edu addCode, pairwise=True) 9212561SN/A # ADDV 9222561SN/A # Note: SimdAddOp can be a bit optimistic here 9233531Sgblack@eecs.umich.edu addAcrossCode = "destElem += srcElem1;" 9242526SN/A twoRegAcrossInstX("addv", "AddvDX", "SimdAddOp", ("uint8_t", "uint16_t"), 9252561SN/A 2, addAcrossCode) 9262561SN/A twoRegAcrossInstX("addv", "AddvQX", "SimdAddOp", smallUnsignedTypes, 4, 9272561SN/A addAcrossCode) 9282561SN/A # AND 9292526SN/A andCode = "destElem = srcElem1 & srcElem2;" 9302526SN/A threeEqualRegInstX("and", "AndDX", "SimdAluOp", ("uint64_t",), 2, andCode) 9312646Ssaidi@eecs.umich.edu threeEqualRegInstX("and", "AndQX", "SimdAluOp", ("uint64_t",), 4, andCode) 9322561SN/A # BIC (immediate) 9333039Sstever@eecs.umich.edu bicImmCode = "destElem &= ~imm;" 9342561SN/A oneRegImmInstX("bic", "BicImmDX", "SimdAluOp", ("uint64_t",), 2, 9352561SN/A bicImmCode, True) 9363531Sgblack@eecs.umich.edu oneRegImmInstX("bic", "BicImmQX", "SimdAluOp", ("uint64_t",), 4, 9372526SN/A bicImmCode, True) 9382561SN/A # BIC (register) 9392561SN/A bicCode = "destElem = srcElem1 & ~srcElem2;" 9402561SN/A threeEqualRegInstX("bic", "BicDX", "SimdAluOp", ("uint64_t",), 2, bicCode) 9412526SN/A threeEqualRegInstX("bic", "BicQX", "SimdAluOp", ("uint64_t",), 4, bicCode) 9422526SN/A # BIF 9432526SN/A bifCode = "destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);" 9442526SN/A threeEqualRegInstX("bif", "BifDX", "SimdAluOp", ("uint64_t",), 2, bifCode, 9452526SN/A True) 9462526SN/A threeEqualRegInstX("bif", "BifQX", "SimdAluOp", ("uint64_t",), 4, bifCode, 9472526SN/A True) 9482526SN/A # BIT 9492526SN/A bitCode = "destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);" 9502526SN/A threeEqualRegInstX("bit", "BitDX", "SimdAluOp", ("uint64_t",), 2, bitCode, 9512526SN/A True) 9522646Ssaidi@eecs.umich.edu threeEqualRegInstX("bit", "BitQX", "SimdAluOp", ("uint64_t",), 4, bitCode, 9532526SN/A True) 9542646Ssaidi@eecs.umich.edu # BSL 9553417Sgblack@eecs.umich.edu bslCode = "destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);" 9562526SN/A threeEqualRegInstX("bsl", "BslDX", "SimdAluOp", ("uint64_t",), 2, bslCode, 9572526SN/A True) 9582526SN/A threeEqualRegInstX("bsl", "BslQX", "SimdAluOp", ("uint64_t",), 4, bslCode, 9593417Sgblack@eecs.umich.edu True) 9602526SN/A # CLS 9612526SN/A clsCode = ''' 9622526SN/A unsigned count = 0; 9632526SN/A if (srcElem1 < 0) { 9642526SN/A srcElem1 <<= 1; 9652526SN/A while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { 9662561SN/A count++; 9672561SN/A srcElem1 <<= 1; 9683417Sgblack@eecs.umich.edu } 9693417Sgblack@eecs.umich.edu } else { 9703417Sgblack@eecs.umich.edu srcElem1 <<= 1; 9713417Sgblack@eecs.umich.edu while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { 9722526SN/A count++; 9732526SN/A srcElem1 <<= 1; 9742526SN/A } 9752526SN/A } 9762526SN/A destElem = count; 9772526SN/A ''' 9782646Ssaidi@eecs.umich.edu twoEqualRegInstX("cls", "ClsDX", "SimdAluOp", smallSignedTypes, 2, clsCode) 9792526SN/A twoEqualRegInstX("cls", "ClsQX", "SimdAluOp", smallSignedTypes, 4, clsCode) 9802646Ssaidi@eecs.umich.edu # CLZ 9812526SN/A clzCode = ''' 9822526SN/A unsigned count = 0; 9832526SN/A while (srcElem1 >= 0 && count < sizeof(Element) * 8) { 9843417Sgblack@eecs.umich.edu count++; 9853417Sgblack@eecs.umich.edu srcElem1 <<= 1; 9863417Sgblack@eecs.umich.edu } 9873417Sgblack@eecs.umich.edu destElem = count; 9882526SN/A ''' 9892561SN/A twoEqualRegInstX("clz", "ClzDX", "SimdAluOp", smallSignedTypes, 2, clzCode) 9902561SN/A twoEqualRegInstX("clz", "ClzQX", "SimdAluOp", smallSignedTypes, 4, clzCode) 9913417Sgblack@eecs.umich.edu # CMEQ (register) 9923417Sgblack@eecs.umich.edu cmeqCode = "destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;" 9933417Sgblack@eecs.umich.edu threeEqualRegInstX("cmeq", "CmeqDX", "SimdCmpOp", unsignedTypes, 2, 9943417Sgblack@eecs.umich.edu cmeqCode) 9952526SN/A threeEqualRegInstX("cmeq", "CmeqQX", "SimdCmpOp", unsignedTypes, 4, 9962526SN/A cmeqCode) 9972526SN/A # CMEQ (zero) 9982526SN/A cmeqZeroCode = "destElem = (srcElem1 == 0) ? (Element)(-1) : 0;" 9992526SN/A twoEqualRegInstX("cmeq", "CmeqZeroDX", "SimdCmpOp", signedTypes, 2, 10002526SN/A cmeqZeroCode) 10012646Ssaidi@eecs.umich.edu twoEqualRegInstX("cmeq", "CmeqZeroQX", "SimdCmpOp", signedTypes, 4, 10022646Ssaidi@eecs.umich.edu cmeqZeroCode) 10032646Ssaidi@eecs.umich.edu # CMGE (register) 10042646Ssaidi@eecs.umich.edu cmgeCode = "destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;" 10052646Ssaidi@eecs.umich.edu threeEqualRegInstX("cmge", "CmgeDX", "SimdCmpOp", signedTypes, 2, cmgeCode) 10062646Ssaidi@eecs.umich.edu threeEqualRegInstX("cmge", "CmgeQX", "SimdCmpOp", signedTypes, 4, cmgeCode) 10073825Ssaidi@eecs.umich.edu # CMGE (zero) 10082646Ssaidi@eecs.umich.edu cmgeZeroCode = "destElem = (srcElem1 >= 0) ? (Element)(-1) : 0;" 10092646Ssaidi@eecs.umich.edu twoEqualRegInstX("cmge", "CmgeZeroDX", "SimdCmpOp", signedTypes, 2, 10102526SN/A cmgeZeroCode) 10112526SN/A twoEqualRegInstX("cmge", "CmgeZeroQX", "SimdCmpOp", signedTypes, 4, 10122938Sgblack@eecs.umich.edu cmgeZeroCode) 10132526SN/A # CMGT (register) 10142526SN/A cmgtCode = "destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;" 10152646Ssaidi@eecs.umich.edu threeEqualRegInstX("cmgt", "CmgtDX", "SimdCmpOp", signedTypes, 2, cmgtCode) 10162646Ssaidi@eecs.umich.edu threeEqualRegInstX("cmgt", "CmgtQX", "SimdCmpOp", signedTypes, 4, cmgtCode) 10172646Ssaidi@eecs.umich.edu # CMGT (zero) 10182646Ssaidi@eecs.umich.edu cmgtZeroCode = "destElem = (srcElem1 > 0) ? (Element)(-1) : 0;" 10192646Ssaidi@eecs.umich.edu twoEqualRegInstX("cmgt", "CmgtZeroDX", "SimdCmpOp", signedTypes, 2, 10203826Ssaidi@eecs.umich.edu cmgtZeroCode) 10212646Ssaidi@eecs.umich.edu twoEqualRegInstX("cmgt", "CmgtZeroQX", "SimdCmpOp", signedTypes, 4, 10223417Sgblack@eecs.umich.edu cmgtZeroCode) 10232526SN/A # CMHI (register) 10242526SN/A threeEqualRegInstX("cmhi", "CmhiDX", "SimdCmpOp", unsignedTypes, 2, 10252526SN/A cmgtCode) 10262526SN/A threeEqualRegInstX("cmhi", "CmhiQX", "SimdCmpOp", unsignedTypes, 4, 10272469SN/A cmgtCode) 10282469SN/A # CMHS (register) 10292526SN/A threeEqualRegInstX("cmhs", "CmhsDX", "SimdCmpOp", unsignedTypes, 2, 10303272Sgblack@eecs.umich.edu cmgeCode) 10313272Sgblack@eecs.umich.edu threeEqualRegInstX("cmhs", "CmhsQX", "SimdCmpOp", unsignedTypes, 4, 10323272Sgblack@eecs.umich.edu cmgeCode) 10333835Sgblack@eecs.umich.edu # CMLE (zero) 10343272Sgblack@eecs.umich.edu cmleZeroCode = "destElem = (srcElem1 <= 0) ? (Element)(-1) : 0;" 10352526SN/A twoEqualRegInstX("cmle", "CmleZeroDX", "SimdCmpOp", signedTypes, 2, 10362526SN/A cmleZeroCode) 10373272Sgblack@eecs.umich.edu twoEqualRegInstX("cmle", "CmleZeroQX", "SimdCmpOp", signedTypes, 4, 10382526SN/A cmleZeroCode) 10392526SN/A # CMLT (zero) 10403272Sgblack@eecs.umich.edu cmltZeroCode = "destElem = (srcElem1 < 0) ? (Element)(-1) : 0;" 10413272Sgblack@eecs.umich.edu twoEqualRegInstX("cmlt", "CmltZeroDX", "SimdCmpOp", signedTypes, 2, 10423272Sgblack@eecs.umich.edu cmltZeroCode) 10433835Sgblack@eecs.umich.edu twoEqualRegInstX("cmlt", "CmltZeroQX", "SimdCmpOp", signedTypes, 4, 10442526SN/A cmltZeroCode) 10452526SN/A # CMTST (register) 10463272Sgblack@eecs.umich.edu tstCode = "destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;" 10473272Sgblack@eecs.umich.edu threeEqualRegInstX("cmtst", "CmtstDX", "SimdAluOp", unsignedTypes, 2, 10483272Sgblack@eecs.umich.edu tstCode) 10493272Sgblack@eecs.umich.edu threeEqualRegInstX("cmtst", "CmtstQX", "SimdAluOp", unsignedTypes, 4, 10502526SN/A tstCode) 10513438Sgblack@eecs.umich.edu # CNT 10523438Sgblack@eecs.umich.edu cntCode = ''' 10533438Sgblack@eecs.umich.edu unsigned count = 0; 10543272Sgblack@eecs.umich.edu while (srcElem1 && count < sizeof(Element) * 8) { 10553388Sgblack@eecs.umich.edu count += srcElem1 & 0x1; 10563438Sgblack@eecs.umich.edu srcElem1 >>= 1; 10573388Sgblack@eecs.umich.edu } 10583438Sgblack@eecs.umich.edu destElem = count; 10593810Sgblack@eecs.umich.edu ''' 10603810Sgblack@eecs.umich.edu twoEqualRegInstX("cnt", "CntDX", "SimdAluOp", ("uint8_t",), 2, cntCode) 10613810Sgblack@eecs.umich.edu twoEqualRegInstX("cnt", "CntQX", "SimdAluOp", ("uint8_t",), 4, cntCode) 10623810Sgblack@eecs.umich.edu # DUP (element) 10633835Sgblack@eecs.umich.edu dupCode = "destElem = srcElem1;" 10643272Sgblack@eecs.umich.edu twoEqualRegInstX("dup", "DupElemDX", "SimdMiscOp", smallUnsignedTypes, 2, 10652526SN/A dupCode, isDup=True, byElem=True) 10662526SN/A twoEqualRegInstX("dup", "DupElemQX", "SimdMiscOp", unsignedTypes, 4, 10673810Sgblack@eecs.umich.edu dupCode, isDup=True, byElem=True) 10682526SN/A twoEqualRegInstX("dup", "DupElemScX", "SimdMiscOp", unsignedTypes, 4, 10693810Sgblack@eecs.umich.edu dupCode, isDup=True, byElem=True, scalar=True) 10703810Sgblack@eecs.umich.edu # DUP (general register) 10713810Sgblack@eecs.umich.edu dupGprInstX("dup", "DupGprWDX", "SimdMiscOp", smallUnsignedTypes, 2, 'W') 10723810Sgblack@eecs.umich.edu dupGprInstX("dup", "DupGprWQX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') 10733835Sgblack@eecs.umich.edu dupGprInstX("dup", "DupGprXQX", "SimdMiscOp", ("uint64_t",), 4, 'X') 10742526SN/A # EOR 10753810Sgblack@eecs.umich.edu eorCode = "destElem = srcElem1 ^ srcElem2;" 10763810Sgblack@eecs.umich.edu threeEqualRegInstX("eor", "EorDX", "SimdAluOp", ("uint64_t",), 2, eorCode) 10773810Sgblack@eecs.umich.edu threeEqualRegInstX("eor", "EorQX", "SimdAluOp", ("uint64_t",), 4, eorCode) 10783810Sgblack@eecs.umich.edu # EXT 10793810Sgblack@eecs.umich.edu extCode = ''' 10802526SN/A for (unsigned i = 0; i < eCount; i++) { 10813810Sgblack@eecs.umich.edu unsigned index = i + imm; 10823388Sgblack@eecs.umich.edu if (index < eCount) { 10833810Sgblack@eecs.umich.edu destReg.elements[i] = srcReg1.elements[index]; 10843810Sgblack@eecs.umich.edu } else { 10853810Sgblack@eecs.umich.edu index -= eCount; 10863438Sgblack@eecs.umich.edu if (index >= eCount) { 10873388Sgblack@eecs.umich.edu fault = std::make_shared<UndefinedInstruction>( 10883810Sgblack@eecs.umich.edu machInst, false, mnemonic); 10892526SN/A } else { 10903272Sgblack@eecs.umich.edu destReg.elements[i] = srcReg2.elements[index]; 10912526SN/A } 10923272Sgblack@eecs.umich.edu } 10933272Sgblack@eecs.umich.edu } 10942469SN/A ''' 10952526SN/A extInstX("Ext", "ExtDX", "SimdMiscOp", ("uint8_t",), 2, extCode) 10963272Sgblack@eecs.umich.edu extInstX("Ext", "ExtQX", "SimdMiscOp", ("uint8_t",), 4, extCode) 10973272Sgblack@eecs.umich.edu # FABD 10982526SN/A fpOp = ''' 10993272Sgblack@eecs.umich.edu FPSCR fpscr = (FPSCR) FpscrExc; 11003272Sgblack@eecs.umich.edu destElem = %s; 11012526SN/A FpscrExc = fpscr; 11022526SN/A ''' 11033272Sgblack@eecs.umich.edu fabdCode = fpOp % "fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))" 11042526SN/A threeEqualRegInstX("fabd", "FabdDX", "SimdFloatAddOp", smallFloatTypes, 2, 11053810Sgblack@eecs.umich.edu fabdCode) 11062526SN/A threeEqualRegInstX("fabd", "FabdQX", "SimdFloatAddOp", floatTypes, 4, 11073272Sgblack@eecs.umich.edu fabdCode) 11083272Sgblack@eecs.umich.edu threeEqualRegInstX("fabd", "FabdScX", "SimdFloatAddOp", floatTypes, 4, 11093272Sgblack@eecs.umich.edu fabdCode, scalar=True) 11103272Sgblack@eecs.umich.edu # FABS 11113272Sgblack@eecs.umich.edu fabsCode = fpOp % "fplibAbs<Element>(srcElem1)" 11123272Sgblack@eecs.umich.edu twoEqualRegInstX("Abs", "FabsDX", "SimdFloatAluOp", smallFloatTypes, 2, 11133272Sgblack@eecs.umich.edu fabsCode) 11143272Sgblack@eecs.umich.edu twoEqualRegInstX("Abs", "FabsQX", "SimdFloatAluOp", floatTypes, 4, 11153272Sgblack@eecs.umich.edu fabsCode) 11163272Sgblack@eecs.umich.edu # FACGE 11173272Sgblack@eecs.umich.edu fpCmpAbsOp = fpOp % ("fplibCompare%s<Element>(fplibAbs<Element>(srcElem1)," 11183272Sgblack@eecs.umich.edu " fplibAbs<Element>(srcElem2), fpscr) ? -1 : 0") 11193272Sgblack@eecs.umich.edu facgeCode = fpCmpAbsOp % "GE" 11203272Sgblack@eecs.umich.edu threeEqualRegInstX("facge", "FacgeDX", "SimdFloatCmpOp", smallFloatTypes, 11213272Sgblack@eecs.umich.edu 2, facgeCode) 11223272Sgblack@eecs.umich.edu threeEqualRegInstX("facge", "FacgeQX", "SimdFloatCmpOp", floatTypes, 4, 11233272Sgblack@eecs.umich.edu facgeCode) 11243272Sgblack@eecs.umich.edu threeEqualRegInstX("facge", "FacgeScX", "SimdFloatCmpOp", floatTypes, 4, 11253272Sgblack@eecs.umich.edu facgeCode, scalar=True) 11263272Sgblack@eecs.umich.edu # FACGT 11273272Sgblack@eecs.umich.edu facgtCode = fpCmpAbsOp % "GT" 11283272Sgblack@eecs.umich.edu threeEqualRegInstX("facgt", "FacgtDX", "SimdFloatCmpOp", smallFloatTypes, 11293272Sgblack@eecs.umich.edu 2, facgtCode) 11303272Sgblack@eecs.umich.edu threeEqualRegInstX("facgt", "FacgtQX", "SimdFloatCmpOp", floatTypes, 4, 11313272Sgblack@eecs.umich.edu facgtCode) 11323272Sgblack@eecs.umich.edu threeEqualRegInstX("facgt", "FacgtScX", "SimdFloatCmpOp", floatTypes, 4, 11333272Sgblack@eecs.umich.edu facgtCode, scalar=True) 11343272Sgblack@eecs.umich.edu # FADD 11353272Sgblack@eecs.umich.edu fpBinOp = fpOp % "fplib%s<Element>(srcElem1, srcElem2, fpscr)" 11363272Sgblack@eecs.umich.edu faddCode = fpBinOp % "Add" 11373272Sgblack@eecs.umich.edu threeEqualRegInstX("fadd", "FaddDX", "SimdFloatAddOp", smallFloatTypes, 2, 11383272Sgblack@eecs.umich.edu faddCode) 11393272Sgblack@eecs.umich.edu threeEqualRegInstX("fadd", "FaddQX", "SimdFloatAddOp", floatTypes, 4, 11403272Sgblack@eecs.umich.edu faddCode) 11413272Sgblack@eecs.umich.edu # FADDP (scalar) 11423272Sgblack@eecs.umich.edu twoRegPairwiseScInstX("faddp", "FaddpScDX", "SimdFloatAddOp", 11433272Sgblack@eecs.umich.edu ("uint32_t",), 2, faddCode) 11443272Sgblack@eecs.umich.edu twoRegPairwiseScInstX("faddp", "FaddpScQX", "SimdFloatAddOp", 11453272Sgblack@eecs.umich.edu ("uint64_t",), 4, faddCode) 11463272Sgblack@eecs.umich.edu # FADDP (vector) 11473272Sgblack@eecs.umich.edu threeEqualRegInstX("faddp", "FaddpDX", "SimdFloatAddOp", smallFloatTypes, 11483272Sgblack@eecs.umich.edu 2, faddCode, pairwise=True) 11493272Sgblack@eecs.umich.edu threeEqualRegInstX("faddp", "FaddpQX", "SimdFloatAddOp", floatTypes, 4, 11503272Sgblack@eecs.umich.edu faddCode, pairwise=True) 11513272Sgblack@eecs.umich.edu # FCMEQ (register) 11523272Sgblack@eecs.umich.edu fpCmpOp = fpOp % ("fplibCompare%s<Element>(srcElem1, srcElem2, fpscr) ?" 11533272Sgblack@eecs.umich.edu " -1 : 0") 11543272Sgblack@eecs.umich.edu fcmeqCode = fpCmpOp % "EQ" 11553272Sgblack@eecs.umich.edu threeEqualRegInstX("fcmeq", "FcmeqDX", "SimdFloatCmpOp", smallFloatTypes, 11563272Sgblack@eecs.umich.edu 2, fcmeqCode) 11573810Sgblack@eecs.umich.edu threeEqualRegInstX("fcmeq", "FcmeqQX", "SimdFloatCmpOp", floatTypes, 4, 11583272Sgblack@eecs.umich.edu fcmeqCode) 11593272Sgblack@eecs.umich.edu threeEqualRegInstX("fcmeq", "FcmeqScX", "SimdFloatCmpOp", floatTypes, 4, 11603272Sgblack@eecs.umich.edu fcmeqCode, scalar=True) 11613272Sgblack@eecs.umich.edu # FCMEQ (zero) 11623272Sgblack@eecs.umich.edu fpCmpZeroOp = fpOp % "fplibCompare%s<Element>(srcElem1, 0, fpscr) ? -1 : 0" 11633272Sgblack@eecs.umich.edu fcmeqZeroCode = fpCmpZeroOp % "EQ" 11643272Sgblack@eecs.umich.edu twoEqualRegInstX("fcmeq", "FcmeqZeroDX", "SimdFloatCmpOp", smallFloatTypes, 11653272Sgblack@eecs.umich.edu 2, fcmeqZeroCode) 11663272Sgblack@eecs.umich.edu twoEqualRegInstX("fcmeq", "FcmeqZeroQX", "SimdFloatCmpOp", floatTypes, 4, 11673272Sgblack@eecs.umich.edu fcmeqZeroCode) 11683272Sgblack@eecs.umich.edu twoEqualRegInstX("fcmeq", "FcmeqZeroScX", "SimdFloatCmpOp", floatTypes, 4, 11693272Sgblack@eecs.umich.edu fcmeqZeroCode, scalar=True) 11703272Sgblack@eecs.umich.edu # FCMGE (register) 11713272Sgblack@eecs.umich.edu fcmgeCode = fpCmpOp % "GE" 11723272Sgblack@eecs.umich.edu threeEqualRegInstX("fcmge", "FcmgeDX", "SimdFloatCmpOp", smallFloatTypes, 11733272Sgblack@eecs.umich.edu 2, fcmgeCode) 11743272Sgblack@eecs.umich.edu threeEqualRegInstX("fcmge", "FcmgeQX", "SimdFloatCmpOp", floatTypes, 4, 11753272Sgblack@eecs.umich.edu fcmgeCode) 11763272Sgblack@eecs.umich.edu threeEqualRegInstX("fcmge", "FcmgeScX", "SimdFloatCmpOp", floatTypes, 4, 11773272Sgblack@eecs.umich.edu fcmgeCode, scalar=True) 11783272Sgblack@eecs.umich.edu # FCMGE (zero) 11793272Sgblack@eecs.umich.edu fcmgeZeroCode = fpCmpZeroOp % "GE" 11803272Sgblack@eecs.umich.edu twoEqualRegInstX("fcmge", "FcmgeZeroDX", "SimdFloatCmpOp", smallFloatTypes, 11813272Sgblack@eecs.umich.edu 2, fcmgeZeroCode) 11823272Sgblack@eecs.umich.edu twoEqualRegInstX("fcmge", "FcmgeZeroQX", "SimdFloatCmpOp", floatTypes, 4, 11833272Sgblack@eecs.umich.edu fcmgeZeroCode) 11843378Sgblack@eecs.umich.edu twoEqualRegInstX("fcmge", "FcmgeZeroScX", "SimdFloatCmpOp", floatTypes, 4, 11853378Sgblack@eecs.umich.edu fcmgeZeroCode, scalar=True) 11863272Sgblack@eecs.umich.edu # FCMGT (register) 11873272Sgblack@eecs.umich.edu fcmgtCode = fpCmpOp % "GT" 11883272Sgblack@eecs.umich.edu threeEqualRegInstX("fcmgt", "FcmgtDX", "SimdFloatCmpOp", smallFloatTypes, 11892954Sgblack@eecs.umich.edu 2, fcmgtCode) 11903378Sgblack@eecs.umich.edu threeEqualRegInstX("fcmgt", "FcmgtQX", "SimdFloatCmpOp", floatTypes, 4, 11913378Sgblack@eecs.umich.edu fcmgtCode) 11923378Sgblack@eecs.umich.edu threeEqualRegInstX("fcmgt", "FcmgtScX", "SimdFloatCmpOp", floatTypes, 4, 11933378Sgblack@eecs.umich.edu fcmgtCode, scalar=True) 11943378Sgblack@eecs.umich.edu # FCMGT (zero) 11953378Sgblack@eecs.umich.edu fcmgtZeroCode = fpCmpZeroOp % "GT" 11963378Sgblack@eecs.umich.edu twoEqualRegInstX("fcmgt", "FcmgtZeroDX", "SimdFloatCmpOp", smallFloatTypes, 11973378Sgblack@eecs.umich.edu 2, fcmgtZeroCode) 11983378Sgblack@eecs.umich.edu twoEqualRegInstX("fcmgt", "FcmgtZeroQX", "SimdFloatCmpOp", floatTypes, 4, 11993378Sgblack@eecs.umich.edu fcmgtZeroCode) 12003378Sgblack@eecs.umich.edu twoEqualRegInstX("fcmgt", "FcmgtZeroScX", "SimdFloatCmpOp", floatTypes, 4, 12013378Sgblack@eecs.umich.edu fcmgtZeroCode, scalar=True) 12023378Sgblack@eecs.umich.edu # FCMLE (zero) 12033378Sgblack@eecs.umich.edu fpCmpRevZeroOp = fpOp % ("fplibCompare%s<Element>(0, srcElem1, fpscr) ?" 12043378Sgblack@eecs.umich.edu " -1 : 0") 12053378Sgblack@eecs.umich.edu fcmleZeroCode = fpCmpRevZeroOp % "GE" 12063378Sgblack@eecs.umich.edu twoEqualRegInstX("fcmle", "FcmleZeroDX", "SimdFloatCmpOp", smallFloatTypes, 12073378Sgblack@eecs.umich.edu 2, fcmleZeroCode) 12083378Sgblack@eecs.umich.edu twoEqualRegInstX("fcmle", "FcmleZeroQX", "SimdFloatCmpOp", floatTypes, 4, 12093378Sgblack@eecs.umich.edu fcmleZeroCode) 12103378Sgblack@eecs.umich.edu twoEqualRegInstX("fcmle", "FcmleZeroScX", "SimdFloatCmpOp", floatTypes, 4, 12113378Sgblack@eecs.umich.edu fcmleZeroCode, scalar=True) 12123378Sgblack@eecs.umich.edu # FCMLT (zero) 12133378Sgblack@eecs.umich.edu fcmltZeroCode = fpCmpRevZeroOp % "GT" 12143378Sgblack@eecs.umich.edu twoEqualRegInstX("fcmlt", "FcmltZeroDX", "SimdFloatCmpOp", smallFloatTypes, 12153378Sgblack@eecs.umich.edu 2, fcmltZeroCode) 12163378Sgblack@eecs.umich.edu twoEqualRegInstX("fcmlt", "FcmltZeroQX", "SimdFloatCmpOp", floatTypes, 4, 12173378Sgblack@eecs.umich.edu fcmltZeroCode) 12183378Sgblack@eecs.umich.edu twoEqualRegInstX("fcmlt", "FcmltZeroScX", "SimdFloatCmpOp", floatTypes, 4, 12193378Sgblack@eecs.umich.edu fcmltZeroCode, scalar=True) 12203378Sgblack@eecs.umich.edu # FCVTAS 12213378Sgblack@eecs.umich.edu fcvtCode = fpOp % ("fplibFPToFixed<Element, Element>(" 12223378Sgblack@eecs.umich.edu "srcElem1, %s, %s, %s, fpscr)") 12233378Sgblack@eecs.umich.edu fcvtasCode = fcvtCode % ("0", "false", "FPRounding_TIEAWAY") 12243378Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtas", "FcvtasDX", "SimdCvtOp", smallFloatTypes, 2, 12253378Sgblack@eecs.umich.edu fcvtasCode) 12263378Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtas", "FcvtasQX", "SimdCvtOp", floatTypes, 4, 12273378Sgblack@eecs.umich.edu fcvtasCode) 12283378Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtas", "FcvtasScX", "SimdCvtOp", floatTypes, 4, 12293378Sgblack@eecs.umich.edu fcvtasCode, scalar=True) 12303378Sgblack@eecs.umich.edu # FCVTAU 12313378Sgblack@eecs.umich.edu fcvtauCode = fcvtCode % ("0", "true", "FPRounding_TIEAWAY") 12323378Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtau", "FcvtauDX", "SimdCvtOp", smallFloatTypes, 2, 12333378Sgblack@eecs.umich.edu fcvtauCode) 12343378Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtau", "FcvtauQX", "SimdCvtOp", floatTypes, 4, 12353378Sgblack@eecs.umich.edu fcvtauCode) 12363378Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtau", "FcvtauScX", "SimdCvtOp", floatTypes, 4, 12373378Sgblack@eecs.umich.edu fcvtauCode, scalar=True) 12383378Sgblack@eecs.umich.edu # FCVTL, FCVTL2 12393378Sgblack@eecs.umich.edu fcvtlCode = fpOp % ("fplibConvert<Element, BigElement>(" 12403810Sgblack@eecs.umich.edu "srcElem1, FPCRRounding(fpscr), fpscr)") 12413378Sgblack@eecs.umich.edu twoRegLongInstX("fcvtl", "FcvtlX", "SimdCvtOp", ("uint16_t", "uint32_t"), 12423378Sgblack@eecs.umich.edu fcvtlCode) 12433378Sgblack@eecs.umich.edu twoRegLongInstX("fcvtl", "Fcvtl2X", "SimdCvtOp", ("uint16_t", "uint32_t"), 12443378Sgblack@eecs.umich.edu fcvtlCode, hi=True) 12453378Sgblack@eecs.umich.edu # FCVTMS 12463378Sgblack@eecs.umich.edu fcvtmsCode = fcvtCode % ("0", "false", "FPRounding_NEGINF") 12473378Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtms", "FcvtmsDX", "SimdCvtOp", smallFloatTypes, 2, 12483378Sgblack@eecs.umich.edu fcvtmsCode) 12493378Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtms", "FcvtmsQX", "SimdCvtOp", floatTypes, 4, 12503378Sgblack@eecs.umich.edu fcvtmsCode) 12513378Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtms", "FcvtmsScX", "SimdCvtOp", floatTypes, 4, 12523378Sgblack@eecs.umich.edu fcvtmsCode, scalar=True) 12533378Sgblack@eecs.umich.edu # FCVTMU 12543378Sgblack@eecs.umich.edu fcvtmuCode = fcvtCode % ("0", "true", "FPRounding_NEGINF") 12553378Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtmu", "FcvtmuDX", "SimdCvtOp", smallFloatTypes, 2, 12563378Sgblack@eecs.umich.edu fcvtmuCode) 12573378Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtmu", "FcvtmuQX", "SimdCvtOp", floatTypes, 4, 12583378Sgblack@eecs.umich.edu fcvtmuCode) 12593378Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtmu", "FcvtmuScX", "SimdCvtOp", floatTypes, 4, 12603378Sgblack@eecs.umich.edu fcvtmuCode, scalar=True) 12613378Sgblack@eecs.umich.edu # FCVTN, FCVTN2 12623378Sgblack@eecs.umich.edu fcvtnCode = fpOp % ("fplibConvert<BigElement, Element>(" 12633378Sgblack@eecs.umich.edu "srcElem1, FPCRRounding(fpscr), fpscr)") 12643378Sgblack@eecs.umich.edu twoRegNarrowInstX("fcvtn", "FcvtnX", "SimdCvtOp", 12653378Sgblack@eecs.umich.edu ("uint16_t", "uint32_t"), fcvtnCode) 12663378Sgblack@eecs.umich.edu twoRegNarrowInstX("fcvtn", "Fcvtn2X", "SimdCvtOp", 12673378Sgblack@eecs.umich.edu ("uint16_t", "uint32_t"), fcvtnCode, hi=True) 12683378Sgblack@eecs.umich.edu # FCVTNS 12693378Sgblack@eecs.umich.edu fcvtnsCode = fcvtCode % ("0", "false", "FPRounding_TIEEVEN") 12703378Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtns", "FcvtnsDX", "SimdCvtOp", smallFloatTypes, 2, 12713439Sgblack@eecs.umich.edu fcvtnsCode) 12723439Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtns", "FcvtnsQX", "SimdCvtOp", floatTypes, 4, 12733439Sgblack@eecs.umich.edu fcvtnsCode) 12742526SN/A twoEqualRegInstX("fcvtns", "FcvtnsScX", "SimdCvtOp", floatTypes, 4, 12753439Sgblack@eecs.umich.edu fcvtnsCode, scalar=True) 12763439Sgblack@eecs.umich.edu # FCVTNU 12773810Sgblack@eecs.umich.edu fcvtnuCode = fcvtCode % ("0", "true", "FPRounding_TIEEVEN") 12782526SN/A twoEqualRegInstX("fcvtnu", "FcvtnuDX", "SimdCvtOp", smallFloatTypes, 2, 12793439Sgblack@eecs.umich.edu fcvtnuCode) 12803439Sgblack@eecs.umich.edu twoEqualRegInstX("fcvtnu", "FcvtnuQX", "SimdCvtOp", floatTypes, 4, 12813439Sgblack@eecs.umich.edu fcvtnuCode) 12822526SN/A twoEqualRegInstX("fcvtnu", "FcvtnuScX", "SimdCvtOp", floatTypes, 4, 12833439Sgblack@eecs.umich.edu fcvtnuCode, scalar=True) 12843439Sgblack@eecs.umich.edu # FCVTPS 12853810Sgblack@eecs.umich.edu fcvtpsCode = fcvtCode % ("0", "false", "FPRounding_POSINF") 12862526SN/A twoEqualRegInstX("fcvtps", "FcvtpsDX", "SimdCvtOp", smallFloatTypes, 2, 12872469SN/A fcvtpsCode) 12882022SN/A twoEqualRegInstX("fcvtps", "FcvtpsQX", "SimdCvtOp", floatTypes, 4, 1289 fcvtpsCode) 1290 twoEqualRegInstX("fcvtps", "FcvtpsScX", "SimdCvtOp", floatTypes, 4, 1291 fcvtpsCode, scalar=True) 1292 # FCVTPU 1293 fcvtpuCode = fcvtCode % ("0", "true", "FPRounding_POSINF") 1294 twoEqualRegInstX("fcvtpu", "FcvtpuDX", "SimdCvtOp", smallFloatTypes, 2, 1295 fcvtpuCode) 1296 twoEqualRegInstX("fcvtpu", "FcvtpuQX", "SimdCvtOp", floatTypes, 4, 1297 fcvtpuCode) 1298 twoEqualRegInstX("fcvtpu", "FcvtpuScX", "SimdCvtOp", floatTypes, 4, 1299 fcvtpuCode, scalar=True) 1300 # FCVTXN, FCVTXN2 1301 fcvtxnCode = fpOp % ("fplibConvert<BigElement, Element>(" 1302 "srcElem1, FPRounding_ODD, fpscr)") 1303 twoRegNarrowInstX("fcvtxn", "FcvtxnX", "SimdCvtOp", smallFloatTypes, 1304 fcvtxnCode) 1305 twoRegNarrowInstX("fcvtxn", "Fcvtxn2X", "SimdCvtOp", smallFloatTypes, 1306 fcvtxnCode, hi=True) 1307 twoRegNarrowInstX("fcvtxn", "FcvtxnScX", "SimdCvtOp", smallFloatTypes, 1308 fcvtxnCode, scalar=True) 1309 # FCVTZS (fixed-point) 1310 fcvtzsCode = fcvtCode % ("imm", "false", "FPRounding_ZERO") 1311 twoEqualRegInstX("fcvtzs", "FcvtzsFixedDX", "SimdCvtOp", smallFloatTypes, 1312 2, fcvtzsCode, hasImm=True) 1313 twoEqualRegInstX("fcvtzs", "FcvtzsFixedQX", "SimdCvtOp", floatTypes, 4, 1314 fcvtzsCode, hasImm=True) 1315 twoEqualRegInstX("fcvtzs", "FcvtzsFixedScX", "SimdCvtOp", floatTypes, 4, 1316 fcvtzsCode, hasImm=True, scalar=True) 1317 # FCVTZS (integer) 1318 fcvtzsIntCode = fcvtCode % ("0", "false", "FPRounding_ZERO") 1319 twoEqualRegInstX("fcvtzs", "FcvtzsIntDX", "SimdCvtOp", smallFloatTypes, 1320 2, fcvtzsIntCode) 1321 twoEqualRegInstX("fcvtzs", "FcvtzsIntQX", "SimdCvtOp", floatTypes, 4, 1322 fcvtzsIntCode) 1323 twoEqualRegInstX("fcvtzs", "FcvtzsIntScX", "SimdCvtOp", floatTypes, 4, 1324 fcvtzsIntCode, scalar=True) 1325 # FCVTZU (fixed-point) 1326 fcvtzuCode = fcvtCode % ("imm", "true", "FPRounding_ZERO") 1327 twoEqualRegInstX("fcvtzu", "FcvtzuFixedDX", "SimdCvtOp", smallFloatTypes, 1328 2, fcvtzuCode, hasImm=True) 1329 twoEqualRegInstX("fcvtzu", "FcvtzuFixedQX", "SimdCvtOp", floatTypes, 4, 1330 fcvtzuCode, hasImm=True) 1331 twoEqualRegInstX("fcvtzu", "FcvtzuFixedScX", "SimdCvtOp", floatTypes, 4, 1332 fcvtzuCode, hasImm=True, scalar=True) 1333 # FCVTZU (integer) 1334 fcvtzuIntCode = fcvtCode % ("0", "true", "FPRounding_ZERO") 1335 twoEqualRegInstX("fcvtzu", "FcvtzuIntDX", "SimdCvtOp", smallFloatTypes, 2, 1336 fcvtzuIntCode) 1337 twoEqualRegInstX("fcvtzu", "FcvtzuIntQX", "SimdCvtOp", floatTypes, 4, 1338 fcvtzuIntCode) 1339 twoEqualRegInstX("fcvtzu", "FcvtzuIntScX", "SimdCvtOp", floatTypes, 4, 1340 fcvtzuIntCode, scalar=True) 1341 # FDIV 1342 fdivCode = fpBinOp % "Div" 1343 threeEqualRegInstX("fdiv", "FdivDX", "SimdFloatDivOp", smallFloatTypes, 2, 1344 fdivCode) 1345 threeEqualRegInstX("fdiv", "FdivQX", "SimdFloatDivOp", floatTypes, 4, 1346 fdivCode) 1347 # FMAX 1348 fmaxCode = fpBinOp % "Max" 1349 threeEqualRegInstX("fmax", "FmaxDX", "SimdFloatCmpOp", smallFloatTypes, 2, 1350 fmaxCode) 1351 threeEqualRegInstX("fmax", "FmaxQX", "SimdFloatCmpOp", floatTypes, 4, 1352 fmaxCode) 1353 # FMAXNM 1354 fmaxnmCode = fpBinOp % "MaxNum" 1355 threeEqualRegInstX("fmaxnm", "FmaxnmDX", "SimdFloatCmpOp", smallFloatTypes, 1356 2, fmaxnmCode) 1357 threeEqualRegInstX("fmaxnm", "FmaxnmQX", "SimdFloatCmpOp", floatTypes, 4, 1358 fmaxnmCode) 1359 # FMAXNMP (scalar) 1360 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScDX", "SimdFloatCmpOp", 1361 ("uint32_t",), 2, fmaxnmCode) 1362 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScQX", "SimdFloatCmpOp", 1363 ("uint64_t",), 4, fmaxnmCode) 1364 # FMAXNMP (vector) 1365 threeEqualRegInstX("fmaxnmp", "FmaxnmpDX", "SimdFloatCmpOp", 1366 smallFloatTypes, 2, fmaxnmCode, pairwise=True) 1367 threeEqualRegInstX("fmaxnmp", "FmaxnmpQX", "SimdFloatCmpOp", floatTypes, 4, 1368 fmaxnmCode, pairwise=True) 1369 # FMAXNMV 1370 # Note: SimdFloatCmpOp can be a bit optimistic here 1371 fpAcrossOp = fpOp % "fplib%s<Element>(destElem, srcElem1, fpscr)" 1372 fmaxnmAcrossCode = fpAcrossOp % "MaxNum" 1373 twoRegAcrossInstX("fmaxnmv", "FmaxnmvQX", "SimdFloatCmpOp", ("uint32_t",), 1374 4, fmaxnmAcrossCode) 1375 # FMAXP (scalar) 1376 twoRegPairwiseScInstX("fmaxp", "FmaxpScDX", "SimdFloatCmpOp", 1377 ("uint32_t",), 2, fmaxCode) 1378 twoRegPairwiseScInstX("fmaxp", "FmaxpScQX", "SimdFloatCmpOp", 1379 ("uint64_t",), 4, fmaxCode) 1380 # FMAXP (vector) 1381 threeEqualRegInstX("fmaxp", "FmaxpDX", "SimdFloatCmpOp", smallFloatTypes, 1382 2, fmaxCode, pairwise=True) 1383 threeEqualRegInstX("fmaxp", "FmaxpQX", "SimdFloatCmpOp", floatTypes, 4, 1384 fmaxCode, pairwise=True) 1385 # FMAXV 1386 # Note: SimdFloatCmpOp can be a bit optimistic here 1387 fmaxAcrossCode = fpAcrossOp % "Max" 1388 twoRegAcrossInstX("fmaxv", "FmaxvQX", "SimdFloatCmpOp", ("uint32_t",), 4, 1389 fmaxAcrossCode) 1390 # FMIN 1391 fminCode = fpBinOp % "Min" 1392 threeEqualRegInstX("fmin", "FminDX", "SimdFloatCmpOp", smallFloatTypes, 2, 1393 fminCode) 1394 threeEqualRegInstX("fmin", "FminQX", "SimdFloatCmpOp", floatTypes, 4, 1395 fminCode) 1396 # FMINNM 1397 fminnmCode = fpBinOp % "MinNum" 1398 threeEqualRegInstX("fminnm", "FminnmDX", "SimdFloatCmpOp", smallFloatTypes, 1399 2, fminnmCode) 1400 threeEqualRegInstX("fminnm", "FminnmQX", "SimdFloatCmpOp", floatTypes, 4, 1401 fminnmCode) 1402 # FMINNMP (scalar) 1403 twoRegPairwiseScInstX("fminnmp", "FminnmpScDX", "SimdFloatCmpOp", 1404 ("uint32_t",), 2, fminnmCode) 1405 twoRegPairwiseScInstX("fminnmp", "FminnmpScQX", "SimdFloatCmpOp", 1406 ("uint64_t",), 4, fminnmCode) 1407 # FMINNMP (vector) 1408 threeEqualRegInstX("fminnmp", "FminnmpDX", "SimdFloatCmpOp", 1409 smallFloatTypes, 2, fminnmCode, pairwise=True) 1410 threeEqualRegInstX("fminnmp", "FminnmpQX", "SimdFloatCmpOp", floatTypes, 4, 1411 fminnmCode, pairwise=True) 1412 # FMINNMV 1413 # Note: SimdFloatCmpOp can be a bit optimistic here 1414 fminnmAcrossCode = fpAcrossOp % "MinNum" 1415 twoRegAcrossInstX("fminnmv", "FminnmvQX", "SimdFloatCmpOp", ("uint32_t",), 1416 4, fminnmAcrossCode) 1417 # FMINP (scalar) 1418 twoRegPairwiseScInstX("fminp", "FminpScDX", "SimdFloatCmpOp", 1419 ("uint32_t",), 2, fminCode) 1420 twoRegPairwiseScInstX("fminp", "FminpScQX", "SimdFloatCmpOp", 1421 ("uint64_t",), 4, fminCode) 1422 # FMINP (vector) 1423 threeEqualRegInstX("fminp", "FminpDX", "SimdFloatCmpOp", smallFloatTypes, 1424 2, fminCode, pairwise=True) 1425 threeEqualRegInstX("fminp", "FminpQX", "SimdFloatCmpOp", floatTypes, 4, 1426 fminCode, pairwise=True) 1427 # FMINV 1428 # Note: SimdFloatCmpOp can be a bit optimistic here 1429 fminAcrossCode = fpAcrossOp % "Min" 1430 twoRegAcrossInstX("fminv", "FminvQX", "SimdFloatCmpOp", ("uint32_t",), 4, 1431 fminAcrossCode) 1432 # FMLA (by element) 1433 fmlaCode = fpOp % ("fplibMulAdd<Element>(" 1434 "destElem, srcElem1, srcElem2, fpscr)") 1435 threeEqualRegInstX("fmla", "FmlaElemDX", "SimdFloatMultAccOp", 1436 smallFloatTypes, 2, fmlaCode, True, byElem=True) 1437 threeEqualRegInstX("fmla", "FmlaElemQX", "SimdFloatMultAccOp", floatTypes, 1438 4, fmlaCode, True, byElem=True) 1439 threeEqualRegInstX("fmla", "FmlaElemScX", "SimdFloatMultAccOp", floatTypes, 1440 4, fmlaCode, True, byElem=True, scalar=True) 1441 # FMLA (vector) 1442 threeEqualRegInstX("fmla", "FmlaDX", "SimdFloatMultAccOp", smallFloatTypes, 1443 2, fmlaCode, True) 1444 threeEqualRegInstX("fmla", "FmlaQX", "SimdFloatMultAccOp", floatTypes, 4, 1445 fmlaCode, True) 1446 # FMLS (by element) 1447 fmlsCode = fpOp % ("fplibMulAdd<Element>(destElem," 1448 " fplibNeg<Element>(srcElem1), srcElem2, fpscr)") 1449 threeEqualRegInstX("fmls", "FmlsElemDX", "SimdFloatMultAccOp", 1450 smallFloatTypes, 2, fmlsCode, True, byElem=True) 1451 threeEqualRegInstX("fmls", "FmlsElemQX", "SimdFloatMultAccOp", floatTypes, 1452 4, fmlsCode, True, byElem=True) 1453 threeEqualRegInstX("fmls", "FmlsElemScX", "SimdFloatMultAccOp", floatTypes, 1454 4, fmlsCode, True, byElem=True, scalar=True) 1455 # FMLS (vector) 1456 threeEqualRegInstX("fmls", "FmlsDX", "SimdFloatMultAccOp", smallFloatTypes, 1457 2, fmlsCode, True) 1458 threeEqualRegInstX("fmls", "FmlsQX", "SimdFloatMultAccOp", floatTypes, 4, 1459 fmlsCode, True) 1460 # FMOV 1461 fmovCode = 'destElem = imm;' 1462 oneRegImmInstX("fmov", "FmovDX", "SimdMiscOp", smallFloatTypes, 2, 1463 fmovCode) 1464 oneRegImmInstX("fmov", "FmovQX", "SimdMiscOp", floatTypes, 4, fmovCode) 1465 # FMUL (by element) 1466 fmulCode = fpBinOp % "Mul" 1467 threeEqualRegInstX("fmul", "FmulElemDX", "SimdFloatMultOp", 1468 smallFloatTypes, 2, fmulCode, byElem=True) 1469 threeEqualRegInstX("fmul", "FmulElemQX", "SimdFloatMultOp", floatTypes, 4, 1470 fmulCode, byElem=True) 1471 threeEqualRegInstX("fmul", "FmulElemScX", "SimdFloatMultOp", floatTypes, 4, 1472 fmulCode, byElem=True, scalar=True) 1473 # FMUL (vector) 1474 threeEqualRegInstX("fmul", "FmulDX", "SimdFloatMultOp", smallFloatTypes, 2, 1475 fmulCode) 1476 threeEqualRegInstX("fmul", "FmulQX", "SimdFloatMultOp", floatTypes, 4, 1477 fmulCode) 1478 # FMULX 1479 fmulxCode = fpBinOp % "MulX" 1480 threeEqualRegInstX("fmulx", "FmulxDX", "SimdFloatMultOp", smallFloatTypes, 1481 2, fmulxCode) 1482 threeEqualRegInstX("fmulx", "FmulxQX", "SimdFloatMultOp", floatTypes, 4, 1483 fmulxCode) 1484 threeEqualRegInstX("fmulx", "FmulxScX", "SimdFloatMultOp", floatTypes, 4, 1485 fmulxCode, scalar=True) 1486 # FMULX (by element) 1487 threeEqualRegInstX("fmulx", "FmulxElemDX", "SimdFloatMultOp", 1488 smallFloatTypes, 2, fmulxCode, byElem=True) 1489 threeEqualRegInstX("fmulx", "FmulxElemQX", "SimdFloatMultOp", floatTypes, 1490 4, fmulxCode, byElem=True) 1491 threeEqualRegInstX("fmulx", "FmulxElemScX", "SimdFloatMultOp", floatTypes, 1492 4, fmulxCode, byElem=True, scalar=True) 1493 # FNEG 1494 fnegCode = fpOp % "fplibNeg<Element>(srcElem1)" 1495 twoEqualRegInstX("Neg", "FnegDX", "SimdFloatAluOp", smallFloatTypes, 2, 1496 fnegCode) 1497 twoEqualRegInstX("Neg", "FnegQX", "SimdFloatAluOp", floatTypes, 4, 1498 fnegCode) 1499 # FRECPE 1500 frecpeCode = fpOp % "fplibRecipEstimate<Element>(srcElem1, fpscr)" 1501 twoEqualRegInstX("frecpe", "FrecpeDX", "SimdFloatMultAccOp", 1502 smallFloatTypes, 2, frecpeCode) 1503 twoEqualRegInstX("frecpe", "FrecpeQX", "SimdFloatMultAccOp", floatTypes, 4, 1504 frecpeCode) 1505 twoEqualRegInstX("frecpe", "FrecpeScX", "SimdFloatMultAccOp", floatTypes, 1506 4, frecpeCode, scalar=True) 1507 # FRECPS 1508 frecpsCode = fpBinOp % "RecipStepFused" 1509 threeEqualRegInstX("frecps", "FrecpsDX", "SimdFloatMultAccOp", 1510 smallFloatTypes, 2, frecpsCode) 1511 threeEqualRegInstX("frecps", "FrecpsQX", "SimdFloatMultAccOp", floatTypes, 1512 4, frecpsCode) 1513 threeEqualRegInstX("frecps", "FrecpsScX", "SimdFloatMultAccOp", floatTypes, 1514 4, frecpsCode, scalar=True) 1515 # FRECPX 1516 frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)" 1517 twoEqualRegInstX("frecpx", "FrecpxX", "SimdFloatMultAccOp", floatTypes, 4, 1518 frecpxCode, scalar=True) 1519 # FRINTA 1520 frintCode = fpOp % "fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)" 1521 frintaCode = frintCode % ("FPRounding_TIEAWAY", "false") 1522 twoEqualRegInstX("frinta", "FrintaDX", "SimdCvtOp", smallFloatTypes, 2, 1523 frintaCode) 1524 twoEqualRegInstX("frinta", "FrintaQX", "SimdCvtOp", floatTypes, 4, 1525 frintaCode) 1526 # FRINTI 1527 frintiCode = frintCode % ("FPCRRounding(fpscr)", "false") 1528 twoEqualRegInstX("frinti", "FrintiDX", "SimdCvtOp", smallFloatTypes, 2, 1529 frintiCode) 1530 twoEqualRegInstX("frinti", "FrintiQX", "SimdCvtOp", floatTypes, 4, 1531 frintiCode) 1532 # FRINTM 1533 frintmCode = frintCode % ("FPRounding_NEGINF", "false") 1534 twoEqualRegInstX("frintm", "FrintmDX", "SimdCvtOp", smallFloatTypes, 2, 1535 frintmCode) 1536 twoEqualRegInstX("frintm", "FrintmQX", "SimdCvtOp", floatTypes, 4, 1537 frintmCode) 1538 # FRINTN 1539 frintnCode = frintCode % ("FPRounding_TIEEVEN", "false") 1540 twoEqualRegInstX("frintn", "FrintnDX", "SimdCvtOp", smallFloatTypes, 2, 1541 frintnCode) 1542 twoEqualRegInstX("frintn", "FrintnQX", "SimdCvtOp", floatTypes, 4, 1543 frintnCode) 1544 # FRINTP 1545 frintpCode = frintCode % ("FPRounding_POSINF", "false") 1546 twoEqualRegInstX("frintp", "FrintpDX", "SimdCvtOp", smallFloatTypes, 2, 1547 frintpCode) 1548 twoEqualRegInstX("frintp", "FrintpQX", "SimdCvtOp", floatTypes, 4, 1549 frintpCode) 1550 # FRINTX 1551 frintxCode = frintCode % ("FPCRRounding(fpscr)", "true") 1552 twoEqualRegInstX("frintx", "FrintxDX", "SimdCvtOp", smallFloatTypes, 2, 1553 frintxCode) 1554 twoEqualRegInstX("frintx", "FrintxQX", "SimdCvtOp", floatTypes, 4, 1555 frintxCode) 1556 # FRINTZ 1557 frintzCode = frintCode % ("FPRounding_ZERO", "false") 1558 twoEqualRegInstX("frintz", "FrintzDX", "SimdCvtOp", smallFloatTypes, 2, 1559 frintzCode) 1560 twoEqualRegInstX("frintz", "FrintzQX", "SimdCvtOp", floatTypes, 4, 1561 frintzCode) 1562 # FRSQRTE 1563 frsqrteCode = fpOp % "fplibRSqrtEstimate<Element>(srcElem1, fpscr)" 1564 twoEqualRegInstX("frsqrte", "FrsqrteDX", "SimdFloatSqrtOp", 1565 smallFloatTypes, 2, frsqrteCode) 1566 twoEqualRegInstX("frsqrte", "FrsqrteQX", "SimdFloatSqrtOp", floatTypes, 4, 1567 frsqrteCode) 1568 twoEqualRegInstX("frsqrte", "FrsqrteScX", "SimdFloatSqrtOp", floatTypes, 4, 1569 frsqrteCode, scalar=True) 1570 # FRSQRTS 1571 frsqrtsCode = fpBinOp % "RSqrtStepFused" 1572 threeEqualRegInstX("frsqrts", "FrsqrtsDX", "SimdFloatMiscOp", 1573 smallFloatTypes, 2, frsqrtsCode) 1574 threeEqualRegInstX("frsqrts", "FrsqrtsQX", "SimdFloatMiscOp", floatTypes, 1575 4, frsqrtsCode) 1576 threeEqualRegInstX("frsqrts", "FrsqrtsScX", "SimdFloatMiscOp", floatTypes, 1577 4, frsqrtsCode, scalar=True) 1578 # FSQRT 1579 fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)" 1580 twoEqualRegInstX("fsqrt", "FsqrtDX", "SimdFloatSqrtOp", smallFloatTypes, 2, 1581 fsqrtCode) 1582 twoEqualRegInstX("fsqrt", "FsqrtQX", "SimdFloatSqrtOp", floatTypes, 4, 1583 fsqrtCode) 1584 # FSUB 1585 fsubCode = fpBinOp % "Sub" 1586 threeEqualRegInstX("fsub", "FsubDX", "SimdFloatAddOp", smallFloatTypes, 2, 1587 fsubCode) 1588 threeEqualRegInstX("fsub", "FsubQX", "SimdFloatAddOp", floatTypes, 4, 1589 fsubCode) 1590 # INS (element) 1591 insFromVecElemInstX("ins", "InsElemX", "SimdMiscOp", unsignedTypes, 4) 1592 # INS (general register) 1593 insFromGprInstX("ins", "InsGprWX", "SimdMiscOp", smallUnsignedTypes, 4, 1594 'W') 1595 insFromGprInstX("ins", "InsGprXX", "SimdMiscOp", unsignedTypes, 4, 'X') 1596 # MLA (by element) 1597 mlaCode = "destElem += srcElem1 * srcElem2;" 1598 threeEqualRegInstX("mla", "MlaElemDX", "SimdMultAccOp", 1599 ("uint16_t", "uint32_t"), 2, mlaCode, True, byElem=True) 1600 threeEqualRegInstX("mla", "MlaElemQX", "SimdMultAccOp", 1601 ("uint16_t", "uint32_t"), 4, mlaCode, True, byElem=True) 1602 # MLA (vector) 1603 threeEqualRegInstX("mla", "MlaDX", "SimdMultAccOp", smallUnsignedTypes, 2, 1604 mlaCode, True) 1605 threeEqualRegInstX("mla", "MlaQX", "SimdMultAccOp", smallUnsignedTypes, 4, 1606 mlaCode, True) 1607 # MLS (by element) 1608 mlsCode = "destElem -= srcElem1 * srcElem2;" 1609 threeEqualRegInstX("mls", "MlsElemDX", "SimdMultAccOp", 1610 ("uint16_t", "uint32_t"), 2, mlsCode, True, byElem=True) 1611 threeEqualRegInstX("mls", "MlsElemQX", "SimdMultAccOp", 1612 ("uint16_t", "uint32_t"), 4, mlsCode, True, byElem=True) 1613 # MLS (vector) 1614 threeEqualRegInstX("mls", "MlsDX", "SimdMultAccOp", smallUnsignedTypes, 2, 1615 mlsCode, True) 1616 threeEqualRegInstX("mls", "MlsQX", "SimdMultAccOp", smallUnsignedTypes, 4, 1617 mlsCode, True) 1618 # MOV (element) -> alias to INS (element) 1619 # MOV (from general) -> alias to INS (general register) 1620 # MOV (scalar) -> alias to DUP (element) 1621 # MOV (to general) -> alias to UMOV 1622 # MOV (vector) -> alias to ORR (register) 1623 # MOVI 1624 movImmCode = "destElem = imm;" 1625 oneRegImmInstX("movi", "MoviDX", "SimdMiscOp", ("uint64_t",), 2, 1626 movImmCode) 1627 oneRegImmInstX("movi", "MoviQX", "SimdMiscOp", ("uint64_t",), 4, 1628 movImmCode) 1629 # MUL (by element) 1630 mulCode = "destElem = srcElem1 * srcElem2;" 1631 threeEqualRegInstX("mul", "MulElemDX", "SimdMultOp", 1632 ("uint16_t", "uint32_t"), 2, mulCode, byElem=True) 1633 threeEqualRegInstX("mul", "MulElemQX", "SimdMultOp", 1634 ("uint16_t", "uint32_t"), 4, mulCode, byElem=True) 1635 # MUL (vector) 1636 threeEqualRegInstX("mul", "MulDX", "SimdMultOp", smallUnsignedTypes, 2, 1637 mulCode) 1638 threeEqualRegInstX("mul", "MulQX", "SimdMultOp", smallUnsignedTypes, 4, 1639 mulCode) 1640 # MVN 1641 mvnCode = "destElem = ~srcElem1;" 1642 twoEqualRegInstX("mvn", "MvnDX", "SimdAluOp", ("uint64_t",), 2, mvnCode) 1643 twoEqualRegInstX("mvn", "MvnQX", "SimdAluOp", ("uint64_t",), 4, mvnCode) 1644 # MVNI 1645 mvniCode = "destElem = ~imm;" 1646 oneRegImmInstX("mvni", "MvniDX", "SimdAluOp", ("uint64_t",), 2, mvniCode) 1647 oneRegImmInstX("mvni", "MvniQX", "SimdAluOp", ("uint64_t",), 4, mvniCode) 1648 # NEG 1649 negCode = "destElem = -srcElem1;" 1650 twoEqualRegInstX("neg", "NegDX", "SimdAluOp", signedTypes, 2, negCode) 1651 twoEqualRegInstX("neg", "NegQX", "SimdAluOp", signedTypes, 4, negCode) 1652 # NOT -> alias to MVN 1653 # ORN 1654 ornCode = "destElem = srcElem1 | ~srcElem2;" 1655 threeEqualRegInstX("orn", "OrnDX", "SimdAluOp", ("uint64_t",), 2, ornCode) 1656 threeEqualRegInstX("orn", "OrnQX", "SimdAluOp", ("uint64_t",), 4, ornCode) 1657 # ORR (immediate) 1658 orrImmCode = "destElem |= imm;" 1659 oneRegImmInstX("orr", "OrrImmDX", "SimdAluOp", ("uint64_t",), 2, 1660 orrImmCode, True) 1661 oneRegImmInstX("orr", "OrrImmQX", "SimdAluOp", ("uint64_t",), 4, 1662 orrImmCode, True) 1663 # ORR (register) 1664 orrCode = "destElem = srcElem1 | srcElem2;" 1665 threeEqualRegInstX("orr", "OrrDX", "SimdAluOp", ("uint64_t",), 2, orrCode) 1666 threeEqualRegInstX("orr", "OrrQX", "SimdAluOp", ("uint64_t",), 4, orrCode) 1667 # PMUL 1668 pmulCode = ''' 1669 destElem = 0; 1670 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 1671 if (bits(srcElem2, j)) 1672 destElem ^= srcElem1 << j; 1673 } 1674 ''' 1675 threeEqualRegInstX("pmul", "PmulDX", "SimdMultOp", ("uint8_t",), 2, 1676 pmulCode) 1677 threeEqualRegInstX("pmul", "PmulQX", "SimdMultOp", ("uint8_t",), 4, 1678 pmulCode) 1679 # PMULL, PMULL2 1680 # Note: 64-bit PMULL is not available (Crypto. Extension) 1681 pmullCode = ''' 1682 destElem = 0; 1683 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 1684 if (bits(srcElem2, j)) 1685 destElem ^= (BigElement)srcElem1 << j; 1686 } 1687 ''' 1688 threeRegLongInstX("pmull", "PmullX", "SimdMultOp", ("uint8_t",), pmullCode) 1689 threeRegLongInstX("pmull", "Pmull2X", "SimdMultOp", ("uint8_t",), 1690 pmullCode, hi=True) 1691 # RADDHN, RADDHN2 1692 raddhnCode = ''' 1693 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + 1694 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1695 (sizeof(Element) * 8); 1696 ''' 1697 threeRegNarrowInstX("raddhn", "RaddhnX", "SimdAddOp", smallUnsignedTypes, 1698 raddhnCode) 1699 threeRegNarrowInstX("raddhn2", "Raddhn2X", "SimdAddOp", smallUnsignedTypes, 1700 raddhnCode, hi=True) 1701 # RBIT 1702 rbitCode = ''' 1703 destElem = 0; 1704 Element temp = srcElem1; 1705 for (int i = 0; i < 8 * sizeof(Element); i++) { 1706 destElem = destElem | ((temp & 0x1) << 1707 (8 * sizeof(Element) - 1 - i)); 1708 temp >>= 1; 1709 } 1710 ''' 1711 twoEqualRegInstX("rbit", "RbitDX", "SimdAluOp", ("uint8_t",), 2, rbitCode) 1712 twoEqualRegInstX("rbit", "RbitQX", "SimdAluOp", ("uint8_t",), 4, rbitCode) 1713 # REV16 1714 rev16Code = ''' 1715 destElem = srcElem1; 1716 unsigned groupSize = ((1 << 1) / sizeof(Element)); 1717 unsigned reverseMask = (groupSize - 1); 1718 j = i ^ reverseMask; 1719 ''' 1720 twoEqualRegInstX("rev16", "Rev16DX", "SimdAluOp", ("uint8_t",), 2, 1721 rev16Code) 1722 twoEqualRegInstX("rev16", "Rev16QX", "SimdAluOp", ("uint8_t",), 4, 1723 rev16Code) 1724 # REV32 1725 rev32Code = ''' 1726 destElem = srcElem1; 1727 unsigned groupSize = ((1 << 2) / sizeof(Element)); 1728 unsigned reverseMask = (groupSize - 1); 1729 j = i ^ reverseMask; 1730 ''' 1731 twoEqualRegInstX("rev32", "Rev32DX", "SimdAluOp", ("uint8_t", "uint16_t"), 1732 2, rev32Code) 1733 twoEqualRegInstX("rev32", "Rev32QX", "SimdAluOp", ("uint8_t", "uint16_t"), 1734 4, rev32Code) 1735 # REV64 1736 rev64Code = ''' 1737 destElem = srcElem1; 1738 unsigned groupSize = ((1 << 3) / sizeof(Element)); 1739 unsigned reverseMask = (groupSize - 1); 1740 j = i ^ reverseMask; 1741 ''' 1742 twoEqualRegInstX("rev64", "Rev64DX", "SimdAluOp", smallUnsignedTypes, 2, 1743 rev64Code) 1744 twoEqualRegInstX("rev64", "Rev64QX", "SimdAluOp", smallUnsignedTypes, 4, 1745 rev64Code) 1746 # RSHRN, RSHRN2 1747 rshrnCode = ''' 1748 if (imm > sizeof(srcElem1) * 8) { 1749 destElem = 0; 1750 } else if (imm) { 1751 Element rBit = bits(srcElem1, imm - 1); 1752 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 1753 } else { 1754 destElem = srcElem1; 1755 } 1756 ''' 1757 twoRegNarrowInstX("rshrn", "RshrnX", "SimdShiftOp", smallUnsignedTypes, 1758 rshrnCode, hasImm=True) 1759 twoRegNarrowInstX("rshrn2", "Rshrn2X", "SimdShiftOp", smallUnsignedTypes, 1760 rshrnCode, hasImm=True, hi=True) 1761 # RSUBHN, RSUBHN2 1762 rsubhnCode = ''' 1763 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + 1764 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1765 (sizeof(Element) * 8); 1766 ''' 1767 threeRegNarrowInstX("rsubhn", "RsubhnX", "SimdAddOp", smallTypes, 1768 rsubhnCode) 1769 threeRegNarrowInstX("rsubhn2", "Rsubhn2X", "SimdAddOp", smallTypes, 1770 rsubhnCode, hi=True) 1771 # SABA 1772 abaCode = ''' 1773 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1774 (srcElem2 - srcElem1); 1775 ''' 1776 threeEqualRegInstX("saba", "SabaDX", "SimdAddAccOp", smallSignedTypes, 2, 1777 abaCode, True) 1778 threeEqualRegInstX("saba", "SabaQX", "SimdAddAccOp", smallSignedTypes, 4, 1779 abaCode, True) 1780 # SABAL, SABAL2 1781 abalCode = ''' 1782 destElem += (srcElem1 > srcElem2) ? 1783 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1784 ((BigElement)srcElem2 - (BigElement)srcElem1); 1785 ''' 1786 threeRegLongInstX("sabal", "SabalX", "SimdAddAccOp", smallSignedTypes, 1787 abalCode, True) 1788 threeRegLongInstX("sabal2", "Sabal2X", "SimdAddAccOp", smallSignedTypes, 1789 abalCode, True, hi=True) 1790 # SABD 1791 abdCode = ''' 1792 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1793 (srcElem2 - srcElem1); 1794 ''' 1795 threeEqualRegInstX("sabd", "SabdDX", "SimdAddOp", smallSignedTypes, 2, 1796 abdCode) 1797 threeEqualRegInstX("sabd", "SabdQX", "SimdAddOp", smallSignedTypes, 4, 1798 abdCode) 1799 # SABDL, SABDL2 1800 abdlCode = ''' 1801 destElem = (srcElem1 > srcElem2) ? 1802 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1803 ((BigElement)srcElem2 - (BigElement)srcElem1); 1804 ''' 1805 threeRegLongInstX("sabdl", "SabdlX", "SimdAddAccOp", smallSignedTypes, 1806 abdlCode, True) 1807 threeRegLongInstX("sabdl2", "Sabdl2X", "SimdAddAccOp", smallSignedTypes, 1808 abdlCode, True, hi=True) 1809 # SADALP 1810 adalpCode = "destElem += (BigElement)srcElem1 + (BigElement)srcElem2;" 1811 twoRegCondenseInstX("sadalp", "SadalpDX", "SimdAddOp", smallSignedTypes, 2, 1812 adalpCode, True) 1813 twoRegCondenseInstX("sadalp", "SadalpQX", "SimdAddOp", smallSignedTypes, 4, 1814 adalpCode, True) 1815 # SADDL, SADDL2 1816 addlwCode = "destElem = (BigElement)srcElem1 + (BigElement)srcElem2;" 1817 threeRegLongInstX("saddl", "SaddlX", "SimdAddAccOp", smallSignedTypes, 1818 addlwCode) 1819 threeRegLongInstX("saddl2", "Saddl2X", "SimdAddAccOp", smallSignedTypes, 1820 addlwCode, hi=True) 1821 # SADDLP 1822 twoRegCondenseInstX("saddlp", "SaddlpDX", "SimdAddOp", smallSignedTypes, 2, 1823 addlwCode) 1824 twoRegCondenseInstX("saddlp", "SaddlpQX", "SimdAddOp", smallSignedTypes, 4, 1825 addlwCode) 1826 # SADDLV 1827 # Note: SimdAddOp can be a bit optimistic here 1828 addAcrossLongCode = "destElem += (BigElement)srcElem1;" 1829 twoRegAcrossInstX("saddlv", "SaddlvDX", "SimdAddOp", ("int8_t", "int16_t"), 1830 2, addAcrossLongCode, long=True) 1831 twoRegAcrossInstX("saddlv", "SaddlvQX", "SimdAddOp", ("int8_t", "int16_t"), 1832 4, addAcrossLongCode, long=True) 1833 twoRegAcrossInstX("saddlv", "SaddlvBQX", "SimdAddOp", ("int32_t",), 4, 1834 addAcrossLongCode, doubleDest=True, long=True) 1835 # SADDW, SADDW2 1836 threeRegWideInstX("saddw", "SaddwX", "SimdAddAccOp", smallSignedTypes, 1837 addlwCode) 1838 threeRegWideInstX("saddw2", "Saddw2X", "SimdAddAccOp", smallSignedTypes, 1839 addlwCode, hi=True) 1840 # SCVTF (fixed-point) 1841 scvtfFixedCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, imm," 1842 " false, FPCRRounding(fpscr), fpscr)") 1843 twoEqualRegInstX("scvtf", "ScvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, 1844 scvtfFixedCode % 32, hasImm=True) 1845 twoEqualRegInstX("scvtf", "ScvtfFixedSQX", "SimdCvtOp", smallFloatTypes, 4, 1846 scvtfFixedCode % 32, hasImm=True) 1847 twoEqualRegInstX("scvtf", "ScvtfFixedDQX", "SimdCvtOp", ("uint64_t",), 4, 1848 scvtfFixedCode % 64, hasImm=True) 1849 twoEqualRegInstX("scvtf", "ScvtfFixedScSX", "SimdCvtOp", smallFloatTypes, 1850 4, scvtfFixedCode % 32, hasImm=True, scalar=True) 1851 twoEqualRegInstX("scvtf", "ScvtfFixedScDX", "SimdCvtOp", ("uint64_t",), 4, 1852 scvtfFixedCode % 64, hasImm=True, scalar=True) 1853 # SCVTF (integer) 1854 scvtfIntCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, 0," 1855 " false, FPCRRounding(fpscr), fpscr)") 1856 twoEqualRegInstX("scvtf", "ScvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, 1857 scvtfIntCode % 32) 1858 twoEqualRegInstX("scvtf", "ScvtfIntSQX", "SimdCvtOp", smallFloatTypes, 4, 1859 scvtfIntCode % 32) 1860 twoEqualRegInstX("scvtf", "ScvtfIntDQX", "SimdCvtOp", ("uint64_t",), 4, 1861 scvtfIntCode % 64) 1862 twoEqualRegInstX("scvtf", "ScvtfIntScSX", "SimdCvtOp", smallFloatTypes, 4, 1863 scvtfIntCode % 32, scalar=True) 1864 twoEqualRegInstX("scvtf", "ScvtfIntScDX", "SimdCvtOp", ("uint64_t",), 4, 1865 scvtfIntCode % 64, scalar=True) 1866 # SHADD 1867 haddCode = ''' 1868 Element carryBit = 1869 (((unsigned)srcElem1 & 0x1) + 1870 ((unsigned)srcElem2 & 0x1)) >> 1; 1871 // Use division instead of a shift to ensure the sign extension works 1872 // right. The compiler will figure out if it can be a shift. Mask the 1873 // inputs so they get truncated correctly. 1874 destElem = (((srcElem1 & ~(Element)1) / 2) + 1875 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1876 ''' 1877 threeEqualRegInstX("shadd", "ShaddDX", "SimdAddOp", smallSignedTypes, 2, 1878 haddCode) 1879 threeEqualRegInstX("shadd", "ShaddQX", "SimdAddOp", smallSignedTypes, 4, 1880 haddCode) 1881 # SHL 1882 shlCode = ''' 1883 if (imm >= sizeof(Element) * 8) 1884 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; 1885 else 1886 destElem = srcElem1 << imm; 1887 ''' 1888 twoEqualRegInstX("shl", "ShlDX", "SimdShiftOp", unsignedTypes, 2, shlCode, 1889 hasImm=True) 1890 twoEqualRegInstX("shl", "ShlQX", "SimdShiftOp", unsignedTypes, 4, shlCode, 1891 hasImm=True) 1892 # SHLL, SHLL2 1893 shllCode = "destElem = ((BigElement)srcElem1) << (sizeof(Element) * 8);" 1894 twoRegLongInstX("shll", "ShllX", "SimdShiftOp", smallTypes, shllCode) 1895 twoRegLongInstX("shll", "Shll2X", "SimdShiftOp", smallTypes, shllCode, 1896 hi=True) 1897 # SHRN, SHRN2 1898 shrnCode = ''' 1899 if (imm >= sizeof(srcElem1) * 8) { 1900 destElem = 0; 1901 } else { 1902 destElem = srcElem1 >> imm; 1903 } 1904 ''' 1905 twoRegNarrowInstX("shrn", "ShrnX", "SimdShiftOp", smallUnsignedTypes, 1906 shrnCode, hasImm=True) 1907 twoRegNarrowInstX("shrn2", "Shrn2X", "SimdShiftOp", smallUnsignedTypes, 1908 shrnCode, hasImm=True, hi=True) 1909 # SHSUB 1910 hsubCode = ''' 1911 Element borrowBit = 1912 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; 1913 // Use division instead of a shift to ensure the sign extension works 1914 // right. The compiler will figure out if it can be a shift. Mask the 1915 // inputs so they get truncated correctly. 1916 destElem = (((srcElem1 & ~(Element)1) / 2) - 1917 ((srcElem2 & ~(Element)1) / 2)) - borrowBit; 1918 ''' 1919 threeEqualRegInstX("shsub", "ShsubDX", "SimdAddOp", smallSignedTypes, 2, 1920 hsubCode) 1921 threeEqualRegInstX("shsub", "ShsubQX", "SimdAddOp", smallSignedTypes, 4, 1922 hsubCode) 1923 # SLI 1924 sliCode = ''' 1925 if (imm >= sizeof(Element) * 8) 1926 destElem = destElem; 1927 else 1928 destElem = (srcElem1 << imm) | (destElem & mask(imm)); 1929 ''' 1930 twoEqualRegInstX("sli", "SliDX", "SimdShiftOp", unsignedTypes, 2, sliCode, 1931 True, hasImm=True) 1932 twoEqualRegInstX("sli", "SliQX", "SimdShiftOp", unsignedTypes, 4, sliCode, 1933 True, hasImm=True) 1934 # SMAX 1935 maxCode = "destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;" 1936 threeEqualRegInstX("smax", "SmaxDX", "SimdCmpOp", smallSignedTypes, 2, 1937 maxCode) 1938 threeEqualRegInstX("smax", "SmaxQX", "SimdCmpOp", smallSignedTypes, 4, 1939 maxCode) 1940 # SMAXP 1941 threeEqualRegInstX("smaxp", "SmaxpDX", "SimdCmpOp", smallSignedTypes, 2, 1942 maxCode, pairwise=True) 1943 threeEqualRegInstX("smaxp", "SmaxpQX", "SimdCmpOp", smallSignedTypes, 4, 1944 maxCode, pairwise=True) 1945 # SMAXV 1946 maxAcrossCode = ''' 1947 if (i == 0 || srcElem1 > destElem) 1948 destElem = srcElem1; 1949 ''' 1950 twoRegAcrossInstX("smaxv", "SmaxvDX", "SimdCmpOp", ("int8_t", "int16_t"), 1951 2, maxAcrossCode) 1952 twoRegAcrossInstX("smaxv", "SmaxvQX", "SimdCmpOp", smallSignedTypes, 4, 1953 maxAcrossCode) 1954 # SMIN 1955 minCode = "destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;" 1956 threeEqualRegInstX("smin", "SminDX", "SimdCmpOp", smallSignedTypes, 2, 1957 minCode) 1958 threeEqualRegInstX("smin", "SminQX", "SimdCmpOp", smallSignedTypes, 4, 1959 minCode) 1960 # SMINP 1961 threeEqualRegInstX("sminp", "SminpDX", "SimdCmpOp", smallSignedTypes, 2, 1962 minCode, pairwise=True) 1963 threeEqualRegInstX("sminp", "SminpQX", "SimdCmpOp", smallSignedTypes, 4, 1964 minCode, pairwise=True) 1965 # SMINV 1966 minAcrossCode = ''' 1967 if (i == 0 || srcElem1 < destElem) 1968 destElem = srcElem1; 1969 ''' 1970 twoRegAcrossInstX("sminv", "SminvDX", "SimdCmpOp", ("int8_t", "int16_t"), 1971 2, minAcrossCode) 1972 twoRegAcrossInstX("sminv", "SminvQX", "SimdCmpOp", smallSignedTypes, 4, 1973 minAcrossCode) 1974 1975 split('exec') 1976 1977 # SMLAL, SMLAL2 (by element) 1978 mlalCode = "destElem += (BigElement)srcElem1 * (BigElement)srcElem2;" 1979 threeRegLongInstX("smlal", "SmlalElemX", "SimdMultAccOp", 1980 ("int16_t", "int32_t"), mlalCode, True, byElem=True) 1981 threeRegLongInstX("smlal", "SmlalElem2X", "SimdMultAccOp", 1982 ("int16_t", "int32_t"), mlalCode, True, byElem=True, 1983 hi=True) 1984 # SMLAL, SMLAL2 (vector) 1985 threeRegLongInstX("smlal", "SmlalX", "SimdMultAccOp", smallSignedTypes, 1986 mlalCode, True) 1987 threeRegLongInstX("smlal", "Smlal2X", "SimdMultAccOp", smallSignedTypes, 1988 mlalCode, True, hi=True) 1989 # SMLSL, SMLSL2 (by element) 1990 mlslCode = "destElem -= (BigElement)srcElem1 * (BigElement)srcElem2;" 1991 threeRegLongInstX("smlsl", "SmlslElemX", "SimdMultAccOp", smallSignedTypes, 1992 mlslCode, True, byElem=True) 1993 threeRegLongInstX("smlsl", "SmlslElem2X", "SimdMultAccOp", 1994 smallSignedTypes, mlslCode, True, byElem=True, hi=True) 1995 # SMLSL, SMLSL2 (vector) 1996 threeRegLongInstX("smlsl", "SmlslX", "SimdMultAccOp", smallSignedTypes, 1997 mlslCode, True) 1998 threeRegLongInstX("smlsl", "Smlsl2X", "SimdMultAccOp", smallSignedTypes, 1999 mlslCode, True, hi=True) 2000 # SMOV 2001 insToGprInstX("smov", "SmovWX", "SimdMiscOp", ("int8_t", "int16_t"), 4, 2002 'W', True) 2003 insToGprInstX("smov", "SmovXX", "SimdMiscOp", smallSignedTypes, 4, 'X', 2004 True) 2005 # SMULL, SMULL2 (by element) 2006 mullCode = "destElem = (BigElement)srcElem1 * (BigElement)srcElem2;" 2007 threeRegLongInstX("smull", "SmullElemX", "SimdMultOp", smallSignedTypes, 2008 mullCode, byElem=True) 2009 threeRegLongInstX("smull", "SmullElem2X", "SimdMultOp", smallSignedTypes, 2010 mullCode, byElem=True, hi=True) 2011 # SMULL, SMULL2 (vector) 2012 threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes, 2013 mullCode) 2014 threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes, 2015 mullCode, hi=True) 2016 # SQABS 2017 sqabsCode = ''' 2018 FPSCR fpscr = (FPSCR) FpscrQc; 2019 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) { 2020 fpscr.qc = 1; 2021 destElem = ~srcElem1; 2022 } else if (srcElem1 < 0) { 2023 destElem = -srcElem1; 2024 } else { 2025 destElem = srcElem1; 2026 } 2027 FpscrQc = fpscr; 2028 ''' 2029 twoEqualRegInstX("sqabs", "SqabsDX", "SimdAluOp", smallSignedTypes, 2, 2030 sqabsCode) 2031 twoEqualRegInstX("sqabs", "SqabsQX", "SimdAluOp", signedTypes, 4, 2032 sqabsCode) 2033 twoEqualRegInstX("sqabs", "SqabsScX", "SimdAluOp", signedTypes, 4, 2034 sqabsCode, scalar=True) 2035 # SQADD 2036 sqaddCode = ''' 2037 destElem = srcElem1 + srcElem2; 2038 FPSCR fpscr = (FPSCR) FpscrQc; 2039 bool negDest = (destElem < 0); 2040 bool negSrc1 = (srcElem1 < 0); 2041 bool negSrc2 = (srcElem2 < 0); 2042 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { 2043 destElem = std::numeric_limits<Element>::min(); 2044 if (negDest) 2045 destElem -= 1; 2046 fpscr.qc = 1; 2047 } 2048 FpscrQc = fpscr; 2049 ''' 2050 threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2, 2051 sqaddCode) 2052 threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4, 2053 sqaddCode) 2054 threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4, 2055 sqaddCode, scalar=True) 2056 # SQDMLAL, SQDMLAL2 (by element) 2057 qdmlalCode = ''' 2058 FPSCR fpscr = (FPSCR) FpscrQc; 2059 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2060 Element maxNeg = std::numeric_limits<Element>::min(); 2061 Element halfNeg = maxNeg / 2; 2062 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2063 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2064 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2065 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2066 fpscr.qc = 1; 2067 } 2068 bool negPreDest = ltz(destElem); 2069 destElem += midElem; 2070 bool negDest = ltz(destElem); 2071 bool negMid = ltz(midElem); 2072 if (negPreDest == negMid && negMid != negDest) { 2073 destElem = mask(sizeof(BigElement) * 8 - 1); 2074 if (negPreDest) 2075 destElem = ~destElem; 2076 fpscr.qc = 1; 2077 } 2078 FpscrQc = fpscr; 2079 ''' 2080 threeRegLongInstX("sqdmlal", "SqdmlalElemX", "SimdMultAccOp", 2081 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True) 2082 threeRegLongInstX("sqdmlal", "SqdmlalElem2X", "SimdMultAccOp", 2083 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, 2084 hi=True) 2085 threeRegLongInstX("sqdmlal", "SqdmlalElemScX", "SimdMultAccOp", 2086 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, 2087 scalar=True) 2088 # SQDMLAL, SQDMLAL2 (vector) 2089 threeRegLongInstX("sqdmlal", "SqdmlalX", "SimdMultAccOp", 2090 ("int16_t", "int32_t"), qdmlalCode, True) 2091 threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp", 2092 ("int16_t", "int32_t"), qdmlalCode, True, hi=True) 2093 threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp", 2094 ("int16_t", "int32_t"), qdmlalCode, True, scalar=True) 2095 # SQDMLSL, SQDMLSL2 (by element) 2096 qdmlslCode = ''' 2097 FPSCR fpscr = (FPSCR) FpscrQc; 2098 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2099 Element maxNeg = std::numeric_limits<Element>::min(); 2100 Element halfNeg = maxNeg / 2; 2101 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2102 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2103 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2104 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2105 fpscr.qc = 1; 2106 } 2107 bool negPreDest = ltz(destElem); 2108 destElem -= midElem; 2109 bool negDest = ltz(destElem); 2110 bool posMid = ltz((BigElement)-midElem); 2111 if (negPreDest == posMid && posMid != negDest) { 2112 destElem = mask(sizeof(BigElement) * 8 - 1); 2113 if (negPreDest) 2114 destElem = ~destElem; 2115 fpscr.qc = 1; 2116 } 2117 FpscrQc = fpscr; 2118 ''' 2119 threeRegLongInstX("sqdmlsl", "SqdmlslElemX", "SimdMultAccOp", 2120 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True) 2121 threeRegLongInstX("sqdmlsl", "SqdmlslElem2X", "SimdMultAccOp", 2122 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, 2123 hi=True) 2124 threeRegLongInstX("sqdmlsl", "SqdmlslElemScX", "SimdMultAccOp", 2125 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, 2126 scalar=True) 2127 # SQDMLSL, SQDMLSL2 (vector) 2128 threeRegLongInstX("sqdmlsl", "SqdmlslX", "SimdMultAccOp", 2129 ("int16_t", "int32_t"), qdmlslCode, True) 2130 threeRegLongInstX("sqdmlsl", "Sqdmlsl2X", "SimdMultAccOp", 2131 ("int16_t", "int32_t"), qdmlslCode, True, hi=True) 2132 threeRegLongInstX("sqdmlsl", "SqdmlslScX", "SimdMultAccOp", 2133 ("int16_t", "int32_t"), qdmlslCode, True, scalar=True) 2134 # SQDMULH (by element) 2135 sqdmulhCode = ''' 2136 FPSCR fpscr = (FPSCR) FpscrQc; 2137 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> 2138 (sizeof(Element) * 8); 2139 if (srcElem1 == srcElem2 && 2140 srcElem1 == (Element)((Element)1 << 2141 (sizeof(Element) * 8 - 1))) { 2142 destElem = ~srcElem1; 2143 fpscr.qc = 1; 2144 } 2145 FpscrQc = fpscr; 2146 ''' 2147 threeEqualRegInstX("sqdmulh", "SqdmulhElemDX", "SimdMultOp", 2148 ("int16_t", "int32_t"), 2, sqdmulhCode, byElem=True) 2149 threeEqualRegInstX("sqdmulh", "SqdmulhElemQX", "SimdMultOp", 2150 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True) 2151 threeEqualRegInstX("sqdmulh", "SqdmulhElemScX", "SimdMultOp", 2152 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True, 2153 scalar=True) 2154 # SQDMULH (vector) 2155 threeEqualRegInstX("sqdmulh", "SqdmulhDX", "SimdMultOp", 2156 ("int16_t", "int32_t"), 2, sqdmulhCode) 2157 threeEqualRegInstX("sqdmulh", "SqdmulhQX", "SimdMultOp", 2158 ("int16_t", "int32_t"), 4, sqdmulhCode) 2159 threeEqualRegInstX("sqdmulh", "SqdmulhScX", "SimdMultOp", 2160 ("int16_t", "int32_t"), 4, sqdmulhCode, scalar=True) 2161 # SQDMULL, SQDMULL2 (by element) 2162 qdmullCode = ''' 2163 FPSCR fpscr = (FPSCR) FpscrQc; 2164 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2165 if (srcElem1 == srcElem2 && 2166 srcElem1 == (Element)((Element)1 << 2167 (Element)(sizeof(Element) * 8 - 1))) { 2168 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); 2169 fpscr.qc = 1; 2170 } 2171 FpscrQc = fpscr; 2172 ''' 2173 threeRegLongInstX("sqdmull", "SqdmullElemX", "SimdMultOp", 2174 ("int16_t", "int32_t"), qdmullCode, True, byElem=True) 2175 threeRegLongInstX("sqdmull", "SqdmullElem2X", "SimdMultOp", 2176 ("int16_t", "int32_t"), qdmullCode, True, byElem=True, 2177 hi=True) 2178 threeRegLongInstX("sqdmull", "SqdmullElemScX", "SimdMultOp", 2179 ("int16_t", "int32_t"), qdmullCode, True, byElem=True, 2180 scalar=True) 2181 # SQDMULL, SQDMULL2 (vector) 2182 threeRegLongInstX("sqdmull", "SqdmullX", "SimdMultOp", 2183 ("int16_t", "int32_t"), qdmullCode, True) 2184 threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp", 2185 ("int16_t", "int32_t"), qdmullCode, True, hi=True) 2186 threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp", 2187 ("int16_t", "int32_t"), qdmullCode, True, scalar=True) 2188 # SQNEG 2189 sqnegCode = ''' 2190 FPSCR fpscr = (FPSCR) FpscrQc; 2191 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) { 2192 fpscr.qc = 1; 2193 destElem = ~srcElem1; 2194 } else { 2195 destElem = -srcElem1; 2196 } 2197 FpscrQc = fpscr; 2198 ''' 2199 twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2, 2200 sqnegCode) 2201 twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4, 2202 sqnegCode) 2203 twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4, 2204 sqnegCode, scalar=True) 2205 # SQRDMULH (by element) 2206 sqrdmulhCode = ''' 2207 FPSCR fpscr = (FPSCR) FpscrQc; 2208 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + 2209 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> 2210 (sizeof(Element) * 8); 2211 Element maxNeg = std::numeric_limits<Element>::min(); 2212 Element halfNeg = maxNeg / 2; 2213 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2214 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2215 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2216 if (destElem < 0) { 2217 destElem = mask(sizeof(Element) * 8 - 1); 2218 } else { 2219 destElem = std::numeric_limits<Element>::min(); 2220 } 2221 fpscr.qc = 1; 2222 } 2223 FpscrQc = fpscr; 2224 ''' 2225 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp", 2226 ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True) 2227 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp", 2228 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True) 2229 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemScX", "SimdMultOp", 2230 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True, 2231 scalar=True) 2232 # SQRDMULH (vector) 2233 threeEqualRegInstX("sqrdmulh", "SqrdmulhDX", "SimdMultOp", 2234 ("int16_t", "int32_t"), 2, sqrdmulhCode) 2235 threeEqualRegInstX("sqrdmulh", "SqrdmulhQX", "SimdMultOp", 2236 ("int16_t", "int32_t"), 4, sqrdmulhCode) 2237 threeEqualRegInstX("sqrdmulh", "SqrdmulhScX", "SimdMultOp", 2238 ("int16_t", "int32_t"), 4, sqrdmulhCode, scalar=True) 2239 # SQRSHL 2240 sqrshlCode = ''' 2241 int16_t shiftAmt = (int8_t)srcElem2; 2242 FPSCR fpscr = (FPSCR) FpscrQc; 2243 if (shiftAmt < 0) { 2244 shiftAmt = -shiftAmt; 2245 Element rBit = 0; 2246 if (shiftAmt <= sizeof(Element) * 8) 2247 rBit = bits(srcElem1, shiftAmt - 1); 2248 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 2249 rBit = 1; 2250 if (shiftAmt >= sizeof(Element) * 8) { 2251 shiftAmt = sizeof(Element) * 8 - 1; 2252 destElem = 0; 2253 } else { 2254 destElem = (srcElem1 >> shiftAmt); 2255 } 2256 // Make sure the right shift sign extended when it should. 2257 if (srcElem1 < 0 && destElem >= 0) { 2258 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2259 1 - shiftAmt)); 2260 } 2261 destElem += rBit; 2262 } else if (shiftAmt > 0) { 2263 bool sat = false; 2264 if (shiftAmt >= sizeof(Element) * 8) { 2265 if (srcElem1 != 0) 2266 sat = true; 2267 else 2268 destElem = 0; 2269 } else { 2270 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, 2271 sizeof(Element) * 8 - 1 - shiftAmt) != 2272 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 2273 sat = true; 2274 } else { 2275 destElem = srcElem1 << shiftAmt; 2276 } 2277 } 2278 if (sat) { 2279 fpscr.qc = 1; 2280 destElem = mask(sizeof(Element) * 8 - 1); 2281 if (srcElem1 < 0) 2282 destElem = ~destElem; 2283 } 2284 } else { 2285 destElem = srcElem1; 2286 } 2287 FpscrQc = fpscr; 2288 ''' 2289 threeEqualRegInstX("sqrshl", "SqrshlDX", "SimdCmpOp", smallSignedTypes, 2, 2290 sqrshlCode) 2291 threeEqualRegInstX("sqrshl", "SqrshlQX", "SimdCmpOp", signedTypes, 4, 2292 sqrshlCode) 2293 threeEqualRegInstX("sqrshl", "SqrshlScX", "SimdCmpOp", signedTypes, 4, 2294 sqrshlCode, scalar=True) 2295 # SQRSHRN, SQRSHRN2 2296 sqrshrnCode = ''' 2297 FPSCR fpscr = (FPSCR) FpscrQc; 2298 if (imm > sizeof(srcElem1) * 8) { 2299 if (srcElem1 != 0 && srcElem1 != -1) 2300 fpscr.qc = 1; 2301 destElem = 0; 2302 } else if (imm) { 2303 BigElement mid = (srcElem1 >> (imm - 1)); 2304 uint64_t rBit = mid & 0x1; 2305 mid >>= 1; 2306 mid |= -(mid & ((BigElement)1 << 2307 (sizeof(BigElement) * 8 - 1 - imm))); 2308 mid += rBit; 2309 if (mid != (Element)mid) { 2310 destElem = mask(sizeof(Element) * 8 - 1); 2311 if (srcElem1 < 0) 2312 destElem = ~destElem; 2313 fpscr.qc = 1; 2314 } else { 2315 destElem = mid; 2316 } 2317 } else { 2318 if (srcElem1 != (Element)srcElem1) { 2319 destElem = mask(sizeof(Element) * 8 - 1); 2320 if (srcElem1 < 0) 2321 destElem = ~destElem; 2322 fpscr.qc = 1; 2323 } else { 2324 destElem = srcElem1; 2325 } 2326 } 2327 FpscrQc = fpscr; 2328 ''' 2329 twoRegNarrowInstX("sqrshrn", "SqrshrnX", "SimdShiftOp", smallSignedTypes, 2330 sqrshrnCode, hasImm=True) 2331 twoRegNarrowInstX("sqrshrn2", "Sqrshrn2X", "SimdShiftOp", smallSignedTypes, 2332 sqrshrnCode, hasImm=True, hi=True) 2333 twoRegNarrowInstX("sqrshrn", "SqrshrnScX", "SimdShiftOp", smallSignedTypes, 2334 sqrshrnCode, hasImm=True, scalar=True) 2335 # SQRSHRUN, SQRSHRUN2 2336 sqrshrunCode = ''' 2337 FPSCR fpscr = (FPSCR) FpscrQc; 2338 if (imm > sizeof(srcElem1) * 8) { 2339 if (srcElem1 != 0) 2340 fpscr.qc = 1; 2341 destElem = 0; 2342 } else if (imm) { 2343 BigElement mid = (srcElem1 >> (imm - 1)); 2344 uint64_t rBit = mid & 0x1; 2345 mid >>= 1; 2346 mid |= -(mid & ((BigElement)1 << 2347 (sizeof(BigElement) * 8 - 1 - imm))); 2348 mid += rBit; 2349 if (bits(mid, sizeof(BigElement) * 8 - 1, 2350 sizeof(Element) * 8) != 0) { 2351 if (srcElem1 < 0) { 2352 destElem = 0; 2353 } else { 2354 destElem = mask(sizeof(Element) * 8); 2355 } 2356 fpscr.qc = 1; 2357 } else { 2358 destElem = mid; 2359 } 2360 } else { 2361 if (srcElem1 < 0) { 2362 fpscr.qc = 1; 2363 destElem = 0; 2364 } else { 2365 destElem = srcElem1; 2366 } 2367 } 2368 FpscrQc = fpscr; 2369 ''' 2370 twoRegNarrowInstX("sqrshrun", "SqrshrunX", "SimdShiftOp", smallSignedTypes, 2371 sqrshrunCode, hasImm=True) 2372 twoRegNarrowInstX("sqrshrun", "Sqrshrun2X", "SimdShiftOp", 2373 smallSignedTypes, sqrshrunCode, hasImm=True, hi=True) 2374 twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp", 2375 smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True) 2376 # SQSHL (immediate) 2377 sqshlImmCode = ''' 2378 FPSCR fpscr = (FPSCR) FpscrQc; 2379 if (imm >= sizeof(Element) * 8) { 2380 if (srcElem1 != 0) { 2381 destElem = std::numeric_limits<Element>::min(); 2382 if (srcElem1 > 0) 2383 destElem = ~destElem; 2384 fpscr.qc = 1; 2385 } else { 2386 destElem = 0; 2387 } 2388 } else if (imm) { 2389 destElem = (srcElem1 << imm); 2390 uint64_t topBits = bits((uint64_t)srcElem1, 2391 sizeof(Element) * 8 - 1, 2392 sizeof(Element) * 8 - 1 - imm); 2393 if (topBits != 0 && topBits != mask(imm + 1)) { 2394 destElem = std::numeric_limits<Element>::min(); 2395 if (srcElem1 > 0) 2396 destElem = ~destElem; 2397 fpscr.qc = 1; 2398 } 2399 } else { 2400 destElem = srcElem1; 2401 } 2402 FpscrQc = fpscr; 2403 ''' 2404 twoEqualRegInstX("sqshl", "SqshlImmDX", "SimdAluOp", smallSignedTypes, 2, 2405 sqshlImmCode, hasImm=True) 2406 twoEqualRegInstX("sqshl", "SqshlImmQX", "SimdAluOp", signedTypes, 4, 2407 sqshlImmCode, hasImm=True) 2408 twoEqualRegInstX("sqshl", "SqshlImmScX", "SimdAluOp", signedTypes, 4, 2409 sqshlImmCode, hasImm=True, scalar=True) 2410 # SQSHL (register) 2411 sqshlCode = ''' 2412 int16_t shiftAmt = (int8_t)srcElem2; 2413 FPSCR fpscr = (FPSCR) FpscrQc; 2414 if (shiftAmt < 0) { 2415 shiftAmt = -shiftAmt; 2416 if (shiftAmt >= sizeof(Element) * 8) { 2417 shiftAmt = sizeof(Element) * 8 - 1; 2418 destElem = 0; 2419 } else { 2420 destElem = (srcElem1 >> shiftAmt); 2421 } 2422 // Make sure the right shift sign extended when it should. 2423 if (srcElem1 < 0 && destElem >= 0) { 2424 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2425 1 - shiftAmt)); 2426 } 2427 } else if (shiftAmt > 0) { 2428 bool sat = false; 2429 if (shiftAmt >= sizeof(Element) * 8) { 2430 if (srcElem1 != 0) 2431 sat = true; 2432 else 2433 destElem = 0; 2434 } else { 2435 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, 2436 sizeof(Element) * 8 - 1 - shiftAmt) != 2437 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 2438 sat = true; 2439 } else { 2440 destElem = srcElem1 << shiftAmt; 2441 } 2442 } 2443 if (sat) { 2444 fpscr.qc = 1; 2445 destElem = mask(sizeof(Element) * 8 - 1); 2446 if (srcElem1 < 0) 2447 destElem = ~destElem; 2448 } 2449 } else { 2450 destElem = srcElem1; 2451 } 2452 FpscrQc = fpscr; 2453 ''' 2454 threeEqualRegInstX("sqshl", "SqshlDX", "SimdAluOp", smallSignedTypes, 2, 2455 sqshlCode) 2456 threeEqualRegInstX("sqshl", "SqshlQX", "SimdAluOp", signedTypes, 4, 2457 sqshlCode) 2458 threeEqualRegInstX("sqshl", "SqshlScX", "SimdAluOp", signedTypes, 4, 2459 sqshlCode, scalar=True) 2460 # SQSHLU 2461 sqshluCode = ''' 2462 FPSCR fpscr = (FPSCR) FpscrQc; 2463 if (imm >= sizeof(Element) * 8) { 2464 if (srcElem1 < 0) { 2465 destElem = 0; 2466 fpscr.qc = 1; 2467 } else if (srcElem1 > 0) { 2468 destElem = mask(sizeof(Element) * 8); 2469 fpscr.qc = 1; 2470 } else { 2471 destElem = 0; 2472 } 2473 } else if (imm) { 2474 destElem = (srcElem1 << imm); 2475 uint64_t topBits = bits((uint64_t)srcElem1, 2476 sizeof(Element) * 8 - 1, 2477 sizeof(Element) * 8 - imm); 2478 if (srcElem1 < 0) { 2479 destElem = 0; 2480 fpscr.qc = 1; 2481 } else if (topBits != 0) { 2482 destElem = mask(sizeof(Element) * 8); 2483 fpscr.qc = 1; 2484 } 2485 } else { 2486 if (srcElem1 < 0) { 2487 fpscr.qc = 1; 2488 destElem = 0; 2489 } else { 2490 destElem = srcElem1; 2491 } 2492 } 2493 FpscrQc = fpscr; 2494 ''' 2495 twoEqualRegInstX("sqshlu", "SqshluDX", "SimdAluOp", smallSignedTypes, 2, 2496 sqshluCode, hasImm=True) 2497 twoEqualRegInstX("sqshlu", "SqshluQX", "SimdAluOp", signedTypes, 4, 2498 sqshluCode, hasImm=True) 2499 twoEqualRegInstX("sqshlu", "SqshluScX", "SimdAluOp", signedTypes, 4, 2500 sqshluCode, hasImm=True, scalar=True) 2501 # SQSHRN, SQSHRN2 2502 sqshrnCode = ''' 2503 FPSCR fpscr = (FPSCR) FpscrQc; 2504 if (imm > sizeof(srcElem1) * 8) { 2505 if (srcElem1 != 0 && srcElem1 != -1) 2506 fpscr.qc = 1; 2507 destElem = 0; 2508 } else if (imm) { 2509 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2510 mid |= -(mid & ((BigElement)1 << 2511 (sizeof(BigElement) * 8 - 1 - imm))); 2512 if (mid != (Element)mid) { 2513 destElem = mask(sizeof(Element) * 8 - 1); 2514 if (srcElem1 < 0) 2515 destElem = ~destElem; 2516 fpscr.qc = 1; 2517 } else { 2518 destElem = mid; 2519 } 2520 } else { 2521 destElem = srcElem1; 2522 } 2523 FpscrQc = fpscr; 2524 ''' 2525 twoRegNarrowInstX("sqshrn", "SqshrnX", "SimdShiftOp", smallSignedTypes, 2526 sqshrnCode, hasImm=True) 2527 twoRegNarrowInstX("sqshrn2", "Sqshrn2X", "SimdShiftOp", smallSignedTypes, 2528 sqshrnCode, hasImm=True, hi=True) 2529 twoRegNarrowInstX("sqshrn", "SqshrnScX", "SimdShiftOp", smallSignedTypes, 2530 sqshrnCode, hasImm=True, scalar=True) 2531 # SQSHRUN, SQSHRUN2 2532 sqshrunCode = ''' 2533 FPSCR fpscr = (FPSCR) FpscrQc; 2534 if (imm > sizeof(srcElem1) * 8) { 2535 if (srcElem1 != 0) 2536 fpscr.qc = 1; 2537 destElem = 0; 2538 } else if (imm) { 2539 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2540 if (bits(mid, sizeof(BigElement) * 8 - 1, 2541 sizeof(Element) * 8) != 0) { 2542 if (srcElem1 < 0) { 2543 destElem = 0; 2544 } else { 2545 destElem = mask(sizeof(Element) * 8); 2546 } 2547 fpscr.qc = 1; 2548 } else { 2549 destElem = mid; 2550 } 2551 } else { 2552 destElem = srcElem1; 2553 } 2554 FpscrQc = fpscr; 2555 ''' 2556 twoRegNarrowInstX("sqshrun", "SqshrunX", "SimdShiftOp", smallSignedTypes, 2557 sqshrunCode, hasImm=True) 2558 twoRegNarrowInstX("sqshrun", "Sqshrun2X", "SimdShiftOp", smallSignedTypes, 2559 sqshrunCode, hasImm=True, hi=True) 2560 twoRegNarrowInstX("sqshrun", "SqshrunScX", "SimdShiftOp", smallSignedTypes, 2561 sqshrunCode, hasImm=True, scalar=True) 2562 # SQSUB 2563 sqsubCode = ''' 2564 destElem = srcElem1 - srcElem2; 2565 FPSCR fpscr = (FPSCR) FpscrQc; 2566 bool negDest = (destElem < 0); 2567 bool negSrc1 = (srcElem1 < 0); 2568 bool posSrc2 = (srcElem2 >= 0); 2569 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { 2570 destElem = std::numeric_limits<Element>::min(); 2571 if (negDest) 2572 destElem -= 1; 2573 fpscr.qc = 1; 2574 } 2575 FpscrQc = fpscr; 2576 ''' 2577 threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2, 2578 sqsubCode) 2579 threeEqualRegInstX("sqsub", "SqsubQX", "SimdAddOp", signedTypes, 4, 2580 sqsubCode) 2581 threeEqualRegInstX("sqsub", "SqsubScX", "SimdAddOp", signedTypes, 4, 2582 sqsubCode, scalar=True) 2583 # SQXTN, SQXTN2 2584 sqxtnCode = ''' 2585 FPSCR fpscr = (FPSCR) FpscrQc; 2586 destElem = srcElem1; 2587 if ((BigElement)destElem != srcElem1) { 2588 fpscr.qc = 1; 2589 destElem = mask(sizeof(Element) * 8 - 1); 2590 if (srcElem1 < 0) 2591 destElem = ~destElem; 2592 } 2593 FpscrQc = fpscr; 2594 ''' 2595 twoRegNarrowInstX("sqxtn", "SqxtnX", "SimdMiscOp", smallSignedTypes, 2596 sqxtnCode) 2597 twoRegNarrowInstX("sqxtn", "Sqxtn2X", "SimdMiscOp", smallSignedTypes, 2598 sqxtnCode, hi=True) 2599 twoRegNarrowInstX("sqxtn", "SqxtnScX", "SimdMiscOp", smallSignedTypes, 2600 sqxtnCode, scalar=True) 2601 # SQXTUN, SQXTUN2 2602 sqxtunCode = ''' 2603 FPSCR fpscr = (FPSCR) FpscrQc; 2604 destElem = srcElem1; 2605 if (srcElem1 < 0 || 2606 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { 2607 fpscr.qc = 1; 2608 destElem = mask(sizeof(Element) * 8); 2609 if (srcElem1 < 0) 2610 destElem = ~destElem; 2611 } 2612 FpscrQc = fpscr; 2613 ''' 2614 twoRegNarrowInstX("sqxtun", "SqxtunX", "SimdMiscOp", smallSignedTypes, 2615 sqxtunCode) 2616 twoRegNarrowInstX("sqxtun", "Sqxtun2X", "SimdMiscOp", smallSignedTypes, 2617 sqxtunCode, hi=True) 2618 twoRegNarrowInstX("sqxtun", "SqxtunScX", "SimdMiscOp", smallSignedTypes, 2619 sqxtunCode, scalar=True) 2620 # SRHADD 2621 rhaddCode = ''' 2622 Element carryBit = 2623 (((unsigned)srcElem1 & 0x1) + 2624 ((unsigned)srcElem2 & 0x1) + 1) >> 1; 2625 // Use division instead of a shift to ensure the sign extension works 2626 // right. The compiler will figure out if it can be a shift. Mask the 2627 // inputs so they get truncated correctly. 2628 destElem = (((srcElem1 & ~(Element)1) / 2) + 2629 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 2630 ''' 2631 threeEqualRegInstX("srhadd", "SrhaddDX", "SimdAddOp", smallSignedTypes, 2, 2632 rhaddCode) 2633 threeEqualRegInstX("srhadd", "SrhaddQX", "SimdAddOp", smallSignedTypes, 4, 2634 rhaddCode) 2635 # SRI 2636 sriCode = ''' 2637 if (imm >= sizeof(Element) * 8) 2638 destElem = destElem; 2639 else 2640 destElem = (srcElem1 >> imm) | 2641 (destElem & ~mask(sizeof(Element) * 8 - imm)); 2642 ''' 2643 twoEqualRegInstX("sri", "SriDX", "SimdShiftOp", unsignedTypes, 2, sriCode, 2644 True, hasImm=True) 2645 twoEqualRegInstX("sri", "SriQX", "SimdShiftOp", unsignedTypes, 4, sriCode, 2646 True, hasImm=True) 2647 # SRSHL 2648 rshlCode = ''' 2649 int16_t shiftAmt = (int8_t)srcElem2; 2650 if (shiftAmt < 0) { 2651 shiftAmt = -shiftAmt; 2652 Element rBit = 0; 2653 if (shiftAmt <= sizeof(Element) * 8) 2654 rBit = bits(srcElem1, shiftAmt - 1); 2655 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) 2656 rBit = 1; 2657 if (shiftAmt >= sizeof(Element) * 8) { 2658 shiftAmt = sizeof(Element) * 8 - 1; 2659 destElem = 0; 2660 } else { 2661 destElem = (srcElem1 >> shiftAmt); 2662 } 2663 // Make sure the right shift sign extended when it should. 2664 if (ltz(srcElem1) && !ltz(destElem)) { 2665 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2666 1 - shiftAmt)); 2667 } 2668 destElem += rBit; 2669 } else if (shiftAmt > 0) { 2670 if (shiftAmt >= sizeof(Element) * 8) { 2671 destElem = 0; 2672 } else { 2673 destElem = srcElem1 << shiftAmt; 2674 } 2675 } else { 2676 destElem = srcElem1; 2677 } 2678 ''' 2679 threeEqualRegInstX("srshl", "SrshlDX", "SimdShiftOp", signedTypes, 2, 2680 rshlCode) 2681 threeEqualRegInstX("srshl", "SrshlQX", "SimdShiftOp", signedTypes, 4, 2682 rshlCode) 2683 # SRSHR 2684 rshrCode = ''' 2685 if (imm > sizeof(srcElem1) * 8) { 2686 destElem = 0; 2687 } else if (imm) { 2688 Element rBit = bits(srcElem1, imm - 1); 2689 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2690 } else { 2691 destElem = srcElem1; 2692 } 2693 ''' 2694 twoEqualRegInstX("srshr", "SrshrDX", "SimdShiftOp", signedTypes, 2, 2695 rshrCode, hasImm=True) 2696 twoEqualRegInstX("srshr", "SrshrQX", "SimdShiftOp", signedTypes, 4, 2697 rshrCode, hasImm=True) 2698 # SRSRA 2699 rsraCode = ''' 2700 if (imm > sizeof(srcElem1) * 8) { 2701 destElem += 0; 2702 } else if (imm) { 2703 Element rBit = bits(srcElem1, imm - 1); 2704 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2705 } else { 2706 destElem += srcElem1; 2707 } 2708 ''' 2709 twoEqualRegInstX("srsra", "SrsraDX", "SimdShiftOp", signedTypes, 2, 2710 rsraCode, True, hasImm=True) 2711 twoEqualRegInstX("srsra", "SrsraQX", "SimdShiftOp", signedTypes, 4, 2712 rsraCode, True, hasImm=True) 2713 # SSHL 2714 shlCode = ''' 2715 int16_t shiftAmt = (int8_t)srcElem2; 2716 if (shiftAmt < 0) { 2717 shiftAmt = -shiftAmt; 2718 if (shiftAmt >= sizeof(Element) * 8) { 2719 shiftAmt = sizeof(Element) * 8 - 1; 2720 destElem = 0; 2721 } else { 2722 destElem = (srcElem1 >> shiftAmt); 2723 } 2724 // Make sure the right shift sign extended when it should. 2725 if (ltz(srcElem1) && !ltz(destElem)) { 2726 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2727 1 - shiftAmt)); 2728 } 2729 } else { 2730 if (shiftAmt >= sizeof(Element) * 8) { 2731 destElem = 0; 2732 } else { 2733 destElem = srcElem1 << shiftAmt; 2734 } 2735 } 2736 ''' 2737 threeEqualRegInstX("sshl", "SshlDX", "SimdShiftOp", signedTypes, 2, 2738 shlCode) 2739 threeEqualRegInstX("sshl", "SshlQX", "SimdShiftOp", signedTypes, 4, 2740 shlCode) 2741 # SSHLL, SSHLL2 2742 shllCode = ''' 2743 if (imm >= sizeof(destElem) * 8) { 2744 destElem = 0; 2745 } else { 2746 destElem = (BigElement)srcElem1 << imm; 2747 } 2748 ''' 2749 twoRegLongInstX("sshll", "SshllX", "SimdShiftOp", smallSignedTypes, 2750 shllCode, hasImm=True) 2751 twoRegLongInstX("sshll", "Sshll2X", "SimdShiftOp", smallSignedTypes, 2752 shllCode, hasImm=True, hi=True) 2753 # SSHR 2754 shrCode = ''' 2755 if (imm >= sizeof(srcElem1) * 8) { 2756 if (ltz(srcElem1)) 2757 destElem = -1; 2758 else 2759 destElem = 0; 2760 } else { 2761 destElem = srcElem1 >> imm; 2762 } 2763 ''' 2764 twoEqualRegInstX("sshr", "SshrDX", "SimdShiftOp", signedTypes, 2, shrCode, 2765 hasImm=True) 2766 twoEqualRegInstX("sshr", "SshrQX", "SimdShiftOp", signedTypes, 4, shrCode, 2767 hasImm=True) 2768 # SSRA 2769 sraCode = ''' 2770 Element mid;; 2771 if (imm >= sizeof(srcElem1) * 8) { 2772 mid = ltz(srcElem1) ? -1 : 0; 2773 } else { 2774 mid = srcElem1 >> imm; 2775 if (ltz(srcElem1) && !ltz(mid)) { 2776 mid |= -(mid & ((Element)1 << 2777 (sizeof(Element) * 8 - 1 - imm))); 2778 } 2779 } 2780 destElem += mid; 2781 ''' 2782 twoEqualRegInstX("ssra", "SsraDX", "SimdShiftOp", signedTypes, 2, sraCode, 2783 True, hasImm=True) 2784 twoEqualRegInstX("ssra", "SsraQX", "SimdShiftOp", signedTypes, 4, sraCode, 2785 True, hasImm=True) 2786 # SSUBL 2787 sublwCode = "destElem = (BigElement)srcElem1 - (BigElement)srcElem2;" 2788 threeRegLongInstX("ssubl", "SsublX", "SimdAddOp", smallSignedTypes, 2789 sublwCode) 2790 threeRegLongInstX("ssubl2", "Ssubl2X", "SimdAddOp", smallSignedTypes, 2791 sublwCode, hi=True) 2792 # SSUBW 2793 threeRegWideInstX("ssubw", "SsubwX", "SimdAddOp", smallSignedTypes, 2794 sublwCode) 2795 threeRegWideInstX("ssubw2", "Ssubw2X", "SimdAddOp", smallSignedTypes, 2796 sublwCode, hi=True) 2797 # SUB 2798 subCode = "destElem = srcElem1 - srcElem2;" 2799 threeEqualRegInstX("sub", "SubDX", "SimdAddOp", unsignedTypes, 2, subCode) 2800 threeEqualRegInstX("sub", "SubQX", "SimdAddOp", unsignedTypes, 4, subCode) 2801 # SUBHN, SUBHN2 2802 subhnCode = ''' 2803 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> 2804 (sizeof(Element) * 8); 2805 ''' 2806 threeRegNarrowInstX("subhn", "SubhnX", "SimdAddOp", smallUnsignedTypes, 2807 subhnCode) 2808 threeRegNarrowInstX("subhn2", "Subhn2X", "SimdAddOp", smallUnsignedTypes, 2809 subhnCode, hi=True) 2810 # SUQADD 2811 suqaddCode = ''' 2812 FPSCR fpscr = (FPSCR) FpscrQc; 2813 Element tmp = destElem + srcElem1; 2814 if (bits(destElem, sizeof(Element) * 8 - 1) == 0) { 2815 if (bits(tmp, sizeof(Element) * 8 - 1) == 1 || 2816 tmp < srcElem1 || tmp < destElem) { 2817 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; 2818 fpscr.qc = 1; 2819 } else { 2820 destElem = tmp; 2821 } 2822 } else { 2823 Element absDestElem = (~destElem) + 1; 2824 if (absDestElem < srcElem1) { 2825 // Still check for positive sat., no need to check for negative sat. 2826 if (bits(tmp, sizeof(Element) * 8 - 1) == 1) { 2827 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; 2828 fpscr.qc = 1; 2829 } else { 2830 destElem = tmp; 2831 } 2832 } else { 2833 destElem = tmp; 2834 } 2835 } 2836 FpscrQc = fpscr; 2837 ''' 2838 twoEqualRegInstX("suqadd", "SuqaddDX", "SimdAddOp", smallUnsignedTypes, 2, 2839 suqaddCode, True) 2840 twoEqualRegInstX("suqadd", "SuqaddQX", "SimdAddOp", unsignedTypes, 4, 2841 suqaddCode, True) 2842 twoEqualRegInstX("suqadd", "SuqaddScX", "SimdAddOp", unsignedTypes, 4, 2843 suqaddCode, True, scalar=True) 2844 # SXTL -> alias to SSHLL 2845 # TBL 2846 tbxTblInstX("tbl", "Tbl1DX", "SimdMiscOp", ("uint8_t",), 1, "true", 2) 2847 tbxTblInstX("tbl", "Tbl1QX", "SimdMiscOp", ("uint8_t",), 1, "true", 4) 2848 tbxTblInstX("tbl", "Tbl2DX", "SimdMiscOp", ("uint8_t",), 2, "true", 2) 2849 tbxTblInstX("tbl", "Tbl2QX", "SimdMiscOp", ("uint8_t",), 2, "true", 4) 2850 tbxTblInstX("tbl", "Tbl3DX", "SimdMiscOp", ("uint8_t",), 3, "true", 2) 2851 tbxTblInstX("tbl", "Tbl3QX", "SimdMiscOp", ("uint8_t",), 3, "true", 4) 2852 tbxTblInstX("tbl", "Tbl4DX", "SimdMiscOp", ("uint8_t",), 4, "true", 2) 2853 tbxTblInstX("tbl", "Tbl4QX", "SimdMiscOp", ("uint8_t",), 4, "true", 4) 2854 # TBX 2855 tbxTblInstX("tbx", "Tbx1DX", "SimdMiscOp", ("uint8_t",), 1, "false", 2) 2856 tbxTblInstX("tbx", "Tbx1QX", "SimdMiscOp", ("uint8_t",), 1, "false", 4) 2857 tbxTblInstX("tbx", "Tbx2DX", "SimdMiscOp", ("uint8_t",), 2, "false", 2) 2858 tbxTblInstX("tbx", "Tbx2QX", "SimdMiscOp", ("uint8_t",), 2, "false", 4) 2859 tbxTblInstX("tbx", "Tbx3DX", "SimdMiscOp", ("uint8_t",), 3, "false", 2) 2860 tbxTblInstX("tbx", "Tbx3QX", "SimdMiscOp", ("uint8_t",), 3, "false", 4) 2861 tbxTblInstX("tbx", "Tbx4DX", "SimdMiscOp", ("uint8_t",), 4, "false", 2) 2862 tbxTblInstX("tbx", "Tbx4QX", "SimdMiscOp", ("uint8_t",), 4, "false", 4) 2863 # TRN1 2864 trnCode = ''' 2865 unsigned part = %s; 2866 for (unsigned i = 0; i < eCount / 2; i++) { 2867 destReg.elements[2 * i] = srcReg1.elements[2 * i + part]; 2868 destReg.elements[2 * i + 1] = srcReg2.elements[2 * i + part]; 2869 } 2870 ''' 2871 threeRegScrambleInstX("trn1", "Trn1DX", "SimdAluOp", smallUnsignedTypes, 2, 2872 trnCode % "0") 2873 threeRegScrambleInstX("trn1", "Trn1QX", "SimdAluOp", unsignedTypes, 4, 2874 trnCode % "0") 2875 # TRN2 2876 threeRegScrambleInstX("trn2", "Trn2DX", "SimdAluOp", smallUnsignedTypes, 2, 2877 trnCode % "1") 2878 threeRegScrambleInstX("trn2", "Trn2QX", "SimdAluOp", unsignedTypes, 4, 2879 trnCode % "1") 2880 # UABA 2881 threeEqualRegInstX("uaba", "UabaDX", "SimdAddAccOp", smallUnsignedTypes, 2, 2882 abaCode, True) 2883 threeEqualRegInstX("uaba", "UabaQX", "SimdAddAccOp", smallUnsignedTypes, 4, 2884 abaCode, True) 2885 # UABAL, UABAL2 2886 threeRegLongInstX("uabal", "UabalX", "SimdAddAccOp", smallUnsignedTypes, 2887 abalCode, True) 2888 threeRegLongInstX("uabal2", "Uabal2X", "SimdAddAccOp", smallUnsignedTypes, 2889 abalCode, True, hi=True) 2890 # UABD 2891 threeEqualRegInstX("uabd", "UabdDX", "SimdAddOp", smallUnsignedTypes, 2, 2892 abdCode) 2893 threeEqualRegInstX("uabd", "UabdQX", "SimdAddOp", smallUnsignedTypes, 4, 2894 abdCode) 2895 # UABDL, UABDL2 2896 threeRegLongInstX("uabdl", "UabdlX", "SimdAddAccOp", smallUnsignedTypes, 2897 abdlCode, True) 2898 threeRegLongInstX("uabdl2", "Uabdl2X", "SimdAddAccOp", smallUnsignedTypes, 2899 abdlCode, True, hi=True) 2900 # UADALP 2901 twoRegCondenseInstX("uadalp", "UadalpDX", "SimdAddOp", smallUnsignedTypes, 2902 2, adalpCode, True) 2903 twoRegCondenseInstX("uadalp", "UadalpQX", "SimdAddOp", smallUnsignedTypes, 2904 4, adalpCode, True) 2905 # UADDL, UADDL2 2906 threeRegLongInstX("uaddl", "UaddlX", "SimdAddAccOp", smallUnsignedTypes, 2907 addlwCode) 2908 threeRegLongInstX("uaddl2", "Uaddl2X", "SimdAddAccOp", smallUnsignedTypes, 2909 addlwCode, hi=True) 2910 # UADDLP 2911 twoRegCondenseInstX("uaddlp", "UaddlpDX", "SimdAddOp", smallUnsignedTypes, 2912 2, addlwCode) 2913 twoRegCondenseInstX("uaddlp", "UaddlpQX", "SimdAddOp", smallUnsignedTypes, 2914 4, addlwCode) 2915 # UADDLV 2916 twoRegAcrossInstX("uaddlv", "UaddlvDX", "SimdAddOp", 2917 ("uint8_t", "uint16_t"), 2, addAcrossLongCode, long=True) 2918 twoRegAcrossInstX("uaddlv", "UaddlvQX", "SimdAddOp", 2919 ("uint8_t", "uint16_t"), 4, addAcrossLongCode, long=True) 2920 twoRegAcrossInstX("uaddlv", "UaddlvBQX", "SimdAddOp", ("uint32_t",), 4, 2921 addAcrossLongCode, doubleDest=True, long=True) 2922 # UADDW 2923 threeRegWideInstX("uaddw", "UaddwX", "SimdAddAccOp", smallUnsignedTypes, 2924 addlwCode) 2925 threeRegWideInstX("uaddw2", "Uaddw2X", "SimdAddAccOp", smallUnsignedTypes, 2926 addlwCode, hi=True) 2927 # UCVTF (fixed-point) 2928 ucvtfFixedCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, imm, true," 2929 " FPCRRounding(fpscr), fpscr)") 2930 twoEqualRegInstX("ucvtf", "UcvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, 2931 ucvtfFixedCode, hasImm=True) 2932 twoEqualRegInstX("ucvtf", "UcvtfFixedQX", "SimdCvtOp", floatTypes, 4, 2933 ucvtfFixedCode, hasImm=True) 2934 twoEqualRegInstX("ucvtf", "UcvtfFixedScX", "SimdCvtOp", floatTypes, 4, 2935 ucvtfFixedCode, hasImm=True, scalar=True) 2936 # UCVTF (integer) 2937 ucvtfIntCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, 0, true," 2938 " FPCRRounding(fpscr), fpscr)") 2939 twoEqualRegInstX("ucvtf", "UcvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, 2940 ucvtfIntCode) 2941 twoEqualRegInstX("ucvtf", "UcvtfIntQX", "SimdCvtOp", floatTypes, 4, 2942 ucvtfIntCode) 2943 twoEqualRegInstX("ucvtf", "UcvtfIntScX", "SimdCvtOp", floatTypes, 4, 2944 ucvtfIntCode, scalar=True) 2945 # UHADD 2946 threeEqualRegInstX("uhadd", "UhaddDX", "SimdAddOp", smallUnsignedTypes, 2, 2947 haddCode) 2948 threeEqualRegInstX("uhadd", "UhaddQX", "SimdAddOp", smallUnsignedTypes, 4, 2949 haddCode) 2950 # UHSUB 2951 threeEqualRegInstX("uhsub", "UhsubDX", "SimdAddOp", smallUnsignedTypes, 2, 2952 hsubCode) 2953 threeEqualRegInstX("uhsub", "UhsubQX", "SimdAddOp", smallUnsignedTypes, 4, 2954 hsubCode) 2955 # UMAX 2956 threeEqualRegInstX("umax", "UmaxDX", "SimdCmpOp", smallUnsignedTypes, 2, 2957 maxCode) 2958 threeEqualRegInstX("umax", "UmaxQX", "SimdCmpOp", smallUnsignedTypes, 4, 2959 maxCode) 2960 # UMAXP 2961 threeEqualRegInstX("umaxp", "UmaxpDX", "SimdCmpOp", smallUnsignedTypes, 2, 2962 maxCode, pairwise=True) 2963 threeEqualRegInstX("umaxp", "UmaxpQX", "SimdCmpOp", smallUnsignedTypes, 4, 2964 maxCode, pairwise=True) 2965 # UMAXV 2966 twoRegAcrossInstX("umaxv", "UmaxvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), 2967 2, maxAcrossCode) 2968 twoRegAcrossInstX("umaxv", "UmaxvQX", "SimdCmpOp", smallUnsignedTypes, 4, 2969 maxAcrossCode) 2970 # UMIN 2971 threeEqualRegInstX("umin", "UminDX", "SimdCmpOp", smallUnsignedTypes, 2, 2972 minCode) 2973 threeEqualRegInstX("umin", "UminQX", "SimdCmpOp", smallUnsignedTypes, 4, 2974 minCode) 2975 # UMINP 2976 threeEqualRegInstX("uminp", "UminpDX", "SimdCmpOp", smallUnsignedTypes, 2, 2977 minCode, pairwise=True) 2978 threeEqualRegInstX("uminp", "UminpQX", "SimdCmpOp", smallUnsignedTypes, 4, 2979 minCode, pairwise=True) 2980 # UMINV 2981 twoRegAcrossInstX("uminv", "UminvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), 2982 2, minAcrossCode) 2983 twoRegAcrossInstX("uminv", "UminvQX", "SimdCmpOp", smallUnsignedTypes, 4, 2984 minAcrossCode) 2985 # UMLAL (by element) 2986 threeRegLongInstX("umlal", "UmlalElemX", "SimdMultAccOp", 2987 smallUnsignedTypes, mlalCode, True, byElem=True) 2988 threeRegLongInstX("umlal", "UmlalElem2X", "SimdMultAccOp", 2989 smallUnsignedTypes, mlalCode, True, byElem=True, hi=True) 2990 # UMLAL (vector) 2991 threeRegLongInstX("umlal", "UmlalX", "SimdMultAccOp", smallUnsignedTypes, 2992 mlalCode, True) 2993 threeRegLongInstX("umlal", "Umlal2X", "SimdMultAccOp", smallUnsignedTypes, 2994 mlalCode, True, hi=True) 2995 # UMLSL (by element) 2996 threeRegLongInstX("umlsl", "UmlslElemX", "SimdMultAccOp", 2997 smallUnsignedTypes, mlslCode, True, byElem=True) 2998 threeRegLongInstX("umlsl", "UmlslElem2X", "SimdMultAccOp", 2999 smallUnsignedTypes, mlslCode, True, byElem=True, hi=True) 3000 # UMLSL (vector) 3001 threeRegLongInstX("umlsl", "UmlslX", "SimdMultAccOp", smallUnsignedTypes, 3002 mlslCode, True) 3003 threeRegLongInstX("umlsl", "Umlsl2X", "SimdMultAccOp", smallUnsignedTypes, 3004 mlslCode, True, hi=True) 3005 # UMOV 3006 insToGprInstX("umov", "UmovWX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') 3007 insToGprInstX("umov", "UmovXX", "SimdMiscOp", ("uint64_t",), 4, 'X') 3008 # UMULL, UMULL2 (by element) 3009 threeRegLongInstX("umull", "UmullElemX", "SimdMultOp", smallUnsignedTypes, 3010 mullCode, byElem=True) 3011 threeRegLongInstX("umull", "UmullElem2X", "SimdMultOp", smallUnsignedTypes, 3012 mullCode, byElem=True, hi=True) 3013 # UMULL, UMULL2 (vector) 3014 threeRegLongInstX("umull", "UmullX", "SimdMultOp", smallUnsignedTypes, 3015 mullCode) 3016 threeRegLongInstX("umull", "Umull2X", "SimdMultOp", smallUnsignedTypes, 3017 mullCode, hi=True) 3018 # UQADD 3019 uqaddCode = ''' 3020 destElem = srcElem1 + srcElem2; 3021 FPSCR fpscr = (FPSCR) FpscrQc; 3022 if (destElem < srcElem1 || destElem < srcElem2) { 3023 destElem = (Element)(-1); 3024 fpscr.qc = 1; 3025 } 3026 FpscrQc = fpscr; 3027 ''' 3028 threeEqualRegInstX("uqadd", "UqaddDX", "SimdAddOp", smallUnsignedTypes, 2, 3029 uqaddCode) 3030 threeEqualRegInstX("uqadd", "UqaddQX", "SimdAddOp", unsignedTypes, 4, 3031 uqaddCode) 3032 threeEqualRegInstX("uqadd", "UqaddScX", "SimdAddOp", unsignedTypes, 4, 3033 uqaddCode, scalar=True) 3034 # UQRSHL 3035 uqrshlCode = ''' 3036 int16_t shiftAmt = (int8_t)srcElem2; 3037 FPSCR fpscr = (FPSCR) FpscrQc; 3038 if (shiftAmt < 0) { 3039 shiftAmt = -shiftAmt; 3040 Element rBit = 0; 3041 if (shiftAmt <= sizeof(Element) * 8) 3042 rBit = bits(srcElem1, shiftAmt - 1); 3043 if (shiftAmt >= sizeof(Element) * 8) { 3044 shiftAmt = sizeof(Element) * 8 - 1; 3045 destElem = 0; 3046 } else { 3047 destElem = (srcElem1 >> shiftAmt); 3048 } 3049 destElem += rBit; 3050 } else { 3051 if (shiftAmt >= sizeof(Element) * 8) { 3052 if (srcElem1 != 0) { 3053 destElem = mask(sizeof(Element) * 8); 3054 fpscr.qc = 1; 3055 } else { 3056 destElem = 0; 3057 } 3058 } else { 3059 if (bits(srcElem1, sizeof(Element) * 8 - 1, 3060 sizeof(Element) * 8 - shiftAmt)) { 3061 destElem = mask(sizeof(Element) * 8); 3062 fpscr.qc = 1; 3063 } else { 3064 destElem = srcElem1 << shiftAmt; 3065 } 3066 } 3067 } 3068 FpscrQc = fpscr; 3069 ''' 3070 threeEqualRegInstX("uqrshl", "UqrshlDX", "SimdCmpOp", smallUnsignedTypes, 3071 2, uqrshlCode) 3072 threeEqualRegInstX("uqrshl", "UqrshlQX", "SimdCmpOp", unsignedTypes, 4, 3073 uqrshlCode) 3074 threeEqualRegInstX("uqrshl", "UqrshlScX", "SimdCmpOp", unsignedTypes, 4, 3075 uqrshlCode, scalar=True) 3076 # UQRSHRN 3077 uqrshrnCode = ''' 3078 FPSCR fpscr = (FPSCR) FpscrQc; 3079 if (imm > sizeof(srcElem1) * 8) { 3080 if (srcElem1 != 0) 3081 fpscr.qc = 1; 3082 destElem = 0; 3083 } else if (imm) { 3084 BigElement mid = (srcElem1 >> (imm - 1)); 3085 uint64_t rBit = mid & 0x1; 3086 mid >>= 1; 3087 mid += rBit; 3088 if (mid != (Element)mid) { 3089 destElem = mask(sizeof(Element) * 8); 3090 fpscr.qc = 1; 3091 } else { 3092 destElem = mid; 3093 } 3094 } else { 3095 if (srcElem1 != (Element)srcElem1) { 3096 destElem = mask(sizeof(Element) * 8 - 1); 3097 fpscr.qc = 1; 3098 } else { 3099 destElem = srcElem1; 3100 } 3101 } 3102 FpscrQc = fpscr; 3103 ''' 3104 twoRegNarrowInstX("uqrshrn", "UqrshrnX", "SimdShiftOp", smallUnsignedTypes, 3105 uqrshrnCode, hasImm=True) 3106 twoRegNarrowInstX("uqrshrn2", "Uqrshrn2X", "SimdShiftOp", 3107 smallUnsignedTypes, uqrshrnCode, hasImm=True, hi=True) 3108 twoRegNarrowInstX("uqrshrn", "UqrshrnScX", "SimdShiftOp", 3109 smallUnsignedTypes, uqrshrnCode, hasImm=True, 3110 scalar=True) 3111 # UQSHL (immediate) 3112 uqshlImmCode = ''' 3113 FPSCR fpscr = (FPSCR) FpscrQc; 3114 if (imm >= sizeof(Element) * 8) { 3115 if (srcElem1 != 0) { 3116 destElem = mask(sizeof(Element) * 8); 3117 fpscr.qc = 1; 3118 } else { 3119 destElem = 0; 3120 } 3121 } else if (imm) { 3122 destElem = (srcElem1 << imm); 3123 uint64_t topBits = bits((uint64_t)srcElem1, 3124 sizeof(Element) * 8 - 1, 3125 sizeof(Element) * 8 - imm); 3126 if (topBits != 0) { 3127 destElem = mask(sizeof(Element) * 8); 3128 fpscr.qc = 1; 3129 } 3130 } else { 3131 destElem = srcElem1; 3132 } 3133 FpscrQc = fpscr; 3134 ''' 3135 twoEqualRegInstX("uqshl", "UqshlImmDX", "SimdAluOp", smallUnsignedTypes, 2, 3136 uqshlImmCode, hasImm=True) 3137 twoEqualRegInstX("uqshl", "UqshlImmQX", "SimdAluOp", unsignedTypes, 4, 3138 uqshlImmCode, hasImm=True) 3139 twoEqualRegInstX("uqshl", "UqshlImmScX", "SimdAluOp", unsignedTypes, 4, 3140 uqshlImmCode, hasImm=True, scalar=True) 3141 # UQSHL (register) 3142 uqshlCode = ''' 3143 int16_t shiftAmt = (int8_t)srcElem2; 3144 FPSCR fpscr = (FPSCR) FpscrQc; 3145 if (shiftAmt < 0) { 3146 shiftAmt = -shiftAmt; 3147 if (shiftAmt >= sizeof(Element) * 8) { 3148 shiftAmt = sizeof(Element) * 8 - 1; 3149 destElem = 0; 3150 } else { 3151 destElem = (srcElem1 >> shiftAmt); 3152 } 3153 } else if (shiftAmt > 0) { 3154 if (shiftAmt >= sizeof(Element) * 8) { 3155 if (srcElem1 != 0) { 3156 destElem = mask(sizeof(Element) * 8); 3157 fpscr.qc = 1; 3158 } else { 3159 destElem = 0; 3160 } 3161 } else { 3162 if (bits(srcElem1, sizeof(Element) * 8 - 1, 3163 sizeof(Element) * 8 - shiftAmt)) { 3164 destElem = mask(sizeof(Element) * 8); 3165 fpscr.qc = 1; 3166 } else { 3167 destElem = srcElem1 << shiftAmt; 3168 } 3169 } 3170 } else { 3171 destElem = srcElem1; 3172 } 3173 FpscrQc = fpscr; 3174 ''' 3175 threeEqualRegInstX("uqshl", "UqshlDX", "SimdAluOp", smallUnsignedTypes, 2, 3176 uqshlCode) 3177 threeEqualRegInstX("uqshl", "UqshlQX", "SimdAluOp", unsignedTypes, 4, 3178 uqshlCode) 3179 threeEqualRegInstX("uqshl", "UqshlScX", "SimdAluOp", unsignedTypes, 4, 3180 uqshlCode, scalar=True) 3181 # UQSHRN, UQSHRN2 3182 uqshrnCode = ''' 3183 FPSCR fpscr = (FPSCR) FpscrQc; 3184 if (imm > sizeof(srcElem1) * 8) { 3185 if (srcElem1 != 0) 3186 fpscr.qc = 1; 3187 destElem = 0; 3188 } else if (imm) { 3189 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 3190 if (mid != (Element)mid) { 3191 destElem = mask(sizeof(Element) * 8); 3192 fpscr.qc = 1; 3193 } else { 3194 destElem = mid; 3195 } 3196 } else { 3197 destElem = srcElem1; 3198 } 3199 FpscrQc = fpscr; 3200 ''' 3201 twoRegNarrowInstX("uqshrn", "UqshrnX", "SimdShiftOp", smallUnsignedTypes, 3202 uqshrnCode, hasImm=True) 3203 twoRegNarrowInstX("uqshrn2", "Uqshrn2X", "SimdShiftOp", smallUnsignedTypes, 3204 uqshrnCode, hasImm=True, hi=True) 3205 twoRegNarrowInstX("uqshrn", "UqshrnScX", "SimdShiftOp", smallUnsignedTypes, 3206 uqshrnCode, hasImm=True, scalar=True) 3207 # UQSUB 3208 uqsubCode = ''' 3209 destElem = srcElem1 - srcElem2; 3210 FPSCR fpscr = (FPSCR) FpscrQc; 3211 if (destElem > srcElem1) { 3212 destElem = 0; 3213 fpscr.qc = 1; 3214 } 3215 FpscrQc = fpscr; 3216 ''' 3217 threeEqualRegInstX("uqsub", "UqsubDX", "SimdAddOp", smallUnsignedTypes, 2, 3218 uqsubCode) 3219 threeEqualRegInstX("uqsub", "UqsubQX", "SimdAddOp", unsignedTypes, 4, 3220 uqsubCode) 3221 threeEqualRegInstX("uqsub", "UqsubScX", "SimdAddOp", unsignedTypes, 4, 3222 uqsubCode, scalar=True) 3223 # UQXTN 3224 uqxtnCode = ''' 3225 FPSCR fpscr = (FPSCR) FpscrQc; 3226 destElem = srcElem1; 3227 if ((BigElement)destElem != srcElem1) { 3228 fpscr.qc = 1; 3229 destElem = mask(sizeof(Element) * 8); 3230 } 3231 FpscrQc = fpscr; 3232 ''' 3233 twoRegNarrowInstX("uqxtn", "UqxtnX", "SimdMiscOp", smallUnsignedTypes, 3234 uqxtnCode) 3235 twoRegNarrowInstX("uqxtn", "Uqxtn2X", "SimdMiscOp", smallUnsignedTypes, 3236 uqxtnCode, hi=True) 3237 twoRegNarrowInstX("uqxtn", "UqxtnScX", "SimdMiscOp", smallUnsignedTypes, 3238 uqxtnCode, scalar=True) 3239 # URECPE 3240 urecpeCode = "destElem = unsignedRecipEstimate(srcElem1);" 3241 twoEqualRegInstX("urecpe", "UrecpeDX", "SimdMultAccOp", ("uint32_t",), 2, 3242 urecpeCode) 3243 twoEqualRegInstX("urecpe", "UrecpeQX", "SimdMultAccOp", ("uint32_t",), 4, 3244 urecpeCode) 3245 # URHADD 3246 threeEqualRegInstX("urhadd", "UrhaddDX", "SimdAddOp", smallUnsignedTypes, 3247 2, rhaddCode) 3248 threeEqualRegInstX("urhadd", "UrhaddQX", "SimdAddOp", smallUnsignedTypes, 3249 4, rhaddCode) 3250 # URSHL 3251 threeEqualRegInstX("urshl", "UrshlDX", "SimdShiftOp", unsignedTypes, 2, 3252 rshlCode) 3253 threeEqualRegInstX("urshl", "UrshlQX", "SimdShiftOp", unsignedTypes, 4, 3254 rshlCode) 3255 # URSHR 3256 twoEqualRegInstX("urshr", "UrshrDX", "SimdShiftOp", unsignedTypes, 2, 3257 rshrCode, hasImm=True) 3258 twoEqualRegInstX("urshr", "UrshrQX", "SimdShiftOp", unsignedTypes, 4, 3259 rshrCode, hasImm=True) 3260 # URSQRTE 3261 ursqrteCode = "destElem = unsignedRSqrtEstimate(srcElem1);" 3262 twoEqualRegInstX("ursqrte", "UrsqrteDX", "SimdSqrtOp", ("uint32_t",), 2, 3263 ursqrteCode) 3264 twoEqualRegInstX("ursqrte", "UrsqrteQX", "SimdSqrtOp", ("uint32_t",), 4, 3265 ursqrteCode) 3266 # URSRA 3267 twoEqualRegInstX("ursra", "UrsraDX", "SimdShiftOp", unsignedTypes, 2, 3268 rsraCode, True, hasImm=True) 3269 twoEqualRegInstX("ursra", "UrsraQX", "SimdShiftOp", unsignedTypes, 4, 3270 rsraCode, True, hasImm=True) 3271 # USHL 3272 threeEqualRegInstX("ushl", "UshlDX", "SimdShiftOp", unsignedTypes, 2, 3273 shlCode) 3274 threeEqualRegInstX("ushl", "UshlQX", "SimdShiftOp", unsignedTypes, 4, 3275 shlCode) 3276 # USHLL, USHLL2 3277 twoRegLongInstX("ushll", "UshllX", "SimdShiftOp", smallUnsignedTypes, 3278 shllCode, hasImm=True) 3279 twoRegLongInstX("ushll", "Ushll2X", "SimdShiftOp", smallUnsignedTypes, 3280 shllCode, hi=True, hasImm=True) 3281 # USHR 3282 twoEqualRegInstX("ushr", "UshrDX", "SimdShiftOp", unsignedTypes, 2, 3283 shrCode, hasImm=True) 3284 twoEqualRegInstX("ushr", "UshrQX", "SimdShiftOp", unsignedTypes, 4, 3285 shrCode, hasImm=True) 3286 # USQADD 3287 usqaddCode = ''' 3288 FPSCR fpscr = (FPSCR) FpscrQc; 3289 Element tmp = destElem + srcElem1; 3290 if (bits(srcElem1, sizeof(Element) * 8 - 1) == 0) { 3291 if (tmp < srcElem1 || tmp < destElem) { 3292 destElem = (Element)(-1); 3293 fpscr.qc = 1; 3294 } else { 3295 destElem = tmp; 3296 } 3297 } else { 3298 Element absSrcElem1 = (~srcElem1) + 1; 3299 if (absSrcElem1 > destElem) { 3300 destElem = 0; 3301 fpscr.qc = 1; 3302 } else { 3303 destElem = tmp; 3304 } 3305 } 3306 FpscrQc = fpscr; 3307 ''' 3308 twoEqualRegInstX("usqadd", "UsqaddDX", "SimdAddOp", smallUnsignedTypes, 2, 3309 usqaddCode, True) 3310 twoEqualRegInstX("usqadd", "UsqaddQX", "SimdAddOp", unsignedTypes, 4, 3311 usqaddCode, True) 3312 twoEqualRegInstX("usqadd", "UsqaddScX", "SimdAddOp", unsignedTypes, 4, 3313 usqaddCode, True, scalar=True) 3314 # USRA 3315 twoEqualRegInstX("usra", "UsraDX", "SimdShiftOp", unsignedTypes, 2, 3316 sraCode, True, hasImm=True) 3317 twoEqualRegInstX("usra", "UsraQX", "SimdShiftOp", unsignedTypes, 4, 3318 sraCode, True, hasImm=True) 3319 # USUBL 3320 threeRegLongInstX("usubl", "UsublX", "SimdAddOp", smallUnsignedTypes, 3321 sublwCode) 3322 threeRegLongInstX("usubl2", "Usubl2X", "SimdAddOp", smallUnsignedTypes, 3323 sublwCode, hi=True) 3324 # USUBW 3325 threeRegWideInstX("usubw", "UsubwX", "SimdAddOp", smallUnsignedTypes, 3326 sublwCode) 3327 threeRegWideInstX("usubw2", "Usubw2X", "SimdAddOp", smallUnsignedTypes, 3328 sublwCode, hi=True) 3329 # UXTL -> alias to USHLL 3330 # UZP1 3331 uzpCode = ''' 3332 unsigned part = %s; 3333 for (unsigned i = 0; i < eCount / 2; i++) { 3334 destReg.elements[i] = srcReg1.elements[2 * i + part]; 3335 destReg.elements[eCount / 2 + i] = srcReg2.elements[2 * i + part]; 3336 } 3337 ''' 3338 threeRegScrambleInstX("Uzp1", "Uzp1DX", "SimdAluOp", smallUnsignedTypes, 2, 3339 uzpCode % "0") 3340 threeRegScrambleInstX("Uzp1", "Uzp1QX", "SimdAluOp", unsignedTypes, 4, 3341 uzpCode % "0") 3342 # UZP2 3343 threeRegScrambleInstX("Uzp2", "Uzp2DX", "SimdAluOp", smallUnsignedTypes, 2, 3344 uzpCode % "1") 3345 threeRegScrambleInstX("Uzp2", "Uzp2QX", "SimdAluOp", unsignedTypes, 4, 3346 uzpCode % "1") 3347 # XTN, XTN2 3348 xtnCode = "destElem = srcElem1;" 3349 twoRegNarrowInstX("Xtn", "XtnX", "SimdMiscOp", smallUnsignedTypes, xtnCode) 3350 twoRegNarrowInstX("Xtn", "Xtn2X", "SimdMiscOp", smallUnsignedTypes, 3351 xtnCode, hi=True) 3352 # ZIP1 3353 zipCode = ''' 3354 unsigned base = %s; 3355 for (unsigned i = 0; i < eCount / 2; i++) { 3356 destReg.elements[2 * i] = srcReg1.elements[base + i]; 3357 destReg.elements[2 * i + 1] = srcReg2.elements[base + i]; 3358 } 3359 ''' 3360 threeRegScrambleInstX("zip1", "Zip1DX", "SimdAluOp", smallUnsignedTypes, 2, 3361 zipCode % "0") 3362 threeRegScrambleInstX("zip1", "Zip1QX", "SimdAluOp", unsignedTypes, 4, 3363 zipCode % "0") 3364 # ZIP2 3365 threeRegScrambleInstX("zip2", "Zip2DX", "SimdAluOp", smallUnsignedTypes, 2, 3366 zipCode % "eCount / 2") 3367 threeRegScrambleInstX("zip2", "Zip2QX", "SimdAluOp", unsignedTypes, 4, 3368 zipCode % "eCount / 2") 3369 3370 for decoderFlavour, type_dict in decoders.iteritems(): 3371 header_output += ''' 3372 class %(decoder_flavour)sDecoder { 3373 public: 3374 ''' % { "decoder_flavour" : decoderFlavour } 3375 for type,name in type_dict.iteritems(): 3376 header_output += ''' 3377 template<typename Elem> using %(type)s = %(new_name)s<Elem>;''' % { 3378 "type" : type, "new_name" : name 3379 } 3380 header_output += ''' 3381 };''' 3382}}; 3383