neon.isa (8588:ef28ed90449d) | neon.isa (8607:5fb918115c07) |
---|---|
1// -*- mode:c++ -*- 2 3// Copyright (c) 2010 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating --- 1605 unchanged lines hidden (view full) --- 1614 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode) 1615 1616 vaddCode = ''' 1617 destElem = srcElem1 + srcElem2; 1618 ''' 1619 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode) 1620 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode) 1621 | 1// -*- mode:c++ -*- 2 3// Copyright (c) 2010 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating --- 1605 unchanged lines hidden (view full) --- 1614 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode) 1615 1616 vaddCode = ''' 1617 destElem = srcElem1 + srcElem2; 1618 ''' 1619 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode) 1620 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode) 1621 |
1622 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", unsignedTypes, | 1622 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes, |
1623 2, vaddCode, pairwise=True) | 1623 2, vaddCode, pairwise=True) |
1624 threeEqualRegInst("vpadd", "NVpaddQ", "SimdAddOp", unsignedTypes, 1625 4, vaddCode, pairwise=True) | |
1626 vaddlwCode = ''' 1627 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 1628 ''' 1629 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode) 1630 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode) 1631 vaddhnCode = ''' 1632 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> 1633 (sizeof(Element) * 8); --- 474 unchanged lines hidden (view full) --- 2108 destElem = 0; 2109 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2110 if (bits(srcElem2, j)) 2111 destElem ^= (BigElement)srcElem1 << j; 2112 } 2113 ''' 2114 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode) 2115 | 1624 vaddlwCode = ''' 1625 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 1626 ''' 1627 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode) 1628 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode) 1629 vaddhnCode = ''' 1630 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> 1631 (sizeof(Element) * 8); --- 474 unchanged lines hidden (view full) --- 2106 destElem = 0; 2107 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2108 if (bits(srcElem2, j)) 2109 destElem ^= (BigElement)srcElem1 << j; 2110 } 2111 ''' 2112 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode) 2113 |
2116 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True) 2117 threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True) | 2114 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True) |
2118 | 2115 |
2119 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True) 2120 threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True) | 2116 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True) |
2121 2122 vqdmulhCode = ''' 2123 FPSCR fpscr = (FPSCR) FpscrQc; 2124 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> 2125 (sizeof(Element) * 8); 2126 if (srcElem1 == srcElem2 && 2127 srcElem1 == (Element)((Element)1 << 2128 (sizeof(Element) * 8 - 1))) { --- 1006 unchanged lines hidden (view full) --- 3135 vtrnCode = ''' 3136 Element mid; 3137 for (unsigned i = 0; i < eCount; i += 2) { 3138 mid = srcReg1.elements[i]; 3139 srcReg1.elements[i] = destReg.elements[i + 1]; 3140 destReg.elements[i + 1] = mid; 3141 } 3142 ''' | 2117 2118 vqdmulhCode = ''' 2119 FPSCR fpscr = (FPSCR) FpscrQc; 2120 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> 2121 (sizeof(Element) * 8); 2122 if (srcElem1 == srcElem2 && 2123 srcElem1 == (Element)((Element)1 << 2124 (sizeof(Element) * 8 - 1))) { --- 1006 unchanged lines hidden (view full) --- 3131 vtrnCode = ''' 3132 Element mid; 3133 for (unsigned i = 0; i < eCount; i += 2) { 3134 mid = srcReg1.elements[i]; 3135 srcReg1.elements[i] = destReg.elements[i + 1]; 3136 destReg.elements[i + 1] = mid; 3137 } 3138 ''' |
3143 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", unsignedTypes, 2, vtrnCode) 3144 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", unsignedTypes, 4, vtrnCode) | 3139 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", 3140 smallUnsignedTypes, 2, vtrnCode) 3141 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", 3142 smallUnsignedTypes, 4, vtrnCode) |
3145 3146 vuzpCode = ''' 3147 Element mid[eCount]; 3148 memcpy(&mid, &srcReg1, sizeof(srcReg1)); 3149 for (unsigned i = 0; i < eCount / 2; i++) { 3150 srcReg1.elements[i] = destReg.elements[2 * i + 1]; 3151 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1]; 3152 destReg.elements[i] = destReg.elements[2 * i]; --- 226 unchanged lines hidden --- | 3143 3144 vuzpCode = ''' 3145 Element mid[eCount]; 3146 memcpy(&mid, &srcReg1, sizeof(srcReg1)); 3147 for (unsigned i = 0; i < eCount / 2; i++) { 3148 srcReg1.elements[i] = destReg.elements[2 * i + 1]; 3149 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1]; 3150 destReg.elements[i] = destReg.elements[2 * i]; --- 226 unchanged lines hidden --- |