neon.isa (8782:10c9297e14d5) neon.isa (8795:0909f8ed7aa0)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating

--- 1602 unchanged lines hidden (view full) ---

1611 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1612
1613 vaddCode = '''
1614 destElem = srcElem1 + srcElem2;
1615 '''
1616 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1617 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1618
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating

--- 1602 unchanged lines hidden (view full) ---

1611 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1612
1613 vaddCode = '''
1614 destElem = srcElem1 + srcElem2;
1615 '''
1616 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1617 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1618
1619 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", unsignedTypes,
1619 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
1620 2, vaddCode, pairwise=True)
1620 2, vaddCode, pairwise=True)
1621 threeEqualRegInst("vpadd", "NVpaddQ", "SimdAddOp", unsignedTypes,
1622 4, vaddCode, pairwise=True)
1623 vaddlwCode = '''
1624 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1625 '''
1626 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1627 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1628 vaddhnCode = '''
1629 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1630 (sizeof(Element) * 8);

--- 474 unchanged lines hidden (view full) ---

2105 destElem = 0;
2106 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2107 if (bits(srcElem2, j))
2108 destElem ^= (BigElement)srcElem1 << j;
2109 }
2110 '''
2111 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2112
1621 vaddlwCode = '''
1622 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1623 '''
1624 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1625 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1626 vaddhnCode = '''
1627 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1628 (sizeof(Element) * 8);

--- 474 unchanged lines hidden (view full) ---

2103 destElem = 0;
2104 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2105 if (bits(srcElem2, j))
2106 destElem ^= (BigElement)srcElem1 << j;
2107 }
2108 '''
2109 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2110
2113 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True)
2114 threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True)
2111 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2115
2112
2116 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True)
2117 threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True)
2113 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2118
2119 vqdmulhCode = '''
2120 FPSCR fpscr = (FPSCR) FpscrQc;
2121 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2122 (sizeof(Element) * 8);
2123 if (srcElem1 == srcElem2 &&
2124 srcElem1 == (Element)((Element)1 <<
2125 (sizeof(Element) * 8 - 1))) {

--- 1006 unchanged lines hidden (view full) ---

3132 vtrnCode = '''
3133 Element mid;
3134 for (unsigned i = 0; i < eCount; i += 2) {
3135 mid = srcReg1.elements[i];
3136 srcReg1.elements[i] = destReg.elements[i + 1];
3137 destReg.elements[i + 1] = mid;
3138 }
3139 '''
2114
2115 vqdmulhCode = '''
2116 FPSCR fpscr = (FPSCR) FpscrQc;
2117 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2118 (sizeof(Element) * 8);
2119 if (srcElem1 == srcElem2 &&
2120 srcElem1 == (Element)((Element)1 <<
2121 (sizeof(Element) * 8 - 1))) {

--- 1006 unchanged lines hidden (view full) ---

3128 vtrnCode = '''
3129 Element mid;
3130 for (unsigned i = 0; i < eCount; i += 2) {
3131 mid = srcReg1.elements[i];
3132 srcReg1.elements[i] = destReg.elements[i + 1];
3133 destReg.elements[i + 1] = mid;
3134 }
3135 '''
3140 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", unsignedTypes, 2, vtrnCode)
3141 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", unsignedTypes, 4, vtrnCode)
3136 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3137 smallUnsignedTypes, 2, vtrnCode)
3138 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3139 smallUnsignedTypes, 4, vtrnCode)
3142
3143 vuzpCode = '''
3144 Element mid[eCount];
3145 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3146 for (unsigned i = 0; i < eCount / 2; i++) {
3147 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3148 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3149 destReg.elements[i] = destReg.elements[2 * i];

--- 226 unchanged lines hidden ---
3140
3141 vuzpCode = '''
3142 Element mid[eCount];
3143 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3144 for (unsigned i = 0; i < eCount / 2; i++) {
3145 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3146 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3147 destReg.elements[i] = destReg.elements[2 * i];

--- 226 unchanged lines hidden ---