neon.isa (8588:ef28ed90449d) neon.isa (8607:5fb918115c07)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating

--- 1605 unchanged lines hidden (view full) ---

1614 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1615
1616 vaddCode = '''
1617 destElem = srcElem1 + srcElem2;
1618 '''
1619 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1620 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1621
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating

--- 1605 unchanged lines hidden (view full) ---

1614 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1615
1616 vaddCode = '''
1617 destElem = srcElem1 + srcElem2;
1618 '''
1619 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1620 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1621
1622 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", unsignedTypes,
1622 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
1623 2, vaddCode, pairwise=True)
1623 2, vaddCode, pairwise=True)
1624 threeEqualRegInst("vpadd", "NVpaddQ", "SimdAddOp", unsignedTypes,
1625 4, vaddCode, pairwise=True)
1626 vaddlwCode = '''
1627 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1628 '''
1629 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1630 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1631 vaddhnCode = '''
1632 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1633 (sizeof(Element) * 8);

--- 474 unchanged lines hidden (view full) ---

2108 destElem = 0;
2109 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2110 if (bits(srcElem2, j))
2111 destElem ^= (BigElement)srcElem1 << j;
2112 }
2113 '''
2114 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2115
1624 vaddlwCode = '''
1625 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1626 '''
1627 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1628 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1629 vaddhnCode = '''
1630 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1631 (sizeof(Element) * 8);

--- 474 unchanged lines hidden (view full) ---

2106 destElem = 0;
2107 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2108 if (bits(srcElem2, j))
2109 destElem ^= (BigElement)srcElem1 << j;
2110 }
2111 '''
2112 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2113
2116 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True)
2117 threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True)
2114 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2118
2115
2119 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True)
2120 threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True)
2116 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2121
2122 vqdmulhCode = '''
2123 FPSCR fpscr = (FPSCR) FpscrQc;
2124 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2125 (sizeof(Element) * 8);
2126 if (srcElem1 == srcElem2 &&
2127 srcElem1 == (Element)((Element)1 <<
2128 (sizeof(Element) * 8 - 1))) {

--- 1006 unchanged lines hidden (view full) ---

3135 vtrnCode = '''
3136 Element mid;
3137 for (unsigned i = 0; i < eCount; i += 2) {
3138 mid = srcReg1.elements[i];
3139 srcReg1.elements[i] = destReg.elements[i + 1];
3140 destReg.elements[i + 1] = mid;
3141 }
3142 '''
2117
2118 vqdmulhCode = '''
2119 FPSCR fpscr = (FPSCR) FpscrQc;
2120 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2121 (sizeof(Element) * 8);
2122 if (srcElem1 == srcElem2 &&
2123 srcElem1 == (Element)((Element)1 <<
2124 (sizeof(Element) * 8 - 1))) {

--- 1006 unchanged lines hidden (view full) ---

3131 vtrnCode = '''
3132 Element mid;
3133 for (unsigned i = 0; i < eCount; i += 2) {
3134 mid = srcReg1.elements[i];
3135 srcReg1.elements[i] = destReg.elements[i + 1];
3136 destReg.elements[i + 1] = mid;
3137 }
3138 '''
3143 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", unsignedTypes, 2, vtrnCode)
3144 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", unsignedTypes, 4, vtrnCode)
3139 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3140 smallUnsignedTypes, 2, vtrnCode)
3141 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3142 smallUnsignedTypes, 4, vtrnCode)
3145
3146 vuzpCode = '''
3147 Element mid[eCount];
3148 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3149 for (unsigned i = 0; i < eCount / 2; i++) {
3150 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3151 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3152 destReg.elements[i] = destReg.elements[2 * i];

--- 226 unchanged lines hidden ---
3143
3144 vuzpCode = '''
3145 Element mid[eCount];
3146 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3147 for (unsigned i = 0; i < eCount / 2; i++) {
3148 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3149 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3150 destReg.elements[i] = destReg.elements[2 * i];

--- 226 unchanged lines hidden ---