neon64.isa (11165:d90aec9435bd) neon64.isa (12038:619bc4100aa8)
1// -*- mode: c++ -*-
2
3// Copyright (c) 2012-2013, 2015 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating

--- 1992 unchanged lines hidden (view full) ---

2001 # SMULL, SMULL2 (vector)
2002 threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes,
2003 mullCode)
2004 threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes,
2005 mullCode, hi=True)
2006 # SQABS
2007 sqabsCode = '''
2008 FPSCR fpscr = (FPSCR) FpscrQc;
1// -*- mode: c++ -*-
2
3// Copyright (c) 2012-2013, 2015 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating

--- 1992 unchanged lines hidden (view full) ---

2001 # SMULL, SMULL2 (vector)
2002 threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes,
2003 mullCode)
2004 threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes,
2005 mullCode, hi=True)
2006 # SQABS
2007 sqabsCode = '''
2008 FPSCR fpscr = (FPSCR) FpscrQc;
2009 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2009 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2010 fpscr.qc = 1;
2011 destElem = ~srcElem1;
2012 } else if (srcElem1 < 0) {
2013 destElem = -srcElem1;
2014 } else {
2015 destElem = srcElem1;
2016 }
2017 FpscrQc = fpscr;

--- 7 unchanged lines hidden (view full) ---

2025 # SQADD
2026 sqaddCode = '''
2027 destElem = srcElem1 + srcElem2;
2028 FPSCR fpscr = (FPSCR) FpscrQc;
2029 bool negDest = (destElem < 0);
2030 bool negSrc1 = (srcElem1 < 0);
2031 bool negSrc2 = (srcElem2 < 0);
2032 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2010 fpscr.qc = 1;
2011 destElem = ~srcElem1;
2012 } else if (srcElem1 < 0) {
2013 destElem = -srcElem1;
2014 } else {
2015 destElem = srcElem1;
2016 }
2017 FpscrQc = fpscr;

--- 7 unchanged lines hidden (view full) ---

2025 # SQADD
2026 sqaddCode = '''
2027 destElem = srcElem1 + srcElem2;
2028 FPSCR fpscr = (FPSCR) FpscrQc;
2029 bool negDest = (destElem < 0);
2030 bool negSrc1 = (srcElem1 < 0);
2031 bool negSrc2 = (srcElem2 < 0);
2032 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2033 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2033 destElem = std::numeric_limits<Element>::min();
2034 if (negDest)
2035 destElem -= 1;
2036 fpscr.qc = 1;
2037 }
2038 FpscrQc = fpscr;
2039 '''
2040 threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2,
2041 sqaddCode)
2042 threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4,
2043 sqaddCode)
2044 threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4,
2045 sqaddCode, scalar=True)
2046 # SQDMLAL, SQDMLAL2 (by element)
2047 qdmlalCode = '''
2048 FPSCR fpscr = (FPSCR) FpscrQc;
2049 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2034 if (negDest)
2035 destElem -= 1;
2036 fpscr.qc = 1;
2037 }
2038 FpscrQc = fpscr;
2039 '''
2040 threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2,
2041 sqaddCode)
2042 threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4,
2043 sqaddCode)
2044 threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4,
2045 sqaddCode, scalar=True)
2046 # SQDMLAL, SQDMLAL2 (by element)
2047 qdmlalCode = '''
2048 FPSCR fpscr = (FPSCR) FpscrQc;
2049 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2050 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2050 Element maxNeg = std::numeric_limits<Element>::min();
2051 Element halfNeg = maxNeg / 2;
2052 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2053 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2054 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2055 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2056 fpscr.qc = 1;
2057 }
2058 bool negPreDest = ltz(destElem);

--- 22 unchanged lines hidden (view full) ---

2081 threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp",
2082 ("int16_t", "int32_t"), qdmlalCode, True, hi=True)
2083 threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp",
2084 ("int16_t", "int32_t"), qdmlalCode, True, scalar=True)
2085 # SQDMLSL, SQDMLSL2 (by element)
2086 qdmlslCode = '''
2087 FPSCR fpscr = (FPSCR) FpscrQc;
2088 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2051 Element halfNeg = maxNeg / 2;
2052 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2053 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2054 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2055 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2056 fpscr.qc = 1;
2057 }
2058 bool negPreDest = ltz(destElem);

--- 22 unchanged lines hidden (view full) ---

2081 threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp",
2082 ("int16_t", "int32_t"), qdmlalCode, True, hi=True)
2083 threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp",
2084 ("int16_t", "int32_t"), qdmlalCode, True, scalar=True)
2085 # SQDMLSL, SQDMLSL2 (by element)
2086 qdmlslCode = '''
2087 FPSCR fpscr = (FPSCR) FpscrQc;
2088 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2089 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2089 Element maxNeg = std::numeric_limits<Element>::min();
2090 Element halfNeg = maxNeg / 2;
2091 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2092 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2093 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2094 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2095 fpscr.qc = 1;
2096 }
2097 bool negPreDest = ltz(destElem);

--- 75 unchanged lines hidden (view full) ---

2173 ("int16_t", "int32_t"), qdmullCode, True)
2174 threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp",
2175 ("int16_t", "int32_t"), qdmullCode, True, hi=True)
2176 threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp",
2177 ("int16_t", "int32_t"), qdmullCode, True, scalar=True)
2178 # SQNEG
2179 sqnegCode = '''
2180 FPSCR fpscr = (FPSCR) FpscrQc;
2090 Element halfNeg = maxNeg / 2;
2091 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2092 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2093 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2094 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2095 fpscr.qc = 1;
2096 }
2097 bool negPreDest = ltz(destElem);

--- 75 unchanged lines hidden (view full) ---

2173 ("int16_t", "int32_t"), qdmullCode, True)
2174 threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp",
2175 ("int16_t", "int32_t"), qdmullCode, True, hi=True)
2176 threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp",
2177 ("int16_t", "int32_t"), qdmullCode, True, scalar=True)
2178 # SQNEG
2179 sqnegCode = '''
2180 FPSCR fpscr = (FPSCR) FpscrQc;
2181 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2181 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2182 fpscr.qc = 1;
2183 destElem = ~srcElem1;
2184 } else {
2185 destElem = -srcElem1;
2186 }
2187 FpscrQc = fpscr;
2188 '''
2189 twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2,
2190 sqnegCode)
2191 twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4,
2192 sqnegCode)
2193 twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
2194 sqnegCode, scalar=True)
2195 # SQRDMULH (by element)
2196 sqrdmulhCode = '''
2197 FPSCR fpscr = (FPSCR) FpscrQc;
2198 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2199 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2200 (sizeof(Element) * 8);
2182 fpscr.qc = 1;
2183 destElem = ~srcElem1;
2184 } else {
2185 destElem = -srcElem1;
2186 }
2187 FpscrQc = fpscr;
2188 '''
2189 twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2,
2190 sqnegCode)
2191 twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4,
2192 sqnegCode)
2193 twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
2194 sqnegCode, scalar=True)
2195 # SQRDMULH (by element)
2196 sqrdmulhCode = '''
2197 FPSCR fpscr = (FPSCR) FpscrQc;
2198 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2199 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2200 (sizeof(Element) * 8);
2201 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2201 Element maxNeg = std::numeric_limits<Element>::min();
2202 Element halfNeg = maxNeg / 2;
2203 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2204 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2205 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2206 if (destElem < 0) {
2207 destElem = mask(sizeof(Element) * 8 - 1);
2208 } else {
2202 Element halfNeg = maxNeg / 2;
2203 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2204 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2205 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2206 if (destElem < 0) {
2207 destElem = mask(sizeof(Element) * 8 - 1);
2208 } else {
2209 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2209 destElem = std::numeric_limits<Element>::min();
2210 }
2211 fpscr.qc = 1;
2212 }
2213 FpscrQc = fpscr;
2214 '''
2215 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp",
2216 ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True)
2217 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp",

--- 145 unchanged lines hidden (view full) ---

2363 smallSignedTypes, sqrshrunCode, hasImm=True, hi=True)
2364 twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp",
2365 smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True)
2366 # SQSHL (immediate)
2367 sqshlImmCode = '''
2368 FPSCR fpscr = (FPSCR) FpscrQc;
2369 if (imm >= sizeof(Element) * 8) {
2370 if (srcElem1 != 0) {
2210 }
2211 fpscr.qc = 1;
2212 }
2213 FpscrQc = fpscr;
2214 '''
2215 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp",
2216 ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True)
2217 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp",

--- 145 unchanged lines hidden (view full) ---

2363 smallSignedTypes, sqrshrunCode, hasImm=True, hi=True)
2364 twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp",
2365 smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True)
2366 # SQSHL (immediate)
2367 sqshlImmCode = '''
2368 FPSCR fpscr = (FPSCR) FpscrQc;
2369 if (imm >= sizeof(Element) * 8) {
2370 if (srcElem1 != 0) {
2371 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2371 destElem = std::numeric_limits<Element>::min();
2372 if (srcElem1 > 0)
2373 destElem = ~destElem;
2374 fpscr.qc = 1;
2375 } else {
2376 destElem = 0;
2377 }
2378 } else if (imm) {
2379 destElem = (srcElem1 << imm);
2380 uint64_t topBits = bits((uint64_t)srcElem1,
2381 sizeof(Element) * 8 - 1,
2382 sizeof(Element) * 8 - 1 - imm);
2383 if (topBits != 0 && topBits != mask(imm + 1)) {
2372 if (srcElem1 > 0)
2373 destElem = ~destElem;
2374 fpscr.qc = 1;
2375 } else {
2376 destElem = 0;
2377 }
2378 } else if (imm) {
2379 destElem = (srcElem1 << imm);
2380 uint64_t topBits = bits((uint64_t)srcElem1,
2381 sizeof(Element) * 8 - 1,
2382 sizeof(Element) * 8 - 1 - imm);
2383 if (topBits != 0 && topBits != mask(imm + 1)) {
2384 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2384 destElem = std::numeric_limits<Element>::min();
2385 if (srcElem1 > 0)
2386 destElem = ~destElem;
2387 fpscr.qc = 1;
2388 }
2389 } else {
2390 destElem = srcElem1;
2391 }
2392 FpscrQc = fpscr;

--- 159 unchanged lines hidden (view full) ---

2552 # SQSUB
2553 sqsubCode = '''
2554 destElem = srcElem1 - srcElem2;
2555 FPSCR fpscr = (FPSCR) FpscrQc;
2556 bool negDest = (destElem < 0);
2557 bool negSrc1 = (srcElem1 < 0);
2558 bool posSrc2 = (srcElem2 >= 0);
2559 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2385 if (srcElem1 > 0)
2386 destElem = ~destElem;
2387 fpscr.qc = 1;
2388 }
2389 } else {
2390 destElem = srcElem1;
2391 }
2392 FpscrQc = fpscr;

--- 159 unchanged lines hidden (view full) ---

2552 # SQSUB
2553 sqsubCode = '''
2554 destElem = srcElem1 - srcElem2;
2555 FPSCR fpscr = (FPSCR) FpscrQc;
2556 bool negDest = (destElem < 0);
2557 bool negSrc1 = (srcElem1 < 0);
2558 bool posSrc2 = (srcElem2 >= 0);
2559 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2560 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2560 destElem = std::numeric_limits<Element>::min();
2561 if (negDest)
2562 destElem -= 1;
2563 fpscr.qc = 1;
2564 }
2565 FpscrQc = fpscr;
2566 '''
2567 threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2,
2568 sqsubCode)

--- 804 unchanged lines hidden ---
2561 if (negDest)
2562 destElem -= 1;
2563 fpscr.qc = 1;
2564 }
2565 FpscrQc = fpscr;
2566 '''
2567 threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2,
2568 sqsubCode)

--- 804 unchanged lines hidden ---