neon.isa (7760:e93e7e0caae1) neon.isa (7783:9b880b40ac10)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating

--- 1618 unchanged lines hidden (view full) ---

1627 vsublwCode = '''
1628 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1629 '''
1630 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1631 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1632
1633 vqaddUCode = '''
1634 destElem = srcElem1 + srcElem2;
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating

--- 1618 unchanged lines hidden (view full) ---

1627 vsublwCode = '''
1628 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1629 '''
1630 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1631 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1632
1633 vqaddUCode = '''
1634 destElem = srcElem1 + srcElem2;
1635 FPSCR fpscr = (FPSCR)Fpscr;
1635 FPSCR fpscr = (FPSCR) FpscrQc;
1636 if (destElem < srcElem1 || destElem < srcElem2) {
1637 destElem = (Element)(-1);
1638 fpscr.qc = 1;
1639 }
1636 if (destElem < srcElem1 || destElem < srcElem2) {
1637 destElem = (Element)(-1);
1638 fpscr.qc = 1;
1639 }
1640 Fpscr = fpscr;
1640 FpscrQc = fpscr;
1641 '''
1642 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1643 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1644 vsubhnCode = '''
1645 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1646 (sizeof(Element) * 8);
1647 '''
1648 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1649 vrsubhnCode = '''
1650 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1651 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1652 (sizeof(Element) * 8);
1653 '''
1654 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1655
1656 vqaddSCode = '''
1657 destElem = srcElem1 + srcElem2;
1641 '''
1642 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1643 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1644 vsubhnCode = '''
1645 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1646 (sizeof(Element) * 8);
1647 '''
1648 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1649 vrsubhnCode = '''
1650 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1651 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1652 (sizeof(Element) * 8);
1653 '''
1654 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1655
1656 vqaddSCode = '''
1657 destElem = srcElem1 + srcElem2;
1658 FPSCR fpscr = (FPSCR)Fpscr;
1658 FPSCR fpscr = (FPSCR) FpscrQc;
1659 bool negDest = (destElem < 0);
1660 bool negSrc1 = (srcElem1 < 0);
1661 bool negSrc2 = (srcElem2 < 0);
1662 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1663 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1664 if (negDest)
1665 destElem -= 1;
1666 fpscr.qc = 1;
1667 }
1659 bool negDest = (destElem < 0);
1660 bool negSrc1 = (srcElem1 < 0);
1661 bool negSrc2 = (srcElem2 < 0);
1662 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1663 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1664 if (negDest)
1665 destElem -= 1;
1666 fpscr.qc = 1;
1667 }
1668 Fpscr = fpscr;
1668 FpscrQc = fpscr;
1669 '''
1670 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1671 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1672
1673 vqsubUCode = '''
1674 destElem = srcElem1 - srcElem2;
1669 '''
1670 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1671 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1672
1673 vqsubUCode = '''
1674 destElem = srcElem1 - srcElem2;
1675 FPSCR fpscr = (FPSCR)Fpscr;
1675 FPSCR fpscr = (FPSCR) FpscrQc;
1676 if (destElem > srcElem1) {
1677 destElem = 0;
1678 fpscr.qc = 1;
1679 }
1676 if (destElem > srcElem1) {
1677 destElem = 0;
1678 fpscr.qc = 1;
1679 }
1680 Fpscr = fpscr;
1680 FpscrQc = fpscr;
1681 '''
1682 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1683 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1684
1685 vqsubSCode = '''
1686 destElem = srcElem1 - srcElem2;
1681 '''
1682 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1683 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1684
1685 vqsubSCode = '''
1686 destElem = srcElem1 - srcElem2;
1687 FPSCR fpscr = (FPSCR)Fpscr;
1687 FPSCR fpscr = (FPSCR) FpscrQc;
1688 bool negDest = (destElem < 0);
1689 bool negSrc1 = (srcElem1 < 0);
1690 bool posSrc2 = (srcElem2 >= 0);
1691 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1692 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1693 if (negDest)
1694 destElem -= 1;
1695 fpscr.qc = 1;
1696 }
1688 bool negDest = (destElem < 0);
1689 bool negSrc1 = (srcElem1 < 0);
1690 bool posSrc2 = (srcElem2 >= 0);
1691 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1692 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1693 if (negDest)
1694 destElem -= 1;
1695 fpscr.qc = 1;
1696 }
1697 Fpscr = fpscr;
1697 FpscrQc = fpscr;
1698 '''
1699 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1700 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1701
1702 vcgtCode = '''
1703 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1704 '''
1705 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)

--- 68 unchanged lines hidden (view full) ---

1774 destElem = srcElem1;
1775 }
1776 '''
1777 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1778 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1779
1780 vqshlUCode = '''
1781 int16_t shiftAmt = (int8_t)srcElem2;
1698 '''
1699 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1700 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1701
1702 vcgtCode = '''
1703 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1704 '''
1705 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)

--- 68 unchanged lines hidden (view full) ---

1774 destElem = srcElem1;
1775 }
1776 '''
1777 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1778 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1779
1780 vqshlUCode = '''
1781 int16_t shiftAmt = (int8_t)srcElem2;
1782 FPSCR fpscr = (FPSCR)Fpscr;
1782 FPSCR fpscr = (FPSCR) FpscrQc;
1783 if (shiftAmt < 0) {
1784 shiftAmt = -shiftAmt;
1785 if (shiftAmt >= sizeof(Element) * 8) {
1786 shiftAmt = sizeof(Element) * 8 - 1;
1787 destElem = 0;
1788 } else {
1789 destElem = (srcElem1 >> shiftAmt);
1790 }

--- 12 unchanged lines hidden (view full) ---

1803 fpscr.qc = 1;
1804 } else {
1805 destElem = srcElem1 << shiftAmt;
1806 }
1807 }
1808 } else {
1809 destElem = srcElem1;
1810 }
1783 if (shiftAmt < 0) {
1784 shiftAmt = -shiftAmt;
1785 if (shiftAmt >= sizeof(Element) * 8) {
1786 shiftAmt = sizeof(Element) * 8 - 1;
1787 destElem = 0;
1788 } else {
1789 destElem = (srcElem1 >> shiftAmt);
1790 }

--- 12 unchanged lines hidden (view full) ---

1803 fpscr.qc = 1;
1804 } else {
1805 destElem = srcElem1 << shiftAmt;
1806 }
1807 }
1808 } else {
1809 destElem = srcElem1;
1810 }
1811 Fpscr = fpscr;
1811 FpscrQc = fpscr;
1812 '''
1813 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1814 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1815
1816 vqshlSCode = '''
1817 int16_t shiftAmt = (int8_t)srcElem2;
1812 '''
1813 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1814 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1815
1816 vqshlSCode = '''
1817 int16_t shiftAmt = (int8_t)srcElem2;
1818 FPSCR fpscr = (FPSCR)Fpscr;
1818 FPSCR fpscr = (FPSCR) FpscrQc;
1819 if (shiftAmt < 0) {
1820 shiftAmt = -shiftAmt;
1821 if (shiftAmt >= sizeof(Element) * 8) {
1822 shiftAmt = sizeof(Element) * 8 - 1;
1823 destElem = 0;
1824 } else {
1825 destElem = (srcElem1 >> shiftAmt);
1826 }

--- 22 unchanged lines hidden (view full) ---

1849 fpscr.qc = 1;
1850 destElem = mask(sizeof(Element) * 8 - 1);
1851 if (srcElem1 < 0)
1852 destElem = ~destElem;
1853 }
1854 } else {
1855 destElem = srcElem1;
1856 }
1819 if (shiftAmt < 0) {
1820 shiftAmt = -shiftAmt;
1821 if (shiftAmt >= sizeof(Element) * 8) {
1822 shiftAmt = sizeof(Element) * 8 - 1;
1823 destElem = 0;
1824 } else {
1825 destElem = (srcElem1 >> shiftAmt);
1826 }

--- 22 unchanged lines hidden (view full) ---

1849 fpscr.qc = 1;
1850 destElem = mask(sizeof(Element) * 8 - 1);
1851 if (srcElem1 < 0)
1852 destElem = ~destElem;
1853 }
1854 } else {
1855 destElem = srcElem1;
1856 }
1857 Fpscr = fpscr;
1857 FpscrQc = fpscr;
1858 '''
1859 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1860 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1861
1862 vqrshlUCode = '''
1863 int16_t shiftAmt = (int8_t)srcElem2;
1858 '''
1859 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1860 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1861
1862 vqrshlUCode = '''
1863 int16_t shiftAmt = (int8_t)srcElem2;
1864 FPSCR fpscr = (FPSCR)Fpscr;
1864 FPSCR fpscr = (FPSCR) FpscrQc;
1865 if (shiftAmt < 0) {
1866 shiftAmt = -shiftAmt;
1867 Element rBit = 0;
1868 if (shiftAmt <= sizeof(Element) * 8)
1869 rBit = bits(srcElem1, shiftAmt - 1);
1870 if (shiftAmt >= sizeof(Element) * 8) {
1871 shiftAmt = sizeof(Element) * 8 - 1;
1872 destElem = 0;

--- 14 unchanged lines hidden (view full) ---

1887 sizeof(Element) * 8 - shiftAmt)) {
1888 destElem = mask(sizeof(Element) * 8);
1889 fpscr.qc = 1;
1890 } else {
1891 destElem = srcElem1 << shiftAmt;
1892 }
1893 }
1894 }
1865 if (shiftAmt < 0) {
1866 shiftAmt = -shiftAmt;
1867 Element rBit = 0;
1868 if (shiftAmt <= sizeof(Element) * 8)
1869 rBit = bits(srcElem1, shiftAmt - 1);
1870 if (shiftAmt >= sizeof(Element) * 8) {
1871 shiftAmt = sizeof(Element) * 8 - 1;
1872 destElem = 0;

--- 14 unchanged lines hidden (view full) ---

1887 sizeof(Element) * 8 - shiftAmt)) {
1888 destElem = mask(sizeof(Element) * 8);
1889 fpscr.qc = 1;
1890 } else {
1891 destElem = srcElem1 << shiftAmt;
1892 }
1893 }
1894 }
1895 Fpscr = fpscr;
1895 FpscrQc = fpscr;
1896 '''
1897 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1898 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1899
1900 vqrshlSCode = '''
1901 int16_t shiftAmt = (int8_t)srcElem2;
1896 '''
1897 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1898 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1899
1900 vqrshlSCode = '''
1901 int16_t shiftAmt = (int8_t)srcElem2;
1902 FPSCR fpscr = (FPSCR)Fpscr;
1902 FPSCR fpscr = (FPSCR) FpscrQc;
1903 if (shiftAmt < 0) {
1904 shiftAmt = -shiftAmt;
1905 Element rBit = 0;
1906 if (shiftAmt <= sizeof(Element) * 8)
1907 rBit = bits(srcElem1, shiftAmt - 1);
1908 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1909 rBit = 1;
1910 if (shiftAmt >= sizeof(Element) * 8) {

--- 28 unchanged lines hidden (view full) ---

1939 fpscr.qc = 1;
1940 destElem = mask(sizeof(Element) * 8 - 1);
1941 if (srcElem1 < 0)
1942 destElem = ~destElem;
1943 }
1944 } else {
1945 destElem = srcElem1;
1946 }
1903 if (shiftAmt < 0) {
1904 shiftAmt = -shiftAmt;
1905 Element rBit = 0;
1906 if (shiftAmt <= sizeof(Element) * 8)
1907 rBit = bits(srcElem1, shiftAmt - 1);
1908 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1909 rBit = 1;
1910 if (shiftAmt >= sizeof(Element) * 8) {

--- 28 unchanged lines hidden (view full) ---

1939 fpscr.qc = 1;
1940 destElem = mask(sizeof(Element) * 8 - 1);
1941 if (srcElem1 < 0)
1942 destElem = ~destElem;
1943 }
1944 } else {
1945 destElem = srcElem1;
1946 }
1947 Fpscr = fpscr;
1947 FpscrQc = fpscr;
1948 '''
1949 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1950 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1951
1952 vabaCode = '''
1953 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1954 (srcElem2 - srcElem1);
1955 '''

--- 41 unchanged lines hidden (view full) ---

1997 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
1998 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
1999 vmlalCode = '''
2000 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2001 '''
2002 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2003
2004 vqdmlalCode = '''
1948 '''
1949 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1950 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1951
1952 vabaCode = '''
1953 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1954 (srcElem2 - srcElem1);
1955 '''

--- 41 unchanged lines hidden (view full) ---

1997 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
1998 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
1999 vmlalCode = '''
2000 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2001 '''
2002 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2003
2004 vqdmlalCode = '''
2005 FPSCR fpscr = (FPSCR)Fpscr;
2005 FPSCR fpscr = (FPSCR) FpscrQc;
2006 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2007 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2008 Element halfNeg = maxNeg / 2;
2009 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2010 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2011 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2012 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2013 fpscr.qc = 1;
2014 }
2015 bool negPreDest = ltz(destElem);
2016 destElem += midElem;
2017 bool negDest = ltz(destElem);
2018 bool negMid = ltz(midElem);
2019 if (negPreDest == negMid && negMid != negDest) {
2020 destElem = mask(sizeof(BigElement) * 8 - 1);
2021 if (negPreDest)
2022 destElem = ~destElem;
2023 fpscr.qc = 1;
2024 }
2006 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2007 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2008 Element halfNeg = maxNeg / 2;
2009 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2010 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2011 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2012 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2013 fpscr.qc = 1;
2014 }
2015 bool negPreDest = ltz(destElem);
2016 destElem += midElem;
2017 bool negDest = ltz(destElem);
2018 bool negMid = ltz(midElem);
2019 if (negPreDest == negMid && negMid != negDest) {
2020 destElem = mask(sizeof(BigElement) * 8 - 1);
2021 if (negPreDest)
2022 destElem = ~destElem;
2023 fpscr.qc = 1;
2024 }
2025 Fpscr = fpscr;
2025 FpscrQc = fpscr;
2026 '''
2027 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2028
2029 vqdmlslCode = '''
2026 '''
2027 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2028
2029 vqdmlslCode = '''
2030 FPSCR fpscr = (FPSCR)Fpscr;
2030 FPSCR fpscr = (FPSCR) FpscrQc;
2031 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2032 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2033 Element halfNeg = maxNeg / 2;
2034 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2035 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2036 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2037 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2038 fpscr.qc = 1;
2039 }
2040 bool negPreDest = ltz(destElem);
2041 destElem -= midElem;
2042 bool negDest = ltz(destElem);
2043 bool posMid = ltz((BigElement)-midElem);
2044 if (negPreDest == posMid && posMid != negDest) {
2045 destElem = mask(sizeof(BigElement) * 8 - 1);
2046 if (negPreDest)
2047 destElem = ~destElem;
2048 fpscr.qc = 1;
2049 }
2031 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2032 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2033 Element halfNeg = maxNeg / 2;
2034 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2035 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2036 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2037 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2038 fpscr.qc = 1;
2039 }
2040 bool negPreDest = ltz(destElem);
2041 destElem -= midElem;
2042 bool negDest = ltz(destElem);
2043 bool posMid = ltz((BigElement)-midElem);
2044 if (negPreDest == posMid && posMid != negDest) {
2045 destElem = mask(sizeof(BigElement) * 8 - 1);
2046 if (negPreDest)
2047 destElem = ~destElem;
2048 fpscr.qc = 1;
2049 }
2050 Fpscr = fpscr;
2050 FpscrQc = fpscr;
2051 '''
2052 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2053
2054 vqdmullCode = '''
2051 '''
2052 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2053
2054 vqdmullCode = '''
2055 FPSCR fpscr = (FPSCR)Fpscr;
2055 FPSCR fpscr = (FPSCR) FpscrQc;
2056 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2057 if (srcElem1 == srcElem2 &&
2058 srcElem1 == (Element)((Element)1 <<
2059 (Element)(sizeof(Element) * 8 - 1))) {
2060 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2061 fpscr.qc = 1;
2062 }
2056 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2057 if (srcElem1 == srcElem2 &&
2058 srcElem1 == (Element)((Element)1 <<
2059 (Element)(sizeof(Element) * 8 - 1))) {
2060 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2061 fpscr.qc = 1;
2062 }
2063 Fpscr = fpscr;
2063 FpscrQc = fpscr;
2064 '''
2065 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2066
2067 vmlsCode = '''
2068 destElem = destElem - srcElem1 * srcElem2;
2069 '''
2070 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2071 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)

--- 22 unchanged lines hidden (view full) ---

2094
2095 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True)
2096 threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True)
2097
2098 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True)
2099 threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True)
2100
2101 vqdmulhCode = '''
2064 '''
2065 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2066
2067 vmlsCode = '''
2068 destElem = destElem - srcElem1 * srcElem2;
2069 '''
2070 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2071 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)

--- 22 unchanged lines hidden (view full) ---

2094
2095 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True)
2096 threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True)
2097
2098 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True)
2099 threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True)
2100
2101 vqdmulhCode = '''
2102 FPSCR fpscr = (FPSCR)Fpscr;
2102 FPSCR fpscr = (FPSCR) FpscrQc;
2103 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2104 (sizeof(Element) * 8);
2105 if (srcElem1 == srcElem2 &&
2106 srcElem1 == (Element)((Element)1 <<
2107 (sizeof(Element) * 8 - 1))) {
2108 destElem = ~srcElem1;
2109 fpscr.qc = 1;
2110 }
2103 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2104 (sizeof(Element) * 8);
2105 if (srcElem1 == srcElem2 &&
2106 srcElem1 == (Element)((Element)1 <<
2107 (sizeof(Element) * 8 - 1))) {
2108 destElem = ~srcElem1;
2109 fpscr.qc = 1;
2110 }
2111 Fpscr = fpscr;
2111 FpscrQc = fpscr;
2112 '''
2113 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2114 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2115
2116 vqrdmulhCode = '''
2112 '''
2113 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2114 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2115
2116 vqrdmulhCode = '''
2117 FPSCR fpscr = (FPSCR)Fpscr;
2117 FPSCR fpscr = (FPSCR) FpscrQc;
2118 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2119 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2120 (sizeof(Element) * 8);
2121 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2122 Element halfNeg = maxNeg / 2;
2123 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2124 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2125 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2126 if (destElem < 0) {
2127 destElem = mask(sizeof(Element) * 8 - 1);
2128 } else {
2129 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2130 }
2131 fpscr.qc = 1;
2132 }
2118 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2119 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2120 (sizeof(Element) * 8);
2121 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2122 Element halfNeg = maxNeg / 2;
2123 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2124 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2125 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2126 if (destElem < 0) {
2127 destElem = mask(sizeof(Element) * 8 - 1);
2128 } else {
2129 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2130 }
2131 fpscr.qc = 1;
2132 }
2133 Fpscr = fpscr;
2133 FpscrQc = fpscr;
2134 '''
2135 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2136 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2137 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2138 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2139
2140 vmaxfpCode = '''
2134 '''
2135 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2136 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2137 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2138 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2139
2140 vmaxfpCode = '''
2141 FPSCR fpscr = (FPSCR)Fpscr;
2141 FPSCR fpscr = (FPSCR) FpscrExc;
2142 bool done;
2143 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2144 if (!done) {
2145 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2146 true, true, VfpRoundNearest);
2147 } else if (flushToZero(srcReg1, srcReg2)) {
2148 fpscr.idc = 1;
2149 }
2142 bool done;
2143 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2144 if (!done) {
2145 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2146 true, true, VfpRoundNearest);
2147 } else if (flushToZero(srcReg1, srcReg2)) {
2148 fpscr.idc = 1;
2149 }
2150 Fpscr = fpscr;
2150 FpscrExc = fpscr;
2151 '''
2152 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2153 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2154
2155 vminfpCode = '''
2151 '''
2152 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2153 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2154
2155 vminfpCode = '''
2156 FPSCR fpscr = (FPSCR)Fpscr;
2156 FPSCR fpscr = (FPSCR) FpscrExc;
2157 bool done;
2158 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2159 if (!done) {
2160 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2161 true, true, VfpRoundNearest);
2162 } else if (flushToZero(srcReg1, srcReg2)) {
2163 fpscr.idc = 1;
2164 }
2157 bool done;
2158 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2159 if (!done) {
2160 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2161 true, true, VfpRoundNearest);
2162 } else if (flushToZero(srcReg1, srcReg2)) {
2163 fpscr.idc = 1;
2164 }
2165 Fpscr = fpscr;
2165 FpscrExc = fpscr;
2166 '''
2167 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2168 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2169
2170 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2171 2, vmaxfpCode, pairwise=True)
2172 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2173 4, vmaxfpCode, pairwise=True)
2174
2175 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2176 2, vminfpCode, pairwise=True)
2177 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2178 4, vminfpCode, pairwise=True)
2179
2180 vaddfpCode = '''
2166 '''
2167 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2168 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2169
2170 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2171 2, vmaxfpCode, pairwise=True)
2172 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2173 4, vmaxfpCode, pairwise=True)
2174
2175 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2176 2, vminfpCode, pairwise=True)
2177 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2178 4, vminfpCode, pairwise=True)
2179
2180 vaddfpCode = '''
2181 FPSCR fpscr = Fpscr;
2181 FPSCR fpscr = (FPSCR) FpscrExc;
2182 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2183 true, true, VfpRoundNearest);
2182 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2183 true, true, VfpRoundNearest);
2184 Fpscr = fpscr;
2184 FpscrExc = fpscr;
2185 '''
2186 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2187 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2188
2189 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2190 2, vaddfpCode, pairwise=True)
2191 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2192 4, vaddfpCode, pairwise=True)
2193
2194 vsubfpCode = '''
2185 '''
2186 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2187 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2188
2189 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2190 2, vaddfpCode, pairwise=True)
2191 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2192 4, vaddfpCode, pairwise=True)
2193
2194 vsubfpCode = '''
2195 FPSCR fpscr = Fpscr;
2195 FPSCR fpscr = (FPSCR) FpscrExc;
2196 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2197 true, true, VfpRoundNearest);
2196 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2197 true, true, VfpRoundNearest);
2198 Fpscr = fpscr;
2198 FpscrExc = fpscr;
2199 '''
2200 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2201 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2202
2203 vmulfpCode = '''
2199 '''
2200 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2201 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2202
2203 vmulfpCode = '''
2204 FPSCR fpscr = Fpscr;
2204 FPSCR fpscr = (FPSCR) FpscrExc;
2205 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2206 true, true, VfpRoundNearest);
2205 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2206 true, true, VfpRoundNearest);
2207 Fpscr = fpscr;
2207 FpscrExc = fpscr;
2208 '''
2209 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2210 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2211
2212 vmlafpCode = '''
2208 '''
2209 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2210 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2211
2212 vmlafpCode = '''
2213 FPSCR fpscr = Fpscr;
2213 FPSCR fpscr = (FPSCR) FpscrExc;
2214 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2215 true, true, VfpRoundNearest);
2216 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2217 true, true, VfpRoundNearest);
2214 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2215 true, true, VfpRoundNearest);
2216 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2217 true, true, VfpRoundNearest);
2218 Fpscr = fpscr;
2218 FpscrExc = fpscr;
2219 '''
2220 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2221 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2222
2223 vmlsfpCode = '''
2219 '''
2220 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2221 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2222
2223 vmlsfpCode = '''
2224 FPSCR fpscr = Fpscr;
2224 FPSCR fpscr = (FPSCR) FpscrExc;
2225 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2226 true, true, VfpRoundNearest);
2227 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2228 true, true, VfpRoundNearest);
2225 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2226 true, true, VfpRoundNearest);
2227 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2228 true, true, VfpRoundNearest);
2229 Fpscr = fpscr;
2229 FpscrExc = fpscr;
2230 '''
2231 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2232 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2233
2234 vcgtfpCode = '''
2230 '''
2231 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2232 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2233
2234 vcgtfpCode = '''
2235 FPSCR fpscr = (FPSCR)Fpscr;
2235 FPSCR fpscr = (FPSCR) FpscrExc;
2236 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2237 true, true, VfpRoundNearest);
2238 destReg = (res == 0) ? -1 : 0;
2239 if (res == 2.0)
2240 fpscr.ioc = 1;
2236 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2237 true, true, VfpRoundNearest);
2238 destReg = (res == 0) ? -1 : 0;
2239 if (res == 2.0)
2240 fpscr.ioc = 1;
2241 Fpscr = fpscr;
2241 FpscrExc = fpscr;
2242 '''
2243 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2244 2, vcgtfpCode, toInt = True)
2245 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2246 4, vcgtfpCode, toInt = True)
2247
2248 vcgefpCode = '''
2242 '''
2243 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2244 2, vcgtfpCode, toInt = True)
2245 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2246 4, vcgtfpCode, toInt = True)
2247
2248 vcgefpCode = '''
2249 FPSCR fpscr = (FPSCR)Fpscr;
2249 FPSCR fpscr = (FPSCR) FpscrExc;
2250 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2251 true, true, VfpRoundNearest);
2252 destReg = (res == 0) ? -1 : 0;
2253 if (res == 2.0)
2254 fpscr.ioc = 1;
2250 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2251 true, true, VfpRoundNearest);
2252 destReg = (res == 0) ? -1 : 0;
2253 if (res == 2.0)
2254 fpscr.ioc = 1;
2255 Fpscr = fpscr;
2255 FpscrExc = fpscr;
2256 '''
2257 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2258 2, vcgefpCode, toInt = True)
2259 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2260 4, vcgefpCode, toInt = True)
2261
2262 vacgtfpCode = '''
2256 '''
2257 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2258 2, vcgefpCode, toInt = True)
2259 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2260 4, vcgefpCode, toInt = True)
2261
2262 vacgtfpCode = '''
2263 FPSCR fpscr = (FPSCR)Fpscr;
2263 FPSCR fpscr = (FPSCR) FpscrExc;
2264 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2265 true, true, VfpRoundNearest);
2266 destReg = (res == 0) ? -1 : 0;
2267 if (res == 2.0)
2268 fpscr.ioc = 1;
2264 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2265 true, true, VfpRoundNearest);
2266 destReg = (res == 0) ? -1 : 0;
2267 if (res == 2.0)
2268 fpscr.ioc = 1;
2269 Fpscr = fpscr;
2269 FpscrExc = fpscr;
2270 '''
2271 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2272 2, vacgtfpCode, toInt = True)
2273 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2274 4, vacgtfpCode, toInt = True)
2275
2276 vacgefpCode = '''
2270 '''
2271 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2272 2, vacgtfpCode, toInt = True)
2273 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2274 4, vacgtfpCode, toInt = True)
2275
2276 vacgefpCode = '''
2277 FPSCR fpscr = (FPSCR)Fpscr;
2277 FPSCR fpscr = (FPSCR) FpscrExc;
2278 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2279 true, true, VfpRoundNearest);
2280 destReg = (res == 0) ? -1 : 0;
2281 if (res == 2.0)
2282 fpscr.ioc = 1;
2278 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2279 true, true, VfpRoundNearest);
2280 destReg = (res == 0) ? -1 : 0;
2281 if (res == 2.0)
2282 fpscr.ioc = 1;
2283 Fpscr = fpscr;
2283 FpscrExc = fpscr;
2284 '''
2285 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2286 2, vacgefpCode, toInt = True)
2287 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2288 4, vacgefpCode, toInt = True)
2289
2290 vceqfpCode = '''
2284 '''
2285 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2286 2, vacgefpCode, toInt = True)
2287 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2288 4, vacgefpCode, toInt = True)
2289
2290 vceqfpCode = '''
2291 FPSCR fpscr = (FPSCR)Fpscr;
2291 FPSCR fpscr = (FPSCR) FpscrExc;
2292 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2293 true, true, VfpRoundNearest);
2294 destReg = (res == 0) ? -1 : 0;
2295 if (res == 2.0)
2296 fpscr.ioc = 1;
2292 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2293 true, true, VfpRoundNearest);
2294 destReg = (res == 0) ? -1 : 0;
2295 if (res == 2.0)
2296 fpscr.ioc = 1;
2297 Fpscr = fpscr;
2297 FpscrExc = fpscr;
2298 '''
2299 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2300 2, vceqfpCode, toInt = True)
2301 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2302 4, vceqfpCode, toInt = True)
2303
2304 vrecpsCode = '''
2298 '''
2299 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2300 2, vceqfpCode, toInt = True)
2301 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2302 4, vceqfpCode, toInt = True)
2303
2304 vrecpsCode = '''
2305 FPSCR fpscr = Fpscr;
2305 FPSCR fpscr = (FPSCR) FpscrExc;
2306 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2307 true, true, VfpRoundNearest);
2306 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2307 true, true, VfpRoundNearest);
2308 Fpscr = fpscr;
2308 FpscrExc = fpscr;
2309 '''
2310 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2311 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2312
2313 vrsqrtsCode = '''
2309 '''
2310 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2311 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2312
2313 vrsqrtsCode = '''
2314 FPSCR fpscr = Fpscr;
2314 FPSCR fpscr = (FPSCR) FpscrExc;
2315 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2316 true, true, VfpRoundNearest);
2315 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2316 true, true, VfpRoundNearest);
2317 Fpscr = fpscr;
2317 FpscrExc = fpscr;
2318 '''
2319 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2320 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2321
2322 vabdfpCode = '''
2318 '''
2319 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2320 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2321
2322 vabdfpCode = '''
2323 FPSCR fpscr = Fpscr;
2323 FPSCR fpscr = (FPSCR) FpscrExc;
2324 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2325 true, true, VfpRoundNearest);
2326 destReg = fabs(mid);
2324 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2325 true, true, VfpRoundNearest);
2326 destReg = fabs(mid);
2327 Fpscr = fpscr;
2327 FpscrExc = fpscr;
2328 '''
2329 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2330 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2331
2332 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2333 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2334 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2335 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)

--- 100 unchanged lines hidden (view full) ---

2436 destElem = destElem;
2437 else
2438 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2439 '''
2440 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2441 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2442
2443 vqshlCode = '''
2328 '''
2329 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2330 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2331
2332 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2333 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2334 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2335 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)

--- 100 unchanged lines hidden (view full) ---

2436 destElem = destElem;
2437 else
2438 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2439 '''
2440 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2441 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2442
2443 vqshlCode = '''
2444 FPSCR fpscr = (FPSCR)Fpscr;
2444 FPSCR fpscr = (FPSCR) FpscrQc;
2445 if (imm >= sizeof(Element) * 8) {
2446 if (srcElem1 != 0) {
2447 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2448 if (srcElem1 > 0)
2449 destElem = ~destElem;
2450 fpscr.qc = 1;
2451 } else {
2452 destElem = 0;

--- 7 unchanged lines hidden (view full) ---

2460 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2461 if (srcElem1 > 0)
2462 destElem = ~destElem;
2463 fpscr.qc = 1;
2464 }
2465 } else {
2466 destElem = srcElem1;
2467 }
2445 if (imm >= sizeof(Element) * 8) {
2446 if (srcElem1 != 0) {
2447 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2448 if (srcElem1 > 0)
2449 destElem = ~destElem;
2450 fpscr.qc = 1;
2451 } else {
2452 destElem = 0;

--- 7 unchanged lines hidden (view full) ---

2460 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2461 if (srcElem1 > 0)
2462 destElem = ~destElem;
2463 fpscr.qc = 1;
2464 }
2465 } else {
2466 destElem = srcElem1;
2467 }
2468 Fpscr = fpscr;
2468 FpscrQc = fpscr;
2469 '''
2470 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2471 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2472
2473 vqshluCode = '''
2469 '''
2470 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2471 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2472
2473 vqshluCode = '''
2474 FPSCR fpscr = (FPSCR)Fpscr;
2474 FPSCR fpscr = (FPSCR) FpscrQc;
2475 if (imm >= sizeof(Element) * 8) {
2476 if (srcElem1 != 0) {
2477 destElem = mask(sizeof(Element) * 8);
2478 fpscr.qc = 1;
2479 } else {
2480 destElem = 0;
2481 }
2482 } else if (imm) {
2483 destElem = (srcElem1 << imm);
2484 uint64_t topBits = bits((uint64_t)srcElem1,
2485 sizeof(Element) * 8 - 1,
2486 sizeof(Element) * 8 - imm);
2487 if (topBits != 0) {
2488 destElem = mask(sizeof(Element) * 8);
2489 fpscr.qc = 1;
2490 }
2491 } else {
2492 destElem = srcElem1;
2493 }
2475 if (imm >= sizeof(Element) * 8) {
2476 if (srcElem1 != 0) {
2477 destElem = mask(sizeof(Element) * 8);
2478 fpscr.qc = 1;
2479 } else {
2480 destElem = 0;
2481 }
2482 } else if (imm) {
2483 destElem = (srcElem1 << imm);
2484 uint64_t topBits = bits((uint64_t)srcElem1,
2485 sizeof(Element) * 8 - 1,
2486 sizeof(Element) * 8 - imm);
2487 if (topBits != 0) {
2488 destElem = mask(sizeof(Element) * 8);
2489 fpscr.qc = 1;
2490 }
2491 } else {
2492 destElem = srcElem1;
2493 }
2494 Fpscr = fpscr;
2494 FpscrQc = fpscr;
2495 '''
2496 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2497 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2498
2499 vqshlusCode = '''
2495 '''
2496 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2497 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2498
2499 vqshlusCode = '''
2500 FPSCR fpscr = (FPSCR)Fpscr;
2500 FPSCR fpscr = (FPSCR) FpscrQc;
2501 if (imm >= sizeof(Element) * 8) {
2502 if (srcElem1 < 0) {
2503 destElem = 0;
2504 fpscr.qc = 1;
2505 } else if (srcElem1 > 0) {
2506 destElem = mask(sizeof(Element) * 8);
2507 fpscr.qc = 1;
2508 } else {

--- 14 unchanged lines hidden (view full) ---

2523 } else {
2524 if (srcElem1 < 0) {
2525 fpscr.qc = 1;
2526 destElem = 0;
2527 } else {
2528 destElem = srcElem1;
2529 }
2530 }
2501 if (imm >= sizeof(Element) * 8) {
2502 if (srcElem1 < 0) {
2503 destElem = 0;
2504 fpscr.qc = 1;
2505 } else if (srcElem1 > 0) {
2506 destElem = mask(sizeof(Element) * 8);
2507 fpscr.qc = 1;
2508 } else {

--- 14 unchanged lines hidden (view full) ---

2523 } else {
2524 if (srcElem1 < 0) {
2525 fpscr.qc = 1;
2526 destElem = 0;
2527 } else {
2528 destElem = srcElem1;
2529 }
2530 }
2531 Fpscr = fpscr;
2531 FpscrQc = fpscr;
2532 '''
2533 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2534 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2535
2536 vshrnCode = '''
2537 if (imm >= sizeof(srcElem1) * 8) {
2538 destElem = 0;
2539 } else {

--- 10 unchanged lines hidden (view full) ---

2550 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2551 } else {
2552 destElem = srcElem1;
2553 }
2554 '''
2555 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2556
2557 vqshrnCode = '''
2532 '''
2533 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2534 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2535
2536 vshrnCode = '''
2537 if (imm >= sizeof(srcElem1) * 8) {
2538 destElem = 0;
2539 } else {

--- 10 unchanged lines hidden (view full) ---

2550 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2551 } else {
2552 destElem = srcElem1;
2553 }
2554 '''
2555 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2556
2557 vqshrnCode = '''
2558 FPSCR fpscr = (FPSCR)Fpscr;
2558 FPSCR fpscr = (FPSCR) FpscrQc;
2559 if (imm > sizeof(srcElem1) * 8) {
2560 if (srcElem1 != 0 && srcElem1 != -1)
2561 fpscr.qc = 1;
2562 destElem = 0;
2563 } else if (imm) {
2564 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2565 mid |= -(mid & ((BigElement)1 <<
2566 (sizeof(BigElement) * 8 - 1 - imm)));
2567 if (mid != (Element)mid) {
2568 destElem = mask(sizeof(Element) * 8 - 1);
2569 if (srcElem1 < 0)
2570 destElem = ~destElem;
2571 fpscr.qc = 1;
2572 } else {
2573 destElem = mid;
2574 }
2575 } else {
2576 destElem = srcElem1;
2577 }
2559 if (imm > sizeof(srcElem1) * 8) {
2560 if (srcElem1 != 0 && srcElem1 != -1)
2561 fpscr.qc = 1;
2562 destElem = 0;
2563 } else if (imm) {
2564 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2565 mid |= -(mid & ((BigElement)1 <<
2566 (sizeof(BigElement) * 8 - 1 - imm)));
2567 if (mid != (Element)mid) {
2568 destElem = mask(sizeof(Element) * 8 - 1);
2569 if (srcElem1 < 0)
2570 destElem = ~destElem;
2571 fpscr.qc = 1;
2572 } else {
2573 destElem = mid;
2574 }
2575 } else {
2576 destElem = srcElem1;
2577 }
2578 Fpscr = fpscr;
2578 FpscrQc = fpscr;
2579 '''
2580 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2581
2582 vqshrunCode = '''
2579 '''
2580 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2581
2582 vqshrunCode = '''
2583 FPSCR fpscr = (FPSCR)Fpscr;
2583 FPSCR fpscr = (FPSCR) FpscrQc;
2584 if (imm > sizeof(srcElem1) * 8) {
2585 if (srcElem1 != 0)
2586 fpscr.qc = 1;
2587 destElem = 0;
2588 } else if (imm) {
2589 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2590 if (mid != (Element)mid) {
2591 destElem = mask(sizeof(Element) * 8);
2592 fpscr.qc = 1;
2593 } else {
2594 destElem = mid;
2595 }
2596 } else {
2597 destElem = srcElem1;
2598 }
2584 if (imm > sizeof(srcElem1) * 8) {
2585 if (srcElem1 != 0)
2586 fpscr.qc = 1;
2587 destElem = 0;
2588 } else if (imm) {
2589 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2590 if (mid != (Element)mid) {
2591 destElem = mask(sizeof(Element) * 8);
2592 fpscr.qc = 1;
2593 } else {
2594 destElem = mid;
2595 }
2596 } else {
2597 destElem = srcElem1;
2598 }
2599 Fpscr = fpscr;
2599 FpscrQc = fpscr;
2600 '''
2601 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2602 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2603
2604 vqshrunsCode = '''
2600 '''
2601 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2602 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2603
2604 vqshrunsCode = '''
2605 FPSCR fpscr = (FPSCR)Fpscr;
2605 FPSCR fpscr = (FPSCR) FpscrQc;
2606 if (imm > sizeof(srcElem1) * 8) {
2607 if (srcElem1 != 0)
2608 fpscr.qc = 1;
2609 destElem = 0;
2610 } else if (imm) {
2611 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2612 if (bits(mid, sizeof(BigElement) * 8 - 1,
2613 sizeof(Element) * 8) != 0) {

--- 4 unchanged lines hidden (view full) ---

2618 }
2619 fpscr.qc = 1;
2620 } else {
2621 destElem = mid;
2622 }
2623 } else {
2624 destElem = srcElem1;
2625 }
2606 if (imm > sizeof(srcElem1) * 8) {
2607 if (srcElem1 != 0)
2608 fpscr.qc = 1;
2609 destElem = 0;
2610 } else if (imm) {
2611 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2612 if (bits(mid, sizeof(BigElement) * 8 - 1,
2613 sizeof(Element) * 8) != 0) {

--- 4 unchanged lines hidden (view full) ---

2618 }
2619 fpscr.qc = 1;
2620 } else {
2621 destElem = mid;
2622 }
2623 } else {
2624 destElem = srcElem1;
2625 }
2626 Fpscr = fpscr;
2626 FpscrQc = fpscr;
2627 '''
2628 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2629 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2630
2631 vqrshrnCode = '''
2627 '''
2628 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2629 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2630
2631 vqrshrnCode = '''
2632 FPSCR fpscr = (FPSCR)Fpscr;
2632 FPSCR fpscr = (FPSCR) FpscrQc;
2633 if (imm > sizeof(srcElem1) * 8) {
2634 if (srcElem1 != 0 && srcElem1 != -1)
2635 fpscr.qc = 1;
2636 destElem = 0;
2637 } else if (imm) {
2638 BigElement mid = (srcElem1 >> (imm - 1));
2639 uint64_t rBit = mid & 0x1;
2640 mid >>= 1;

--- 13 unchanged lines hidden (view full) ---

2654 destElem = mask(sizeof(Element) * 8 - 1);
2655 if (srcElem1 < 0)
2656 destElem = ~destElem;
2657 fpscr.qc = 1;
2658 } else {
2659 destElem = srcElem1;
2660 }
2661 }
2633 if (imm > sizeof(srcElem1) * 8) {
2634 if (srcElem1 != 0 && srcElem1 != -1)
2635 fpscr.qc = 1;
2636 destElem = 0;
2637 } else if (imm) {
2638 BigElement mid = (srcElem1 >> (imm - 1));
2639 uint64_t rBit = mid & 0x1;
2640 mid >>= 1;

--- 13 unchanged lines hidden (view full) ---

2654 destElem = mask(sizeof(Element) * 8 - 1);
2655 if (srcElem1 < 0)
2656 destElem = ~destElem;
2657 fpscr.qc = 1;
2658 } else {
2659 destElem = srcElem1;
2660 }
2661 }
2662 Fpscr = fpscr;
2662 FpscrQc = fpscr;
2663 '''
2664 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2665 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2666
2667 vqrshrunCode = '''
2663 '''
2664 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2665 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2666
2667 vqrshrunCode = '''
2668 FPSCR fpscr = (FPSCR)Fpscr;
2668 FPSCR fpscr = (FPSCR) FpscrQc;
2669 if (imm > sizeof(srcElem1) * 8) {
2670 if (srcElem1 != 0)
2671 fpscr.qc = 1;
2672 destElem = 0;
2673 } else if (imm) {
2674 BigElement mid = (srcElem1 >> (imm - 1));
2675 uint64_t rBit = mid & 0x1;
2676 mid >>= 1;

--- 7 unchanged lines hidden (view full) ---

2684 } else {
2685 if (srcElem1 != (Element)srcElem1) {
2686 destElem = mask(sizeof(Element) * 8 - 1);
2687 fpscr.qc = 1;
2688 } else {
2689 destElem = srcElem1;
2690 }
2691 }
2669 if (imm > sizeof(srcElem1) * 8) {
2670 if (srcElem1 != 0)
2671 fpscr.qc = 1;
2672 destElem = 0;
2673 } else if (imm) {
2674 BigElement mid = (srcElem1 >> (imm - 1));
2675 uint64_t rBit = mid & 0x1;
2676 mid >>= 1;

--- 7 unchanged lines hidden (view full) ---

2684 } else {
2685 if (srcElem1 != (Element)srcElem1) {
2686 destElem = mask(sizeof(Element) * 8 - 1);
2687 fpscr.qc = 1;
2688 } else {
2689 destElem = srcElem1;
2690 }
2691 }
2692 Fpscr = fpscr;
2692 FpscrQc = fpscr;
2693 '''
2694 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2695 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2696
2697 vqrshrunsCode = '''
2693 '''
2694 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2695 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2696
2697 vqrshrunsCode = '''
2698 FPSCR fpscr = (FPSCR)Fpscr;
2698 FPSCR fpscr = (FPSCR) FpscrQc;
2699 if (imm > sizeof(srcElem1) * 8) {
2700 if (srcElem1 != 0)
2701 fpscr.qc = 1;
2702 destElem = 0;
2703 } else if (imm) {
2704 BigElement mid = (srcElem1 >> (imm - 1));
2705 uint64_t rBit = mid & 0x1;
2706 mid >>= 1;

--- 14 unchanged lines hidden (view full) ---

2721 } else {
2722 if (srcElem1 < 0) {
2723 fpscr.qc = 1;
2724 destElem = 0;
2725 } else {
2726 destElem = srcElem1;
2727 }
2728 }
2699 if (imm > sizeof(srcElem1) * 8) {
2700 if (srcElem1 != 0)
2701 fpscr.qc = 1;
2702 destElem = 0;
2703 } else if (imm) {
2704 BigElement mid = (srcElem1 >> (imm - 1));
2705 uint64_t rBit = mid & 0x1;
2706 mid >>= 1;

--- 14 unchanged lines hidden (view full) ---

2721 } else {
2722 if (srcElem1 < 0) {
2723 fpscr.qc = 1;
2724 destElem = 0;
2725 } else {
2726 destElem = srcElem1;
2727 }
2728 }
2729 Fpscr = fpscr;
2729 FpscrQc = fpscr;
2730 '''
2731 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2732 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2733
2734 vshllCode = '''
2735 if (imm >= sizeof(destElem) * 8) {
2736 destElem = 0;
2737 } else {
2738 destElem = (BigElement)srcElem1 << imm;
2739 }
2740 '''
2741 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2742
2743 vmovlCode = '''
2744 destElem = srcElem1;
2745 '''
2746 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2747
2748 vcvt2ufxCode = '''
2730 '''
2731 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2732 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2733
2734 vshllCode = '''
2735 if (imm >= sizeof(destElem) * 8) {
2736 destElem = 0;
2737 } else {
2738 destElem = (BigElement)srcElem1 << imm;
2739 }
2740 '''
2741 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2742
2743 vmovlCode = '''
2744 destElem = srcElem1;
2745 '''
2746 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2747
2748 vcvt2ufxCode = '''
2749 FPSCR fpscr = Fpscr;
2749 FPSCR fpscr = (FPSCR) FpscrExc;
2750 if (flushToZero(srcElem1))
2751 fpscr.idc = 1;
2752 VfpSavedState state = prepFpState(VfpRoundNearest);
2753 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2754 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2755 __asm__ __volatile__("" :: "m" (destReg));
2756 finishVfp(fpscr, state, true);
2750 if (flushToZero(srcElem1))
2751 fpscr.idc = 1;
2752 VfpSavedState state = prepFpState(VfpRoundNearest);
2753 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2754 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2755 __asm__ __volatile__("" :: "m" (destReg));
2756 finishVfp(fpscr, state, true);
2757 Fpscr = fpscr;
2757 FpscrExc = fpscr;
2758 '''
2759 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2760 2, vcvt2ufxCode, toInt = True)
2761 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2762 4, vcvt2ufxCode, toInt = True)
2763
2764 vcvt2sfxCode = '''
2758 '''
2759 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2760 2, vcvt2ufxCode, toInt = True)
2761 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2762 4, vcvt2ufxCode, toInt = True)
2763
2764 vcvt2sfxCode = '''
2765 FPSCR fpscr = Fpscr;
2765 FPSCR fpscr = (FPSCR) FpscrExc;
2766 if (flushToZero(srcElem1))
2767 fpscr.idc = 1;
2768 VfpSavedState state = prepFpState(VfpRoundNearest);
2769 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2770 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2771 __asm__ __volatile__("" :: "m" (destReg));
2772 finishVfp(fpscr, state, true);
2766 if (flushToZero(srcElem1))
2767 fpscr.idc = 1;
2768 VfpSavedState state = prepFpState(VfpRoundNearest);
2769 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2770 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2771 __asm__ __volatile__("" :: "m" (destReg));
2772 finishVfp(fpscr, state, true);
2773 Fpscr = fpscr;
2773 FpscrExc = fpscr;
2774 '''
2775 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2776 2, vcvt2sfxCode, toInt = True)
2777 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2778 4, vcvt2sfxCode, toInt = True)
2779
2780 vcvtu2fpCode = '''
2774 '''
2775 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2776 2, vcvt2sfxCode, toInt = True)
2777 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2778 4, vcvt2sfxCode, toInt = True)
2779
2780 vcvtu2fpCode = '''
2781 FPSCR fpscr = Fpscr;
2781 FPSCR fpscr = (FPSCR) FpscrExc;
2782 VfpSavedState state = prepFpState(VfpRoundNearest);
2783 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2784 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2785 __asm__ __volatile__("" :: "m" (destElem));
2786 finishVfp(fpscr, state, true);
2782 VfpSavedState state = prepFpState(VfpRoundNearest);
2783 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2784 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2785 __asm__ __volatile__("" :: "m" (destElem));
2786 finishVfp(fpscr, state, true);
2787 Fpscr = fpscr;
2787 FpscrExc = fpscr;
2788 '''
2789 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2790 2, vcvtu2fpCode, fromInt = True)
2791 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2792 4, vcvtu2fpCode, fromInt = True)
2793
2794 vcvts2fpCode = '''
2788 '''
2789 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2790 2, vcvtu2fpCode, fromInt = True)
2791 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2792 4, vcvtu2fpCode, fromInt = True)
2793
2794 vcvts2fpCode = '''
2795 FPSCR fpscr = Fpscr;
2795 FPSCR fpscr = (FPSCR) FpscrExc;
2796 VfpSavedState state = prepFpState(VfpRoundNearest);
2797 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2798 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2799 __asm__ __volatile__("" :: "m" (destElem));
2800 finishVfp(fpscr, state, true);
2796 VfpSavedState state = prepFpState(VfpRoundNearest);
2797 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2798 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2799 __asm__ __volatile__("" :: "m" (destElem));
2800 finishVfp(fpscr, state, true);
2801 Fpscr = fpscr;
2801 FpscrExc = fpscr;
2802 '''
2803 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2804 2, vcvts2fpCode, fromInt = True)
2805 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2806 4, vcvts2fpCode, fromInt = True)
2807
2808 vcvts2hCode = '''
2802 '''
2803 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2804 2, vcvts2fpCode, fromInt = True)
2805 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2806 4, vcvts2fpCode, fromInt = True)
2807
2808 vcvts2hCode = '''
2809 FPSCR fpscr = Fpscr;
2809 FPSCR fpscr = (FPSCR) FpscrExc;
2810 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2811 if (flushToZero(srcFp1))
2812 fpscr.idc = 1;
2813 VfpSavedState state = prepFpState(VfpRoundNearest);
2814 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2815 : "m" (srcFp1), "m" (destElem));
2816 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2817 fpscr.ahp, srcFp1);
2818 __asm__ __volatile__("" :: "m" (destElem));
2819 finishVfp(fpscr, state, true);
2810 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2811 if (flushToZero(srcFp1))
2812 fpscr.idc = 1;
2813 VfpSavedState state = prepFpState(VfpRoundNearest);
2814 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2815 : "m" (srcFp1), "m" (destElem));
2816 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2817 fpscr.ahp, srcFp1);
2818 __asm__ __volatile__("" :: "m" (destElem));
2819 finishVfp(fpscr, state, true);
2820 Fpscr = fpscr;
2820 FpscrExc = fpscr;
2821 '''
2822 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2823
2824 vcvth2sCode = '''
2821 '''
2822 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2823
2824 vcvth2sCode = '''
2825 FPSCR fpscr = Fpscr;
2825 FPSCR fpscr = (FPSCR) FpscrExc;
2826 VfpSavedState state = prepFpState(VfpRoundNearest);
2827 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2828 : "m" (srcElem1), "m" (destElem));
2829 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2830 __asm__ __volatile__("" :: "m" (destElem));
2831 finishVfp(fpscr, state, true);
2826 VfpSavedState state = prepFpState(VfpRoundNearest);
2827 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2828 : "m" (srcElem1), "m" (destElem));
2829 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2830 __asm__ __volatile__("" :: "m" (destElem));
2831 finishVfp(fpscr, state, true);
2832 Fpscr = fpscr;
2832 FpscrExc = fpscr;
2833 '''
2834 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2835
2836 vrsqrteCode = '''
2837 destElem = unsignedRSqrtEstimate(srcElem1);
2838 '''
2839 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2840 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2841
2842 vrsqrtefpCode = '''
2833 '''
2834 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2835
2836 vrsqrteCode = '''
2837 destElem = unsignedRSqrtEstimate(srcElem1);
2838 '''
2839 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2840 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2841
2842 vrsqrtefpCode = '''
2843 FPSCR fpscr = Fpscr;
2843 FPSCR fpscr = (FPSCR) FpscrExc;
2844 if (flushToZero(srcReg1))
2845 fpscr.idc = 1;
2846 destReg = fprSqrtEstimate(fpscr, srcReg1);
2844 if (flushToZero(srcReg1))
2845 fpscr.idc = 1;
2846 destReg = fprSqrtEstimate(fpscr, srcReg1);
2847 Fpscr = fpscr;
2847 FpscrExc = fpscr;
2848 '''
2849 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2850 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2851
2852 vrecpeCode = '''
2853 destElem = unsignedRecipEstimate(srcElem1);
2854 '''
2855 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2856 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2857
2858 vrecpefpCode = '''
2848 '''
2849 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2850 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2851
2852 vrecpeCode = '''
2853 destElem = unsignedRecipEstimate(srcElem1);
2854 '''
2855 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2856 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2857
2858 vrecpefpCode = '''
2859 FPSCR fpscr = Fpscr;
2859 FPSCR fpscr = (FPSCR) FpscrExc;
2860 if (flushToZero(srcReg1))
2861 fpscr.idc = 1;
2862 destReg = fpRecipEstimate(fpscr, srcReg1);
2860 if (flushToZero(srcReg1))
2861 fpscr.idc = 1;
2862 destReg = fpRecipEstimate(fpscr, srcReg1);
2863 Fpscr = fpscr;
2863 FpscrExc = fpscr;
2864 '''
2865 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2866 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2867
2868 vrev16Code = '''
2869 destElem = srcElem1;
2870 unsigned groupSize = ((1 << 1) / sizeof(Element));
2871 unsigned reverseMask = (groupSize - 1);

--- 77 unchanged lines hidden (view full) ---

2949
2950 vmvnCode = '''
2951 destElem = ~srcElem1;
2952 '''
2953 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2954 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2955
2956 vqabsCode = '''
2864 '''
2865 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2866 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2867
2868 vrev16Code = '''
2869 destElem = srcElem1;
2870 unsigned groupSize = ((1 << 1) / sizeof(Element));
2871 unsigned reverseMask = (groupSize - 1);

--- 77 unchanged lines hidden (view full) ---

2949
2950 vmvnCode = '''
2951 destElem = ~srcElem1;
2952 '''
2953 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2954 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2955
2956 vqabsCode = '''
2957 FPSCR fpscr = (FPSCR)Fpscr;
2957 FPSCR fpscr = (FPSCR) FpscrQc;
2958 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2959 fpscr.qc = 1;
2960 destElem = ~srcElem1;
2961 } else if (srcElem1 < 0) {
2962 destElem = -srcElem1;
2963 } else {
2964 destElem = srcElem1;
2965 }
2958 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2959 fpscr.qc = 1;
2960 destElem = ~srcElem1;
2961 } else if (srcElem1 < 0) {
2962 destElem = -srcElem1;
2963 } else {
2964 destElem = srcElem1;
2965 }
2966 Fpscr = fpscr;
2966 FpscrQc = fpscr;
2967 '''
2968 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2969 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2970
2971 vqnegCode = '''
2967 '''
2968 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2969 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2970
2971 vqnegCode = '''
2972 FPSCR fpscr = (FPSCR)Fpscr;
2972 FPSCR fpscr = (FPSCR) FpscrQc;
2973 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2974 fpscr.qc = 1;
2975 destElem = ~srcElem1;
2976 } else {
2977 destElem = -srcElem1;
2978 }
2973 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2974 fpscr.qc = 1;
2975 destElem = ~srcElem1;
2976 } else {
2977 destElem = -srcElem1;
2978 }
2979 Fpscr = fpscr;
2979 FpscrQc = fpscr;
2980 '''
2981 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2982 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
2983
2984 vabsCode = '''
2985 if (srcElem1 < 0) {
2986 destElem = -srcElem1;
2987 } else {

--- 26 unchanged lines hidden (view full) ---

3014 '''
3015 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3016 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3017
3018 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3019 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3020 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3021 vcgtfpCode = '''
2980 '''
2981 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2982 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
2983
2984 vabsCode = '''
2985 if (srcElem1 < 0) {
2986 destElem = -srcElem1;
2987 } else {

--- 26 unchanged lines hidden (view full) ---

3014 '''
3015 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3016 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3017
3018 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3019 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3020 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3021 vcgtfpCode = '''
3022 FPSCR fpscr = (FPSCR)Fpscr;
3022 FPSCR fpscr = (FPSCR) FpscrExc;
3023 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3024 true, true, VfpRoundNearest);
3025 destReg = (res == 0) ? -1 : 0;
3026 if (res == 2.0)
3027 fpscr.ioc = 1;
3023 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3024 true, true, VfpRoundNearest);
3025 destReg = (res == 0) ? -1 : 0;
3026 if (res == 2.0)
3027 fpscr.ioc = 1;
3028 Fpscr = fpscr;
3028 FpscrExc = fpscr;
3029 '''
3030 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3031 2, vcgtfpCode, toInt = True)
3032 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3033 4, vcgtfpCode, toInt = True)
3034
3035 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3036 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3037 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3038 vcgefpCode = '''
3029 '''
3030 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3031 2, vcgtfpCode, toInt = True)
3032 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3033 4, vcgtfpCode, toInt = True)
3034
3035 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3036 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3037 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3038 vcgefpCode = '''
3039 FPSCR fpscr = (FPSCR)Fpscr;
3039 FPSCR fpscr = (FPSCR) FpscrExc;
3040 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3041 true, true, VfpRoundNearest);
3042 destReg = (res == 0) ? -1 : 0;
3043 if (res == 2.0)
3044 fpscr.ioc = 1;
3040 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3041 true, true, VfpRoundNearest);
3042 destReg = (res == 0) ? -1 : 0;
3043 if (res == 2.0)
3044 fpscr.ioc = 1;
3045 Fpscr = fpscr;
3045 FpscrExc = fpscr;
3046 '''
3047 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3048 2, vcgefpCode, toInt = True)
3049 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3050 4, vcgefpCode, toInt = True)
3051
3052 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3053 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3054 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3055 vceqfpCode = '''
3046 '''
3047 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3048 2, vcgefpCode, toInt = True)
3049 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3050 4, vcgefpCode, toInt = True)
3051
3052 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3053 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3054 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3055 vceqfpCode = '''
3056 FPSCR fpscr = (FPSCR)Fpscr;
3056 FPSCR fpscr = (FPSCR) FpscrExc;
3057 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3058 true, true, VfpRoundNearest);
3059 destReg = (res == 0) ? -1 : 0;
3060 if (res == 2.0)
3061 fpscr.ioc = 1;
3057 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3058 true, true, VfpRoundNearest);
3059 destReg = (res == 0) ? -1 : 0;
3060 if (res == 2.0)
3061 fpscr.ioc = 1;
3062 Fpscr = fpscr;
3062 FpscrExc = fpscr;
3063 '''
3064 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3065 2, vceqfpCode, toInt = True)
3066 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3067 4, vceqfpCode, toInt = True)
3068
3069 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3070 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3071 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3072 vclefpCode = '''
3063 '''
3064 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3065 2, vceqfpCode, toInt = True)
3066 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3067 4, vceqfpCode, toInt = True)
3068
3069 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3070 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3071 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3072 vclefpCode = '''
3073 FPSCR fpscr = (FPSCR)Fpscr;
3073 FPSCR fpscr = (FPSCR) FpscrExc;
3074 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3075 true, true, VfpRoundNearest);
3076 destReg = (res == 0) ? -1 : 0;
3077 if (res == 2.0)
3078 fpscr.ioc = 1;
3074 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3075 true, true, VfpRoundNearest);
3076 destReg = (res == 0) ? -1 : 0;
3077 if (res == 2.0)
3078 fpscr.ioc = 1;
3079 Fpscr = fpscr;
3079 FpscrExc = fpscr;
3080 '''
3081 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3082 2, vclefpCode, toInt = True)
3083 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3084 4, vclefpCode, toInt = True)
3085
3086 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3087 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3088 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3089 vcltfpCode = '''
3080 '''
3081 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3082 2, vclefpCode, toInt = True)
3083 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3084 4, vclefpCode, toInt = True)
3085
3086 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3087 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3088 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3089 vcltfpCode = '''
3090 FPSCR fpscr = (FPSCR)Fpscr;
3090 FPSCR fpscr = (FPSCR) FpscrExc;
3091 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3092 true, true, VfpRoundNearest);
3093 destReg = (res == 0) ? -1 : 0;
3094 if (res == 2.0)
3095 fpscr.ioc = 1;
3091 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3092 true, true, VfpRoundNearest);
3093 destReg = (res == 0) ? -1 : 0;
3094 if (res == 2.0)
3095 fpscr.ioc = 1;
3096 Fpscr = fpscr;
3096 FpscrExc = fpscr;
3097 '''
3098 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3099 2, vcltfpCode, toInt = True)
3100 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3101 4, vcltfpCode, toInt = True)
3102
3103 vswpCode = '''
3104 FloatRegBits mid;

--- 93 unchanged lines hidden (view full) ---

3198 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3199 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3200
3201 vbicCode = 'destElem &= ~imm;'
3202 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3203 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3204
3205 vqmovnCode = '''
3097 '''
3098 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3099 2, vcltfpCode, toInt = True)
3100 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3101 4, vcltfpCode, toInt = True)
3102
3103 vswpCode = '''
3104 FloatRegBits mid;

--- 93 unchanged lines hidden (view full) ---

3198 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3199 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3200
3201 vbicCode = 'destElem &= ~imm;'
3202 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3203 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3204
3205 vqmovnCode = '''
3206 FPSCR fpscr = (FPSCR)Fpscr;
3206 FPSCR fpscr = (FPSCR) FpscrQc;
3207 destElem = srcElem1;
3208 if ((BigElement)destElem != srcElem1) {
3209 fpscr.qc = 1;
3210 destElem = mask(sizeof(Element) * 8 - 1);
3211 if (srcElem1 < 0)
3212 destElem = ~destElem;
3213 }
3207 destElem = srcElem1;
3208 if ((BigElement)destElem != srcElem1) {
3209 fpscr.qc = 1;
3210 destElem = mask(sizeof(Element) * 8 - 1);
3211 if (srcElem1 < 0)
3212 destElem = ~destElem;
3213 }
3214 Fpscr = fpscr;
3214 FpscrQc = fpscr;
3215 '''
3216 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3217
3218 vqmovunCode = '''
3215 '''
3216 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3217
3218 vqmovunCode = '''
3219 FPSCR fpscr = (FPSCR)Fpscr;
3219 FPSCR fpscr = (FPSCR) FpscrQc;
3220 destElem = srcElem1;
3221 if ((BigElement)destElem != srcElem1) {
3222 fpscr.qc = 1;
3223 destElem = mask(sizeof(Element) * 8);
3224 }
3220 destElem = srcElem1;
3221 if ((BigElement)destElem != srcElem1) {
3222 fpscr.qc = 1;
3223 destElem = mask(sizeof(Element) * 8);
3224 }
3225 Fpscr = fpscr;
3225 FpscrQc = fpscr;
3226 '''
3227 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3228 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3229
3230 vqmovunsCode = '''
3226 '''
3227 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3228 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3229
3230 vqmovunsCode = '''
3231 FPSCR fpscr = (FPSCR)Fpscr;
3231 FPSCR fpscr = (FPSCR) FpscrQc;
3232 destElem = srcElem1;
3233 if (srcElem1 < 0 ||
3234 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3235 fpscr.qc = 1;
3236 destElem = mask(sizeof(Element) * 8);
3237 if (srcElem1 < 0)
3238 destElem = ~destElem;
3239 }
3232 destElem = srcElem1;
3233 if (srcElem1 < 0 ||
3234 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3235 fpscr.qc = 1;
3236 destElem = mask(sizeof(Element) * 8);
3237 if (srcElem1 < 0)
3238 destElem = ~destElem;
3239 }
3240 Fpscr = fpscr;
3240 FpscrQc = fpscr;
3241 '''
3242 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3243 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3244
3245 def buildVext(name, Name, opClass, types, rCount, op):
3246 global header_output, exec_output
3247 eWalkCode = '''
3248 RegVect srcReg1, srcReg2, destReg;

--- 103 unchanged lines hidden ---
3241 '''
3242 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3243 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3244
3245 def buildVext(name, Name, opClass, types, rCount, op):
3246 global header_output, exec_output
3247 eWalkCode = '''
3248 RegVect srcReg1, srcReg2, destReg;

--- 103 unchanged lines hidden ---