neon.isa revision 8782
17259Sgblack@eecs.umich.edu// -*- mode:c++ -*- 28868SMatt.Horsnell@arm.com 37259Sgblack@eecs.umich.edu// Copyright (c) 2010 ARM Limited 47259Sgblack@eecs.umich.edu// All rights reserved 57259Sgblack@eecs.umich.edu// 67259Sgblack@eecs.umich.edu// The license below extends only to copyright in the software and shall 77259Sgblack@eecs.umich.edu// not be construed as granting a license to any other intellectual 87259Sgblack@eecs.umich.edu// property including but not limited to intellectual property relating 97259Sgblack@eecs.umich.edu// to a hardware implementation of the functionality of the software 107259Sgblack@eecs.umich.edu// licensed hereunder. You may use the software subject to the license 117259Sgblack@eecs.umich.edu// terms below provided that you ensure that this notice is replicated 127259Sgblack@eecs.umich.edu// unmodified and in its entirety in all distributions of the software, 137259Sgblack@eecs.umich.edu// modified or unmodified, in source code or in binary form. 147259Sgblack@eecs.umich.edu// 157259Sgblack@eecs.umich.edu// Redistribution and use in source and binary forms, with or without 167259Sgblack@eecs.umich.edu// modification, are permitted provided that the following conditions are 177259Sgblack@eecs.umich.edu// met: redistributions of source code must retain the above copyright 187259Sgblack@eecs.umich.edu// notice, this list of conditions and the following disclaimer; 197259Sgblack@eecs.umich.edu// redistributions in binary form must reproduce the above copyright 207259Sgblack@eecs.umich.edu// notice, this list of conditions and the following disclaimer in the 217259Sgblack@eecs.umich.edu// documentation and/or other materials provided with the distribution; 227259Sgblack@eecs.umich.edu// neither the name of the copyright holders nor the names of its 237259Sgblack@eecs.umich.edu// contributors may be used to endorse or promote products derived from 247259Sgblack@eecs.umich.edu// this software without specific prior written permission. 257259Sgblack@eecs.umich.edu// 267259Sgblack@eecs.umich.edu// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 277259Sgblack@eecs.umich.edu// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 287259Sgblack@eecs.umich.edu// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 297259Sgblack@eecs.umich.edu// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 307259Sgblack@eecs.umich.edu// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 317259Sgblack@eecs.umich.edu// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 327259Sgblack@eecs.umich.edu// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 337259Sgblack@eecs.umich.edu// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 347259Sgblack@eecs.umich.edu// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 357259Sgblack@eecs.umich.edu// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 367259Sgblack@eecs.umich.edu// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 377259Sgblack@eecs.umich.edu// 387405SAli.Saidi@ARM.com// Authors: Gabe Black 397259Sgblack@eecs.umich.edu 407259Sgblack@eecs.umich.eduoutput header {{ 417405SAli.Saidi@ARM.com template <template <typename T> class Base> 427259Sgblack@eecs.umich.edu StaticInstPtr 437404SAli.Saidi@ARM.com decodeNeonUThreeUReg(unsigned size, 447259Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 457259Sgblack@eecs.umich.edu IntRegIndex op1, IntRegIndex op2) 467259Sgblack@eecs.umich.edu { 477259Sgblack@eecs.umich.edu switch (size) { 487259Sgblack@eecs.umich.edu case 0: 498868SMatt.Horsnell@arm.com return new Base<uint8_t>(machInst, dest, op1, op2); 508868SMatt.Horsnell@arm.com case 1: 518868SMatt.Horsnell@arm.com return new Base<uint16_t>(machInst, dest, op1, op2); 528868SMatt.Horsnell@arm.com case 2: 538868SMatt.Horsnell@arm.com return new Base<uint32_t>(machInst, dest, op1, op2); 548868SMatt.Horsnell@arm.com case 3: 558868SMatt.Horsnell@arm.com return new Base<uint64_t>(machInst, dest, op1, op2); 568868SMatt.Horsnell@arm.com default: 578868SMatt.Horsnell@arm.com return new Unknown(machInst); 588868SMatt.Horsnell@arm.com } 598868SMatt.Horsnell@arm.com } 608868SMatt.Horsnell@arm.com 618868SMatt.Horsnell@arm.com template <template <typename T> class Base> 628868SMatt.Horsnell@arm.com StaticInstPtr 638868SMatt.Horsnell@arm.com decodeNeonSThreeUReg(unsigned size, 648868SMatt.Horsnell@arm.com ExtMachInst machInst, IntRegIndex dest, 658868SMatt.Horsnell@arm.com IntRegIndex op1, IntRegIndex op2) 668868SMatt.Horsnell@arm.com { 678868SMatt.Horsnell@arm.com switch (size) { 688868SMatt.Horsnell@arm.com case 0: 698868SMatt.Horsnell@arm.com return new Base<int8_t>(machInst, dest, op1, op2); 708868SMatt.Horsnell@arm.com case 1: 718868SMatt.Horsnell@arm.com return new Base<int16_t>(machInst, dest, op1, op2); 728868SMatt.Horsnell@arm.com case 2: 738868SMatt.Horsnell@arm.com return new Base<int32_t>(machInst, dest, op1, op2); 748868SMatt.Horsnell@arm.com case 3: 758868SMatt.Horsnell@arm.com return new Base<int64_t>(machInst, dest, op1, op2); 768868SMatt.Horsnell@arm.com default: 778868SMatt.Horsnell@arm.com return new Unknown(machInst); 788868SMatt.Horsnell@arm.com } 797259Sgblack@eecs.umich.edu } 807259Sgblack@eecs.umich.edu 817259Sgblack@eecs.umich.edu template <template <typename T> class Base> 827259Sgblack@eecs.umich.edu StaticInstPtr 837259Sgblack@eecs.umich.edu decodeNeonUSThreeUReg(bool notSigned, unsigned size, 847259Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 857259Sgblack@eecs.umich.edu IntRegIndex op1, IntRegIndex op2) 867259Sgblack@eecs.umich.edu { 877259Sgblack@eecs.umich.edu if (notSigned) { 887259Sgblack@eecs.umich.edu return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2); 897259Sgblack@eecs.umich.edu } else { 907259Sgblack@eecs.umich.edu return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2); 917259Sgblack@eecs.umich.edu } 927351Sgblack@eecs.umich.edu } 937351Sgblack@eecs.umich.edu 947259Sgblack@eecs.umich.edu template <template <typename T> class Base> 957259Sgblack@eecs.umich.edu StaticInstPtr 967259Sgblack@eecs.umich.edu decodeNeonUThreeUSReg(unsigned size, 977259Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 987259Sgblack@eecs.umich.edu IntRegIndex op1, IntRegIndex op2) 997259Sgblack@eecs.umich.edu { 1007259Sgblack@eecs.umich.edu switch (size) { 1017259Sgblack@eecs.umich.edu case 0: 1027259Sgblack@eecs.umich.edu return new Base<uint8_t>(machInst, dest, op1, op2); 1037259Sgblack@eecs.umich.edu case 1: 1047259Sgblack@eecs.umich.edu return new Base<uint16_t>(machInst, dest, op1, op2); 1057259Sgblack@eecs.umich.edu case 2: 1067259Sgblack@eecs.umich.edu return new Base<uint32_t>(machInst, dest, op1, op2); 1077259Sgblack@eecs.umich.edu default: 1087259Sgblack@eecs.umich.edu return new Unknown(machInst); 1097259Sgblack@eecs.umich.edu } 1107259Sgblack@eecs.umich.edu } 1117259Sgblack@eecs.umich.edu 1127259Sgblack@eecs.umich.edu template <template <typename T> class Base> 1137259Sgblack@eecs.umich.edu StaticInstPtr 1147259Sgblack@eecs.umich.edu decodeNeonSThreeUSReg(unsigned size, 1157259Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 1167259Sgblack@eecs.umich.edu IntRegIndex op1, IntRegIndex op2) 1177259Sgblack@eecs.umich.edu { 1187259Sgblack@eecs.umich.edu switch (size) { 1197259Sgblack@eecs.umich.edu case 0: 1207259Sgblack@eecs.umich.edu return new Base<int8_t>(machInst, dest, op1, op2); 1217259Sgblack@eecs.umich.edu case 1: 1227259Sgblack@eecs.umich.edu return new Base<int16_t>(machInst, dest, op1, op2); 1237259Sgblack@eecs.umich.edu case 2: 1247259Sgblack@eecs.umich.edu return new Base<int32_t>(machInst, dest, op1, op2); 1257259Sgblack@eecs.umich.edu default: 1267259Sgblack@eecs.umich.edu return new Unknown(machInst); 1277259Sgblack@eecs.umich.edu } 1287259Sgblack@eecs.umich.edu } 1297259Sgblack@eecs.umich.edu 1307259Sgblack@eecs.umich.edu template <template <typename T> class Base> 1317259Sgblack@eecs.umich.edu StaticInstPtr 1327259Sgblack@eecs.umich.edu decodeNeonUSThreeUSReg(bool notSigned, unsigned size, 1337259Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 1347259Sgblack@eecs.umich.edu IntRegIndex op1, IntRegIndex op2) 1357259Sgblack@eecs.umich.edu { 1367259Sgblack@eecs.umich.edu if (notSigned) { 1377259Sgblack@eecs.umich.edu return decodeNeonUThreeUSReg<Base>( 1387259Sgblack@eecs.umich.edu size, machInst, dest, op1, op2); 1397259Sgblack@eecs.umich.edu } else { 1407259Sgblack@eecs.umich.edu return decodeNeonSThreeUSReg<Base>( 1417259Sgblack@eecs.umich.edu size, machInst, dest, op1, op2); 1427259Sgblack@eecs.umich.edu } 1437259Sgblack@eecs.umich.edu } 1447259Sgblack@eecs.umich.edu 1457259Sgblack@eecs.umich.edu template <template <typename T> class BaseD, 1467259Sgblack@eecs.umich.edu template <typename T> class BaseQ> 1477259Sgblack@eecs.umich.edu StaticInstPtr 1487259Sgblack@eecs.umich.edu decodeNeonUThreeSReg(bool q, unsigned size, 1497259Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 1507259Sgblack@eecs.umich.edu IntRegIndex op1, IntRegIndex op2) 1517259Sgblack@eecs.umich.edu { 1527259Sgblack@eecs.umich.edu if (q) { 1537259Sgblack@eecs.umich.edu return decodeNeonUThreeUSReg<BaseQ>( 1547259Sgblack@eecs.umich.edu size, machInst, dest, op1, op2); 1557259Sgblack@eecs.umich.edu } else { 1567259Sgblack@eecs.umich.edu return decodeNeonUThreeUSReg<BaseD>( 1577259Sgblack@eecs.umich.edu size, machInst, dest, op1, op2); 1587259Sgblack@eecs.umich.edu } 1597259Sgblack@eecs.umich.edu } 1607259Sgblack@eecs.umich.edu 1617259Sgblack@eecs.umich.edu template <template <typename T> class BaseD, 1627259Sgblack@eecs.umich.edu template <typename T> class BaseQ> 1637351Sgblack@eecs.umich.edu StaticInstPtr 1647351Sgblack@eecs.umich.edu decodeNeonSThreeSReg(bool q, unsigned size, 1657351Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 1667351Sgblack@eecs.umich.edu IntRegIndex op1, IntRegIndex op2) 1677351Sgblack@eecs.umich.edu { 1687351Sgblack@eecs.umich.edu if (q) { 1697351Sgblack@eecs.umich.edu return decodeNeonSThreeUSReg<BaseQ>( 1707351Sgblack@eecs.umich.edu size, machInst, dest, op1, op2); 1717351Sgblack@eecs.umich.edu } else { 1727351Sgblack@eecs.umich.edu return decodeNeonSThreeUSReg<BaseD>( 1737351Sgblack@eecs.umich.edu size, machInst, dest, op1, op2); 1747351Sgblack@eecs.umich.edu } 1757351Sgblack@eecs.umich.edu } 1767351Sgblack@eecs.umich.edu 1777351Sgblack@eecs.umich.edu template <template <typename T> class BaseD, 1787351Sgblack@eecs.umich.edu template <typename T> class BaseQ> 1797351Sgblack@eecs.umich.edu StaticInstPtr 1807351Sgblack@eecs.umich.edu decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size, 1817351Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 1827351Sgblack@eecs.umich.edu IntRegIndex op1, IntRegIndex op2) 1837351Sgblack@eecs.umich.edu { 1847351Sgblack@eecs.umich.edu if (notSigned) { 1857351Sgblack@eecs.umich.edu return decodeNeonUThreeSReg<BaseD, BaseQ>( 1867406SAli.Saidi@ARM.com q, size, machInst, dest, op1, op2); 1877259Sgblack@eecs.umich.edu } else { 1887259Sgblack@eecs.umich.edu return decodeNeonSThreeSReg<BaseD, BaseQ>( 1897351Sgblack@eecs.umich.edu q, size, machInst, dest, op1, op2); 1907259Sgblack@eecs.umich.edu } 1917351Sgblack@eecs.umich.edu } 1927351Sgblack@eecs.umich.edu 1937351Sgblack@eecs.umich.edu template <template <typename T> class BaseD, 1947259Sgblack@eecs.umich.edu template <typename T> class BaseQ> 1957259Sgblack@eecs.umich.edu StaticInstPtr 1967259Sgblack@eecs.umich.edu decodeNeonUThreeReg(bool q, unsigned size, 1977351Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 1987351Sgblack@eecs.umich.edu IntRegIndex op1, IntRegIndex op2) 1997351Sgblack@eecs.umich.edu { 2007351Sgblack@eecs.umich.edu if (q) { 2017351Sgblack@eecs.umich.edu return decodeNeonUThreeUReg<BaseQ>( 2027259Sgblack@eecs.umich.edu size, machInst, dest, op1, op2); 2037259Sgblack@eecs.umich.edu } else { 2047259Sgblack@eecs.umich.edu return decodeNeonUThreeUReg<BaseD>( 2057259Sgblack@eecs.umich.edu size, machInst, dest, op1, op2); 2067259Sgblack@eecs.umich.edu } 2077259Sgblack@eecs.umich.edu } 2087259Sgblack@eecs.umich.edu 2097259Sgblack@eecs.umich.edu template <template <typename T> class BaseD, 2107259Sgblack@eecs.umich.edu template <typename T> class BaseQ> 2117259Sgblack@eecs.umich.edu StaticInstPtr 2127259Sgblack@eecs.umich.edu decodeNeonSThreeReg(bool q, unsigned size, 2137259Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 2147259Sgblack@eecs.umich.edu IntRegIndex op1, IntRegIndex op2) 2157259Sgblack@eecs.umich.edu { 2167259Sgblack@eecs.umich.edu if (q) { 2177259Sgblack@eecs.umich.edu return decodeNeonSThreeUReg<BaseQ>( 2187259Sgblack@eecs.umich.edu size, machInst, dest, op1, op2); 2197259Sgblack@eecs.umich.edu } else { 2207351Sgblack@eecs.umich.edu return decodeNeonSThreeUReg<BaseD>( 2217351Sgblack@eecs.umich.edu size, machInst, dest, op1, op2); 2227259Sgblack@eecs.umich.edu } 2237351Sgblack@eecs.umich.edu } 2247259Sgblack@eecs.umich.edu 2257351Sgblack@eecs.umich.edu template <template <typename T> class BaseD, 2267259Sgblack@eecs.umich.edu template <typename T> class BaseQ> 2277259Sgblack@eecs.umich.edu StaticInstPtr 2287259Sgblack@eecs.umich.edu decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size, 2297259Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 2307259Sgblack@eecs.umich.edu IntRegIndex op1, IntRegIndex op2) 2317259Sgblack@eecs.umich.edu { 2327259Sgblack@eecs.umich.edu if (notSigned) { 2337259Sgblack@eecs.umich.edu return decodeNeonUThreeReg<BaseD, BaseQ>( 2347259Sgblack@eecs.umich.edu q, size, machInst, dest, op1, op2); 2357259Sgblack@eecs.umich.edu } else { 2367259Sgblack@eecs.umich.edu return decodeNeonSThreeReg<BaseD, BaseQ>( 2377259Sgblack@eecs.umich.edu q, size, machInst, dest, op1, op2); 2387259Sgblack@eecs.umich.edu } 2397259Sgblack@eecs.umich.edu } 2407259Sgblack@eecs.umich.edu 2417259Sgblack@eecs.umich.edu template <template <typename T> class BaseD, 2427259Sgblack@eecs.umich.edu template <typename T> class BaseQ> 2437259Sgblack@eecs.umich.edu StaticInstPtr 2447259Sgblack@eecs.umich.edu decodeNeonUTwoShiftReg(bool q, unsigned size, 2457351Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 2467351Sgblack@eecs.umich.edu IntRegIndex op1, uint64_t imm) 2477351Sgblack@eecs.umich.edu { 2487351Sgblack@eecs.umich.edu if (q) { 2497351Sgblack@eecs.umich.edu switch (size) { 2507259Sgblack@eecs.umich.edu case 0: 2517259Sgblack@eecs.umich.edu return new BaseQ<uint8_t>(machInst, dest, op1, imm); 2527259Sgblack@eecs.umich.edu case 1: 2537259Sgblack@eecs.umich.edu return new BaseQ<uint16_t>(machInst, dest, op1, imm); 2547259Sgblack@eecs.umich.edu case 2: 2557259Sgblack@eecs.umich.edu return new BaseQ<uint32_t>(machInst, dest, op1, imm); 2567259Sgblack@eecs.umich.edu case 3: 2577259Sgblack@eecs.umich.edu return new BaseQ<uint64_t>(machInst, dest, op1, imm); 2587259Sgblack@eecs.umich.edu default: 2597259Sgblack@eecs.umich.edu return new Unknown(machInst); 2607259Sgblack@eecs.umich.edu } 2617259Sgblack@eecs.umich.edu } else { 2627259Sgblack@eecs.umich.edu switch (size) { 2637259Sgblack@eecs.umich.edu case 0: 2647259Sgblack@eecs.umich.edu return new BaseD<uint8_t>(machInst, dest, op1, imm); 2657259Sgblack@eecs.umich.edu case 1: 2667259Sgblack@eecs.umich.edu return new BaseD<uint16_t>(machInst, dest, op1, imm); 2677259Sgblack@eecs.umich.edu case 2: 2687259Sgblack@eecs.umich.edu return new BaseD<uint32_t>(machInst, dest, op1, imm); 2697259Sgblack@eecs.umich.edu case 3: 2707259Sgblack@eecs.umich.edu return new BaseD<uint64_t>(machInst, dest, op1, imm); 2717351Sgblack@eecs.umich.edu default: 2727351Sgblack@eecs.umich.edu return new Unknown(machInst); 2737351Sgblack@eecs.umich.edu } 2747351Sgblack@eecs.umich.edu } 2757351Sgblack@eecs.umich.edu } 2767351Sgblack@eecs.umich.edu 2777351Sgblack@eecs.umich.edu template <template <typename T> class BaseD, 2787351Sgblack@eecs.umich.edu template <typename T> class BaseQ> 2797351Sgblack@eecs.umich.edu StaticInstPtr 2807351Sgblack@eecs.umich.edu decodeNeonSTwoShiftReg(bool q, unsigned size, 2817351Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 2827351Sgblack@eecs.umich.edu IntRegIndex op1, uint64_t imm) 2837351Sgblack@eecs.umich.edu { 2847351Sgblack@eecs.umich.edu if (q) { 2857351Sgblack@eecs.umich.edu switch (size) { 2867351Sgblack@eecs.umich.edu case 0: 2877351Sgblack@eecs.umich.edu return new BaseQ<int8_t>(machInst, dest, op1, imm); 2887351Sgblack@eecs.umich.edu case 1: 2897351Sgblack@eecs.umich.edu return new BaseQ<int16_t>(machInst, dest, op1, imm); 2907351Sgblack@eecs.umich.edu case 2: 2917259Sgblack@eecs.umich.edu return new BaseQ<int32_t>(machInst, dest, op1, imm); 2927259Sgblack@eecs.umich.edu case 3: 2937259Sgblack@eecs.umich.edu return new BaseQ<int64_t>(machInst, dest, op1, imm); 2947259Sgblack@eecs.umich.edu default: 2957259Sgblack@eecs.umich.edu return new Unknown(machInst); 2967259Sgblack@eecs.umich.edu } 2977259Sgblack@eecs.umich.edu } else { 2987259Sgblack@eecs.umich.edu switch (size) { 2997259Sgblack@eecs.umich.edu case 0: 3007259Sgblack@eecs.umich.edu return new BaseD<int8_t>(machInst, dest, op1, imm); 3017259Sgblack@eecs.umich.edu case 1: 3027259Sgblack@eecs.umich.edu return new BaseD<int16_t>(machInst, dest, op1, imm); 3037259Sgblack@eecs.umich.edu case 2: 3047259Sgblack@eecs.umich.edu return new BaseD<int32_t>(machInst, dest, op1, imm); 3057259Sgblack@eecs.umich.edu case 3: 3067259Sgblack@eecs.umich.edu return new BaseD<int64_t>(machInst, dest, op1, imm); 3077259Sgblack@eecs.umich.edu default: 3087259Sgblack@eecs.umich.edu return new Unknown(machInst); 3097259Sgblack@eecs.umich.edu } 3107259Sgblack@eecs.umich.edu } 3117259Sgblack@eecs.umich.edu } 3127259Sgblack@eecs.umich.edu 3137259Sgblack@eecs.umich.edu 3147259Sgblack@eecs.umich.edu template <template <typename T> class BaseD, 3157259Sgblack@eecs.umich.edu template <typename T> class BaseQ> 3167259Sgblack@eecs.umich.edu StaticInstPtr 3177259Sgblack@eecs.umich.edu decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size, 3187259Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 3197259Sgblack@eecs.umich.edu IntRegIndex op1, uint64_t imm) 3207259Sgblack@eecs.umich.edu { 3217259Sgblack@eecs.umich.edu if (notSigned) { 3227259Sgblack@eecs.umich.edu return decodeNeonUTwoShiftReg<BaseD, BaseQ>( 3237351Sgblack@eecs.umich.edu q, size, machInst, dest, op1, imm); 3247351Sgblack@eecs.umich.edu } else { 3257351Sgblack@eecs.umich.edu return decodeNeonSTwoShiftReg<BaseD, BaseQ>( 3267351Sgblack@eecs.umich.edu q, size, machInst, dest, op1, imm); 3277351Sgblack@eecs.umich.edu } 3287351Sgblack@eecs.umich.edu } 3297351Sgblack@eecs.umich.edu 3307351Sgblack@eecs.umich.edu template <template <typename T> class Base> 3317351Sgblack@eecs.umich.edu StaticInstPtr 3327351Sgblack@eecs.umich.edu decodeNeonUTwoShiftUSReg(unsigned size, 3337351Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 3347351Sgblack@eecs.umich.edu IntRegIndex op1, uint64_t imm) 3357351Sgblack@eecs.umich.edu { 3367351Sgblack@eecs.umich.edu switch (size) { 3377351Sgblack@eecs.umich.edu case 0: 3387351Sgblack@eecs.umich.edu return new Base<uint8_t>(machInst, dest, op1, imm); 3397351Sgblack@eecs.umich.edu case 1: 3407351Sgblack@eecs.umich.edu return new Base<uint16_t>(machInst, dest, op1, imm); 3417351Sgblack@eecs.umich.edu case 2: 3427351Sgblack@eecs.umich.edu return new Base<uint32_t>(machInst, dest, op1, imm); 3437351Sgblack@eecs.umich.edu default: 3447351Sgblack@eecs.umich.edu return new Unknown(machInst); 3457351Sgblack@eecs.umich.edu } 3467351Sgblack@eecs.umich.edu } 3477351Sgblack@eecs.umich.edu 3487351Sgblack@eecs.umich.edu template <template <typename T> class BaseD, 3497351Sgblack@eecs.umich.edu template <typename T> class BaseQ> 3507351Sgblack@eecs.umich.edu StaticInstPtr 3517351Sgblack@eecs.umich.edu decodeNeonUTwoShiftSReg(bool q, unsigned size, 3527351Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 3537351Sgblack@eecs.umich.edu IntRegIndex op1, uint64_t imm) 3547351Sgblack@eecs.umich.edu { 3557351Sgblack@eecs.umich.edu if (q) { 3567351Sgblack@eecs.umich.edu return decodeNeonUTwoShiftUSReg<BaseQ>( 3577351Sgblack@eecs.umich.edu size, machInst, dest, op1, imm); 3587351Sgblack@eecs.umich.edu } else { 3597351Sgblack@eecs.umich.edu return decodeNeonUTwoShiftUSReg<BaseD>( 3607351Sgblack@eecs.umich.edu size, machInst, dest, op1, imm); 3617351Sgblack@eecs.umich.edu } 3627351Sgblack@eecs.umich.edu } 3637351Sgblack@eecs.umich.edu 3647351Sgblack@eecs.umich.edu template <template <typename T> class Base> 3657351Sgblack@eecs.umich.edu StaticInstPtr 3667351Sgblack@eecs.umich.edu decodeNeonSTwoShiftUSReg(unsigned size, 3677351Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 3687351Sgblack@eecs.umich.edu IntRegIndex op1, uint64_t imm) 3697351Sgblack@eecs.umich.edu { 3707351Sgblack@eecs.umich.edu switch (size) { 3717351Sgblack@eecs.umich.edu case 0: 3727351Sgblack@eecs.umich.edu return new Base<int8_t>(machInst, dest, op1, imm); 3737259Sgblack@eecs.umich.edu case 1: 3747583SAli.Saidi@arm.com return new Base<int16_t>(machInst, dest, op1, imm); 3757259Sgblack@eecs.umich.edu case 2: 3767259Sgblack@eecs.umich.edu return new Base<int32_t>(machInst, dest, op1, imm); 3777583SAli.Saidi@arm.com default: 3787583SAli.Saidi@arm.com return new Unknown(machInst); 3797583SAli.Saidi@arm.com } 3807583SAli.Saidi@arm.com } 3817583SAli.Saidi@arm.com 3827583SAli.Saidi@arm.com template <template <typename T> class BaseD, 3837583SAli.Saidi@arm.com template <typename T> class BaseQ> 3847583SAli.Saidi@arm.com StaticInstPtr 3857583SAli.Saidi@arm.com decodeNeonSTwoShiftSReg(bool q, unsigned size, 3867583SAli.Saidi@arm.com ExtMachInst machInst, IntRegIndex dest, 3877583SAli.Saidi@arm.com IntRegIndex op1, uint64_t imm) 3887583SAli.Saidi@arm.com { 3897583SAli.Saidi@arm.com if (q) { 3907583SAli.Saidi@arm.com return decodeNeonSTwoShiftUSReg<BaseQ>( 3917583SAli.Saidi@arm.com size, machInst, dest, op1, imm); 3927583SAli.Saidi@arm.com } else { 3937583SAli.Saidi@arm.com return decodeNeonSTwoShiftUSReg<BaseD>( 3947583SAli.Saidi@arm.com size, machInst, dest, op1, imm); 3958988SAli.Saidi@ARM.com } 3967259Sgblack@eecs.umich.edu } 3977583SAli.Saidi@arm.com 3987583SAli.Saidi@arm.com template <template <typename T> class BaseD, 3997583SAli.Saidi@arm.com template <typename T> class BaseQ> 4007583SAli.Saidi@arm.com StaticInstPtr 4017583SAli.Saidi@arm.com decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size, 4027583SAli.Saidi@arm.com ExtMachInst machInst, IntRegIndex dest, 4037583SAli.Saidi@arm.com IntRegIndex op1, uint64_t imm) 4047583SAli.Saidi@arm.com { 4058988SAli.Saidi@ARM.com if (notSigned) { 4067259Sgblack@eecs.umich.edu return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 4077583SAli.Saidi@arm.com q, size, machInst, dest, op1, imm); 4087583SAli.Saidi@arm.com } else { 4097583SAli.Saidi@arm.com return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 4107583SAli.Saidi@arm.com q, size, machInst, dest, op1, imm); 4117583SAli.Saidi@arm.com } 4127583SAli.Saidi@arm.com } 4137583SAli.Saidi@arm.com 4147583SAli.Saidi@arm.com template <template <typename T> class Base> 4158988SAli.Saidi@ARM.com StaticInstPtr 4167259Sgblack@eecs.umich.edu decodeNeonUTwoMiscUSReg(unsigned size, 4178058SAli.Saidi@ARM.com ExtMachInst machInst, IntRegIndex dest, 4188549Sdaniel.johnson@arm.com IntRegIndex op1) 4198549Sdaniel.johnson@arm.com { 4208549Sdaniel.johnson@arm.com switch (size) { 4218549Sdaniel.johnson@arm.com case 0: 4228549Sdaniel.johnson@arm.com return new Base<uint8_t>(machInst, dest, op1); 4238549Sdaniel.johnson@arm.com case 1: 4248549Sdaniel.johnson@arm.com return new Base<uint16_t>(machInst, dest, op1); 4258549Sdaniel.johnson@arm.com case 2: 4268549Sdaniel.johnson@arm.com return new Base<uint32_t>(machInst, dest, op1); 4278549Sdaniel.johnson@arm.com default: 4288988SAli.Saidi@ARM.com return new Unknown(machInst); 4298549Sdaniel.johnson@arm.com } 4308549Sdaniel.johnson@arm.com } 4318549Sdaniel.johnson@arm.com 4327259Sgblack@eecs.umich.edu template <template <typename T> class Base> 4337583SAli.Saidi@arm.com StaticInstPtr 4347259Sgblack@eecs.umich.edu decodeNeonSTwoMiscUSReg(unsigned size, 4357351Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 4367351Sgblack@eecs.umich.edu IntRegIndex op1) 4377351Sgblack@eecs.umich.edu { 4387351Sgblack@eecs.umich.edu switch (size) { 4397351Sgblack@eecs.umich.edu case 0: 4407351Sgblack@eecs.umich.edu return new Base<int8_t>(machInst, dest, op1); 4417351Sgblack@eecs.umich.edu case 1: 4427351Sgblack@eecs.umich.edu return new Base<int16_t>(machInst, dest, op1); 4437351Sgblack@eecs.umich.edu case 2: 4447351Sgblack@eecs.umich.edu return new Base<int32_t>(machInst, dest, op1); 4457351Sgblack@eecs.umich.edu default: 4467351Sgblack@eecs.umich.edu return new Unknown(machInst); 4477259Sgblack@eecs.umich.edu } 4488737Skoansin.tan@gmail.com } 4497259Sgblack@eecs.umich.edu 4507259Sgblack@eecs.umich.edu template <template <typename T> class BaseD, 4517259Sgblack@eecs.umich.edu template <typename T> class BaseQ> 4527259Sgblack@eecs.umich.edu StaticInstPtr 4537259Sgblack@eecs.umich.edu decodeNeonUTwoMiscSReg(bool q, unsigned size, 4547259Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 4557259Sgblack@eecs.umich.edu IntRegIndex op1) 4567259Sgblack@eecs.umich.edu { 4577259Sgblack@eecs.umich.edu if (q) { 4587259Sgblack@eecs.umich.edu return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 4597259Sgblack@eecs.umich.edu } else { 4607259Sgblack@eecs.umich.edu return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 4617259Sgblack@eecs.umich.edu } 4627259Sgblack@eecs.umich.edu } 4637259Sgblack@eecs.umich.edu 4647259Sgblack@eecs.umich.edu template <template <typename T> class BaseD, 4657351Sgblack@eecs.umich.edu template <typename T> class BaseQ> 4667351Sgblack@eecs.umich.edu StaticInstPtr 4677351Sgblack@eecs.umich.edu decodeNeonSTwoMiscSReg(bool q, unsigned size, 4687351Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 4697351Sgblack@eecs.umich.edu IntRegIndex op1) 4707351Sgblack@eecs.umich.edu { 4717351Sgblack@eecs.umich.edu if (q) { 4727351Sgblack@eecs.umich.edu return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 4737351Sgblack@eecs.umich.edu } else { 4747351Sgblack@eecs.umich.edu return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 4757351Sgblack@eecs.umich.edu } 4767351Sgblack@eecs.umich.edu } 4777351Sgblack@eecs.umich.edu 4787351Sgblack@eecs.umich.edu template <template <typename T> class Base> 4797351Sgblack@eecs.umich.edu StaticInstPtr 4807259Sgblack@eecs.umich.edu decodeNeonUTwoMiscUReg(unsigned size, 4817259Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 4827259Sgblack@eecs.umich.edu IntRegIndex op1) 4837406SAli.Saidi@ARM.com { 4847351Sgblack@eecs.umich.edu switch (size) { 4857351Sgblack@eecs.umich.edu case 0: 4867259Sgblack@eecs.umich.edu return new Base<uint8_t>(machInst, dest, op1); 4877259Sgblack@eecs.umich.edu case 1: 4887259Sgblack@eecs.umich.edu return new Base<uint16_t>(machInst, dest, op1); 4897259Sgblack@eecs.umich.edu case 2: 4907259Sgblack@eecs.umich.edu return new Base<uint32_t>(machInst, dest, op1); 4917259Sgblack@eecs.umich.edu case 3: 4927259Sgblack@eecs.umich.edu return new Base<uint64_t>(machInst, dest, op1); 4937259Sgblack@eecs.umich.edu default: 4947259Sgblack@eecs.umich.edu return new Unknown(machInst); 4957259Sgblack@eecs.umich.edu } 4967259Sgblack@eecs.umich.edu } 4977259Sgblack@eecs.umich.edu 4987259Sgblack@eecs.umich.edu template <template <typename T> class Base> 4997259Sgblack@eecs.umich.edu StaticInstPtr 5008550SChander.Sudanthi@ARM.com decodeNeonSTwoMiscUReg(unsigned size, 5017259Sgblack@eecs.umich.edu ExtMachInst machInst, IntRegIndex dest, 5027259Sgblack@eecs.umich.edu IntRegIndex op1) 5037259Sgblack@eecs.umich.edu { 5047259Sgblack@eecs.umich.edu switch (size) { 5057259Sgblack@eecs.umich.edu case 0: 5068902Sandreas.hansson@arm.com return new Base<int8_t>(machInst, dest, op1); 507 case 1: 508 return new Base<int16_t>(machInst, dest, op1); 509 case 2: 510 return new Base<int32_t>(machInst, dest, op1); 511 case 3: 512 return new Base<int64_t>(machInst, dest, op1); 513 default: 514 return new Unknown(machInst); 515 } 516 } 517 518 template <template <typename T> class BaseD, 519 template <typename T> class BaseQ> 520 StaticInstPtr 521 decodeNeonSTwoMiscReg(bool q, unsigned size, 522 ExtMachInst machInst, IntRegIndex dest, 523 IntRegIndex op1) 524 { 525 if (q) { 526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 527 } else { 528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1); 529 } 530 } 531 532 template <template <typename T> class BaseD, 533 template <typename T> class BaseQ> 534 StaticInstPtr 535 decodeNeonUTwoMiscReg(bool q, unsigned size, 536 ExtMachInst machInst, IntRegIndex dest, 537 IntRegIndex op1) 538 { 539 if (q) { 540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 541 } else { 542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1); 543 } 544 } 545 546 template <template <typename T> class BaseD, 547 template <typename T> class BaseQ> 548 StaticInstPtr 549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size, 550 ExtMachInst machInst, IntRegIndex dest, 551 IntRegIndex op1) 552 { 553 if (notSigned) { 554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 555 q, size, machInst, dest, op1); 556 } else { 557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 558 q, size, machInst, dest, op1); 559 } 560 } 561 562}}; 563 564output exec {{ 565 static float 566 vcgtFunc(float op1, float op2) 567 { 568 if (isSnan(op1) || isSnan(op2)) 569 return 2.0; 570 return (op1 > op2) ? 0.0 : 1.0; 571 } 572 573 static float 574 vcgeFunc(float op1, float op2) 575 { 576 if (isSnan(op1) || isSnan(op2)) 577 return 2.0; 578 return (op1 >= op2) ? 0.0 : 1.0; 579 } 580 581 static float 582 vceqFunc(float op1, float op2) 583 { 584 if (isSnan(op1) || isSnan(op2)) 585 return 2.0; 586 return (op1 == op2) ? 0.0 : 1.0; 587 } 588 589 static float 590 vcleFunc(float op1, float op2) 591 { 592 if (isSnan(op1) || isSnan(op2)) 593 return 2.0; 594 return (op1 <= op2) ? 0.0 : 1.0; 595 } 596 597 static float 598 vcltFunc(float op1, float op2) 599 { 600 if (isSnan(op1) || isSnan(op2)) 601 return 2.0; 602 return (op1 < op2) ? 0.0 : 1.0; 603 } 604 605 static float 606 vacgtFunc(float op1, float op2) 607 { 608 if (isSnan(op1) || isSnan(op2)) 609 return 2.0; 610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0; 611 } 612 613 static float 614 vacgeFunc(float op1, float op2) 615 { 616 if (isSnan(op1) || isSnan(op2)) 617 return 2.0; 618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0; 619 } 620}}; 621 622let {{ 623 624 header_output = "" 625 exec_output = "" 626 627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t") 628 unsignedTypes = smallUnsignedTypes + ("uint64_t",) 629 smallSignedTypes = ("int8_t", "int16_t", "int32_t") 630 signedTypes = smallSignedTypes + ("int64_t",) 631 smallTypes = smallUnsignedTypes + smallSignedTypes 632 allTypes = unsignedTypes + signedTypes 633 634 def threeEqualRegInst(name, Name, opClass, types, rCount, op, 635 readDest=False, pairwise=False): 636 global header_output, exec_output 637 eWalkCode = simdEnabledCheckCode + ''' 638 RegVect srcReg1, srcReg2, destReg; 639 ''' 640 for reg in range(rCount): 641 eWalkCode += ''' 642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 644 ''' % { "reg" : reg } 645 if readDest: 646 eWalkCode += ''' 647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 648 ''' % { "reg" : reg } 649 readDestCode = '' 650 if readDest: 651 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 652 if pairwise: 653 eWalkCode += ''' 654 for (unsigned i = 0; i < eCount; i++) { 655 Element srcElem1 = gtoh(2 * i < eCount ? 656 srcReg1.elements[2 * i] : 657 srcReg2.elements[2 * i - eCount]); 658 Element srcElem2 = gtoh(2 * i < eCount ? 659 srcReg1.elements[2 * i + 1] : 660 srcReg2.elements[2 * i + 1 - eCount]); 661 Element destElem; 662 %(readDest)s 663 %(op)s 664 destReg.elements[i] = htog(destElem); 665 } 666 ''' % { "op" : op, "readDest" : readDestCode } 667 else: 668 eWalkCode += ''' 669 for (unsigned i = 0; i < eCount; i++) { 670 Element srcElem1 = gtoh(srcReg1.elements[i]); 671 Element srcElem2 = gtoh(srcReg2.elements[i]); 672 Element destElem; 673 %(readDest)s 674 %(op)s 675 destReg.elements[i] = htog(destElem); 676 } 677 ''' % { "op" : op, "readDest" : readDestCode } 678 for reg in range(rCount): 679 eWalkCode += ''' 680 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 681 ''' % { "reg" : reg } 682 iop = InstObjParams(name, Name, 683 "RegRegRegOp", 684 { "code": eWalkCode, 685 "r_count": rCount, 686 "predicate_test": predicateTest, 687 "op_class": opClass }, []) 688 header_output += NeonRegRegRegOpDeclare.subst(iop) 689 exec_output += NeonEqualRegExecute.subst(iop) 690 for type in types: 691 substDict = { "targs" : type, 692 "class_name" : Name } 693 exec_output += NeonExecDeclare.subst(substDict) 694 695 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op, 696 readDest=False, pairwise=False, toInt=False): 697 global header_output, exec_output 698 eWalkCode = simdEnabledCheckCode + ''' 699 typedef FloatReg FloatVect[rCount]; 700 FloatVect srcRegs1, srcRegs2; 701 ''' 702 if toInt: 703 eWalkCode += 'RegVect destRegs;\n' 704 else: 705 eWalkCode += 'FloatVect destRegs;\n' 706 for reg in range(rCount): 707 eWalkCode += ''' 708 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 709 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 710 ''' % { "reg" : reg } 711 if readDest: 712 if toInt: 713 eWalkCode += ''' 714 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 715 ''' % { "reg" : reg } 716 else: 717 eWalkCode += ''' 718 destRegs[%(reg)d] = FpDestP%(reg)d; 719 ''' % { "reg" : reg } 720 readDestCode = '' 721 if readDest: 722 readDestCode = 'destReg = destRegs[r];' 723 destType = 'FloatReg' 724 writeDest = 'destRegs[r] = destReg;' 725 if toInt: 726 destType = 'FloatRegBits' 727 writeDest = 'destRegs.regs[r] = destReg;' 728 if pairwise: 729 eWalkCode += ''' 730 for (unsigned r = 0; r < rCount; r++) { 731 FloatReg srcReg1 = (2 * r < rCount) ? 732 srcRegs1[2 * r] : srcRegs2[2 * r - rCount]; 733 FloatReg srcReg2 = (2 * r < rCount) ? 734 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount]; 735 %(destType)s destReg; 736 %(readDest)s 737 %(op)s 738 %(writeDest)s 739 } 740 ''' % { "op" : op, 741 "readDest" : readDestCode, 742 "destType" : destType, 743 "writeDest" : writeDest } 744 else: 745 eWalkCode += ''' 746 for (unsigned r = 0; r < rCount; r++) { 747 FloatReg srcReg1 = srcRegs1[r]; 748 FloatReg srcReg2 = srcRegs2[r]; 749 %(destType)s destReg; 750 %(readDest)s 751 %(op)s 752 %(writeDest)s 753 } 754 ''' % { "op" : op, 755 "readDest" : readDestCode, 756 "destType" : destType, 757 "writeDest" : writeDest } 758 for reg in range(rCount): 759 if toInt: 760 eWalkCode += ''' 761 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d]; 762 ''' % { "reg" : reg } 763 else: 764 eWalkCode += ''' 765 FpDestP%(reg)d = destRegs[%(reg)d]; 766 ''' % { "reg" : reg } 767 iop = InstObjParams(name, Name, 768 "FpRegRegRegOp", 769 { "code": eWalkCode, 770 "r_count": rCount, 771 "predicate_test": predicateTest, 772 "op_class": opClass }, []) 773 header_output += NeonRegRegRegOpDeclare.subst(iop) 774 exec_output += NeonEqualRegExecute.subst(iop) 775 for type in types: 776 substDict = { "targs" : type, 777 "class_name" : Name } 778 exec_output += NeonExecDeclare.subst(substDict) 779 780 def threeUnequalRegInst(name, Name, opClass, types, op, 781 bigSrc1, bigSrc2, bigDest, readDest): 782 global header_output, exec_output 783 src1Cnt = src2Cnt = destCnt = 2 784 src1Prefix = src2Prefix = destPrefix = '' 785 if bigSrc1: 786 src1Cnt = 4 787 src1Prefix = 'Big' 788 if bigSrc2: 789 src2Cnt = 4 790 src2Prefix = 'Big' 791 if bigDest: 792 destCnt = 4 793 destPrefix = 'Big' 794 eWalkCode = simdEnabledCheckCode + ''' 795 %sRegVect srcReg1; 796 %sRegVect srcReg2; 797 %sRegVect destReg; 798 ''' % (src1Prefix, src2Prefix, destPrefix) 799 for reg in range(src1Cnt): 800 eWalkCode += ''' 801 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 802 ''' % { "reg" : reg } 803 for reg in range(src2Cnt): 804 eWalkCode += ''' 805 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 806 ''' % { "reg" : reg } 807 if readDest: 808 for reg in range(destCnt): 809 eWalkCode += ''' 810 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 811 ''' % { "reg" : reg } 812 readDestCode = '' 813 if readDest: 814 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 815 eWalkCode += ''' 816 for (unsigned i = 0; i < eCount; i++) { 817 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); 818 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]); 819 %(destPrefix)sElement destElem; 820 %(readDest)s 821 %(op)s 822 destReg.elements[i] = htog(destElem); 823 } 824 ''' % { "op" : op, "readDest" : readDestCode, 825 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, 826 "destPrefix" : destPrefix } 827 for reg in range(destCnt): 828 eWalkCode += ''' 829 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 830 ''' % { "reg" : reg } 831 iop = InstObjParams(name, Name, 832 "RegRegRegOp", 833 { "code": eWalkCode, 834 "r_count": 2, 835 "predicate_test": predicateTest, 836 "op_class": opClass }, []) 837 header_output += NeonRegRegRegOpDeclare.subst(iop) 838 exec_output += NeonUnequalRegExecute.subst(iop) 839 for type in types: 840 substDict = { "targs" : type, 841 "class_name" : Name } 842 exec_output += NeonExecDeclare.subst(substDict) 843 844 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False): 845 threeUnequalRegInst(name, Name, opClass, types, op, 846 True, True, False, readDest) 847 848 def threeRegLongInst(name, Name, opClass, types, op, readDest=False): 849 threeUnequalRegInst(name, Name, opClass, types, op, 850 False, False, True, readDest) 851 852 def threeRegWideInst(name, Name, opClass, types, op, readDest=False): 853 threeUnequalRegInst(name, Name, opClass, types, op, 854 True, False, True, readDest) 855 856 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False): 857 global header_output, exec_output 858 eWalkCode = simdEnabledCheckCode + ''' 859 RegVect srcReg1, srcReg2, destReg; 860 ''' 861 for reg in range(rCount): 862 eWalkCode += ''' 863 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 864 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 865 ''' % { "reg" : reg } 866 if readDest: 867 eWalkCode += ''' 868 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 869 ''' % { "reg" : reg } 870 readDestCode = '' 871 if readDest: 872 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 873 eWalkCode += ''' 874 if (imm < 0 && imm >= eCount) { 875 if (FullSystem) 876 fault = new UndefinedInstruction; 877 else 878 fault = new UndefinedInstruction(false, mnemonic); 879 } else { 880 for (unsigned i = 0; i < eCount; i++) { 881 Element srcElem1 = gtoh(srcReg1.elements[i]); 882 Element srcElem2 = gtoh(srcReg2.elements[imm]); 883 Element destElem; 884 %(readDest)s 885 %(op)s 886 destReg.elements[i] = htog(destElem); 887 } 888 } 889 ''' % { "op" : op, "readDest" : readDestCode } 890 for reg in range(rCount): 891 eWalkCode += ''' 892 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 893 ''' % { "reg" : reg } 894 iop = InstObjParams(name, Name, 895 "RegRegRegImmOp", 896 { "code": eWalkCode, 897 "r_count": rCount, 898 "predicate_test": predicateTest, 899 "op_class": opClass }, []) 900 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 901 exec_output += NeonEqualRegExecute.subst(iop) 902 for type in types: 903 substDict = { "targs" : type, 904 "class_name" : Name } 905 exec_output += NeonExecDeclare.subst(substDict) 906 907 def twoRegLongInst(name, Name, opClass, types, op, readDest=False): 908 global header_output, exec_output 909 rCount = 2 910 eWalkCode = simdEnabledCheckCode + ''' 911 RegVect srcReg1, srcReg2; 912 BigRegVect destReg; 913 ''' 914 for reg in range(rCount): 915 eWalkCode += ''' 916 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 917 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);; 918 ''' % { "reg" : reg } 919 if readDest: 920 for reg in range(2 * rCount): 921 eWalkCode += ''' 922 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 923 ''' % { "reg" : reg } 924 readDestCode = '' 925 if readDest: 926 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 927 eWalkCode += ''' 928 if (imm < 0 && imm >= eCount) { 929 if (FullSystem) 930 fault = new UndefinedInstruction; 931 else 932 fault = new UndefinedInstruction(false, mnemonic); 933 } else { 934 for (unsigned i = 0; i < eCount; i++) { 935 Element srcElem1 = gtoh(srcReg1.elements[i]); 936 Element srcElem2 = gtoh(srcReg2.elements[imm]); 937 BigElement destElem; 938 %(readDest)s 939 %(op)s 940 destReg.elements[i] = htog(destElem); 941 } 942 } 943 ''' % { "op" : op, "readDest" : readDestCode } 944 for reg in range(2 * rCount): 945 eWalkCode += ''' 946 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 947 ''' % { "reg" : reg } 948 iop = InstObjParams(name, Name, 949 "RegRegRegImmOp", 950 { "code": eWalkCode, 951 "r_count": rCount, 952 "predicate_test": predicateTest, 953 "op_class": opClass }, []) 954 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 955 exec_output += NeonUnequalRegExecute.subst(iop) 956 for type in types: 957 substDict = { "targs" : type, 958 "class_name" : Name } 959 exec_output += NeonExecDeclare.subst(substDict) 960 961 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False): 962 global header_output, exec_output 963 eWalkCode = simdEnabledCheckCode + ''' 964 typedef FloatReg FloatVect[rCount]; 965 FloatVect srcRegs1, srcRegs2, destRegs; 966 ''' 967 for reg in range(rCount): 968 eWalkCode += ''' 969 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 970 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 971 ''' % { "reg" : reg } 972 if readDest: 973 eWalkCode += ''' 974 destRegs[%(reg)d] = FpDestP%(reg)d; 975 ''' % { "reg" : reg } 976 readDestCode = '' 977 if readDest: 978 readDestCode = 'destReg = destRegs[i];' 979 eWalkCode += ''' 980 if (imm < 0 && imm >= eCount) { 981 if (FullSystem) 982 fault = new UndefinedInstruction; 983 else 984 fault = new UndefinedInstruction(false, mnemonic); 985 } else { 986 for (unsigned i = 0; i < rCount; i++) { 987 FloatReg srcReg1 = srcRegs1[i]; 988 FloatReg srcReg2 = srcRegs2[imm]; 989 FloatReg destReg; 990 %(readDest)s 991 %(op)s 992 destRegs[i] = destReg; 993 } 994 } 995 ''' % { "op" : op, "readDest" : readDestCode } 996 for reg in range(rCount): 997 eWalkCode += ''' 998 FpDestP%(reg)d = destRegs[%(reg)d]; 999 ''' % { "reg" : reg } 1000 iop = InstObjParams(name, Name, 1001 "FpRegRegRegImmOp", 1002 { "code": eWalkCode, 1003 "r_count": rCount, 1004 "predicate_test": predicateTest, 1005 "op_class": opClass }, []) 1006 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 1007 exec_output += NeonEqualRegExecute.subst(iop) 1008 for type in types: 1009 substDict = { "targs" : type, 1010 "class_name" : Name } 1011 exec_output += NeonExecDeclare.subst(substDict) 1012 1013 def twoRegShiftInst(name, Name, opClass, types, rCount, op, 1014 readDest=False, toInt=False, fromInt=False): 1015 global header_output, exec_output 1016 eWalkCode = simdEnabledCheckCode + ''' 1017 RegVect srcRegs1, destRegs; 1018 ''' 1019 for reg in range(rCount): 1020 eWalkCode += ''' 1021 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1022 ''' % { "reg" : reg } 1023 if readDest: 1024 eWalkCode += ''' 1025 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1026 ''' % { "reg" : reg } 1027 readDestCode = '' 1028 if readDest: 1029 readDestCode = 'destElem = gtoh(destRegs.elements[i]);' 1030 if toInt: 1031 readDestCode = 'destReg = gtoh(destRegs.regs[i]);' 1032 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);' 1033 if fromInt: 1034 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);' 1035 declDest = 'Element destElem;' 1036 writeDestCode = 'destRegs.elements[i] = htog(destElem);' 1037 if toInt: 1038 declDest = 'FloatRegBits destReg;' 1039 writeDestCode = 'destRegs.regs[i] = htog(destReg);' 1040 eWalkCode += ''' 1041 for (unsigned i = 0; i < eCount; i++) { 1042 %(readOp)s 1043 %(declDest)s 1044 %(readDest)s 1045 %(op)s 1046 %(writeDest)s 1047 } 1048 ''' % { "readOp" : readOpCode, 1049 "declDest" : declDest, 1050 "readDest" : readDestCode, 1051 "op" : op, 1052 "writeDest" : writeDestCode } 1053 for reg in range(rCount): 1054 eWalkCode += ''' 1055 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]); 1056 ''' % { "reg" : reg } 1057 iop = InstObjParams(name, Name, 1058 "RegRegImmOp", 1059 { "code": eWalkCode, 1060 "r_count": rCount, 1061 "predicate_test": predicateTest, 1062 "op_class": opClass }, []) 1063 header_output += NeonRegRegImmOpDeclare.subst(iop) 1064 exec_output += NeonEqualRegExecute.subst(iop) 1065 for type in types: 1066 substDict = { "targs" : type, 1067 "class_name" : Name } 1068 exec_output += NeonExecDeclare.subst(substDict) 1069 1070 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False): 1071 global header_output, exec_output 1072 eWalkCode = simdEnabledCheckCode + ''' 1073 BigRegVect srcReg1; 1074 RegVect destReg; 1075 ''' 1076 for reg in range(4): 1077 eWalkCode += ''' 1078 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1079 ''' % { "reg" : reg } 1080 if readDest: 1081 for reg in range(2): 1082 eWalkCode += ''' 1083 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1084 ''' % { "reg" : reg } 1085 readDestCode = '' 1086 if readDest: 1087 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1088 eWalkCode += ''' 1089 for (unsigned i = 0; i < eCount; i++) { 1090 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1091 Element destElem; 1092 %(readDest)s 1093 %(op)s 1094 destReg.elements[i] = htog(destElem); 1095 } 1096 ''' % { "op" : op, "readDest" : readDestCode } 1097 for reg in range(2): 1098 eWalkCode += ''' 1099 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1100 ''' % { "reg" : reg } 1101 iop = InstObjParams(name, Name, 1102 "RegRegImmOp", 1103 { "code": eWalkCode, 1104 "r_count": 2, 1105 "predicate_test": predicateTest, 1106 "op_class": opClass }, []) 1107 header_output += NeonRegRegImmOpDeclare.subst(iop) 1108 exec_output += NeonUnequalRegExecute.subst(iop) 1109 for type in types: 1110 substDict = { "targs" : type, 1111 "class_name" : Name } 1112 exec_output += NeonExecDeclare.subst(substDict) 1113 1114 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False): 1115 global header_output, exec_output 1116 eWalkCode = simdEnabledCheckCode + ''' 1117 RegVect srcReg1; 1118 BigRegVect destReg; 1119 ''' 1120 for reg in range(2): 1121 eWalkCode += ''' 1122 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1123 ''' % { "reg" : reg } 1124 if readDest: 1125 for reg in range(4): 1126 eWalkCode += ''' 1127 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1128 ''' % { "reg" : reg } 1129 readDestCode = '' 1130 if readDest: 1131 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 1132 eWalkCode += ''' 1133 for (unsigned i = 0; i < eCount; i++) { 1134 Element srcElem1 = gtoh(srcReg1.elements[i]); 1135 BigElement destElem; 1136 %(readDest)s 1137 %(op)s 1138 destReg.elements[i] = htog(destElem); 1139 } 1140 ''' % { "op" : op, "readDest" : readDestCode } 1141 for reg in range(4): 1142 eWalkCode += ''' 1143 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1144 ''' % { "reg" : reg } 1145 iop = InstObjParams(name, Name, 1146 "RegRegImmOp", 1147 { "code": eWalkCode, 1148 "r_count": 2, 1149 "predicate_test": predicateTest, 1150 "op_class": opClass }, []) 1151 header_output += NeonRegRegImmOpDeclare.subst(iop) 1152 exec_output += NeonUnequalRegExecute.subst(iop) 1153 for type in types: 1154 substDict = { "targs" : type, 1155 "class_name" : Name } 1156 exec_output += NeonExecDeclare.subst(substDict) 1157 1158 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False): 1159 global header_output, exec_output 1160 eWalkCode = simdEnabledCheckCode + ''' 1161 RegVect srcReg1, destReg; 1162 ''' 1163 for reg in range(rCount): 1164 eWalkCode += ''' 1165 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1166 ''' % { "reg" : reg } 1167 if readDest: 1168 eWalkCode += ''' 1169 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1170 ''' % { "reg" : reg } 1171 readDestCode = '' 1172 if readDest: 1173 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1174 eWalkCode += ''' 1175 for (unsigned i = 0; i < eCount; i++) { 1176 unsigned j = i; 1177 Element srcElem1 = gtoh(srcReg1.elements[i]); 1178 Element destElem; 1179 %(readDest)s 1180 %(op)s 1181 destReg.elements[j] = htog(destElem); 1182 } 1183 ''' % { "op" : op, "readDest" : readDestCode } 1184 for reg in range(rCount): 1185 eWalkCode += ''' 1186 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1187 ''' % { "reg" : reg } 1188 iop = InstObjParams(name, Name, 1189 "RegRegOp", 1190 { "code": eWalkCode, 1191 "r_count": rCount, 1192 "predicate_test": predicateTest, 1193 "op_class": opClass }, []) 1194 header_output += NeonRegRegOpDeclare.subst(iop) 1195 exec_output += NeonEqualRegExecute.subst(iop) 1196 for type in types: 1197 substDict = { "targs" : type, 1198 "class_name" : Name } 1199 exec_output += NeonExecDeclare.subst(substDict) 1200 1201 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False): 1202 global header_output, exec_output 1203 eWalkCode = simdEnabledCheckCode + ''' 1204 RegVect srcReg1, destReg; 1205 ''' 1206 for reg in range(rCount): 1207 eWalkCode += ''' 1208 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1209 ''' % { "reg" : reg } 1210 if readDest: 1211 eWalkCode += ''' 1212 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1213 ''' % { "reg" : reg } 1214 readDestCode = '' 1215 if readDest: 1216 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1217 eWalkCode += ''' 1218 for (unsigned i = 0; i < eCount; i++) { 1219 Element srcElem1 = gtoh(srcReg1.elements[imm]); 1220 Element destElem; 1221 %(readDest)s 1222 %(op)s 1223 destReg.elements[i] = htog(destElem); 1224 } 1225 ''' % { "op" : op, "readDest" : readDestCode } 1226 for reg in range(rCount): 1227 eWalkCode += ''' 1228 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1229 ''' % { "reg" : reg } 1230 iop = InstObjParams(name, Name, 1231 "RegRegImmOp", 1232 { "code": eWalkCode, 1233 "r_count": rCount, 1234 "predicate_test": predicateTest, 1235 "op_class": opClass }, []) 1236 header_output += NeonRegRegImmOpDeclare.subst(iop) 1237 exec_output += NeonEqualRegExecute.subst(iop) 1238 for type in types: 1239 substDict = { "targs" : type, 1240 "class_name" : Name } 1241 exec_output += NeonExecDeclare.subst(substDict) 1242 1243 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False): 1244 global header_output, exec_output 1245 eWalkCode = simdEnabledCheckCode + ''' 1246 RegVect srcReg1, destReg; 1247 ''' 1248 for reg in range(rCount): 1249 eWalkCode += ''' 1250 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1251 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1252 ''' % { "reg" : reg } 1253 if readDest: 1254 eWalkCode += ''' 1255 ''' % { "reg" : reg } 1256 readDestCode = '' 1257 if readDest: 1258 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1259 eWalkCode += op 1260 for reg in range(rCount): 1261 eWalkCode += ''' 1262 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1263 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]); 1264 ''' % { "reg" : reg } 1265 iop = InstObjParams(name, Name, 1266 "RegRegOp", 1267 { "code": eWalkCode, 1268 "r_count": rCount, 1269 "predicate_test": predicateTest, 1270 "op_class": opClass }, []) 1271 header_output += NeonRegRegOpDeclare.subst(iop) 1272 exec_output += NeonEqualRegExecute.subst(iop) 1273 for type in types: 1274 substDict = { "targs" : type, 1275 "class_name" : Name } 1276 exec_output += NeonExecDeclare.subst(substDict) 1277 1278 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op, 1279 readDest=False, toInt=False): 1280 global header_output, exec_output 1281 eWalkCode = simdEnabledCheckCode + ''' 1282 typedef FloatReg FloatVect[rCount]; 1283 FloatVect srcRegs1; 1284 ''' 1285 if toInt: 1286 eWalkCode += 'RegVect destRegs;\n' 1287 else: 1288 eWalkCode += 'FloatVect destRegs;\n' 1289 for reg in range(rCount): 1290 eWalkCode += ''' 1291 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 1292 ''' % { "reg" : reg } 1293 if readDest: 1294 if toInt: 1295 eWalkCode += ''' 1296 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 1297 ''' % { "reg" : reg } 1298 else: 1299 eWalkCode += ''' 1300 destRegs[%(reg)d] = FpDestP%(reg)d; 1301 ''' % { "reg" : reg } 1302 readDestCode = '' 1303 if readDest: 1304 readDestCode = 'destReg = destRegs[i];' 1305 destType = 'FloatReg' 1306 writeDest = 'destRegs[r] = destReg;' 1307 if toInt: 1308 destType = 'FloatRegBits' 1309 writeDest = 'destRegs.regs[r] = destReg;' 1310 eWalkCode += ''' 1311 for (unsigned r = 0; r < rCount; r++) { 1312 FloatReg srcReg1 = srcRegs1[r]; 1313 %(destType)s destReg; 1314 %(readDest)s 1315 %(op)s 1316 %(writeDest)s 1317 } 1318 ''' % { "op" : op, 1319 "readDest" : readDestCode, 1320 "destType" : destType, 1321 "writeDest" : writeDest } 1322 for reg in range(rCount): 1323 if toInt: 1324 eWalkCode += ''' 1325 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d]; 1326 ''' % { "reg" : reg } 1327 else: 1328 eWalkCode += ''' 1329 FpDestP%(reg)d = destRegs[%(reg)d]; 1330 ''' % { "reg" : reg } 1331 iop = InstObjParams(name, Name, 1332 "FpRegRegOp", 1333 { "code": eWalkCode, 1334 "r_count": rCount, 1335 "predicate_test": predicateTest, 1336 "op_class": opClass }, []) 1337 header_output += NeonRegRegOpDeclare.subst(iop) 1338 exec_output += NeonEqualRegExecute.subst(iop) 1339 for type in types: 1340 substDict = { "targs" : type, 1341 "class_name" : Name } 1342 exec_output += NeonExecDeclare.subst(substDict) 1343 1344 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False): 1345 global header_output, exec_output 1346 eWalkCode = simdEnabledCheckCode + ''' 1347 RegVect srcRegs; 1348 BigRegVect destReg; 1349 ''' 1350 for reg in range(rCount): 1351 eWalkCode += ''' 1352 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1353 ''' % { "reg" : reg } 1354 if readDest: 1355 eWalkCode += ''' 1356 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1357 ''' % { "reg" : reg } 1358 readDestCode = '' 1359 if readDest: 1360 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1361 eWalkCode += ''' 1362 for (unsigned i = 0; i < eCount / 2; i++) { 1363 Element srcElem1 = gtoh(srcRegs.elements[2 * i]); 1364 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); 1365 BigElement destElem; 1366 %(readDest)s 1367 %(op)s 1368 destReg.elements[i] = htog(destElem); 1369 } 1370 ''' % { "op" : op, "readDest" : readDestCode } 1371 for reg in range(rCount): 1372 eWalkCode += ''' 1373 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1374 ''' % { "reg" : reg } 1375 iop = InstObjParams(name, Name, 1376 "RegRegOp", 1377 { "code": eWalkCode, 1378 "r_count": rCount, 1379 "predicate_test": predicateTest, 1380 "op_class": opClass }, []) 1381 header_output += NeonRegRegOpDeclare.subst(iop) 1382 exec_output += NeonUnequalRegExecute.subst(iop) 1383 for type in types: 1384 substDict = { "targs" : type, 1385 "class_name" : Name } 1386 exec_output += NeonExecDeclare.subst(substDict) 1387 1388 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False): 1389 global header_output, exec_output 1390 eWalkCode = simdEnabledCheckCode + ''' 1391 BigRegVect srcReg1; 1392 RegVect destReg; 1393 ''' 1394 for reg in range(4): 1395 eWalkCode += ''' 1396 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1397 ''' % { "reg" : reg } 1398 if readDest: 1399 for reg in range(2): 1400 eWalkCode += ''' 1401 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1402 ''' % { "reg" : reg } 1403 readDestCode = '' 1404 if readDest: 1405 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1406 eWalkCode += ''' 1407 for (unsigned i = 0; i < eCount; i++) { 1408 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1409 Element destElem; 1410 %(readDest)s 1411 %(op)s 1412 destReg.elements[i] = htog(destElem); 1413 } 1414 ''' % { "op" : op, "readDest" : readDestCode } 1415 for reg in range(2): 1416 eWalkCode += ''' 1417 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1418 ''' % { "reg" : reg } 1419 iop = InstObjParams(name, Name, 1420 "RegRegOp", 1421 { "code": eWalkCode, 1422 "r_count": 2, 1423 "predicate_test": predicateTest, 1424 "op_class": opClass }, []) 1425 header_output += NeonRegRegOpDeclare.subst(iop) 1426 exec_output += NeonUnequalRegExecute.subst(iop) 1427 for type in types: 1428 substDict = { "targs" : type, 1429 "class_name" : Name } 1430 exec_output += NeonExecDeclare.subst(substDict) 1431 1432 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False): 1433 global header_output, exec_output 1434 eWalkCode = simdEnabledCheckCode + ''' 1435 RegVect destReg; 1436 ''' 1437 if readDest: 1438 for reg in range(rCount): 1439 eWalkCode += ''' 1440 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1441 ''' % { "reg" : reg } 1442 readDestCode = '' 1443 if readDest: 1444 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1445 eWalkCode += ''' 1446 for (unsigned i = 0; i < eCount; i++) { 1447 Element destElem; 1448 %(readDest)s 1449 %(op)s 1450 destReg.elements[i] = htog(destElem); 1451 } 1452 ''' % { "op" : op, "readDest" : readDestCode } 1453 for reg in range(rCount): 1454 eWalkCode += ''' 1455 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1456 ''' % { "reg" : reg } 1457 iop = InstObjParams(name, Name, 1458 "RegImmOp", 1459 { "code": eWalkCode, 1460 "r_count": rCount, 1461 "predicate_test": predicateTest, 1462 "op_class": opClass }, []) 1463 header_output += NeonRegImmOpDeclare.subst(iop) 1464 exec_output += NeonEqualRegExecute.subst(iop) 1465 for type in types: 1466 substDict = { "targs" : type, 1467 "class_name" : Name } 1468 exec_output += NeonExecDeclare.subst(substDict) 1469 1470 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False): 1471 global header_output, exec_output 1472 eWalkCode = simdEnabledCheckCode + ''' 1473 RegVect srcReg1; 1474 BigRegVect destReg; 1475 ''' 1476 for reg in range(2): 1477 eWalkCode += ''' 1478 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1479 ''' % { "reg" : reg } 1480 if readDest: 1481 for reg in range(4): 1482 eWalkCode += ''' 1483 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1484 ''' % { "reg" : reg } 1485 readDestCode = '' 1486 if readDest: 1487 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 1488 eWalkCode += ''' 1489 for (unsigned i = 0; i < eCount; i++) { 1490 Element srcElem1 = gtoh(srcReg1.elements[i]); 1491 BigElement destElem; 1492 %(readDest)s 1493 %(op)s 1494 destReg.elements[i] = htog(destElem); 1495 } 1496 ''' % { "op" : op, "readDest" : readDestCode } 1497 for reg in range(4): 1498 eWalkCode += ''' 1499 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1500 ''' % { "reg" : reg } 1501 iop = InstObjParams(name, Name, 1502 "RegRegOp", 1503 { "code": eWalkCode, 1504 "r_count": 2, 1505 "predicate_test": predicateTest, 1506 "op_class": opClass }, []) 1507 header_output += NeonRegRegOpDeclare.subst(iop) 1508 exec_output += NeonUnequalRegExecute.subst(iop) 1509 for type in types: 1510 substDict = { "targs" : type, 1511 "class_name" : Name } 1512 exec_output += NeonExecDeclare.subst(substDict) 1513 1514 vhaddCode = ''' 1515 Element carryBit = 1516 (((unsigned)srcElem1 & 0x1) + 1517 ((unsigned)srcElem2 & 0x1)) >> 1; 1518 // Use division instead of a shift to ensure the sign extension works 1519 // right. The compiler will figure out if it can be a shift. Mask the 1520 // inputs so they get truncated correctly. 1521 destElem = (((srcElem1 & ~(Element)1) / 2) + 1522 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1523 ''' 1524 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode) 1525 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode) 1526 1527 vrhaddCode = ''' 1528 Element carryBit = 1529 (((unsigned)srcElem1 & 0x1) + 1530 ((unsigned)srcElem2 & 0x1) + 1) >> 1; 1531 // Use division instead of a shift to ensure the sign extension works 1532 // right. The compiler will figure out if it can be a shift. Mask the 1533 // inputs so they get truncated correctly. 1534 destElem = (((srcElem1 & ~(Element)1) / 2) + 1535 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1536 ''' 1537 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode) 1538 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode) 1539 1540 vhsubCode = ''' 1541 Element barrowBit = 1542 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; 1543 // Use division instead of a shift to ensure the sign extension works 1544 // right. The compiler will figure out if it can be a shift. Mask the 1545 // inputs so they get truncated correctly. 1546 destElem = (((srcElem1 & ~(Element)1) / 2) - 1547 ((srcElem2 & ~(Element)1) / 2)) - barrowBit; 1548 ''' 1549 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode) 1550 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode) 1551 1552 vandCode = ''' 1553 destElem = srcElem1 & srcElem2; 1554 ''' 1555 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode) 1556 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode) 1557 1558 vbicCode = ''' 1559 destElem = srcElem1 & ~srcElem2; 1560 ''' 1561 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode) 1562 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode) 1563 1564 vorrCode = ''' 1565 destElem = srcElem1 | srcElem2; 1566 ''' 1567 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode) 1568 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode) 1569 1570 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode) 1571 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode) 1572 1573 vornCode = ''' 1574 destElem = srcElem1 | ~srcElem2; 1575 ''' 1576 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode) 1577 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode) 1578 1579 veorCode = ''' 1580 destElem = srcElem1 ^ srcElem2; 1581 ''' 1582 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode) 1583 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode) 1584 1585 vbifCode = ''' 1586 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2); 1587 ''' 1588 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True) 1589 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True) 1590 vbitCode = ''' 1591 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2); 1592 ''' 1593 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True) 1594 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True) 1595 vbslCode = ''' 1596 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem); 1597 ''' 1598 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True) 1599 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True) 1600 1601 vmaxCode = ''' 1602 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2; 1603 ''' 1604 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode) 1605 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode) 1606 1607 vminCode = ''' 1608 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2; 1609 ''' 1610 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode) 1611 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode) 1612 1613 vaddCode = ''' 1614 destElem = srcElem1 + srcElem2; 1615 ''' 1616 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode) 1617 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode) 1618 1619 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", unsignedTypes, 1620 2, vaddCode, pairwise=True) 1621 threeEqualRegInst("vpadd", "NVpaddQ", "SimdAddOp", unsignedTypes, 1622 4, vaddCode, pairwise=True) 1623 vaddlwCode = ''' 1624 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 1625 ''' 1626 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode) 1627 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode) 1628 vaddhnCode = ''' 1629 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> 1630 (sizeof(Element) * 8); 1631 ''' 1632 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode) 1633 vraddhnCode = ''' 1634 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + 1635 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1636 (sizeof(Element) * 8); 1637 ''' 1638 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode) 1639 1640 vsubCode = ''' 1641 destElem = srcElem1 - srcElem2; 1642 ''' 1643 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode) 1644 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode) 1645 vsublwCode = ''' 1646 destElem = (BigElement)srcElem1 - (BigElement)srcElem2; 1647 ''' 1648 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode) 1649 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode) 1650 1651 vqaddUCode = ''' 1652 destElem = srcElem1 + srcElem2; 1653 FPSCR fpscr = (FPSCR) FpscrQc; 1654 if (destElem < srcElem1 || destElem < srcElem2) { 1655 destElem = (Element)(-1); 1656 fpscr.qc = 1; 1657 } 1658 FpscrQc = fpscr; 1659 ''' 1660 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode) 1661 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode) 1662 vsubhnCode = ''' 1663 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> 1664 (sizeof(Element) * 8); 1665 ''' 1666 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode) 1667 vrsubhnCode = ''' 1668 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + 1669 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1670 (sizeof(Element) * 8); 1671 ''' 1672 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode) 1673 1674 vqaddSCode = ''' 1675 destElem = srcElem1 + srcElem2; 1676 FPSCR fpscr = (FPSCR) FpscrQc; 1677 bool negDest = (destElem < 0); 1678 bool negSrc1 = (srcElem1 < 0); 1679 bool negSrc2 = (srcElem2 < 0); 1680 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { 1681 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 1682 if (negDest) 1683 destElem -= 1; 1684 fpscr.qc = 1; 1685 } 1686 FpscrQc = fpscr; 1687 ''' 1688 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode) 1689 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode) 1690 1691 vqsubUCode = ''' 1692 destElem = srcElem1 - srcElem2; 1693 FPSCR fpscr = (FPSCR) FpscrQc; 1694 if (destElem > srcElem1) { 1695 destElem = 0; 1696 fpscr.qc = 1; 1697 } 1698 FpscrQc = fpscr; 1699 ''' 1700 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode) 1701 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode) 1702 1703 vqsubSCode = ''' 1704 destElem = srcElem1 - srcElem2; 1705 FPSCR fpscr = (FPSCR) FpscrQc; 1706 bool negDest = (destElem < 0); 1707 bool negSrc1 = (srcElem1 < 0); 1708 bool posSrc2 = (srcElem2 >= 0); 1709 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { 1710 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 1711 if (negDest) 1712 destElem -= 1; 1713 fpscr.qc = 1; 1714 } 1715 FpscrQc = fpscr; 1716 ''' 1717 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode) 1718 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode) 1719 1720 vcgtCode = ''' 1721 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0; 1722 ''' 1723 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode) 1724 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode) 1725 1726 vcgeCode = ''' 1727 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0; 1728 ''' 1729 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode) 1730 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode) 1731 1732 vceqCode = ''' 1733 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0; 1734 ''' 1735 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode) 1736 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode) 1737 1738 vshlCode = ''' 1739 int16_t shiftAmt = (int8_t)srcElem2; 1740 if (shiftAmt < 0) { 1741 shiftAmt = -shiftAmt; 1742 if (shiftAmt >= sizeof(Element) * 8) { 1743 shiftAmt = sizeof(Element) * 8 - 1; 1744 destElem = 0; 1745 } else { 1746 destElem = (srcElem1 >> shiftAmt); 1747 } 1748 // Make sure the right shift sign extended when it should. 1749 if (ltz(srcElem1) && !ltz(destElem)) { 1750 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1751 1 - shiftAmt)); 1752 } 1753 } else { 1754 if (shiftAmt >= sizeof(Element) * 8) { 1755 destElem = 0; 1756 } else { 1757 destElem = srcElem1 << shiftAmt; 1758 } 1759 } 1760 ''' 1761 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode) 1762 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode) 1763 1764 vrshlCode = ''' 1765 int16_t shiftAmt = (int8_t)srcElem2; 1766 if (shiftAmt < 0) { 1767 shiftAmt = -shiftAmt; 1768 Element rBit = 0; 1769 if (shiftAmt <= sizeof(Element) * 8) 1770 rBit = bits(srcElem1, shiftAmt - 1); 1771 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) 1772 rBit = 1; 1773 if (shiftAmt >= sizeof(Element) * 8) { 1774 shiftAmt = sizeof(Element) * 8 - 1; 1775 destElem = 0; 1776 } else { 1777 destElem = (srcElem1 >> shiftAmt); 1778 } 1779 // Make sure the right shift sign extended when it should. 1780 if (ltz(srcElem1) && !ltz(destElem)) { 1781 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1782 1 - shiftAmt)); 1783 } 1784 destElem += rBit; 1785 } else if (shiftAmt > 0) { 1786 if (shiftAmt >= sizeof(Element) * 8) { 1787 destElem = 0; 1788 } else { 1789 destElem = srcElem1 << shiftAmt; 1790 } 1791 } else { 1792 destElem = srcElem1; 1793 } 1794 ''' 1795 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode) 1796 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode) 1797 1798 vqshlUCode = ''' 1799 int16_t shiftAmt = (int8_t)srcElem2; 1800 FPSCR fpscr = (FPSCR) FpscrQc; 1801 if (shiftAmt < 0) { 1802 shiftAmt = -shiftAmt; 1803 if (shiftAmt >= sizeof(Element) * 8) { 1804 shiftAmt = sizeof(Element) * 8 - 1; 1805 destElem = 0; 1806 } else { 1807 destElem = (srcElem1 >> shiftAmt); 1808 } 1809 } else if (shiftAmt > 0) { 1810 if (shiftAmt >= sizeof(Element) * 8) { 1811 if (srcElem1 != 0) { 1812 destElem = mask(sizeof(Element) * 8); 1813 fpscr.qc = 1; 1814 } else { 1815 destElem = 0; 1816 } 1817 } else { 1818 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1819 sizeof(Element) * 8 - shiftAmt)) { 1820 destElem = mask(sizeof(Element) * 8); 1821 fpscr.qc = 1; 1822 } else { 1823 destElem = srcElem1 << shiftAmt; 1824 } 1825 } 1826 } else { 1827 destElem = srcElem1; 1828 } 1829 FpscrQc = fpscr; 1830 ''' 1831 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode) 1832 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode) 1833 1834 vqshlSCode = ''' 1835 int16_t shiftAmt = (int8_t)srcElem2; 1836 FPSCR fpscr = (FPSCR) FpscrQc; 1837 if (shiftAmt < 0) { 1838 shiftAmt = -shiftAmt; 1839 if (shiftAmt >= sizeof(Element) * 8) { 1840 shiftAmt = sizeof(Element) * 8 - 1; 1841 destElem = 0; 1842 } else { 1843 destElem = (srcElem1 >> shiftAmt); 1844 } 1845 // Make sure the right shift sign extended when it should. 1846 if (srcElem1 < 0 && destElem >= 0) { 1847 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1848 1 - shiftAmt)); 1849 } 1850 } else if (shiftAmt > 0) { 1851 bool sat = false; 1852 if (shiftAmt >= sizeof(Element) * 8) { 1853 if (srcElem1 != 0) 1854 sat = true; 1855 else 1856 destElem = 0; 1857 } else { 1858 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1859 sizeof(Element) * 8 - 1 - shiftAmt) != 1860 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 1861 sat = true; 1862 } else { 1863 destElem = srcElem1 << shiftAmt; 1864 } 1865 } 1866 if (sat) { 1867 fpscr.qc = 1; 1868 destElem = mask(sizeof(Element) * 8 - 1); 1869 if (srcElem1 < 0) 1870 destElem = ~destElem; 1871 } 1872 } else { 1873 destElem = srcElem1; 1874 } 1875 FpscrQc = fpscr; 1876 ''' 1877 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode) 1878 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode) 1879 1880 vqrshlUCode = ''' 1881 int16_t shiftAmt = (int8_t)srcElem2; 1882 FPSCR fpscr = (FPSCR) FpscrQc; 1883 if (shiftAmt < 0) { 1884 shiftAmt = -shiftAmt; 1885 Element rBit = 0; 1886 if (shiftAmt <= sizeof(Element) * 8) 1887 rBit = bits(srcElem1, shiftAmt - 1); 1888 if (shiftAmt >= sizeof(Element) * 8) { 1889 shiftAmt = sizeof(Element) * 8 - 1; 1890 destElem = 0; 1891 } else { 1892 destElem = (srcElem1 >> shiftAmt); 1893 } 1894 destElem += rBit; 1895 } else { 1896 if (shiftAmt >= sizeof(Element) * 8) { 1897 if (srcElem1 != 0) { 1898 destElem = mask(sizeof(Element) * 8); 1899 fpscr.qc = 1; 1900 } else { 1901 destElem = 0; 1902 } 1903 } else { 1904 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1905 sizeof(Element) * 8 - shiftAmt)) { 1906 destElem = mask(sizeof(Element) * 8); 1907 fpscr.qc = 1; 1908 } else { 1909 destElem = srcElem1 << shiftAmt; 1910 } 1911 } 1912 } 1913 FpscrQc = fpscr; 1914 ''' 1915 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode) 1916 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode) 1917 1918 vqrshlSCode = ''' 1919 int16_t shiftAmt = (int8_t)srcElem2; 1920 FPSCR fpscr = (FPSCR) FpscrQc; 1921 if (shiftAmt < 0) { 1922 shiftAmt = -shiftAmt; 1923 Element rBit = 0; 1924 if (shiftAmt <= sizeof(Element) * 8) 1925 rBit = bits(srcElem1, shiftAmt - 1); 1926 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 1927 rBit = 1; 1928 if (shiftAmt >= sizeof(Element) * 8) { 1929 shiftAmt = sizeof(Element) * 8 - 1; 1930 destElem = 0; 1931 } else { 1932 destElem = (srcElem1 >> shiftAmt); 1933 } 1934 // Make sure the right shift sign extended when it should. 1935 if (srcElem1 < 0 && destElem >= 0) { 1936 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1937 1 - shiftAmt)); 1938 } 1939 destElem += rBit; 1940 } else if (shiftAmt > 0) { 1941 bool sat = false; 1942 if (shiftAmt >= sizeof(Element) * 8) { 1943 if (srcElem1 != 0) 1944 sat = true; 1945 else 1946 destElem = 0; 1947 } else { 1948 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1949 sizeof(Element) * 8 - 1 - shiftAmt) != 1950 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 1951 sat = true; 1952 } else { 1953 destElem = srcElem1 << shiftAmt; 1954 } 1955 } 1956 if (sat) { 1957 fpscr.qc = 1; 1958 destElem = mask(sizeof(Element) * 8 - 1); 1959 if (srcElem1 < 0) 1960 destElem = ~destElem; 1961 } 1962 } else { 1963 destElem = srcElem1; 1964 } 1965 FpscrQc = fpscr; 1966 ''' 1967 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode) 1968 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode) 1969 1970 vabaCode = ''' 1971 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1972 (srcElem2 - srcElem1); 1973 ''' 1974 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True) 1975 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True) 1976 vabalCode = ''' 1977 destElem += (srcElem1 > srcElem2) ? 1978 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1979 ((BigElement)srcElem2 - (BigElement)srcElem1); 1980 ''' 1981 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True) 1982 1983 vabdCode = ''' 1984 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1985 (srcElem2 - srcElem1); 1986 ''' 1987 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode) 1988 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode) 1989 vabdlCode = ''' 1990 destElem = (srcElem1 > srcElem2) ? 1991 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1992 ((BigElement)srcElem2 - (BigElement)srcElem1); 1993 ''' 1994 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode) 1995 1996 vtstCode = ''' 1997 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0; 1998 ''' 1999 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode) 2000 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode) 2001 2002 vmulCode = ''' 2003 destElem = srcElem1 * srcElem2; 2004 ''' 2005 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode) 2006 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode) 2007 vmullCode = ''' 2008 destElem = (BigElement)srcElem1 * (BigElement)srcElem2; 2009 ''' 2010 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode) 2011 2012 vmlaCode = ''' 2013 destElem = destElem + srcElem1 * srcElem2; 2014 ''' 2015 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True) 2016 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True) 2017 vmlalCode = ''' 2018 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2; 2019 ''' 2020 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True) 2021 2022 vqdmlalCode = ''' 2023 FPSCR fpscr = (FPSCR) FpscrQc; 2024 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2025 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2026 Element halfNeg = maxNeg / 2; 2027 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2028 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2029 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2030 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2031 fpscr.qc = 1; 2032 } 2033 bool negPreDest = ltz(destElem); 2034 destElem += midElem; 2035 bool negDest = ltz(destElem); 2036 bool negMid = ltz(midElem); 2037 if (negPreDest == negMid && negMid != negDest) { 2038 destElem = mask(sizeof(BigElement) * 8 - 1); 2039 if (negPreDest) 2040 destElem = ~destElem; 2041 fpscr.qc = 1; 2042 } 2043 FpscrQc = fpscr; 2044 ''' 2045 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True) 2046 2047 vqdmlslCode = ''' 2048 FPSCR fpscr = (FPSCR) FpscrQc; 2049 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2050 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2051 Element halfNeg = maxNeg / 2; 2052 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2053 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2054 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2055 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2056 fpscr.qc = 1; 2057 } 2058 bool negPreDest = ltz(destElem); 2059 destElem -= midElem; 2060 bool negDest = ltz(destElem); 2061 bool posMid = ltz((BigElement)-midElem); 2062 if (negPreDest == posMid && posMid != negDest) { 2063 destElem = mask(sizeof(BigElement) * 8 - 1); 2064 if (negPreDest) 2065 destElem = ~destElem; 2066 fpscr.qc = 1; 2067 } 2068 FpscrQc = fpscr; 2069 ''' 2070 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True) 2071 2072 vqdmullCode = ''' 2073 FPSCR fpscr = (FPSCR) FpscrQc; 2074 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2075 if (srcElem1 == srcElem2 && 2076 srcElem1 == (Element)((Element)1 << 2077 (Element)(sizeof(Element) * 8 - 1))) { 2078 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); 2079 fpscr.qc = 1; 2080 } 2081 FpscrQc = fpscr; 2082 ''' 2083 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode) 2084 2085 vmlsCode = ''' 2086 destElem = destElem - srcElem1 * srcElem2; 2087 ''' 2088 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True) 2089 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True) 2090 vmlslCode = ''' 2091 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2; 2092 ''' 2093 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True) 2094 2095 vmulpCode = ''' 2096 destElem = 0; 2097 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2098 if (bits(srcElem2, j)) 2099 destElem ^= srcElem1 << j; 2100 } 2101 ''' 2102 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode) 2103 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode) 2104 vmullpCode = ''' 2105 destElem = 0; 2106 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2107 if (bits(srcElem2, j)) 2108 destElem ^= (BigElement)srcElem1 << j; 2109 } 2110 ''' 2111 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode) 2112 2113 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True) 2114 threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True) 2115 2116 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True) 2117 threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True) 2118 2119 vqdmulhCode = ''' 2120 FPSCR fpscr = (FPSCR) FpscrQc; 2121 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> 2122 (sizeof(Element) * 8); 2123 if (srcElem1 == srcElem2 && 2124 srcElem1 == (Element)((Element)1 << 2125 (sizeof(Element) * 8 - 1))) { 2126 destElem = ~srcElem1; 2127 fpscr.qc = 1; 2128 } 2129 FpscrQc = fpscr; 2130 ''' 2131 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode) 2132 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode) 2133 2134 vqrdmulhCode = ''' 2135 FPSCR fpscr = (FPSCR) FpscrQc; 2136 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + 2137 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> 2138 (sizeof(Element) * 8); 2139 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2140 Element halfNeg = maxNeg / 2; 2141 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2142 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2143 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2144 if (destElem < 0) { 2145 destElem = mask(sizeof(Element) * 8 - 1); 2146 } else { 2147 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2148 } 2149 fpscr.qc = 1; 2150 } 2151 FpscrQc = fpscr; 2152 ''' 2153 threeEqualRegInst("vqrdmulh", "VqrdmulhD", 2154 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode) 2155 threeEqualRegInst("vqrdmulh", "VqrdmulhQ", 2156 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) 2157 2158 vmaxfpCode = ''' 2159 FPSCR fpscr = (FPSCR) FpscrExc; 2160 bool done; 2161 destReg = processNans(fpscr, done, true, srcReg1, srcReg2); 2162 if (!done) { 2163 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS, 2164 true, true, VfpRoundNearest); 2165 } else if (flushToZero(srcReg1, srcReg2)) { 2166 fpscr.idc = 1; 2167 } 2168 FpscrExc = fpscr; 2169 ''' 2170 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode) 2171 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode) 2172 2173 vminfpCode = ''' 2174 FPSCR fpscr = (FPSCR) FpscrExc; 2175 bool done; 2176 destReg = processNans(fpscr, done, true, srcReg1, srcReg2); 2177 if (!done) { 2178 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS, 2179 true, true, VfpRoundNearest); 2180 } else if (flushToZero(srcReg1, srcReg2)) { 2181 fpscr.idc = 1; 2182 } 2183 FpscrExc = fpscr; 2184 ''' 2185 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode) 2186 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode) 2187 2188 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",), 2189 2, vmaxfpCode, pairwise=True) 2190 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",), 2191 4, vmaxfpCode, pairwise=True) 2192 2193 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",), 2194 2, vminfpCode, pairwise=True) 2195 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",), 2196 4, vminfpCode, pairwise=True) 2197 2198 vaddfpCode = ''' 2199 FPSCR fpscr = (FPSCR) FpscrExc; 2200 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS, 2201 true, true, VfpRoundNearest); 2202 FpscrExc = fpscr; 2203 ''' 2204 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode) 2205 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode) 2206 2207 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",), 2208 2, vaddfpCode, pairwise=True) 2209 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",), 2210 4, vaddfpCode, pairwise=True) 2211 2212 vsubfpCode = ''' 2213 FPSCR fpscr = (FPSCR) FpscrExc; 2214 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2215 true, true, VfpRoundNearest); 2216 FpscrExc = fpscr; 2217 ''' 2218 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode) 2219 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode) 2220 2221 vmulfpCode = ''' 2222 FPSCR fpscr = (FPSCR) FpscrExc; 2223 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2224 true, true, VfpRoundNearest); 2225 FpscrExc = fpscr; 2226 ''' 2227 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode) 2228 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode) 2229 2230 vmlafpCode = ''' 2231 FPSCR fpscr = (FPSCR) FpscrExc; 2232 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2233 true, true, VfpRoundNearest); 2234 destReg = binaryOp(fpscr, mid, destReg, fpAddS, 2235 true, true, VfpRoundNearest); 2236 FpscrExc = fpscr; 2237 ''' 2238 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) 2239 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) 2240 2241 vmlsfpCode = ''' 2242 FPSCR fpscr = (FPSCR) FpscrExc; 2243 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2244 true, true, VfpRoundNearest); 2245 destReg = binaryOp(fpscr, destReg, mid, fpSubS, 2246 true, true, VfpRoundNearest); 2247 FpscrExc = fpscr; 2248 ''' 2249 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True) 2250 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True) 2251 2252 vcgtfpCode = ''' 2253 FPSCR fpscr = (FPSCR) FpscrExc; 2254 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc, 2255 true, true, VfpRoundNearest); 2256 destReg = (res == 0) ? -1 : 0; 2257 if (res == 2.0) 2258 fpscr.ioc = 1; 2259 FpscrExc = fpscr; 2260 ''' 2261 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",), 2262 2, vcgtfpCode, toInt = True) 2263 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",), 2264 4, vcgtfpCode, toInt = True) 2265 2266 vcgefpCode = ''' 2267 FPSCR fpscr = (FPSCR) FpscrExc; 2268 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc, 2269 true, true, VfpRoundNearest); 2270 destReg = (res == 0) ? -1 : 0; 2271 if (res == 2.0) 2272 fpscr.ioc = 1; 2273 FpscrExc = fpscr; 2274 ''' 2275 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",), 2276 2, vcgefpCode, toInt = True) 2277 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",), 2278 4, vcgefpCode, toInt = True) 2279 2280 vacgtfpCode = ''' 2281 FPSCR fpscr = (FPSCR) FpscrExc; 2282 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc, 2283 true, true, VfpRoundNearest); 2284 destReg = (res == 0) ? -1 : 0; 2285 if (res == 2.0) 2286 fpscr.ioc = 1; 2287 FpscrExc = fpscr; 2288 ''' 2289 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",), 2290 2, vacgtfpCode, toInt = True) 2291 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",), 2292 4, vacgtfpCode, toInt = True) 2293 2294 vacgefpCode = ''' 2295 FPSCR fpscr = (FPSCR) FpscrExc; 2296 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc, 2297 true, true, VfpRoundNearest); 2298 destReg = (res == 0) ? -1 : 0; 2299 if (res == 2.0) 2300 fpscr.ioc = 1; 2301 FpscrExc = fpscr; 2302 ''' 2303 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",), 2304 2, vacgefpCode, toInt = True) 2305 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",), 2306 4, vacgefpCode, toInt = True) 2307 2308 vceqfpCode = ''' 2309 FPSCR fpscr = (FPSCR) FpscrExc; 2310 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc, 2311 true, true, VfpRoundNearest); 2312 destReg = (res == 0) ? -1 : 0; 2313 if (res == 2.0) 2314 fpscr.ioc = 1; 2315 FpscrExc = fpscr; 2316 ''' 2317 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",), 2318 2, vceqfpCode, toInt = True) 2319 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",), 2320 4, vceqfpCode, toInt = True) 2321 2322 vrecpsCode = ''' 2323 FPSCR fpscr = (FPSCR) FpscrExc; 2324 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS, 2325 true, true, VfpRoundNearest); 2326 FpscrExc = fpscr; 2327 ''' 2328 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode) 2329 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode) 2330 2331 vrsqrtsCode = ''' 2332 FPSCR fpscr = (FPSCR) FpscrExc; 2333 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS, 2334 true, true, VfpRoundNearest); 2335 FpscrExc = fpscr; 2336 ''' 2337 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode) 2338 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode) 2339 2340 vabdfpCode = ''' 2341 FPSCR fpscr = (FPSCR) FpscrExc; 2342 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2343 true, true, VfpRoundNearest); 2344 destReg = fabs(mid); 2345 FpscrExc = fpscr; 2346 ''' 2347 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode) 2348 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode) 2349 2350 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True) 2351 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True) 2352 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) 2353 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) 2354 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True) 2355 2356 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True) 2357 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True) 2358 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True) 2359 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True) 2360 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True) 2361 2362 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode) 2363 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode) 2364 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode) 2365 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode) 2366 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode) 2367 2368 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode) 2369 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True) 2370 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True) 2371 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode) 2372 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode) 2373 twoEqualRegInst("vqrdmulh", "VqrdmulhsD", 2374 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode) 2375 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ", 2376 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) 2377 2378 vshrCode = ''' 2379 if (imm >= sizeof(srcElem1) * 8) { 2380 if (ltz(srcElem1)) 2381 destElem = -1; 2382 else 2383 destElem = 0; 2384 } else { 2385 destElem = srcElem1 >> imm; 2386 } 2387 ''' 2388 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode) 2389 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode) 2390 2391 vsraCode = ''' 2392 Element mid;; 2393 if (imm >= sizeof(srcElem1) * 8) { 2394 mid = ltz(srcElem1) ? -1 : 0; 2395 } else { 2396 mid = srcElem1 >> imm; 2397 if (ltz(srcElem1) && !ltz(mid)) { 2398 mid |= -(mid & ((Element)1 << 2399 (sizeof(Element) * 8 - 1 - imm))); 2400 } 2401 } 2402 destElem += mid; 2403 ''' 2404 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True) 2405 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True) 2406 2407 vrshrCode = ''' 2408 if (imm > sizeof(srcElem1) * 8) { 2409 destElem = 0; 2410 } else if (imm) { 2411 Element rBit = bits(srcElem1, imm - 1); 2412 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2413 } else { 2414 destElem = srcElem1; 2415 } 2416 ''' 2417 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode) 2418 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode) 2419 2420 vrsraCode = ''' 2421 if (imm > sizeof(srcElem1) * 8) { 2422 destElem += 0; 2423 } else if (imm) { 2424 Element rBit = bits(srcElem1, imm - 1); 2425 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2426 } else { 2427 destElem += srcElem1; 2428 } 2429 ''' 2430 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True) 2431 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True) 2432 2433 vsriCode = ''' 2434 if (imm >= sizeof(Element) * 8) 2435 destElem = destElem; 2436 else 2437 destElem = (srcElem1 >> imm) | 2438 (destElem & ~mask(sizeof(Element) * 8 - imm)); 2439 ''' 2440 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True) 2441 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True) 2442 2443 vshlCode = ''' 2444 if (imm >= sizeof(Element) * 8) 2445 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; 2446 else 2447 destElem = srcElem1 << imm; 2448 ''' 2449 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode) 2450 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode) 2451 2452 vsliCode = ''' 2453 if (imm >= sizeof(Element) * 8) 2454 destElem = destElem; 2455 else 2456 destElem = (srcElem1 << imm) | (destElem & mask(imm)); 2457 ''' 2458 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True) 2459 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True) 2460 2461 vqshlCode = ''' 2462 FPSCR fpscr = (FPSCR) FpscrQc; 2463 if (imm >= sizeof(Element) * 8) { 2464 if (srcElem1 != 0) { 2465 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2466 if (srcElem1 > 0) 2467 destElem = ~destElem; 2468 fpscr.qc = 1; 2469 } else { 2470 destElem = 0; 2471 } 2472 } else if (imm) { 2473 destElem = (srcElem1 << imm); 2474 uint64_t topBits = bits((uint64_t)srcElem1, 2475 sizeof(Element) * 8 - 1, 2476 sizeof(Element) * 8 - 1 - imm); 2477 if (topBits != 0 && topBits != mask(imm + 1)) { 2478 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2479 if (srcElem1 > 0) 2480 destElem = ~destElem; 2481 fpscr.qc = 1; 2482 } 2483 } else { 2484 destElem = srcElem1; 2485 } 2486 FpscrQc = fpscr; 2487 ''' 2488 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode) 2489 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode) 2490 2491 vqshluCode = ''' 2492 FPSCR fpscr = (FPSCR) FpscrQc; 2493 if (imm >= sizeof(Element) * 8) { 2494 if (srcElem1 != 0) { 2495 destElem = mask(sizeof(Element) * 8); 2496 fpscr.qc = 1; 2497 } else { 2498 destElem = 0; 2499 } 2500 } else if (imm) { 2501 destElem = (srcElem1 << imm); 2502 uint64_t topBits = bits((uint64_t)srcElem1, 2503 sizeof(Element) * 8 - 1, 2504 sizeof(Element) * 8 - imm); 2505 if (topBits != 0) { 2506 destElem = mask(sizeof(Element) * 8); 2507 fpscr.qc = 1; 2508 } 2509 } else { 2510 destElem = srcElem1; 2511 } 2512 FpscrQc = fpscr; 2513 ''' 2514 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode) 2515 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode) 2516 2517 vqshlusCode = ''' 2518 FPSCR fpscr = (FPSCR) FpscrQc; 2519 if (imm >= sizeof(Element) * 8) { 2520 if (srcElem1 < 0) { 2521 destElem = 0; 2522 fpscr.qc = 1; 2523 } else if (srcElem1 > 0) { 2524 destElem = mask(sizeof(Element) * 8); 2525 fpscr.qc = 1; 2526 } else { 2527 destElem = 0; 2528 } 2529 } else if (imm) { 2530 destElem = (srcElem1 << imm); 2531 uint64_t topBits = bits((uint64_t)srcElem1, 2532 sizeof(Element) * 8 - 1, 2533 sizeof(Element) * 8 - imm); 2534 if (srcElem1 < 0) { 2535 destElem = 0; 2536 fpscr.qc = 1; 2537 } else if (topBits != 0) { 2538 destElem = mask(sizeof(Element) * 8); 2539 fpscr.qc = 1; 2540 } 2541 } else { 2542 if (srcElem1 < 0) { 2543 fpscr.qc = 1; 2544 destElem = 0; 2545 } else { 2546 destElem = srcElem1; 2547 } 2548 } 2549 FpscrQc = fpscr; 2550 ''' 2551 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode) 2552 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode) 2553 2554 vshrnCode = ''' 2555 if (imm >= sizeof(srcElem1) * 8) { 2556 destElem = 0; 2557 } else { 2558 destElem = srcElem1 >> imm; 2559 } 2560 ''' 2561 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode) 2562 2563 vrshrnCode = ''' 2564 if (imm > sizeof(srcElem1) * 8) { 2565 destElem = 0; 2566 } else if (imm) { 2567 Element rBit = bits(srcElem1, imm - 1); 2568 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2569 } else { 2570 destElem = srcElem1; 2571 } 2572 ''' 2573 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode) 2574 2575 vqshrnCode = ''' 2576 FPSCR fpscr = (FPSCR) FpscrQc; 2577 if (imm > sizeof(srcElem1) * 8) { 2578 if (srcElem1 != 0 && srcElem1 != -1) 2579 fpscr.qc = 1; 2580 destElem = 0; 2581 } else if (imm) { 2582 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2583 mid |= -(mid & ((BigElement)1 << 2584 (sizeof(BigElement) * 8 - 1 - imm))); 2585 if (mid != (Element)mid) { 2586 destElem = mask(sizeof(Element) * 8 - 1); 2587 if (srcElem1 < 0) 2588 destElem = ~destElem; 2589 fpscr.qc = 1; 2590 } else { 2591 destElem = mid; 2592 } 2593 } else { 2594 destElem = srcElem1; 2595 } 2596 FpscrQc = fpscr; 2597 ''' 2598 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode) 2599 2600 vqshrunCode = ''' 2601 FPSCR fpscr = (FPSCR) FpscrQc; 2602 if (imm > sizeof(srcElem1) * 8) { 2603 if (srcElem1 != 0) 2604 fpscr.qc = 1; 2605 destElem = 0; 2606 } else if (imm) { 2607 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2608 if (mid != (Element)mid) { 2609 destElem = mask(sizeof(Element) * 8); 2610 fpscr.qc = 1; 2611 } else { 2612 destElem = mid; 2613 } 2614 } else { 2615 destElem = srcElem1; 2616 } 2617 FpscrQc = fpscr; 2618 ''' 2619 twoRegNarrowShiftInst("vqshrun", "NVqshrun", 2620 "SimdShiftOp", smallUnsignedTypes, vqshrunCode) 2621 2622 vqshrunsCode = ''' 2623 FPSCR fpscr = (FPSCR) FpscrQc; 2624 if (imm > sizeof(srcElem1) * 8) { 2625 if (srcElem1 != 0) 2626 fpscr.qc = 1; 2627 destElem = 0; 2628 } else if (imm) { 2629 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2630 if (bits(mid, sizeof(BigElement) * 8 - 1, 2631 sizeof(Element) * 8) != 0) { 2632 if (srcElem1 < 0) { 2633 destElem = 0; 2634 } else { 2635 destElem = mask(sizeof(Element) * 8); 2636 } 2637 fpscr.qc = 1; 2638 } else { 2639 destElem = mid; 2640 } 2641 } else { 2642 destElem = srcElem1; 2643 } 2644 FpscrQc = fpscr; 2645 ''' 2646 twoRegNarrowShiftInst("vqshrun", "NVqshruns", 2647 "SimdShiftOp", smallSignedTypes, vqshrunsCode) 2648 2649 vqrshrnCode = ''' 2650 FPSCR fpscr = (FPSCR) FpscrQc; 2651 if (imm > sizeof(srcElem1) * 8) { 2652 if (srcElem1 != 0 && srcElem1 != -1) 2653 fpscr.qc = 1; 2654 destElem = 0; 2655 } else if (imm) { 2656 BigElement mid = (srcElem1 >> (imm - 1)); 2657 uint64_t rBit = mid & 0x1; 2658 mid >>= 1; 2659 mid |= -(mid & ((BigElement)1 << 2660 (sizeof(BigElement) * 8 - 1 - imm))); 2661 mid += rBit; 2662 if (mid != (Element)mid) { 2663 destElem = mask(sizeof(Element) * 8 - 1); 2664 if (srcElem1 < 0) 2665 destElem = ~destElem; 2666 fpscr.qc = 1; 2667 } else { 2668 destElem = mid; 2669 } 2670 } else { 2671 if (srcElem1 != (Element)srcElem1) { 2672 destElem = mask(sizeof(Element) * 8 - 1); 2673 if (srcElem1 < 0) 2674 destElem = ~destElem; 2675 fpscr.qc = 1; 2676 } else { 2677 destElem = srcElem1; 2678 } 2679 } 2680 FpscrQc = fpscr; 2681 ''' 2682 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn", 2683 "SimdShiftOp", smallSignedTypes, vqrshrnCode) 2684 2685 vqrshrunCode = ''' 2686 FPSCR fpscr = (FPSCR) FpscrQc; 2687 if (imm > sizeof(srcElem1) * 8) { 2688 if (srcElem1 != 0) 2689 fpscr.qc = 1; 2690 destElem = 0; 2691 } else if (imm) { 2692 BigElement mid = (srcElem1 >> (imm - 1)); 2693 uint64_t rBit = mid & 0x1; 2694 mid >>= 1; 2695 mid += rBit; 2696 if (mid != (Element)mid) { 2697 destElem = mask(sizeof(Element) * 8); 2698 fpscr.qc = 1; 2699 } else { 2700 destElem = mid; 2701 } 2702 } else { 2703 if (srcElem1 != (Element)srcElem1) { 2704 destElem = mask(sizeof(Element) * 8 - 1); 2705 fpscr.qc = 1; 2706 } else { 2707 destElem = srcElem1; 2708 } 2709 } 2710 FpscrQc = fpscr; 2711 ''' 2712 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun", 2713 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode) 2714 2715 vqrshrunsCode = ''' 2716 FPSCR fpscr = (FPSCR) FpscrQc; 2717 if (imm > sizeof(srcElem1) * 8) { 2718 if (srcElem1 != 0) 2719 fpscr.qc = 1; 2720 destElem = 0; 2721 } else if (imm) { 2722 BigElement mid = (srcElem1 >> (imm - 1)); 2723 uint64_t rBit = mid & 0x1; 2724 mid >>= 1; 2725 mid |= -(mid & ((BigElement)1 << 2726 (sizeof(BigElement) * 8 - 1 - imm))); 2727 mid += rBit; 2728 if (bits(mid, sizeof(BigElement) * 8 - 1, 2729 sizeof(Element) * 8) != 0) { 2730 if (srcElem1 < 0) { 2731 destElem = 0; 2732 } else { 2733 destElem = mask(sizeof(Element) * 8); 2734 } 2735 fpscr.qc = 1; 2736 } else { 2737 destElem = mid; 2738 } 2739 } else { 2740 if (srcElem1 < 0) { 2741 fpscr.qc = 1; 2742 destElem = 0; 2743 } else { 2744 destElem = srcElem1; 2745 } 2746 } 2747 FpscrQc = fpscr; 2748 ''' 2749 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns", 2750 "SimdShiftOp", smallSignedTypes, vqrshrunsCode) 2751 2752 vshllCode = ''' 2753 if (imm >= sizeof(destElem) * 8) { 2754 destElem = 0; 2755 } else { 2756 destElem = (BigElement)srcElem1 << imm; 2757 } 2758 ''' 2759 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode) 2760 2761 vmovlCode = ''' 2762 destElem = srcElem1; 2763 ''' 2764 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode) 2765 2766 vcvt2ufxCode = ''' 2767 FPSCR fpscr = (FPSCR) FpscrExc; 2768 if (flushToZero(srcElem1)) 2769 fpscr.idc = 1; 2770 VfpSavedState state = prepFpState(VfpRoundNearest); 2771 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 2772 destReg = vfpFpSToFixed(srcElem1, false, false, imm); 2773 __asm__ __volatile__("" :: "m" (destReg)); 2774 finishVfp(fpscr, state, true); 2775 FpscrExc = fpscr; 2776 ''' 2777 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",), 2778 2, vcvt2ufxCode, toInt = True) 2779 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",), 2780 4, vcvt2ufxCode, toInt = True) 2781 2782 vcvt2sfxCode = ''' 2783 FPSCR fpscr = (FPSCR) FpscrExc; 2784 if (flushToZero(srcElem1)) 2785 fpscr.idc = 1; 2786 VfpSavedState state = prepFpState(VfpRoundNearest); 2787 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 2788 destReg = vfpFpSToFixed(srcElem1, true, false, imm); 2789 __asm__ __volatile__("" :: "m" (destReg)); 2790 finishVfp(fpscr, state, true); 2791 FpscrExc = fpscr; 2792 ''' 2793 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",), 2794 2, vcvt2sfxCode, toInt = True) 2795 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",), 2796 4, vcvt2sfxCode, toInt = True) 2797 2798 vcvtu2fpCode = ''' 2799 FPSCR fpscr = (FPSCR) FpscrExc; 2800 VfpSavedState state = prepFpState(VfpRoundNearest); 2801 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 2802 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm); 2803 __asm__ __volatile__("" :: "m" (destElem)); 2804 finishVfp(fpscr, state, true); 2805 FpscrExc = fpscr; 2806 ''' 2807 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",), 2808 2, vcvtu2fpCode, fromInt = True) 2809 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",), 2810 4, vcvtu2fpCode, fromInt = True) 2811 2812 vcvts2fpCode = ''' 2813 FPSCR fpscr = (FPSCR) FpscrExc; 2814 VfpSavedState state = prepFpState(VfpRoundNearest); 2815 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 2816 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm); 2817 __asm__ __volatile__("" :: "m" (destElem)); 2818 finishVfp(fpscr, state, true); 2819 FpscrExc = fpscr; 2820 ''' 2821 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",), 2822 2, vcvts2fpCode, fromInt = True) 2823 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",), 2824 4, vcvts2fpCode, fromInt = True) 2825 2826 vcvts2hCode = ''' 2827 FPSCR fpscr = (FPSCR) FpscrExc; 2828 float srcFp1 = bitsToFp(srcElem1, (float)0.0); 2829 if (flushToZero(srcFp1)) 2830 fpscr.idc = 1; 2831 VfpSavedState state = prepFpState(VfpRoundNearest); 2832 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem) 2833 : "m" (srcFp1), "m" (destElem)); 2834 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest, 2835 fpscr.ahp, srcFp1); 2836 __asm__ __volatile__("" :: "m" (destElem)); 2837 finishVfp(fpscr, state, true); 2838 FpscrExc = fpscr; 2839 ''' 2840 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode) 2841 2842 vcvth2sCode = ''' 2843 FPSCR fpscr = (FPSCR) FpscrExc; 2844 VfpSavedState state = prepFpState(VfpRoundNearest); 2845 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem) 2846 : "m" (srcElem1), "m" (destElem)); 2847 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1)); 2848 __asm__ __volatile__("" :: "m" (destElem)); 2849 finishVfp(fpscr, state, true); 2850 FpscrExc = fpscr; 2851 ''' 2852 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode) 2853 2854 vrsqrteCode = ''' 2855 destElem = unsignedRSqrtEstimate(srcElem1); 2856 ''' 2857 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode) 2858 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode) 2859 2860 vrsqrtefpCode = ''' 2861 FPSCR fpscr = (FPSCR) FpscrExc; 2862 if (flushToZero(srcReg1)) 2863 fpscr.idc = 1; 2864 destReg = fprSqrtEstimate(fpscr, srcReg1); 2865 FpscrExc = fpscr; 2866 ''' 2867 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode) 2868 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode) 2869 2870 vrecpeCode = ''' 2871 destElem = unsignedRecipEstimate(srcElem1); 2872 ''' 2873 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode) 2874 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode) 2875 2876 vrecpefpCode = ''' 2877 FPSCR fpscr = (FPSCR) FpscrExc; 2878 if (flushToZero(srcReg1)) 2879 fpscr.idc = 1; 2880 destReg = fpRecipEstimate(fpscr, srcReg1); 2881 FpscrExc = fpscr; 2882 ''' 2883 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode) 2884 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode) 2885 2886 vrev16Code = ''' 2887 destElem = srcElem1; 2888 unsigned groupSize = ((1 << 1) / sizeof(Element)); 2889 unsigned reverseMask = (groupSize - 1); 2890 j = i ^ reverseMask; 2891 ''' 2892 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code) 2893 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code) 2894 vrev32Code = ''' 2895 destElem = srcElem1; 2896 unsigned groupSize = ((1 << 2) / sizeof(Element)); 2897 unsigned reverseMask = (groupSize - 1); 2898 j = i ^ reverseMask; 2899 ''' 2900 twoRegMiscInst("vrev32", "NVrev32D", 2901 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code) 2902 twoRegMiscInst("vrev32", "NVrev32Q", 2903 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code) 2904 vrev64Code = ''' 2905 destElem = srcElem1; 2906 unsigned groupSize = ((1 << 3) / sizeof(Element)); 2907 unsigned reverseMask = (groupSize - 1); 2908 j = i ^ reverseMask; 2909 ''' 2910 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code) 2911 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code) 2912 2913 vpaddlCode = ''' 2914 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 2915 ''' 2916 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode) 2917 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode) 2918 2919 vpadalCode = ''' 2920 destElem += (BigElement)srcElem1 + (BigElement)srcElem2; 2921 ''' 2922 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True) 2923 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True) 2924 2925 vclsCode = ''' 2926 unsigned count = 0; 2927 if (srcElem1 < 0) { 2928 srcElem1 <<= 1; 2929 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { 2930 count++; 2931 srcElem1 <<= 1; 2932 } 2933 } else { 2934 srcElem1 <<= 1; 2935 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { 2936 count++; 2937 srcElem1 <<= 1; 2938 } 2939 } 2940 destElem = count; 2941 ''' 2942 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode) 2943 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode) 2944 2945 vclzCode = ''' 2946 unsigned count = 0; 2947 while (srcElem1 >= 0 && count < sizeof(Element) * 8) { 2948 count++; 2949 srcElem1 <<= 1; 2950 } 2951 destElem = count; 2952 ''' 2953 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode) 2954 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode) 2955 2956 vcntCode = ''' 2957 unsigned count = 0; 2958 while (srcElem1 && count < sizeof(Element) * 8) { 2959 count += srcElem1 & 0x1; 2960 srcElem1 >>= 1; 2961 } 2962 destElem = count; 2963 ''' 2964 2965 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode) 2966 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode) 2967 2968 vmvnCode = ''' 2969 destElem = ~srcElem1; 2970 ''' 2971 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode) 2972 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode) 2973 2974 vqabsCode = ''' 2975 FPSCR fpscr = (FPSCR) FpscrQc; 2976 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2977 fpscr.qc = 1; 2978 destElem = ~srcElem1; 2979 } else if (srcElem1 < 0) { 2980 destElem = -srcElem1; 2981 } else { 2982 destElem = srcElem1; 2983 } 2984 FpscrQc = fpscr; 2985 ''' 2986 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode) 2987 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode) 2988 2989 vqnegCode = ''' 2990 FPSCR fpscr = (FPSCR) FpscrQc; 2991 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2992 fpscr.qc = 1; 2993 destElem = ~srcElem1; 2994 } else { 2995 destElem = -srcElem1; 2996 } 2997 FpscrQc = fpscr; 2998 ''' 2999 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode) 3000 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode) 3001 3002 vabsCode = ''' 3003 if (srcElem1 < 0) { 3004 destElem = -srcElem1; 3005 } else { 3006 destElem = srcElem1; 3007 } 3008 ''' 3009 3010 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode) 3011 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode) 3012 vabsfpCode = ''' 3013 union 3014 { 3015 uint32_t i; 3016 float f; 3017 } cStruct; 3018 cStruct.f = srcReg1; 3019 cStruct.i &= mask(sizeof(Element) * 8 - 1); 3020 destReg = cStruct.f; 3021 ''' 3022 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode) 3023 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode) 3024 3025 vnegCode = ''' 3026 destElem = -srcElem1; 3027 ''' 3028 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode) 3029 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode) 3030 vnegfpCode = ''' 3031 destReg = -srcReg1; 3032 ''' 3033 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode) 3034 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode) 3035 3036 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;' 3037 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode) 3038 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode) 3039 vcgtfpCode = ''' 3040 FPSCR fpscr = (FPSCR) FpscrExc; 3041 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc, 3042 true, true, VfpRoundNearest); 3043 destReg = (res == 0) ? -1 : 0; 3044 if (res == 2.0) 3045 fpscr.ioc = 1; 3046 FpscrExc = fpscr; 3047 ''' 3048 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",), 3049 2, vcgtfpCode, toInt = True) 3050 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",), 3051 4, vcgtfpCode, toInt = True) 3052 3053 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;' 3054 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode) 3055 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode) 3056 vcgefpCode = ''' 3057 FPSCR fpscr = (FPSCR) FpscrExc; 3058 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc, 3059 true, true, VfpRoundNearest); 3060 destReg = (res == 0) ? -1 : 0; 3061 if (res == 2.0) 3062 fpscr.ioc = 1; 3063 FpscrExc = fpscr; 3064 ''' 3065 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",), 3066 2, vcgefpCode, toInt = True) 3067 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",), 3068 4, vcgefpCode, toInt = True) 3069 3070 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;' 3071 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode) 3072 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode) 3073 vceqfpCode = ''' 3074 FPSCR fpscr = (FPSCR) FpscrExc; 3075 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc, 3076 true, true, VfpRoundNearest); 3077 destReg = (res == 0) ? -1 : 0; 3078 if (res == 2.0) 3079 fpscr.ioc = 1; 3080 FpscrExc = fpscr; 3081 ''' 3082 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",), 3083 2, vceqfpCode, toInt = True) 3084 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",), 3085 4, vceqfpCode, toInt = True) 3086 3087 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;' 3088 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode) 3089 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode) 3090 vclefpCode = ''' 3091 FPSCR fpscr = (FPSCR) FpscrExc; 3092 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc, 3093 true, true, VfpRoundNearest); 3094 destReg = (res == 0) ? -1 : 0; 3095 if (res == 2.0) 3096 fpscr.ioc = 1; 3097 FpscrExc = fpscr; 3098 ''' 3099 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",), 3100 2, vclefpCode, toInt = True) 3101 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",), 3102 4, vclefpCode, toInt = True) 3103 3104 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;' 3105 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode) 3106 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode) 3107 vcltfpCode = ''' 3108 FPSCR fpscr = (FPSCR) FpscrExc; 3109 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc, 3110 true, true, VfpRoundNearest); 3111 destReg = (res == 0) ? -1 : 0; 3112 if (res == 2.0) 3113 fpscr.ioc = 1; 3114 FpscrExc = fpscr; 3115 ''' 3116 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",), 3117 2, vcltfpCode, toInt = True) 3118 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",), 3119 4, vcltfpCode, toInt = True) 3120 3121 vswpCode = ''' 3122 FloatRegBits mid; 3123 for (unsigned r = 0; r < rCount; r++) { 3124 mid = srcReg1.regs[r]; 3125 srcReg1.regs[r] = destReg.regs[r]; 3126 destReg.regs[r] = mid; 3127 } 3128 ''' 3129 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode) 3130 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode) 3131 3132 vtrnCode = ''' 3133 Element mid; 3134 for (unsigned i = 0; i < eCount; i += 2) { 3135 mid = srcReg1.elements[i]; 3136 srcReg1.elements[i] = destReg.elements[i + 1]; 3137 destReg.elements[i + 1] = mid; 3138 } 3139 ''' 3140 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", unsignedTypes, 2, vtrnCode) 3141 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", unsignedTypes, 4, vtrnCode) 3142 3143 vuzpCode = ''' 3144 Element mid[eCount]; 3145 memcpy(&mid, &srcReg1, sizeof(srcReg1)); 3146 for (unsigned i = 0; i < eCount / 2; i++) { 3147 srcReg1.elements[i] = destReg.elements[2 * i + 1]; 3148 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1]; 3149 destReg.elements[i] = destReg.elements[2 * i]; 3150 } 3151 for (unsigned i = 0; i < eCount / 2; i++) { 3152 destReg.elements[eCount / 2 + i] = mid[2 * i]; 3153 } 3154 ''' 3155 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode) 3156 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode) 3157 3158 vzipCode = ''' 3159 Element mid[eCount]; 3160 memcpy(&mid, &destReg, sizeof(destReg)); 3161 for (unsigned i = 0; i < eCount / 2; i++) { 3162 destReg.elements[2 * i] = mid[i]; 3163 destReg.elements[2 * i + 1] = srcReg1.elements[i]; 3164 } 3165 for (int i = 0; i < eCount / 2; i++) { 3166 srcReg1.elements[2 * i] = mid[eCount / 2 + i]; 3167 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i]; 3168 } 3169 ''' 3170 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode) 3171 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode) 3172 3173 vmovnCode = 'destElem = srcElem1;' 3174 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode) 3175 3176 vdupCode = 'destElem = srcElem1;' 3177 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode) 3178 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode) 3179 3180 def vdupGprInst(name, Name, opClass, types, rCount): 3181 global header_output, exec_output 3182 eWalkCode = ''' 3183 RegVect destReg; 3184 for (unsigned i = 0; i < eCount; i++) { 3185 destReg.elements[i] = htog((Element)Op1); 3186 } 3187 ''' 3188 for reg in range(rCount): 3189 eWalkCode += ''' 3190 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 3191 ''' % { "reg" : reg } 3192 iop = InstObjParams(name, Name, 3193 "RegRegOp", 3194 { "code": eWalkCode, 3195 "r_count": rCount, 3196 "predicate_test": predicateTest, 3197 "op_class": opClass }, []) 3198 header_output += NeonRegRegOpDeclare.subst(iop) 3199 exec_output += NeonEqualRegExecute.subst(iop) 3200 for type in types: 3201 substDict = { "targs" : type, 3202 "class_name" : Name } 3203 exec_output += NeonExecDeclare.subst(substDict) 3204 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2) 3205 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4) 3206 3207 vmovCode = 'destElem = imm;' 3208 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode) 3209 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode) 3210 3211 vorrCode = 'destElem |= imm;' 3212 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True) 3213 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True) 3214 3215 vmvnCode = 'destElem = ~imm;' 3216 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode) 3217 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode) 3218 3219 vbicCode = 'destElem &= ~imm;' 3220 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True) 3221 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True) 3222 3223 vqmovnCode = ''' 3224 FPSCR fpscr = (FPSCR) FpscrQc; 3225 destElem = srcElem1; 3226 if ((BigElement)destElem != srcElem1) { 3227 fpscr.qc = 1; 3228 destElem = mask(sizeof(Element) * 8 - 1); 3229 if (srcElem1 < 0) 3230 destElem = ~destElem; 3231 } 3232 FpscrQc = fpscr; 3233 ''' 3234 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode) 3235 3236 vqmovunCode = ''' 3237 FPSCR fpscr = (FPSCR) FpscrQc; 3238 destElem = srcElem1; 3239 if ((BigElement)destElem != srcElem1) { 3240 fpscr.qc = 1; 3241 destElem = mask(sizeof(Element) * 8); 3242 } 3243 FpscrQc = fpscr; 3244 ''' 3245 twoRegNarrowMiscInst("vqmovun", "NVqmovun", 3246 "SimdMiscOp", smallUnsignedTypes, vqmovunCode) 3247 3248 vqmovunsCode = ''' 3249 FPSCR fpscr = (FPSCR) FpscrQc; 3250 destElem = srcElem1; 3251 if (srcElem1 < 0 || 3252 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { 3253 fpscr.qc = 1; 3254 destElem = mask(sizeof(Element) * 8); 3255 if (srcElem1 < 0) 3256 destElem = ~destElem; 3257 } 3258 FpscrQc = fpscr; 3259 ''' 3260 twoRegNarrowMiscInst("vqmovun", "NVqmovuns", 3261 "SimdMiscOp", smallSignedTypes, vqmovunsCode) 3262 3263 def buildVext(name, Name, opClass, types, rCount, op): 3264 global header_output, exec_output 3265 eWalkCode = ''' 3266 RegVect srcReg1, srcReg2, destReg; 3267 ''' 3268 for reg in range(rCount): 3269 eWalkCode += simdEnabledCheckCode + ''' 3270 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 3271 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 3272 ''' % { "reg" : reg } 3273 eWalkCode += op 3274 for reg in range(rCount): 3275 eWalkCode += ''' 3276 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 3277 ''' % { "reg" : reg } 3278 iop = InstObjParams(name, Name, 3279 "RegRegRegImmOp", 3280 { "code": eWalkCode, 3281 "r_count": rCount, 3282 "predicate_test": predicateTest, 3283 "op_class": opClass }, []) 3284 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 3285 exec_output += NeonEqualRegExecute.subst(iop) 3286 for type in types: 3287 substDict = { "targs" : type, 3288 "class_name" : Name } 3289 exec_output += NeonExecDeclare.subst(substDict) 3290 3291 vextCode = ''' 3292 for (unsigned i = 0; i < eCount; i++) { 3293 unsigned index = i + imm; 3294 if (index < eCount) { 3295 destReg.elements[i] = srcReg1.elements[index]; 3296 } else { 3297 index -= eCount; 3298 if (index >= eCount) { 3299 if (FullSystem) 3300 fault = new UndefinedInstruction; 3301 else 3302 fault = new UndefinedInstruction(false, mnemonic); 3303 } else { 3304 destReg.elements[i] = srcReg2.elements[index]; 3305 } 3306 } 3307 } 3308 ''' 3309 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode) 3310 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode) 3311 3312 def buildVtbxl(name, Name, opClass, length, isVtbl): 3313 global header_output, decoder_output, exec_output 3314 code = ''' 3315 union 3316 { 3317 uint8_t bytes[32]; 3318 FloatRegBits regs[8]; 3319 } table; 3320 3321 union 3322 { 3323 uint8_t bytes[8]; 3324 FloatRegBits regs[2]; 3325 } destReg, srcReg2; 3326 3327 const unsigned length = %(length)d; 3328 const bool isVtbl = %(isVtbl)s; 3329 3330 srcReg2.regs[0] = htog(FpOp2P0_uw); 3331 srcReg2.regs[1] = htog(FpOp2P1_uw); 3332 3333 destReg.regs[0] = htog(FpDestP0_uw); 3334 destReg.regs[1] = htog(FpDestP1_uw); 3335 ''' % { "length" : length, "isVtbl" : isVtbl } 3336 for reg in range(8): 3337 if reg < length * 2: 3338 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \ 3339 { "reg" : reg } 3340 else: 3341 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg } 3342 code += ''' 3343 for (unsigned i = 0; i < sizeof(destReg); i++) { 3344 uint8_t index = srcReg2.bytes[i]; 3345 if (index < 8 * length) { 3346 destReg.bytes[i] = table.bytes[index]; 3347 } else { 3348 if (isVtbl) 3349 destReg.bytes[i] = 0; 3350 // else destReg.bytes[i] unchanged 3351 } 3352 } 3353 3354 FpDestP0_uw = gtoh(destReg.regs[0]); 3355 FpDestP1_uw = gtoh(destReg.regs[1]); 3356 ''' 3357 iop = InstObjParams(name, Name, 3358 "RegRegRegOp", 3359 { "code": code, 3360 "predicate_test": predicateTest, 3361 "op_class": opClass }, []) 3362 header_output += RegRegRegOpDeclare.subst(iop) 3363 decoder_output += RegRegRegOpConstructor.subst(iop) 3364 exec_output += PredOpExecute.subst(iop) 3365 3366 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true") 3367 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true") 3368 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true") 3369 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true") 3370 3371 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false") 3372 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false") 3373 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false") 3374 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false") 3375}}; 3376