vfp.cc revision 9918
17396Sgblack@eecs.umich.edu/* 27396Sgblack@eecs.umich.edu * Copyright (c) 2010 ARM Limited 37396Sgblack@eecs.umich.edu * All rights reserved 47396Sgblack@eecs.umich.edu * 57396Sgblack@eecs.umich.edu * The license below extends only to copyright in the software and shall 67396Sgblack@eecs.umich.edu * not be construed as granting a license to any other intellectual 77396Sgblack@eecs.umich.edu * property including but not limited to intellectual property relating 87396Sgblack@eecs.umich.edu * to a hardware implementation of the functionality of the software 97396Sgblack@eecs.umich.edu * licensed hereunder. You may use the software subject to the license 107396Sgblack@eecs.umich.edu * terms below provided that you ensure that this notice is replicated 117396Sgblack@eecs.umich.edu * unmodified and in its entirety in all distributions of the software, 127396Sgblack@eecs.umich.edu * modified or unmodified, in source code or in binary form. 137396Sgblack@eecs.umich.edu * 147396Sgblack@eecs.umich.edu * Redistribution and use in source and binary forms, with or without 157396Sgblack@eecs.umich.edu * modification, are permitted provided that the following conditions are 167396Sgblack@eecs.umich.edu * met: redistributions of source code must retain the above copyright 177396Sgblack@eecs.umich.edu * notice, this list of conditions and the following disclaimer; 187396Sgblack@eecs.umich.edu * redistributions in binary form must reproduce the above copyright 197396Sgblack@eecs.umich.edu * notice, this list of conditions and the following disclaimer in the 207396Sgblack@eecs.umich.edu * documentation and/or other materials provided with the distribution; 217396Sgblack@eecs.umich.edu * neither the name of the copyright holders nor the names of its 227396Sgblack@eecs.umich.edu * contributors may be used to endorse or promote products derived from 237396Sgblack@eecs.umich.edu * this software without specific prior written permission. 247396Sgblack@eecs.umich.edu * 257396Sgblack@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 267396Sgblack@eecs.umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 277396Sgblack@eecs.umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 287396Sgblack@eecs.umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 297396Sgblack@eecs.umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 307396Sgblack@eecs.umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 317396Sgblack@eecs.umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 327396Sgblack@eecs.umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 337396Sgblack@eecs.umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 347396Sgblack@eecs.umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 357396Sgblack@eecs.umich.edu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 367396Sgblack@eecs.umich.edu * 377396Sgblack@eecs.umich.edu * Authors: Gabe Black 387396Sgblack@eecs.umich.edu */ 397396Sgblack@eecs.umich.edu 407396Sgblack@eecs.umich.edu#include "arch/arm/insts/vfp.hh" 417396Sgblack@eecs.umich.edu 427434Sgblack@eecs.umich.edu/* 437434Sgblack@eecs.umich.edu * The asm statements below are to keep gcc from reordering code. Otherwise 447434Sgblack@eecs.umich.edu * the rounding mode might be set after the operation it was intended for, the 457434Sgblack@eecs.umich.edu * exception bits read before it, etc. 467434Sgblack@eecs.umich.edu */ 477434Sgblack@eecs.umich.edu 487396Sgblack@eecs.umich.edustd::string 497396Sgblack@eecs.umich.eduFpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 507396Sgblack@eecs.umich.edu{ 517396Sgblack@eecs.umich.edu std::stringstream ss; 527396Sgblack@eecs.umich.edu printMnemonic(ss); 539918Ssteve.reinhardt@amd.com printReg(ss, dest + FP_Reg_Base); 547396Sgblack@eecs.umich.edu ss << ", "; 559918Ssteve.reinhardt@amd.com printReg(ss, op1 + FP_Reg_Base); 567396Sgblack@eecs.umich.edu return ss.str(); 577396Sgblack@eecs.umich.edu} 587396Sgblack@eecs.umich.edu 597396Sgblack@eecs.umich.edustd::string 607396Sgblack@eecs.umich.eduFpRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 617396Sgblack@eecs.umich.edu{ 627396Sgblack@eecs.umich.edu std::stringstream ss; 637396Sgblack@eecs.umich.edu printMnemonic(ss); 649918Ssteve.reinhardt@amd.com printReg(ss, dest + FP_Reg_Base); 657396Sgblack@eecs.umich.edu ccprintf(ss, ", #%d", imm); 667396Sgblack@eecs.umich.edu return ss.str(); 677396Sgblack@eecs.umich.edu} 687396Sgblack@eecs.umich.edu 697396Sgblack@eecs.umich.edustd::string 707396Sgblack@eecs.umich.eduFpRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 717396Sgblack@eecs.umich.edu{ 727396Sgblack@eecs.umich.edu std::stringstream ss; 737396Sgblack@eecs.umich.edu printMnemonic(ss); 749918Ssteve.reinhardt@amd.com printReg(ss, dest + FP_Reg_Base); 757396Sgblack@eecs.umich.edu ss << ", "; 769918Ssteve.reinhardt@amd.com printReg(ss, op1 + FP_Reg_Base); 777396Sgblack@eecs.umich.edu ccprintf(ss, ", #%d", imm); 787396Sgblack@eecs.umich.edu return ss.str(); 797396Sgblack@eecs.umich.edu} 807396Sgblack@eecs.umich.edu 817396Sgblack@eecs.umich.edustd::string 827396Sgblack@eecs.umich.eduFpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 837396Sgblack@eecs.umich.edu{ 847396Sgblack@eecs.umich.edu std::stringstream ss; 857396Sgblack@eecs.umich.edu printMnemonic(ss); 869918Ssteve.reinhardt@amd.com printReg(ss, dest + FP_Reg_Base); 877396Sgblack@eecs.umich.edu ss << ", "; 889918Ssteve.reinhardt@amd.com printReg(ss, op1 + FP_Reg_Base); 897396Sgblack@eecs.umich.edu ss << ", "; 909918Ssteve.reinhardt@amd.com printReg(ss, op2 + FP_Reg_Base); 917396Sgblack@eecs.umich.edu return ss.str(); 927396Sgblack@eecs.umich.edu} 937430Sgblack@eecs.umich.edu 947639Sgblack@eecs.umich.edustd::string 957639Sgblack@eecs.umich.eduFpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 967639Sgblack@eecs.umich.edu{ 977639Sgblack@eecs.umich.edu std::stringstream ss; 987639Sgblack@eecs.umich.edu printMnemonic(ss); 999918Ssteve.reinhardt@amd.com printReg(ss, dest + FP_Reg_Base); 1007639Sgblack@eecs.umich.edu ss << ", "; 1019918Ssteve.reinhardt@amd.com printReg(ss, op1 + FP_Reg_Base); 1027639Sgblack@eecs.umich.edu ss << ", "; 1039918Ssteve.reinhardt@amd.com printReg(ss, op2 + FP_Reg_Base); 1047639Sgblack@eecs.umich.edu ccprintf(ss, ", #%d", imm); 1057639Sgblack@eecs.umich.edu return ss.str(); 1067639Sgblack@eecs.umich.edu} 1077639Sgblack@eecs.umich.edu 1087430Sgblack@eecs.umich.edunamespace ArmISA 1097430Sgblack@eecs.umich.edu{ 1107430Sgblack@eecs.umich.edu 1117430Sgblack@eecs.umich.eduVfpSavedState 1127430Sgblack@eecs.umich.eduprepFpState(uint32_t rMode) 1137430Sgblack@eecs.umich.edu{ 1147430Sgblack@eecs.umich.edu int roundingMode = fegetround(); 1157430Sgblack@eecs.umich.edu feclearexcept(FeAllExceptions); 1167430Sgblack@eecs.umich.edu switch (rMode) { 1177430Sgblack@eecs.umich.edu case VfpRoundNearest: 1187430Sgblack@eecs.umich.edu fesetround(FeRoundNearest); 1197430Sgblack@eecs.umich.edu break; 1207430Sgblack@eecs.umich.edu case VfpRoundUpward: 1217430Sgblack@eecs.umich.edu fesetround(FeRoundUpward); 1227430Sgblack@eecs.umich.edu break; 1237430Sgblack@eecs.umich.edu case VfpRoundDown: 1247430Sgblack@eecs.umich.edu fesetround(FeRoundDown); 1257430Sgblack@eecs.umich.edu break; 1267430Sgblack@eecs.umich.edu case VfpRoundZero: 1277430Sgblack@eecs.umich.edu fesetround(FeRoundZero); 1287430Sgblack@eecs.umich.edu break; 1297430Sgblack@eecs.umich.edu } 1307430Sgblack@eecs.umich.edu return roundingMode; 1317430Sgblack@eecs.umich.edu} 1327430Sgblack@eecs.umich.edu 1337430Sgblack@eecs.umich.eduvoid 1347639Sgblack@eecs.umich.edufinishVfp(FPSCR &fpscr, VfpSavedState state, bool flush) 1357430Sgblack@eecs.umich.edu{ 1367430Sgblack@eecs.umich.edu int exceptions = fetestexcept(FeAllExceptions); 1377430Sgblack@eecs.umich.edu bool underflow = false; 1387430Sgblack@eecs.umich.edu if (exceptions & FeInvalid) { 1397430Sgblack@eecs.umich.edu fpscr.ioc = 1; 1407430Sgblack@eecs.umich.edu } 1417430Sgblack@eecs.umich.edu if (exceptions & FeDivByZero) { 1427430Sgblack@eecs.umich.edu fpscr.dzc = 1; 1437430Sgblack@eecs.umich.edu } 1447430Sgblack@eecs.umich.edu if (exceptions & FeOverflow) { 1457430Sgblack@eecs.umich.edu fpscr.ofc = 1; 1467430Sgblack@eecs.umich.edu } 1477430Sgblack@eecs.umich.edu if (exceptions & FeUnderflow) { 1487430Sgblack@eecs.umich.edu underflow = true; 1497430Sgblack@eecs.umich.edu fpscr.ufc = 1; 1507430Sgblack@eecs.umich.edu } 1517639Sgblack@eecs.umich.edu if ((exceptions & FeInexact) && !(underflow && flush)) { 1527430Sgblack@eecs.umich.edu fpscr.ixc = 1; 1537430Sgblack@eecs.umich.edu } 1547430Sgblack@eecs.umich.edu fesetround(state); 1557430Sgblack@eecs.umich.edu} 1567430Sgblack@eecs.umich.edu 1577430Sgblack@eecs.umich.edutemplate <class fpType> 1587430Sgblack@eecs.umich.edufpType 1597639Sgblack@eecs.umich.edufixDest(bool flush, bool defaultNan, fpType val, fpType op1) 1607430Sgblack@eecs.umich.edu{ 1617430Sgblack@eecs.umich.edu int fpClass = std::fpclassify(val); 1627430Sgblack@eecs.umich.edu fpType junk = 0.0; 1637430Sgblack@eecs.umich.edu if (fpClass == FP_NAN) { 1647430Sgblack@eecs.umich.edu const bool single = (sizeof(val) == sizeof(float)); 1657430Sgblack@eecs.umich.edu const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 1667430Sgblack@eecs.umich.edu const bool nan = std::isnan(op1); 1677639Sgblack@eecs.umich.edu if (!nan || defaultNan) { 1687430Sgblack@eecs.umich.edu val = bitsToFp(qnan, junk); 1697430Sgblack@eecs.umich.edu } else if (nan) { 1707430Sgblack@eecs.umich.edu val = bitsToFp(fpToBits(op1) | qnan, junk); 1717430Sgblack@eecs.umich.edu } 1727639Sgblack@eecs.umich.edu } else if (fpClass == FP_SUBNORMAL && flush == 1) { 1737430Sgblack@eecs.umich.edu // Turn val into a zero with the correct sign; 1747430Sgblack@eecs.umich.edu uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 1757430Sgblack@eecs.umich.edu val = bitsToFp(fpToBits(val) & bitMask, junk); 1767430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 1777430Sgblack@eecs.umich.edu feraiseexcept(FeUnderflow); 1787430Sgblack@eecs.umich.edu } 1797430Sgblack@eecs.umich.edu return val; 1807430Sgblack@eecs.umich.edu} 1817430Sgblack@eecs.umich.edu 1827430Sgblack@eecs.umich.edutemplate 1837639Sgblack@eecs.umich.edufloat fixDest<float>(bool flush, bool defaultNan, float val, float op1); 1847430Sgblack@eecs.umich.edutemplate 1857639Sgblack@eecs.umich.edudouble fixDest<double>(bool flush, bool defaultNan, double val, double op1); 1867430Sgblack@eecs.umich.edu 1877430Sgblack@eecs.umich.edutemplate <class fpType> 1887430Sgblack@eecs.umich.edufpType 1897639Sgblack@eecs.umich.edufixDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2) 1907430Sgblack@eecs.umich.edu{ 1917430Sgblack@eecs.umich.edu int fpClass = std::fpclassify(val); 1927430Sgblack@eecs.umich.edu fpType junk = 0.0; 1937430Sgblack@eecs.umich.edu if (fpClass == FP_NAN) { 1947430Sgblack@eecs.umich.edu const bool single = (sizeof(val) == sizeof(float)); 1957430Sgblack@eecs.umich.edu const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 1967430Sgblack@eecs.umich.edu const bool nan1 = std::isnan(op1); 1977430Sgblack@eecs.umich.edu const bool nan2 = std::isnan(op2); 1987430Sgblack@eecs.umich.edu const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 1997430Sgblack@eecs.umich.edu const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 2007639Sgblack@eecs.umich.edu if ((!nan1 && !nan2) || defaultNan) { 2017430Sgblack@eecs.umich.edu val = bitsToFp(qnan, junk); 2027430Sgblack@eecs.umich.edu } else if (signal1) { 2037430Sgblack@eecs.umich.edu val = bitsToFp(fpToBits(op1) | qnan, junk); 2047430Sgblack@eecs.umich.edu } else if (signal2) { 2057430Sgblack@eecs.umich.edu val = bitsToFp(fpToBits(op2) | qnan, junk); 2067430Sgblack@eecs.umich.edu } else if (nan1) { 2077430Sgblack@eecs.umich.edu val = op1; 2087430Sgblack@eecs.umich.edu } else if (nan2) { 2097430Sgblack@eecs.umich.edu val = op2; 2107430Sgblack@eecs.umich.edu } 2117639Sgblack@eecs.umich.edu } else if (fpClass == FP_SUBNORMAL && flush) { 2127430Sgblack@eecs.umich.edu // Turn val into a zero with the correct sign; 2137430Sgblack@eecs.umich.edu uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 2147430Sgblack@eecs.umich.edu val = bitsToFp(fpToBits(val) & bitMask, junk); 2157430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 2167430Sgblack@eecs.umich.edu feraiseexcept(FeUnderflow); 2177430Sgblack@eecs.umich.edu } 2187430Sgblack@eecs.umich.edu return val; 2197430Sgblack@eecs.umich.edu} 2207430Sgblack@eecs.umich.edu 2217430Sgblack@eecs.umich.edutemplate 2227639Sgblack@eecs.umich.edufloat fixDest<float>(bool flush, bool defaultNan, 2237639Sgblack@eecs.umich.edu float val, float op1, float op2); 2247430Sgblack@eecs.umich.edutemplate 2257639Sgblack@eecs.umich.edudouble fixDest<double>(bool flush, bool defaultNan, 2267639Sgblack@eecs.umich.edu double val, double op1, double op2); 2277430Sgblack@eecs.umich.edu 2287430Sgblack@eecs.umich.edutemplate <class fpType> 2297430Sgblack@eecs.umich.edufpType 2307639Sgblack@eecs.umich.edufixDivDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2) 2317430Sgblack@eecs.umich.edu{ 2327639Sgblack@eecs.umich.edu fpType mid = fixDest(flush, defaultNan, val, op1, op2); 2337430Sgblack@eecs.umich.edu const bool single = (sizeof(fpType) == sizeof(float)); 2347430Sgblack@eecs.umich.edu const fpType junk = 0.0; 2357430Sgblack@eecs.umich.edu if ((single && (val == bitsToFp(0x00800000, junk) || 2367430Sgblack@eecs.umich.edu val == bitsToFp(0x80800000, junk))) || 2377430Sgblack@eecs.umich.edu (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) || 2387430Sgblack@eecs.umich.edu val == bitsToFp(ULL(0x8010000000000000), junk))) 2397430Sgblack@eecs.umich.edu ) { 2407430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); 2417430Sgblack@eecs.umich.edu fesetround(FeRoundZero); 2427430Sgblack@eecs.umich.edu fpType temp = 0.0; 2437430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 2447430Sgblack@eecs.umich.edu temp = op1 / op2; 2457430Sgblack@eecs.umich.edu if (flushToZero(temp)) { 2467430Sgblack@eecs.umich.edu feraiseexcept(FeUnderflow); 2477639Sgblack@eecs.umich.edu if (flush) { 2487430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 2497430Sgblack@eecs.umich.edu mid = temp; 2507430Sgblack@eecs.umich.edu } 2517430Sgblack@eecs.umich.edu } 2527430Sgblack@eecs.umich.edu __asm__ __volatile__("" :: "m" (temp)); 2537430Sgblack@eecs.umich.edu } 2547430Sgblack@eecs.umich.edu return mid; 2557430Sgblack@eecs.umich.edu} 2567430Sgblack@eecs.umich.edu 2577430Sgblack@eecs.umich.edutemplate 2587639Sgblack@eecs.umich.edufloat fixDivDest<float>(bool flush, bool defaultNan, 2597639Sgblack@eecs.umich.edu float val, float op1, float op2); 2607430Sgblack@eecs.umich.edutemplate 2617639Sgblack@eecs.umich.edudouble fixDivDest<double>(bool flush, bool defaultNan, 2627639Sgblack@eecs.umich.edu double val, double op1, double op2); 2637430Sgblack@eecs.umich.edu 2647430Sgblack@eecs.umich.edufloat 2657430Sgblack@eecs.umich.edufixFpDFpSDest(FPSCR fpscr, double val) 2667430Sgblack@eecs.umich.edu{ 2677430Sgblack@eecs.umich.edu const float junk = 0.0; 2687430Sgblack@eecs.umich.edu float op1 = 0.0; 2697430Sgblack@eecs.umich.edu if (std::isnan(val)) { 2707430Sgblack@eecs.umich.edu uint64_t valBits = fpToBits(val); 2717430Sgblack@eecs.umich.edu uint32_t op1Bits = bits(valBits, 50, 29) | 2727430Sgblack@eecs.umich.edu (mask(9) << 22) | 2737430Sgblack@eecs.umich.edu (bits(valBits, 63) << 31); 2747430Sgblack@eecs.umich.edu op1 = bitsToFp(op1Bits, junk); 2757430Sgblack@eecs.umich.edu } 2767639Sgblack@eecs.umich.edu float mid = fixDest(fpscr.fz, fpscr.dn, (float)val, op1); 2777430Sgblack@eecs.umich.edu if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) == 2787430Sgblack@eecs.umich.edu (FeUnderflow | FeInexact)) { 2797430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 2807430Sgblack@eecs.umich.edu } 2817430Sgblack@eecs.umich.edu if (mid == bitsToFp(0x00800000, junk) || 2827430Sgblack@eecs.umich.edu mid == bitsToFp(0x80800000, junk)) { 2837430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (val) : "m" (val)); 2847430Sgblack@eecs.umich.edu fesetround(FeRoundZero); 2857430Sgblack@eecs.umich.edu float temp = 0.0; 2867430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 2877430Sgblack@eecs.umich.edu temp = val; 2887430Sgblack@eecs.umich.edu if (flushToZero(temp)) { 2897430Sgblack@eecs.umich.edu feraiseexcept(FeUnderflow); 2907430Sgblack@eecs.umich.edu if (fpscr.fz) { 2917430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 2927430Sgblack@eecs.umich.edu mid = temp; 2937430Sgblack@eecs.umich.edu } 2947430Sgblack@eecs.umich.edu } 2957430Sgblack@eecs.umich.edu __asm__ __volatile__("" :: "m" (temp)); 2967430Sgblack@eecs.umich.edu } 2977430Sgblack@eecs.umich.edu return mid; 2987430Sgblack@eecs.umich.edu} 2997430Sgblack@eecs.umich.edu 3007430Sgblack@eecs.umich.edudouble 3017430Sgblack@eecs.umich.edufixFpSFpDDest(FPSCR fpscr, float val) 3027430Sgblack@eecs.umich.edu{ 3037430Sgblack@eecs.umich.edu const double junk = 0.0; 3047430Sgblack@eecs.umich.edu double op1 = 0.0; 3057430Sgblack@eecs.umich.edu if (std::isnan(val)) { 3067430Sgblack@eecs.umich.edu uint32_t valBits = fpToBits(val); 3077430Sgblack@eecs.umich.edu uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) | 3087430Sgblack@eecs.umich.edu (mask(12) << 51) | 3097430Sgblack@eecs.umich.edu ((uint64_t)bits(valBits, 31) << 63); 3107430Sgblack@eecs.umich.edu op1 = bitsToFp(op1Bits, junk); 3117430Sgblack@eecs.umich.edu } 3127639Sgblack@eecs.umich.edu double mid = fixDest(fpscr.fz, fpscr.dn, (double)val, op1); 3137430Sgblack@eecs.umich.edu if (mid == bitsToFp(ULL(0x0010000000000000), junk) || 3147430Sgblack@eecs.umich.edu mid == bitsToFp(ULL(0x8010000000000000), junk)) { 3157430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (val) : "m" (val)); 3167430Sgblack@eecs.umich.edu fesetround(FeRoundZero); 3177430Sgblack@eecs.umich.edu double temp = 0.0; 3187430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 3197430Sgblack@eecs.umich.edu temp = val; 3207430Sgblack@eecs.umich.edu if (flushToZero(temp)) { 3217430Sgblack@eecs.umich.edu feraiseexcept(FeUnderflow); 3227430Sgblack@eecs.umich.edu if (fpscr.fz) { 3237430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 3247430Sgblack@eecs.umich.edu mid = temp; 3257430Sgblack@eecs.umich.edu } 3267430Sgblack@eecs.umich.edu } 3277430Sgblack@eecs.umich.edu __asm__ __volatile__("" :: "m" (temp)); 3287430Sgblack@eecs.umich.edu } 3297430Sgblack@eecs.umich.edu return mid; 3307430Sgblack@eecs.umich.edu} 3317430Sgblack@eecs.umich.edu 3327639Sgblack@eecs.umich.eduuint16_t 3337639Sgblack@eecs.umich.eduvcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, 3347639Sgblack@eecs.umich.edu uint32_t rMode, bool ahp, float op) 3357430Sgblack@eecs.umich.edu{ 3367430Sgblack@eecs.umich.edu uint32_t opBits = fpToBits(op); 3377430Sgblack@eecs.umich.edu // Extract the operand. 3387430Sgblack@eecs.umich.edu bool neg = bits(opBits, 31); 3397430Sgblack@eecs.umich.edu uint32_t exponent = bits(opBits, 30, 23); 3407430Sgblack@eecs.umich.edu uint32_t oldMantissa = bits(opBits, 22, 0); 3417430Sgblack@eecs.umich.edu uint32_t mantissa = oldMantissa >> (23 - 10); 3427430Sgblack@eecs.umich.edu // Do the conversion. 3437430Sgblack@eecs.umich.edu uint32_t extra = oldMantissa & mask(23 - 10); 3447430Sgblack@eecs.umich.edu if (exponent == 0xff) { 3457430Sgblack@eecs.umich.edu if (oldMantissa != 0) { 3467430Sgblack@eecs.umich.edu // Nans. 3477430Sgblack@eecs.umich.edu if (bits(mantissa, 9) == 0) { 3487430Sgblack@eecs.umich.edu // Signalling nan. 3497430Sgblack@eecs.umich.edu fpscr.ioc = 1; 3507430Sgblack@eecs.umich.edu } 3517639Sgblack@eecs.umich.edu if (ahp) { 3527430Sgblack@eecs.umich.edu mantissa = 0; 3537430Sgblack@eecs.umich.edu exponent = 0; 3547430Sgblack@eecs.umich.edu fpscr.ioc = 1; 3557639Sgblack@eecs.umich.edu } else if (defaultNan) { 3567430Sgblack@eecs.umich.edu mantissa = (1 << 9); 3577430Sgblack@eecs.umich.edu exponent = 0x1f; 3587430Sgblack@eecs.umich.edu neg = false; 3597430Sgblack@eecs.umich.edu } else { 3607430Sgblack@eecs.umich.edu exponent = 0x1f; 3617430Sgblack@eecs.umich.edu mantissa |= (1 << 9); 3627430Sgblack@eecs.umich.edu } 3637430Sgblack@eecs.umich.edu } else { 3647430Sgblack@eecs.umich.edu // Infinities. 3657430Sgblack@eecs.umich.edu exponent = 0x1F; 3667639Sgblack@eecs.umich.edu if (ahp) { 3677430Sgblack@eecs.umich.edu fpscr.ioc = 1; 3687430Sgblack@eecs.umich.edu mantissa = 0x3ff; 3697430Sgblack@eecs.umich.edu } else { 3707430Sgblack@eecs.umich.edu mantissa = 0; 3717430Sgblack@eecs.umich.edu } 3727430Sgblack@eecs.umich.edu } 3737430Sgblack@eecs.umich.edu } else if (exponent == 0 && oldMantissa == 0) { 3747430Sgblack@eecs.umich.edu // Zero, don't need to do anything. 3757430Sgblack@eecs.umich.edu } else { 3767430Sgblack@eecs.umich.edu // Normalized or denormalized numbers. 3777430Sgblack@eecs.umich.edu 3787430Sgblack@eecs.umich.edu bool inexact = (extra != 0); 3797430Sgblack@eecs.umich.edu 3807430Sgblack@eecs.umich.edu if (exponent == 0) { 3817430Sgblack@eecs.umich.edu // Denormalized. 3827430Sgblack@eecs.umich.edu 3837430Sgblack@eecs.umich.edu // If flush to zero is on, this shouldn't happen. 3847639Sgblack@eecs.umich.edu assert(!flush); 3857430Sgblack@eecs.umich.edu 3867430Sgblack@eecs.umich.edu // Check for underflow 3877430Sgblack@eecs.umich.edu if (inexact || fpscr.ufe) 3887430Sgblack@eecs.umich.edu fpscr.ufc = 1; 3897430Sgblack@eecs.umich.edu 3907430Sgblack@eecs.umich.edu // Handle rounding. 3917639Sgblack@eecs.umich.edu unsigned mode = rMode; 3927430Sgblack@eecs.umich.edu if ((mode == VfpRoundUpward && !neg && extra) || 3937430Sgblack@eecs.umich.edu (mode == VfpRoundDown && neg && extra) || 3947430Sgblack@eecs.umich.edu (mode == VfpRoundNearest && 3957430Sgblack@eecs.umich.edu (extra > (1 << 9) || 3967430Sgblack@eecs.umich.edu (extra == (1 << 9) && bits(mantissa, 0))))) { 3977430Sgblack@eecs.umich.edu mantissa++; 3987430Sgblack@eecs.umich.edu } 3997430Sgblack@eecs.umich.edu 4007430Sgblack@eecs.umich.edu // See if the number became normalized after rounding. 4017430Sgblack@eecs.umich.edu if (mantissa == (1 << 10)) { 4027430Sgblack@eecs.umich.edu mantissa = 0; 4037430Sgblack@eecs.umich.edu exponent = 1; 4047430Sgblack@eecs.umich.edu } 4057430Sgblack@eecs.umich.edu } else { 4067430Sgblack@eecs.umich.edu // Normalized. 4077430Sgblack@eecs.umich.edu 4087430Sgblack@eecs.umich.edu // We need to track the dropped bits differently since 4097430Sgblack@eecs.umich.edu // more can be dropped by denormalizing. 4107430Sgblack@eecs.umich.edu bool topOne = bits(extra, 12); 4117430Sgblack@eecs.umich.edu bool restZeros = bits(extra, 11, 0) == 0; 4127430Sgblack@eecs.umich.edu 4137430Sgblack@eecs.umich.edu if (exponent <= (127 - 15)) { 4147430Sgblack@eecs.umich.edu // The result is too small. Denormalize. 4157430Sgblack@eecs.umich.edu mantissa |= (1 << 10); 4167430Sgblack@eecs.umich.edu while (mantissa && exponent <= (127 - 15)) { 4177430Sgblack@eecs.umich.edu restZeros = restZeros && !topOne; 4187430Sgblack@eecs.umich.edu topOne = bits(mantissa, 0); 4197430Sgblack@eecs.umich.edu mantissa = mantissa >> 1; 4207430Sgblack@eecs.umich.edu exponent++; 4217430Sgblack@eecs.umich.edu } 4227430Sgblack@eecs.umich.edu if (topOne || !restZeros) 4237430Sgblack@eecs.umich.edu inexact = true; 4247430Sgblack@eecs.umich.edu exponent = 0; 4257430Sgblack@eecs.umich.edu } else { 4267430Sgblack@eecs.umich.edu // Change bias. 4277430Sgblack@eecs.umich.edu exponent -= (127 - 15); 4287430Sgblack@eecs.umich.edu } 4297430Sgblack@eecs.umich.edu 4307430Sgblack@eecs.umich.edu if (exponent == 0 && (inexact || fpscr.ufe)) { 4317430Sgblack@eecs.umich.edu // Underflow 4327430Sgblack@eecs.umich.edu fpscr.ufc = 1; 4337430Sgblack@eecs.umich.edu } 4347430Sgblack@eecs.umich.edu 4357430Sgblack@eecs.umich.edu // Handle rounding. 4367639Sgblack@eecs.umich.edu unsigned mode = rMode; 4377430Sgblack@eecs.umich.edu bool nonZero = topOne || !restZeros; 4387430Sgblack@eecs.umich.edu if ((mode == VfpRoundUpward && !neg && nonZero) || 4397430Sgblack@eecs.umich.edu (mode == VfpRoundDown && neg && nonZero) || 4407430Sgblack@eecs.umich.edu (mode == VfpRoundNearest && topOne && 4417430Sgblack@eecs.umich.edu (!restZeros || bits(mantissa, 0)))) { 4427430Sgblack@eecs.umich.edu mantissa++; 4437430Sgblack@eecs.umich.edu } 4447430Sgblack@eecs.umich.edu 4457430Sgblack@eecs.umich.edu // See if we rounded up and need to bump the exponent. 4467430Sgblack@eecs.umich.edu if (mantissa == (1 << 10)) { 4477430Sgblack@eecs.umich.edu mantissa = 0; 4487430Sgblack@eecs.umich.edu exponent++; 4497430Sgblack@eecs.umich.edu } 4507430Sgblack@eecs.umich.edu 4517430Sgblack@eecs.umich.edu // Deal with overflow 4527639Sgblack@eecs.umich.edu if (ahp) { 4537430Sgblack@eecs.umich.edu if (exponent >= 0x20) { 4547430Sgblack@eecs.umich.edu exponent = 0x1f; 4557430Sgblack@eecs.umich.edu mantissa = 0x3ff; 4567430Sgblack@eecs.umich.edu fpscr.ioc = 1; 4577430Sgblack@eecs.umich.edu // Supress inexact exception. 4587430Sgblack@eecs.umich.edu inexact = false; 4597430Sgblack@eecs.umich.edu } 4607430Sgblack@eecs.umich.edu } else { 4617430Sgblack@eecs.umich.edu if (exponent >= 0x1f) { 4627430Sgblack@eecs.umich.edu if ((mode == VfpRoundNearest) || 4637430Sgblack@eecs.umich.edu (mode == VfpRoundUpward && !neg) || 4647430Sgblack@eecs.umich.edu (mode == VfpRoundDown && neg)) { 4657430Sgblack@eecs.umich.edu // Overflow to infinity. 4667430Sgblack@eecs.umich.edu exponent = 0x1f; 4677430Sgblack@eecs.umich.edu mantissa = 0; 4687430Sgblack@eecs.umich.edu } else { 4697430Sgblack@eecs.umich.edu // Overflow to max normal. 4707430Sgblack@eecs.umich.edu exponent = 0x1e; 4717430Sgblack@eecs.umich.edu mantissa = 0x3ff; 4727430Sgblack@eecs.umich.edu } 4737430Sgblack@eecs.umich.edu fpscr.ofc = 1; 4747430Sgblack@eecs.umich.edu inexact = true; 4757430Sgblack@eecs.umich.edu } 4767430Sgblack@eecs.umich.edu } 4777430Sgblack@eecs.umich.edu } 4787430Sgblack@eecs.umich.edu 4797430Sgblack@eecs.umich.edu if (inexact) { 4807430Sgblack@eecs.umich.edu fpscr.ixc = 1; 4817430Sgblack@eecs.umich.edu } 4827430Sgblack@eecs.umich.edu } 4837430Sgblack@eecs.umich.edu // Reassemble and install the result. 4847430Sgblack@eecs.umich.edu uint32_t result = bits(mantissa, 9, 0); 4857430Sgblack@eecs.umich.edu replaceBits(result, 14, 10, exponent); 4867430Sgblack@eecs.umich.edu if (neg) 4877430Sgblack@eecs.umich.edu result |= (1 << 15); 4887639Sgblack@eecs.umich.edu return result; 4897430Sgblack@eecs.umich.edu} 4907430Sgblack@eecs.umich.edu 4917430Sgblack@eecs.umich.edufloat 4927639Sgblack@eecs.umich.eduvcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) 4937430Sgblack@eecs.umich.edu{ 4947430Sgblack@eecs.umich.edu float junk = 0.0; 4957430Sgblack@eecs.umich.edu // Extract the bitfields. 4967639Sgblack@eecs.umich.edu bool neg = bits(op, 15); 4977639Sgblack@eecs.umich.edu uint32_t exponent = bits(op, 14, 10); 4987639Sgblack@eecs.umich.edu uint32_t mantissa = bits(op, 9, 0); 4997430Sgblack@eecs.umich.edu // Do the conversion. 5007430Sgblack@eecs.umich.edu if (exponent == 0) { 5017430Sgblack@eecs.umich.edu if (mantissa != 0) { 5027430Sgblack@eecs.umich.edu // Normalize the value. 5037430Sgblack@eecs.umich.edu exponent = exponent + (127 - 15) + 1; 5047430Sgblack@eecs.umich.edu while (mantissa < (1 << 10)) { 5057430Sgblack@eecs.umich.edu mantissa = mantissa << 1; 5067430Sgblack@eecs.umich.edu exponent--; 5077430Sgblack@eecs.umich.edu } 5087430Sgblack@eecs.umich.edu } 5097430Sgblack@eecs.umich.edu mantissa = mantissa << (23 - 10); 5107639Sgblack@eecs.umich.edu } else if (exponent == 0x1f && !ahp) { 5117430Sgblack@eecs.umich.edu // Infinities and nans. 5127430Sgblack@eecs.umich.edu exponent = 0xff; 5137430Sgblack@eecs.umich.edu if (mantissa != 0) { 5147430Sgblack@eecs.umich.edu // Nans. 5157430Sgblack@eecs.umich.edu mantissa = mantissa << (23 - 10); 5167430Sgblack@eecs.umich.edu if (bits(mantissa, 22) == 0) { 5177430Sgblack@eecs.umich.edu // Signalling nan. 5187430Sgblack@eecs.umich.edu fpscr.ioc = 1; 5197430Sgblack@eecs.umich.edu mantissa |= (1 << 22); 5207430Sgblack@eecs.umich.edu } 5217639Sgblack@eecs.umich.edu if (defaultNan) { 5227430Sgblack@eecs.umich.edu mantissa &= ~mask(22); 5237430Sgblack@eecs.umich.edu neg = false; 5247430Sgblack@eecs.umich.edu } 5257430Sgblack@eecs.umich.edu } 5267430Sgblack@eecs.umich.edu } else { 5277430Sgblack@eecs.umich.edu exponent = exponent + (127 - 15); 5287430Sgblack@eecs.umich.edu mantissa = mantissa << (23 - 10); 5297430Sgblack@eecs.umich.edu } 5307430Sgblack@eecs.umich.edu // Reassemble the result. 5317430Sgblack@eecs.umich.edu uint32_t result = bits(mantissa, 22, 0); 5327430Sgblack@eecs.umich.edu replaceBits(result, 30, 23, exponent); 5337430Sgblack@eecs.umich.edu if (neg) 5347430Sgblack@eecs.umich.edu result |= (1 << 31); 5357430Sgblack@eecs.umich.edu return bitsToFp(result, junk); 5367430Sgblack@eecs.umich.edu} 5377430Sgblack@eecs.umich.edu 5387430Sgblack@eecs.umich.eduuint64_t 5397430Sgblack@eecs.umich.eduvfpFpSToFixed(float val, bool isSigned, bool half, 5407430Sgblack@eecs.umich.edu uint8_t imm, bool rzero) 5417430Sgblack@eecs.umich.edu{ 5427430Sgblack@eecs.umich.edu int rmode = rzero ? FeRoundZero : fegetround(); 5437430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); 5447430Sgblack@eecs.umich.edu fesetround(FeRoundNearest); 5457430Sgblack@eecs.umich.edu val = val * powf(2.0, imm); 5467430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (val) : "m" (val)); 5477430Sgblack@eecs.umich.edu fesetround(rmode); 5487430Sgblack@eecs.umich.edu feclearexcept(FeAllExceptions); 5497430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (val) : "m" (val)); 5507430Sgblack@eecs.umich.edu float origVal = val; 5517430Sgblack@eecs.umich.edu val = rintf(val); 5527430Sgblack@eecs.umich.edu int fpType = std::fpclassify(val); 5537430Sgblack@eecs.umich.edu if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 5547430Sgblack@eecs.umich.edu if (fpType == FP_NAN) { 5557430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 5567430Sgblack@eecs.umich.edu } 5577430Sgblack@eecs.umich.edu val = 0.0; 5587430Sgblack@eecs.umich.edu } else if (origVal != val) { 5597430Sgblack@eecs.umich.edu switch (rmode) { 5607430Sgblack@eecs.umich.edu case FeRoundNearest: 5617430Sgblack@eecs.umich.edu if (origVal - val > 0.5) 5627430Sgblack@eecs.umich.edu val += 1.0; 5637430Sgblack@eecs.umich.edu else if (val - origVal > 0.5) 5647430Sgblack@eecs.umich.edu val -= 1.0; 5657430Sgblack@eecs.umich.edu break; 5667430Sgblack@eecs.umich.edu case FeRoundDown: 5677430Sgblack@eecs.umich.edu if (origVal < val) 5687430Sgblack@eecs.umich.edu val -= 1.0; 5697430Sgblack@eecs.umich.edu break; 5707430Sgblack@eecs.umich.edu case FeRoundUpward: 5717430Sgblack@eecs.umich.edu if (origVal > val) 5727430Sgblack@eecs.umich.edu val += 1.0; 5737430Sgblack@eecs.umich.edu break; 5747430Sgblack@eecs.umich.edu } 5757430Sgblack@eecs.umich.edu feraiseexcept(FeInexact); 5767430Sgblack@eecs.umich.edu } 5777430Sgblack@eecs.umich.edu 5787430Sgblack@eecs.umich.edu if (isSigned) { 5797430Sgblack@eecs.umich.edu if (half) { 5807430Sgblack@eecs.umich.edu if ((double)val < (int16_t)(1 << 15)) { 5817430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 5827430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 5837430Sgblack@eecs.umich.edu return (int16_t)(1 << 15); 5847430Sgblack@eecs.umich.edu } 5857430Sgblack@eecs.umich.edu if ((double)val > (int16_t)mask(15)) { 5867430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 5877430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 5887430Sgblack@eecs.umich.edu return (int16_t)mask(15); 5897430Sgblack@eecs.umich.edu } 5907430Sgblack@eecs.umich.edu return (int16_t)val; 5917430Sgblack@eecs.umich.edu } else { 5927430Sgblack@eecs.umich.edu if ((double)val < (int32_t)(1 << 31)) { 5937430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 5947430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 5957430Sgblack@eecs.umich.edu return (int32_t)(1 << 31); 5967430Sgblack@eecs.umich.edu } 5977430Sgblack@eecs.umich.edu if ((double)val > (int32_t)mask(31)) { 5987430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 5997430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 6007430Sgblack@eecs.umich.edu return (int32_t)mask(31); 6017430Sgblack@eecs.umich.edu } 6027430Sgblack@eecs.umich.edu return (int32_t)val; 6037430Sgblack@eecs.umich.edu } 6047430Sgblack@eecs.umich.edu } else { 6057430Sgblack@eecs.umich.edu if (half) { 6067430Sgblack@eecs.umich.edu if ((double)val < 0) { 6077430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 6087430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 6097430Sgblack@eecs.umich.edu return 0; 6107430Sgblack@eecs.umich.edu } 6117430Sgblack@eecs.umich.edu if ((double)val > (mask(16))) { 6127430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 6137430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 6147430Sgblack@eecs.umich.edu return mask(16); 6157430Sgblack@eecs.umich.edu } 6167430Sgblack@eecs.umich.edu return (uint16_t)val; 6177430Sgblack@eecs.umich.edu } else { 6187430Sgblack@eecs.umich.edu if ((double)val < 0) { 6197430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 6207430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 6217430Sgblack@eecs.umich.edu return 0; 6227430Sgblack@eecs.umich.edu } 6237430Sgblack@eecs.umich.edu if ((double)val > (mask(32))) { 6247430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 6257430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 6267430Sgblack@eecs.umich.edu return mask(32); 6277430Sgblack@eecs.umich.edu } 6287430Sgblack@eecs.umich.edu return (uint32_t)val; 6297430Sgblack@eecs.umich.edu } 6307430Sgblack@eecs.umich.edu } 6317430Sgblack@eecs.umich.edu} 6327430Sgblack@eecs.umich.edu 6337430Sgblack@eecs.umich.edufloat 6347639Sgblack@eecs.umich.eduvfpUFixedToFpS(bool flush, bool defaultNan, 6357639Sgblack@eecs.umich.edu uint32_t val, bool half, uint8_t imm) 6367430Sgblack@eecs.umich.edu{ 6377430Sgblack@eecs.umich.edu fesetround(FeRoundNearest); 6387430Sgblack@eecs.umich.edu if (half) 6397430Sgblack@eecs.umich.edu val = (uint16_t)val; 6407430Sgblack@eecs.umich.edu float scale = powf(2.0, imm); 6417430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 6427430Sgblack@eecs.umich.edu feclearexcept(FeAllExceptions); 6437430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 6447639Sgblack@eecs.umich.edu return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); 6457430Sgblack@eecs.umich.edu} 6467430Sgblack@eecs.umich.edu 6477430Sgblack@eecs.umich.edufloat 6487639Sgblack@eecs.umich.eduvfpSFixedToFpS(bool flush, bool defaultNan, 6497639Sgblack@eecs.umich.edu int32_t val, bool half, uint8_t imm) 6507430Sgblack@eecs.umich.edu{ 6517430Sgblack@eecs.umich.edu fesetround(FeRoundNearest); 6527430Sgblack@eecs.umich.edu if (half) 6537430Sgblack@eecs.umich.edu val = sext<16>(val & mask(16)); 6547430Sgblack@eecs.umich.edu float scale = powf(2.0, imm); 6557430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 6567430Sgblack@eecs.umich.edu feclearexcept(FeAllExceptions); 6577430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 6587639Sgblack@eecs.umich.edu return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); 6597430Sgblack@eecs.umich.edu} 6607430Sgblack@eecs.umich.edu 6617430Sgblack@eecs.umich.eduuint64_t 6627430Sgblack@eecs.umich.eduvfpFpDToFixed(double val, bool isSigned, bool half, 6637430Sgblack@eecs.umich.edu uint8_t imm, bool rzero) 6647430Sgblack@eecs.umich.edu{ 6657430Sgblack@eecs.umich.edu int rmode = rzero ? FeRoundZero : fegetround(); 6667430Sgblack@eecs.umich.edu fesetround(FeRoundNearest); 6677430Sgblack@eecs.umich.edu val = val * pow(2.0, imm); 6687430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (val) : "m" (val)); 6697430Sgblack@eecs.umich.edu fesetround(rmode); 6707430Sgblack@eecs.umich.edu feclearexcept(FeAllExceptions); 6717430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (val) : "m" (val)); 6727430Sgblack@eecs.umich.edu double origVal = val; 6737430Sgblack@eecs.umich.edu val = rint(val); 6747430Sgblack@eecs.umich.edu int fpType = std::fpclassify(val); 6757430Sgblack@eecs.umich.edu if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 6767430Sgblack@eecs.umich.edu if (fpType == FP_NAN) { 6777430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 6787430Sgblack@eecs.umich.edu } 6797430Sgblack@eecs.umich.edu val = 0.0; 6807430Sgblack@eecs.umich.edu } else if (origVal != val) { 6817430Sgblack@eecs.umich.edu switch (rmode) { 6827430Sgblack@eecs.umich.edu case FeRoundNearest: 6837430Sgblack@eecs.umich.edu if (origVal - val > 0.5) 6847430Sgblack@eecs.umich.edu val += 1.0; 6857430Sgblack@eecs.umich.edu else if (val - origVal > 0.5) 6867430Sgblack@eecs.umich.edu val -= 1.0; 6877430Sgblack@eecs.umich.edu break; 6887430Sgblack@eecs.umich.edu case FeRoundDown: 6897430Sgblack@eecs.umich.edu if (origVal < val) 6907430Sgblack@eecs.umich.edu val -= 1.0; 6917430Sgblack@eecs.umich.edu break; 6927430Sgblack@eecs.umich.edu case FeRoundUpward: 6937430Sgblack@eecs.umich.edu if (origVal > val) 6947430Sgblack@eecs.umich.edu val += 1.0; 6957430Sgblack@eecs.umich.edu break; 6967430Sgblack@eecs.umich.edu } 6977430Sgblack@eecs.umich.edu feraiseexcept(FeInexact); 6987430Sgblack@eecs.umich.edu } 6997430Sgblack@eecs.umich.edu if (isSigned) { 7007430Sgblack@eecs.umich.edu if (half) { 7017430Sgblack@eecs.umich.edu if (val < (int16_t)(1 << 15)) { 7027430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 7037430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 7047430Sgblack@eecs.umich.edu return (int16_t)(1 << 15); 7057430Sgblack@eecs.umich.edu } 7067430Sgblack@eecs.umich.edu if (val > (int16_t)mask(15)) { 7077430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 7087430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 7097430Sgblack@eecs.umich.edu return (int16_t)mask(15); 7107430Sgblack@eecs.umich.edu } 7117430Sgblack@eecs.umich.edu return (int16_t)val; 7127430Sgblack@eecs.umich.edu } else { 7137430Sgblack@eecs.umich.edu if (val < (int32_t)(1 << 31)) { 7147430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 7157430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 7167430Sgblack@eecs.umich.edu return (int32_t)(1 << 31); 7177430Sgblack@eecs.umich.edu } 7187430Sgblack@eecs.umich.edu if (val > (int32_t)mask(31)) { 7197430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 7207430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 7217430Sgblack@eecs.umich.edu return (int32_t)mask(31); 7227430Sgblack@eecs.umich.edu } 7237430Sgblack@eecs.umich.edu return (int32_t)val; 7247430Sgblack@eecs.umich.edu } 7257430Sgblack@eecs.umich.edu } else { 7267430Sgblack@eecs.umich.edu if (half) { 7277430Sgblack@eecs.umich.edu if (val < 0) { 7287430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 7297430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 7307430Sgblack@eecs.umich.edu return 0; 7317430Sgblack@eecs.umich.edu } 7327430Sgblack@eecs.umich.edu if (val > mask(16)) { 7337430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 7347430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 7357430Sgblack@eecs.umich.edu return mask(16); 7367430Sgblack@eecs.umich.edu } 7377430Sgblack@eecs.umich.edu return (uint16_t)val; 7387430Sgblack@eecs.umich.edu } else { 7397430Sgblack@eecs.umich.edu if (val < 0) { 7407430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 7417430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 7427430Sgblack@eecs.umich.edu return 0; 7437430Sgblack@eecs.umich.edu } 7447430Sgblack@eecs.umich.edu if (val > mask(32)) { 7457430Sgblack@eecs.umich.edu feraiseexcept(FeInvalid); 7467430Sgblack@eecs.umich.edu feclearexcept(FeInexact); 7477430Sgblack@eecs.umich.edu return mask(32); 7487430Sgblack@eecs.umich.edu } 7497430Sgblack@eecs.umich.edu return (uint32_t)val; 7507430Sgblack@eecs.umich.edu } 7517430Sgblack@eecs.umich.edu } 7527430Sgblack@eecs.umich.edu} 7537430Sgblack@eecs.umich.edu 7547430Sgblack@eecs.umich.edudouble 7557639Sgblack@eecs.umich.eduvfpUFixedToFpD(bool flush, bool defaultNan, 7567639Sgblack@eecs.umich.edu uint32_t val, bool half, uint8_t imm) 7577430Sgblack@eecs.umich.edu{ 7587430Sgblack@eecs.umich.edu fesetround(FeRoundNearest); 7597430Sgblack@eecs.umich.edu if (half) 7607430Sgblack@eecs.umich.edu val = (uint16_t)val; 7617430Sgblack@eecs.umich.edu double scale = pow(2.0, imm); 7627430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 7637430Sgblack@eecs.umich.edu feclearexcept(FeAllExceptions); 7647430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 7657639Sgblack@eecs.umich.edu return fixDivDest(flush, defaultNan, val / scale, (double)val, scale); 7667430Sgblack@eecs.umich.edu} 7677430Sgblack@eecs.umich.edu 7687430Sgblack@eecs.umich.edudouble 7697639Sgblack@eecs.umich.eduvfpSFixedToFpD(bool flush, bool defaultNan, 7707639Sgblack@eecs.umich.edu int32_t val, bool half, uint8_t imm) 7717430Sgblack@eecs.umich.edu{ 7727430Sgblack@eecs.umich.edu fesetround(FeRoundNearest); 7737430Sgblack@eecs.umich.edu if (half) 7747430Sgblack@eecs.umich.edu val = sext<16>(val & mask(16)); 7757430Sgblack@eecs.umich.edu double scale = pow(2.0, imm); 7767430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 7777430Sgblack@eecs.umich.edu feclearexcept(FeAllExceptions); 7787430Sgblack@eecs.umich.edu __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 7797639Sgblack@eecs.umich.edu return fixDivDest(flush, defaultNan, val / scale, (double)val, scale); 7807430Sgblack@eecs.umich.edu} 7817430Sgblack@eecs.umich.edu 7827639Sgblack@eecs.umich.edu// This function implements a magic formula taken from the architecture 7837639Sgblack@eecs.umich.edu// reference manual. It was originally called recip_sqrt_estimate. 7847639Sgblack@eecs.umich.edustatic double 7857639Sgblack@eecs.umich.edurecipSqrtEstimate(double a) 7867639Sgblack@eecs.umich.edu{ 7877639Sgblack@eecs.umich.edu int64_t q0, q1, s; 7887639Sgblack@eecs.umich.edu double r; 7897639Sgblack@eecs.umich.edu if (a < 0.5) { 7907639Sgblack@eecs.umich.edu q0 = (int64_t)(a * 512.0); 7917639Sgblack@eecs.umich.edu r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0); 7927639Sgblack@eecs.umich.edu } else { 7937639Sgblack@eecs.umich.edu q1 = (int64_t)(a * 256.0); 7947639Sgblack@eecs.umich.edu r = 1.0 / sqrt(((double)q1 + 0.5) / 256.0); 7957639Sgblack@eecs.umich.edu } 7967639Sgblack@eecs.umich.edu s = (int64_t)(256.0 * r + 0.5); 7977639Sgblack@eecs.umich.edu return (double)s / 256.0; 7987639Sgblack@eecs.umich.edu} 7997639Sgblack@eecs.umich.edu 8007639Sgblack@eecs.umich.edu// This function is only intended for use in Neon instructions because 8017639Sgblack@eecs.umich.edu// it ignores certain bits in the FPSCR. 8027639Sgblack@eecs.umich.edufloat 8037639Sgblack@eecs.umich.edufprSqrtEstimate(FPSCR &fpscr, float op) 8047639Sgblack@eecs.umich.edu{ 8057639Sgblack@eecs.umich.edu const uint32_t qnan = 0x7fc00000; 8067639Sgblack@eecs.umich.edu float junk = 0.0; 8077639Sgblack@eecs.umich.edu int fpClass = std::fpclassify(op); 8087639Sgblack@eecs.umich.edu if (fpClass == FP_NAN) { 8097639Sgblack@eecs.umich.edu if ((fpToBits(op) & qnan) != qnan) 8107639Sgblack@eecs.umich.edu fpscr.ioc = 1; 8117639Sgblack@eecs.umich.edu return bitsToFp(qnan, junk); 8127639Sgblack@eecs.umich.edu } else if (fpClass == FP_ZERO) { 8137639Sgblack@eecs.umich.edu fpscr.dzc = 1; 8147639Sgblack@eecs.umich.edu // Return infinity with the same sign as the operand. 8157639Sgblack@eecs.umich.edu return bitsToFp((std::signbit(op) << 31) | 8167639Sgblack@eecs.umich.edu (0xFF << 23) | (0 << 0), junk); 8177639Sgblack@eecs.umich.edu } else if (std::signbit(op)) { 8187639Sgblack@eecs.umich.edu // Set invalid op bit. 8197639Sgblack@eecs.umich.edu fpscr.ioc = 1; 8207639Sgblack@eecs.umich.edu return bitsToFp(qnan, junk); 8217639Sgblack@eecs.umich.edu } else if (fpClass == FP_INFINITE) { 8227639Sgblack@eecs.umich.edu return 0.0; 8237639Sgblack@eecs.umich.edu } else { 8247639Sgblack@eecs.umich.edu uint64_t opBits = fpToBits(op); 8257639Sgblack@eecs.umich.edu double scaled; 8267639Sgblack@eecs.umich.edu if (bits(opBits, 23)) { 8277639Sgblack@eecs.umich.edu scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) | 8287639Sgblack@eecs.umich.edu (ULL(0x3fd) << 52) | (bits(opBits, 31) << 63), 8297639Sgblack@eecs.umich.edu (double)0.0); 8307639Sgblack@eecs.umich.edu } else { 8317639Sgblack@eecs.umich.edu scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) | 8327639Sgblack@eecs.umich.edu (ULL(0x3fe) << 52) | (bits(opBits, 31) << 63), 8337639Sgblack@eecs.umich.edu (double)0.0); 8347639Sgblack@eecs.umich.edu } 8357639Sgblack@eecs.umich.edu uint64_t resultExp = (380 - bits(opBits, 30, 23)) / 2; 8367639Sgblack@eecs.umich.edu 8377639Sgblack@eecs.umich.edu uint64_t estimate = fpToBits(recipSqrtEstimate(scaled)); 8387639Sgblack@eecs.umich.edu 8397639Sgblack@eecs.umich.edu return bitsToFp((bits(estimate, 63) << 31) | 8407639Sgblack@eecs.umich.edu (bits(resultExp, 7, 0) << 23) | 8417639Sgblack@eecs.umich.edu (bits(estimate, 51, 29) << 0), junk); 8427639Sgblack@eecs.umich.edu } 8437639Sgblack@eecs.umich.edu} 8447639Sgblack@eecs.umich.edu 8457639Sgblack@eecs.umich.eduuint32_t 8467639Sgblack@eecs.umich.eduunsignedRSqrtEstimate(uint32_t op) 8477639Sgblack@eecs.umich.edu{ 8487639Sgblack@eecs.umich.edu if (bits(op, 31, 30) == 0) { 8497639Sgblack@eecs.umich.edu return -1; 8507639Sgblack@eecs.umich.edu } else { 8517639Sgblack@eecs.umich.edu double dpOp; 8527639Sgblack@eecs.umich.edu if (bits(op, 31)) { 8537639Sgblack@eecs.umich.edu dpOp = bitsToFp((ULL(0) << 63) | 8547639Sgblack@eecs.umich.edu (ULL(0x3fe) << 52) | 8557639Sgblack@eecs.umich.edu (bits((uint64_t)op, 30, 0) << 21) | 8567639Sgblack@eecs.umich.edu (0 << 0), (double)0.0); 8577639Sgblack@eecs.umich.edu } else { 8587639Sgblack@eecs.umich.edu dpOp = bitsToFp((ULL(0) << 63) | 8597639Sgblack@eecs.umich.edu (ULL(0x3fd) << 52) | 8607639Sgblack@eecs.umich.edu (bits((uint64_t)op, 29, 0) << 22) | 8617639Sgblack@eecs.umich.edu (0 << 0), (double)0.0); 8627639Sgblack@eecs.umich.edu } 8637639Sgblack@eecs.umich.edu uint64_t estimate = fpToBits(recipSqrtEstimate(dpOp)); 8647639Sgblack@eecs.umich.edu return (1 << 31) | bits(estimate, 51, 21); 8657639Sgblack@eecs.umich.edu } 8667639Sgblack@eecs.umich.edu} 8677639Sgblack@eecs.umich.edu 8687639Sgblack@eecs.umich.edu// This function implements a magic formula taken from the architecture 8697639Sgblack@eecs.umich.edu// reference manual. It was originally called recip_estimate. 8707639Sgblack@eecs.umich.edu 8717639Sgblack@eecs.umich.edustatic double 8727639Sgblack@eecs.umich.edurecipEstimate(double a) 8737639Sgblack@eecs.umich.edu{ 8747639Sgblack@eecs.umich.edu int64_t q, s; 8757639Sgblack@eecs.umich.edu double r; 8767639Sgblack@eecs.umich.edu q = (int64_t)(a * 512.0); 8777639Sgblack@eecs.umich.edu r = 1.0 / (((double)q + 0.5) / 512.0); 8787639Sgblack@eecs.umich.edu s = (int64_t)(256.0 * r + 0.5); 8797639Sgblack@eecs.umich.edu return (double)s / 256.0; 8807639Sgblack@eecs.umich.edu} 8817639Sgblack@eecs.umich.edu 8827639Sgblack@eecs.umich.edu// This function is only intended for use in Neon instructions because 8837639Sgblack@eecs.umich.edu// it ignores certain bits in the FPSCR. 8847639Sgblack@eecs.umich.edufloat 8857639Sgblack@eecs.umich.edufpRecipEstimate(FPSCR &fpscr, float op) 8867639Sgblack@eecs.umich.edu{ 8877639Sgblack@eecs.umich.edu const uint32_t qnan = 0x7fc00000; 8887639Sgblack@eecs.umich.edu float junk = 0.0; 8897639Sgblack@eecs.umich.edu int fpClass = std::fpclassify(op); 8907639Sgblack@eecs.umich.edu if (fpClass == FP_NAN) { 8917639Sgblack@eecs.umich.edu if ((fpToBits(op) & qnan) != qnan) 8927639Sgblack@eecs.umich.edu fpscr.ioc = 1; 8937639Sgblack@eecs.umich.edu return bitsToFp(qnan, junk); 8947639Sgblack@eecs.umich.edu } else if (fpClass == FP_INFINITE) { 8957639Sgblack@eecs.umich.edu return bitsToFp(std::signbit(op) << 31, junk); 8967639Sgblack@eecs.umich.edu } else if (fpClass == FP_ZERO) { 8977639Sgblack@eecs.umich.edu fpscr.dzc = 1; 8987639Sgblack@eecs.umich.edu // Return infinity with the same sign as the operand. 8997639Sgblack@eecs.umich.edu return bitsToFp((std::signbit(op) << 31) | 9007639Sgblack@eecs.umich.edu (0xFF << 23) | (0 << 0), junk); 9017639Sgblack@eecs.umich.edu } else if (fabs(op) >= pow(2.0, 126)) { 9027639Sgblack@eecs.umich.edu fpscr.ufc = 1; 9037639Sgblack@eecs.umich.edu return bitsToFp(std::signbit(op) << 31, junk); 9047639Sgblack@eecs.umich.edu } else { 9057639Sgblack@eecs.umich.edu uint64_t opBits = fpToBits(op); 9067639Sgblack@eecs.umich.edu double scaled; 9077639Sgblack@eecs.umich.edu scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) | 9087639Sgblack@eecs.umich.edu (ULL(0x3fe) << 52) | (ULL(0) << 63), 9097639Sgblack@eecs.umich.edu (double)0.0); 9107639Sgblack@eecs.umich.edu uint64_t resultExp = 253 - bits(opBits, 30, 23); 9117639Sgblack@eecs.umich.edu 9127639Sgblack@eecs.umich.edu uint64_t estimate = fpToBits(recipEstimate(scaled)); 9137639Sgblack@eecs.umich.edu 9147639Sgblack@eecs.umich.edu return bitsToFp((bits(opBits, 31) << 31) | 9157639Sgblack@eecs.umich.edu (bits(resultExp, 7, 0) << 23) | 9167639Sgblack@eecs.umich.edu (bits(estimate, 51, 29) << 0), junk); 9177639Sgblack@eecs.umich.edu } 9187639Sgblack@eecs.umich.edu} 9197639Sgblack@eecs.umich.edu 9207639Sgblack@eecs.umich.eduuint32_t 9217639Sgblack@eecs.umich.eduunsignedRecipEstimate(uint32_t op) 9227639Sgblack@eecs.umich.edu{ 9237639Sgblack@eecs.umich.edu if (bits(op, 31) == 0) { 9247639Sgblack@eecs.umich.edu return -1; 9257639Sgblack@eecs.umich.edu } else { 9267639Sgblack@eecs.umich.edu double dpOp; 9277639Sgblack@eecs.umich.edu dpOp = bitsToFp((ULL(0) << 63) | 9287639Sgblack@eecs.umich.edu (ULL(0x3fe) << 52) | 9297639Sgblack@eecs.umich.edu (bits((uint64_t)op, 30, 0) << 21) | 9307639Sgblack@eecs.umich.edu (0 << 0), (double)0.0); 9317639Sgblack@eecs.umich.edu uint64_t estimate = fpToBits(recipEstimate(dpOp)); 9327639Sgblack@eecs.umich.edu return (1 << 31) | bits(estimate, 51, 21); 9337639Sgblack@eecs.umich.edu } 9347639Sgblack@eecs.umich.edu} 9357639Sgblack@eecs.umich.edu 9367639Sgblack@eecs.umich.edutemplate <class fpType> 9377639Sgblack@eecs.umich.edufpType 9387639Sgblack@eecs.umich.eduFpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 9397639Sgblack@eecs.umich.edu fpType op1, fpType op2) const 9407639Sgblack@eecs.umich.edu{ 9417639Sgblack@eecs.umich.edu done = true; 9427639Sgblack@eecs.umich.edu fpType junk = 0.0; 9437639Sgblack@eecs.umich.edu fpType dest = 0.0; 9447639Sgblack@eecs.umich.edu const bool single = (sizeof(fpType) == sizeof(float)); 9457639Sgblack@eecs.umich.edu const uint64_t qnan = 9467639Sgblack@eecs.umich.edu single ? 0x7fc00000 : ULL(0x7ff8000000000000); 9477639Sgblack@eecs.umich.edu const bool nan1 = std::isnan(op1); 9487639Sgblack@eecs.umich.edu const bool nan2 = std::isnan(op2); 9497639Sgblack@eecs.umich.edu const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 9507639Sgblack@eecs.umich.edu const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 9517639Sgblack@eecs.umich.edu if (nan1 || nan2) { 9527639Sgblack@eecs.umich.edu if (defaultNan) { 9537639Sgblack@eecs.umich.edu dest = bitsToFp(qnan, junk); 9547639Sgblack@eecs.umich.edu } else if (signal1) { 9557639Sgblack@eecs.umich.edu dest = bitsToFp(fpToBits(op1) | qnan, junk); 9567639Sgblack@eecs.umich.edu } else if (signal2) { 9577639Sgblack@eecs.umich.edu dest = bitsToFp(fpToBits(op2) | qnan, junk); 9587639Sgblack@eecs.umich.edu } else if (nan1) { 9597639Sgblack@eecs.umich.edu dest = op1; 9607639Sgblack@eecs.umich.edu } else if (nan2) { 9617639Sgblack@eecs.umich.edu dest = op2; 9627639Sgblack@eecs.umich.edu } 9637639Sgblack@eecs.umich.edu if (signal1 || signal2) { 9647639Sgblack@eecs.umich.edu fpscr.ioc = 1; 9657639Sgblack@eecs.umich.edu } 9667639Sgblack@eecs.umich.edu } else { 9677639Sgblack@eecs.umich.edu done = false; 9687639Sgblack@eecs.umich.edu } 9697639Sgblack@eecs.umich.edu return dest; 9707639Sgblack@eecs.umich.edu} 9717639Sgblack@eecs.umich.edu 9727639Sgblack@eecs.umich.edutemplate 9737639Sgblack@eecs.umich.edufloat FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 9747639Sgblack@eecs.umich.edu float op1, float op2) const; 9757639Sgblack@eecs.umich.edutemplate 9767639Sgblack@eecs.umich.edudouble FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 9777639Sgblack@eecs.umich.edu double op1, double op2) const; 9787639Sgblack@eecs.umich.edu 9797430Sgblack@eecs.umich.edutemplate <class fpType> 9807430Sgblack@eecs.umich.edufpType 9817430Sgblack@eecs.umich.eduFpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 9827430Sgblack@eecs.umich.edu fpType (*func)(fpType, fpType), 9837639Sgblack@eecs.umich.edu bool flush, bool defaultNan, uint32_t rMode) const 9847430Sgblack@eecs.umich.edu{ 9857430Sgblack@eecs.umich.edu const bool single = (sizeof(fpType) == sizeof(float)); 9867430Sgblack@eecs.umich.edu fpType junk = 0.0; 9877430Sgblack@eecs.umich.edu 9887430Sgblack@eecs.umich.edu if (flush && flushToZero(op1, op2)) 9897430Sgblack@eecs.umich.edu fpscr.idc = 1; 9907430Sgblack@eecs.umich.edu VfpSavedState state = prepFpState(rMode); 9917430Sgblack@eecs.umich.edu __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state) 9927430Sgblack@eecs.umich.edu : "m" (op1), "m" (op2), "m" (state)); 9937430Sgblack@eecs.umich.edu fpType dest = func(op1, op2); 9947430Sgblack@eecs.umich.edu __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 9957430Sgblack@eecs.umich.edu 9967430Sgblack@eecs.umich.edu // Get NAN behavior right. This varies between x86 and ARM. 9979515SAli.Saidi@ARM.com if (std::isnan(dest)) { 9987430Sgblack@eecs.umich.edu const uint64_t qnan = 9997430Sgblack@eecs.umich.edu single ? 0x7fc00000 : ULL(0x7ff8000000000000); 10007430Sgblack@eecs.umich.edu const bool nan1 = std::isnan(op1); 10017430Sgblack@eecs.umich.edu const bool nan2 = std::isnan(op2); 10027430Sgblack@eecs.umich.edu const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 10037430Sgblack@eecs.umich.edu const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 10047639Sgblack@eecs.umich.edu if ((!nan1 && !nan2) || (defaultNan == 1)) { 10057430Sgblack@eecs.umich.edu dest = bitsToFp(qnan, junk); 10067430Sgblack@eecs.umich.edu } else if (signal1) { 10077430Sgblack@eecs.umich.edu dest = bitsToFp(fpToBits(op1) | qnan, junk); 10087430Sgblack@eecs.umich.edu } else if (signal2) { 10097430Sgblack@eecs.umich.edu dest = bitsToFp(fpToBits(op2) | qnan, junk); 10107430Sgblack@eecs.umich.edu } else if (nan1) { 10117430Sgblack@eecs.umich.edu dest = op1; 10127430Sgblack@eecs.umich.edu } else if (nan2) { 10137430Sgblack@eecs.umich.edu dest = op2; 10147430Sgblack@eecs.umich.edu } 10157430Sgblack@eecs.umich.edu } else if (flush && flushToZero(dest)) { 10167430Sgblack@eecs.umich.edu feraiseexcept(FeUnderflow); 10177430Sgblack@eecs.umich.edu } else if (( 10187430Sgblack@eecs.umich.edu (single && (dest == bitsToFp(0x00800000, junk) || 10197430Sgblack@eecs.umich.edu dest == bitsToFp(0x80800000, junk))) || 10207430Sgblack@eecs.umich.edu (!single && 10217430Sgblack@eecs.umich.edu (dest == bitsToFp(ULL(0x0010000000000000), junk) || 10227430Sgblack@eecs.umich.edu dest == bitsToFp(ULL(0x8010000000000000), junk))) 10237430Sgblack@eecs.umich.edu ) && rMode != VfpRoundZero) { 10247430Sgblack@eecs.umich.edu /* 10257430Sgblack@eecs.umich.edu * Correct for the fact that underflow is detected -before- rounding 10267430Sgblack@eecs.umich.edu * in ARM and -after- rounding in x86. 10277430Sgblack@eecs.umich.edu */ 10287430Sgblack@eecs.umich.edu fesetround(FeRoundZero); 10297430Sgblack@eecs.umich.edu __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2) 10307430Sgblack@eecs.umich.edu : "m" (op1), "m" (op2)); 10317430Sgblack@eecs.umich.edu fpType temp = func(op1, op2); 10327430Sgblack@eecs.umich.edu __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 10337430Sgblack@eecs.umich.edu if (flush && flushToZero(temp)) { 10347430Sgblack@eecs.umich.edu dest = temp; 10357430Sgblack@eecs.umich.edu } 10367430Sgblack@eecs.umich.edu } 10377639Sgblack@eecs.umich.edu finishVfp(fpscr, state, flush); 10387430Sgblack@eecs.umich.edu return dest; 10397430Sgblack@eecs.umich.edu} 10407430Sgblack@eecs.umich.edu 10417430Sgblack@eecs.umich.edutemplate 10427430Sgblack@eecs.umich.edufloat FpOp::binaryOp(FPSCR &fpscr, float op1, float op2, 10437430Sgblack@eecs.umich.edu float (*func)(float, float), 10447639Sgblack@eecs.umich.edu bool flush, bool defaultNan, uint32_t rMode) const; 10457430Sgblack@eecs.umich.edutemplate 10467430Sgblack@eecs.umich.edudouble FpOp::binaryOp(FPSCR &fpscr, double op1, double op2, 10477430Sgblack@eecs.umich.edu double (*func)(double, double), 10487639Sgblack@eecs.umich.edu bool flush, bool defaultNan, uint32_t rMode) const; 10497430Sgblack@eecs.umich.edu 10507430Sgblack@eecs.umich.edutemplate <class fpType> 10517430Sgblack@eecs.umich.edufpType 10527430Sgblack@eecs.umich.eduFpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType), 10537430Sgblack@eecs.umich.edu bool flush, uint32_t rMode) const 10547430Sgblack@eecs.umich.edu{ 10557430Sgblack@eecs.umich.edu const bool single = (sizeof(fpType) == sizeof(float)); 10567430Sgblack@eecs.umich.edu fpType junk = 0.0; 10577430Sgblack@eecs.umich.edu 10587430Sgblack@eecs.umich.edu if (flush && flushToZero(op1)) 10597430Sgblack@eecs.umich.edu fpscr.idc = 1; 10607430Sgblack@eecs.umich.edu VfpSavedState state = prepFpState(rMode); 10617430Sgblack@eecs.umich.edu __asm__ __volatile__ ("" : "=m" (op1), "=m" (state) 10627430Sgblack@eecs.umich.edu : "m" (op1), "m" (state)); 10637430Sgblack@eecs.umich.edu fpType dest = func(op1); 10647430Sgblack@eecs.umich.edu __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 10657430Sgblack@eecs.umich.edu 10667430Sgblack@eecs.umich.edu // Get NAN behavior right. This varies between x86 and ARM. 10679515SAli.Saidi@ARM.com if (std::isnan(dest)) { 10687430Sgblack@eecs.umich.edu const uint64_t qnan = 10697430Sgblack@eecs.umich.edu single ? 0x7fc00000 : ULL(0x7ff8000000000000); 10707430Sgblack@eecs.umich.edu const bool nan = std::isnan(op1); 10717430Sgblack@eecs.umich.edu if (!nan || fpscr.dn == 1) { 10727430Sgblack@eecs.umich.edu dest = bitsToFp(qnan, junk); 10737430Sgblack@eecs.umich.edu } else if (nan) { 10747430Sgblack@eecs.umich.edu dest = bitsToFp(fpToBits(op1) | qnan, junk); 10757430Sgblack@eecs.umich.edu } 10767430Sgblack@eecs.umich.edu } else if (flush && flushToZero(dest)) { 10777430Sgblack@eecs.umich.edu feraiseexcept(FeUnderflow); 10787430Sgblack@eecs.umich.edu } else if (( 10797430Sgblack@eecs.umich.edu (single && (dest == bitsToFp(0x00800000, junk) || 10807430Sgblack@eecs.umich.edu dest == bitsToFp(0x80800000, junk))) || 10817430Sgblack@eecs.umich.edu (!single && 10827430Sgblack@eecs.umich.edu (dest == bitsToFp(ULL(0x0010000000000000), junk) || 10837430Sgblack@eecs.umich.edu dest == bitsToFp(ULL(0x8010000000000000), junk))) 10847430Sgblack@eecs.umich.edu ) && rMode != VfpRoundZero) { 10857430Sgblack@eecs.umich.edu /* 10867430Sgblack@eecs.umich.edu * Correct for the fact that underflow is detected -before- rounding 10877430Sgblack@eecs.umich.edu * in ARM and -after- rounding in x86. 10887430Sgblack@eecs.umich.edu */ 10897430Sgblack@eecs.umich.edu fesetround(FeRoundZero); 10907430Sgblack@eecs.umich.edu __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1)); 10917430Sgblack@eecs.umich.edu fpType temp = func(op1); 10927430Sgblack@eecs.umich.edu __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 10937430Sgblack@eecs.umich.edu if (flush && flushToZero(temp)) { 10947430Sgblack@eecs.umich.edu dest = temp; 10957430Sgblack@eecs.umich.edu } 10967430Sgblack@eecs.umich.edu } 10977639Sgblack@eecs.umich.edu finishVfp(fpscr, state, flush); 10987430Sgblack@eecs.umich.edu return dest; 10997430Sgblack@eecs.umich.edu} 11007430Sgblack@eecs.umich.edu 11017430Sgblack@eecs.umich.edutemplate 11027430Sgblack@eecs.umich.edufloat FpOp::unaryOp(FPSCR &fpscr, float op1, float (*func)(float), 11037430Sgblack@eecs.umich.edu bool flush, uint32_t rMode) const; 11047430Sgblack@eecs.umich.edutemplate 11057430Sgblack@eecs.umich.edudouble FpOp::unaryOp(FPSCR &fpscr, double op1, double (*func)(double), 11067430Sgblack@eecs.umich.edu bool flush, uint32_t rMode) const; 11077430Sgblack@eecs.umich.edu 11087430Sgblack@eecs.umich.eduIntRegIndex 11097430Sgblack@eecs.umich.eduVfpMacroOp::addStride(IntRegIndex idx, unsigned stride) 11107430Sgblack@eecs.umich.edu{ 11117430Sgblack@eecs.umich.edu if (wide) { 11127430Sgblack@eecs.umich.edu stride *= 2; 11137430Sgblack@eecs.umich.edu } 11147430Sgblack@eecs.umich.edu unsigned offset = idx % 8; 11157430Sgblack@eecs.umich.edu idx = (IntRegIndex)(idx - offset); 11167430Sgblack@eecs.umich.edu offset += stride; 11177430Sgblack@eecs.umich.edu idx = (IntRegIndex)(idx + (offset % 8)); 11187430Sgblack@eecs.umich.edu return idx; 11197430Sgblack@eecs.umich.edu} 11207430Sgblack@eecs.umich.edu 11217430Sgblack@eecs.umich.eduvoid 11227430Sgblack@eecs.umich.eduVfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) 11237430Sgblack@eecs.umich.edu{ 11247430Sgblack@eecs.umich.edu unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 11257430Sgblack@eecs.umich.edu assert(!inScalarBank(dest)); 11267430Sgblack@eecs.umich.edu dest = addStride(dest, stride); 11277430Sgblack@eecs.umich.edu op1 = addStride(op1, stride); 11287430Sgblack@eecs.umich.edu if (!inScalarBank(op2)) { 11297430Sgblack@eecs.umich.edu op2 = addStride(op2, stride); 11307430Sgblack@eecs.umich.edu } 11317430Sgblack@eecs.umich.edu} 11327430Sgblack@eecs.umich.edu 11337430Sgblack@eecs.umich.eduvoid 11347430Sgblack@eecs.umich.eduVfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1) 11357430Sgblack@eecs.umich.edu{ 11367430Sgblack@eecs.umich.edu unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 11377430Sgblack@eecs.umich.edu assert(!inScalarBank(dest)); 11387430Sgblack@eecs.umich.edu dest = addStride(dest, stride); 11397430Sgblack@eecs.umich.edu if (!inScalarBank(op1)) { 11407430Sgblack@eecs.umich.edu op1 = addStride(op1, stride); 11417430Sgblack@eecs.umich.edu } 11427430Sgblack@eecs.umich.edu} 11437430Sgblack@eecs.umich.edu 11447430Sgblack@eecs.umich.eduvoid 11457430Sgblack@eecs.umich.eduVfpMacroOp::nextIdxs(IntRegIndex &dest) 11467430Sgblack@eecs.umich.edu{ 11477430Sgblack@eecs.umich.edu unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 11487430Sgblack@eecs.umich.edu assert(!inScalarBank(dest)); 11497430Sgblack@eecs.umich.edu dest = addStride(dest, stride); 11507430Sgblack@eecs.umich.edu} 11517430Sgblack@eecs.umich.edu 11527430Sgblack@eecs.umich.edu} 1153