vfp.cc revision 7434:dd5a09b86b14
12810SN/A/* 28856Sandreas.hansson@arm.com * Copyright (c) 2010 ARM Limited 38856Sandreas.hansson@arm.com * All rights reserved 48856Sandreas.hansson@arm.com * 58856Sandreas.hansson@arm.com * The license below extends only to copyright in the software and shall 68856Sandreas.hansson@arm.com * not be construed as granting a license to any other intellectual 78856Sandreas.hansson@arm.com * property including but not limited to intellectual property relating 88856Sandreas.hansson@arm.com * to a hardware implementation of the functionality of the software 98856Sandreas.hansson@arm.com * licensed hereunder. You may use the software subject to the license 108856Sandreas.hansson@arm.com * terms below provided that you ensure that this notice is replicated 118856Sandreas.hansson@arm.com * unmodified and in its entirety in all distributions of the software, 128856Sandreas.hansson@arm.com * modified or unmodified, in source code or in binary form. 138856Sandreas.hansson@arm.com * 142810SN/A * Redistribution and use in source and binary forms, with or without 152810SN/A * modification, are permitted provided that the following conditions are 162810SN/A * met: redistributions of source code must retain the above copyright 172810SN/A * notice, this list of conditions and the following disclaimer; 182810SN/A * redistributions in binary form must reproduce the above copyright 192810SN/A * notice, this list of conditions and the following disclaimer in the 202810SN/A * documentation and/or other materials provided with the distribution; 212810SN/A * neither the name of the copyright holders nor the names of its 222810SN/A * contributors may be used to endorse or promote products derived from 232810SN/A * this software without specific prior written permission. 242810SN/A * 252810SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 262810SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 272810SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 282810SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 292810SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 302810SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 312810SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 322810SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 332810SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 342810SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 352810SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 362810SN/A * 372810SN/A * Authors: Gabe Black 382810SN/A */ 392810SN/A 402810SN/A#include "arch/arm/insts/vfp.hh" 412810SN/A 422810SN/A/* 432810SN/A * The asm statements below are to keep gcc from reordering code. Otherwise 442810SN/A * the rounding mode might be set after the operation it was intended for, the 452810SN/A * exception bits read before it, etc. 462810SN/A */ 472810SN/A 483348SN/Astd::string 493348SN/AFpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 508232Snate@binkert.org{ 515338Sstever@gmail.com std::stringstream ss; 525338Sstever@gmail.com printMnemonic(ss); 538786Sgblack@eecs.umich.edu printReg(ss, dest + FP_Base_DepTag); 542810SN/A ss << ", "; 552810SN/A printReg(ss, op1 + FP_Base_DepTag); 562810SN/A return ss.str(); 578856Sandreas.hansson@arm.com} 588856Sandreas.hansson@arm.com 598856Sandreas.hansson@arm.comstd::string 608914Sandreas.hansson@arm.comFpRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 618914Sandreas.hansson@arm.com{ 628856Sandreas.hansson@arm.com std::stringstream ss; 638856Sandreas.hansson@arm.com printMnemonic(ss); 644475SN/A printReg(ss, dest + FP_Base_DepTag); 655034SN/A ccprintf(ss, ", #%d", imm); 665034SN/A return ss.str(); 675314SN/A} 685314SN/A 694628SN/Astd::string 705034SN/AFpRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 715034SN/A{ 725034SN/A std::stringstream ss; 736122SSteve.Reinhardt@amd.com printMnemonic(ss); 748134SAli.Saidi@ARM.com printReg(ss, dest + FP_Base_DepTag); 754626SN/A ss << ", "; 764626SN/A printReg(ss, op1 + FP_Base_DepTag); 775034SN/A ccprintf(ss, ", #%d", imm); 786122SSteve.Reinhardt@amd.com return ss.str(); 798883SAli.Saidi@ARM.com} 808833Sdam.sunwoo@arm.com 814458SN/Astd::string 822810SN/AFpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 832810SN/A{ 843013SN/A std::stringstream ss; 858856Sandreas.hansson@arm.com printMnemonic(ss); 862810SN/A printReg(ss, dest + FP_Base_DepTag); 873013SN/A ss << ", "; 888856Sandreas.hansson@arm.com printReg(ss, op1 + FP_Base_DepTag); 892810SN/A ss << ", "; 902810SN/A printReg(ss, op2 + FP_Base_DepTag); 912810SN/A return ss.str(); 922810SN/A} 938856Sandreas.hansson@arm.com 942810SN/Anamespace ArmISA 953013SN/A{ 968856Sandreas.hansson@arm.com 973013SN/AVfpSavedState 988856Sandreas.hansson@arm.comprepFpState(uint32_t rMode) 998856Sandreas.hansson@arm.com{ 1002897SN/A int roundingMode = fegetround(); 1014666SN/A feclearexcept(FeAllExceptions); 1028856Sandreas.hansson@arm.com switch (rMode) { 1032897SN/A case VfpRoundNearest: 1042810SN/A fesetround(FeRoundNearest); 1052810SN/A break; 1062844SN/A case VfpRoundUpward: 1072810SN/A fesetround(FeRoundUpward); 1082858SN/A break; 1092858SN/A case VfpRoundDown: 1108856Sandreas.hansson@arm.com fesetround(FeRoundDown); 1118856Sandreas.hansson@arm.com break; 1128711Sandreas.hansson@arm.com case VfpRoundZero: 1132858SN/A fesetround(FeRoundZero); 1142858SN/A break; 1154628SN/A } 1162858SN/A return roundingMode; 1172810SN/A} 1182810SN/A 1192810SN/Avoid 1202810SN/AfinishVfp(FPSCR &fpscr, VfpSavedState state) 1212810SN/A{ 1224022SN/A int exceptions = fetestexcept(FeAllExceptions); 1234022SN/A bool underflow = false; 1244022SN/A if (exceptions & FeInvalid) { 1252810SN/A fpscr.ioc = 1; 1262810SN/A } 1278833Sdam.sunwoo@arm.com if (exceptions & FeDivByZero) { 1282810SN/A fpscr.dzc = 1; 1292810SN/A } 1302810SN/A if (exceptions & FeOverflow) { 1312810SN/A fpscr.ofc = 1; 1328833Sdam.sunwoo@arm.com } 1338833Sdam.sunwoo@arm.com if (exceptions & FeUnderflow) { 1348833Sdam.sunwoo@arm.com underflow = true; 1352810SN/A fpscr.ufc = 1; 1362810SN/A } 1374871SN/A if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) { 1384871SN/A fpscr.ixc = 1; 1394871SN/A } 1404871SN/A fesetround(state); 1414871SN/A} 1424871SN/A 1434871SN/Atemplate <class fpType> 1444871SN/AfpType 1454871SN/AfixDest(FPSCR fpscr, fpType val, fpType op1) 1464871SN/A{ 1472810SN/A int fpClass = std::fpclassify(val); 1482810SN/A fpType junk = 0.0; 1492810SN/A if (fpClass == FP_NAN) { 1508833Sdam.sunwoo@arm.com const bool single = (sizeof(val) == sizeof(float)); 1512810SN/A const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 1524871SN/A const bool nan = std::isnan(op1); 1538833Sdam.sunwoo@arm.com if (!nan || (fpscr.dn == 1)) { 1548833Sdam.sunwoo@arm.com val = bitsToFp(qnan, junk); 1558833Sdam.sunwoo@arm.com } else if (nan) { 1562810SN/A val = bitsToFp(fpToBits(op1) | qnan, junk); 1572810SN/A } 1582810SN/A } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 1592810SN/A // Turn val into a zero with the correct sign; 1608833Sdam.sunwoo@arm.com uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 1612810SN/A val = bitsToFp(fpToBits(val) & bitMask, junk); 1624871SN/A feclearexcept(FeInexact); 1638833Sdam.sunwoo@arm.com feraiseexcept(FeUnderflow); 1648833Sdam.sunwoo@arm.com } 1658833Sdam.sunwoo@arm.com return val; 1662810SN/A} 1672810SN/A 1684022SN/Atemplate 1694022SN/Afloat fixDest<float>(FPSCR fpscr, float val, float op1); 1704022SN/Atemplate 1712810SN/Adouble fixDest<double>(FPSCR fpscr, double val, double op1); 1722810SN/A 1738833Sdam.sunwoo@arm.comtemplate <class fpType> 1742810SN/AfpType 1752810SN/AfixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 1762810SN/A{ 1772810SN/A int fpClass = std::fpclassify(val); 1788833Sdam.sunwoo@arm.com fpType junk = 0.0; 1798833Sdam.sunwoo@arm.com if (fpClass == FP_NAN) { 1808833Sdam.sunwoo@arm.com const bool single = (sizeof(val) == sizeof(float)); 1812810SN/A const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 1822810SN/A const bool nan1 = std::isnan(op1); 1832810SN/A const bool nan2 = std::isnan(op2); 1842810SN/A const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 1852810SN/A const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 1868833Sdam.sunwoo@arm.com if ((!nan1 && !nan2) || (fpscr.dn == 1)) { 1872810SN/A val = bitsToFp(qnan, junk); 1884871SN/A } else if (signal1) { 1898833Sdam.sunwoo@arm.com val = bitsToFp(fpToBits(op1) | qnan, junk); 1908833Sdam.sunwoo@arm.com } else if (signal2) { 1918833Sdam.sunwoo@arm.com val = bitsToFp(fpToBits(op2) | qnan, junk); 1922810SN/A } else if (nan1) { 1932810SN/A val = op1; 1942810SN/A } else if (nan2) { 1952810SN/A val = op2; 1968833Sdam.sunwoo@arm.com } 1972810SN/A } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 1984871SN/A // Turn val into a zero with the correct sign; 1998833Sdam.sunwoo@arm.com uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 2008833Sdam.sunwoo@arm.com val = bitsToFp(fpToBits(val) & bitMask, junk); 2018833Sdam.sunwoo@arm.com feclearexcept(FeInexact); 2022810SN/A feraiseexcept(FeUnderflow); 2032810SN/A } 2044022SN/A return val; 2054022SN/A} 2064022SN/A 2072810SN/Atemplate 2082810SN/Afloat fixDest<float>(FPSCR fpscr, float val, float op1, float op2); 2098833Sdam.sunwoo@arm.comtemplate 2102810SN/Adouble fixDest<double>(FPSCR fpscr, double val, double op1, double op2); 2112810SN/A 2122810SN/Atemplate <class fpType> 2132810SN/AfpType 2148833Sdam.sunwoo@arm.comfixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 2158833Sdam.sunwoo@arm.com{ 2168833Sdam.sunwoo@arm.com fpType mid = fixDest(fpscr, val, op1, op2); 2172810SN/A const bool single = (sizeof(fpType) == sizeof(float)); 2182810SN/A const fpType junk = 0.0; 2192810SN/A if ((single && (val == bitsToFp(0x00800000, junk) || 2202810SN/A val == bitsToFp(0x80800000, junk))) || 2212810SN/A (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) || 2228833Sdam.sunwoo@arm.com val == bitsToFp(ULL(0x8010000000000000), junk))) 2232810SN/A ) { 2244871SN/A __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); 2258833Sdam.sunwoo@arm.com fesetround(FeRoundZero); 2268833Sdam.sunwoo@arm.com fpType temp = 0.0; 2278833Sdam.sunwoo@arm.com __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 2282810SN/A temp = op1 / op2; 2292810SN/A if (flushToZero(temp)) { 2302810SN/A feraiseexcept(FeUnderflow); 2312810SN/A if (fpscr.fz) { 2328833Sdam.sunwoo@arm.com feclearexcept(FeInexact); 2332810SN/A mid = temp; 2344871SN/A } 2358833Sdam.sunwoo@arm.com } 2368833Sdam.sunwoo@arm.com __asm__ __volatile__("" :: "m" (temp)); 2378833Sdam.sunwoo@arm.com } 2382810SN/A return mid; 2392810SN/A} 2404022SN/A 2414022SN/Atemplate 2424022SN/Afloat fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2); 2432810SN/Atemplate 2442810SN/Adouble fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2); 2452810SN/A 2462810SN/Afloat 2472810SN/AfixFpDFpSDest(FPSCR fpscr, double val) 2482810SN/A{ 2498833Sdam.sunwoo@arm.com const float junk = 0.0; 2502810SN/A float op1 = 0.0; 2518833Sdam.sunwoo@arm.com if (std::isnan(val)) { 2528833Sdam.sunwoo@arm.com uint64_t valBits = fpToBits(val); 2538833Sdam.sunwoo@arm.com uint32_t op1Bits = bits(valBits, 50, 29) | 2542810SN/A (mask(9) << 22) | 2552810SN/A (bits(valBits, 63) << 31); 2562810SN/A op1 = bitsToFp(op1Bits, junk); 2572810SN/A } 2582810SN/A float mid = fixDest(fpscr, (float)val, op1); 2598833Sdam.sunwoo@arm.com if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) == 2602810SN/A (FeUnderflow | FeInexact)) { 2612810SN/A feclearexcept(FeInexact); 2628833Sdam.sunwoo@arm.com } 2638833Sdam.sunwoo@arm.com if (mid == bitsToFp(0x00800000, junk) || 2648833Sdam.sunwoo@arm.com mid == bitsToFp(0x80800000, junk)) { 2652810SN/A __asm__ __volatile__("" : "=m" (val) : "m" (val)); 2662810SN/A fesetround(FeRoundZero); 2672810SN/A float temp = 0.0; 2682810SN/A __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 2698833Sdam.sunwoo@arm.com temp = val; 2702810SN/A if (flushToZero(temp)) { 2712810SN/A feraiseexcept(FeUnderflow); 2728833Sdam.sunwoo@arm.com if (fpscr.fz) { 2738833Sdam.sunwoo@arm.com feclearexcept(FeInexact); 2748833Sdam.sunwoo@arm.com mid = temp; 2752810SN/A } 2762810SN/A } 2774022SN/A __asm__ __volatile__("" :: "m" (temp)); 2784022SN/A } 2794022SN/A return mid; 2802810SN/A} 2812810SN/A 2822810SN/Adouble 2832810SN/AfixFpSFpDDest(FPSCR fpscr, float val) 2842810SN/A{ 2852810SN/A const double junk = 0.0; 2868833Sdam.sunwoo@arm.com double op1 = 0.0; 2872810SN/A if (std::isnan(val)) { 2888833Sdam.sunwoo@arm.com uint32_t valBits = fpToBits(val); 2898833Sdam.sunwoo@arm.com uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) | 2908833Sdam.sunwoo@arm.com (mask(12) << 51) | 2912810SN/A ((uint64_t)bits(valBits, 31) << 63); 2922810SN/A op1 = bitsToFp(op1Bits, junk); 2932810SN/A } 2942810SN/A double mid = fixDest(fpscr, (double)val, op1); 2952810SN/A if (mid == bitsToFp(ULL(0x0010000000000000), junk) || 2968833Sdam.sunwoo@arm.com mid == bitsToFp(ULL(0x8010000000000000), junk)) { 2972810SN/A __asm__ __volatile__("" : "=m" (val) : "m" (val)); 2982810SN/A fesetround(FeRoundZero); 2998833Sdam.sunwoo@arm.com double temp = 0.0; 3008833Sdam.sunwoo@arm.com __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 3018833Sdam.sunwoo@arm.com temp = val; 3022810SN/A if (flushToZero(temp)) { 3032810SN/A feraiseexcept(FeUnderflow); 3042810SN/A if (fpscr.fz) { 3052810SN/A feclearexcept(FeInexact); 3068833Sdam.sunwoo@arm.com mid = temp; 3072810SN/A } 3082810SN/A } 3098833Sdam.sunwoo@arm.com __asm__ __volatile__("" :: "m" (temp)); 3108833Sdam.sunwoo@arm.com } 3118833Sdam.sunwoo@arm.com return mid; 3122810SN/A} 3132810SN/A 3144022SN/Afloat 3154022SN/AvcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top) 3164022SN/A{ 3172810SN/A float junk = 0.0; 3182810SN/A uint32_t destBits = fpToBits(dest); 3192810SN/A uint32_t opBits = fpToBits(op); 3202810SN/A // Extract the operand. 3212810SN/A bool neg = bits(opBits, 31); 3222810SN/A uint32_t exponent = bits(opBits, 30, 23); 3232810SN/A uint32_t oldMantissa = bits(opBits, 22, 0); 3242810SN/A uint32_t mantissa = oldMantissa >> (23 - 10); 3258833Sdam.sunwoo@arm.com // Do the conversion. 3268833Sdam.sunwoo@arm.com uint32_t extra = oldMantissa & mask(23 - 10); 3278833Sdam.sunwoo@arm.com if (exponent == 0xff) { 3288833Sdam.sunwoo@arm.com if (oldMantissa != 0) { 3292810SN/A // Nans. 3302810SN/A if (bits(mantissa, 9) == 0) { 3312810SN/A // Signalling nan. 3322810SN/A fpscr.ioc = 1; 3332810SN/A } 3348833Sdam.sunwoo@arm.com if (fpscr.ahp) { 3352810SN/A mantissa = 0; 3362810SN/A exponent = 0; 3378833Sdam.sunwoo@arm.com fpscr.ioc = 1; 3388833Sdam.sunwoo@arm.com } else if (fpscr.dn) { 3398833Sdam.sunwoo@arm.com mantissa = (1 << 9); 3402810SN/A exponent = 0x1f; 3412810SN/A neg = false; 3422810SN/A } else { 3432810SN/A exponent = 0x1f; 3448833Sdam.sunwoo@arm.com mantissa |= (1 << 9); 3452810SN/A } 3462810SN/A } else { 3478833Sdam.sunwoo@arm.com // Infinities. 3488833Sdam.sunwoo@arm.com exponent = 0x1F; 3498833Sdam.sunwoo@arm.com if (fpscr.ahp) { 3502810SN/A fpscr.ioc = 1; 3512810SN/A mantissa = 0x3ff; 3522810SN/A } else { 3532810SN/A mantissa = 0; 3542810SN/A } 3552810SN/A } 3562810SN/A } else if (exponent == 0 && oldMantissa == 0) { 3572810SN/A // Zero, don't need to do anything. 3582810SN/A } else { 3592810SN/A // Normalized or denormalized numbers. 3602810SN/A 3612810SN/A bool inexact = (extra != 0); 3622810SN/A 3632810SN/A if (exponent == 0) { 3642810SN/A // Denormalized. 3652810SN/A 3662810SN/A // If flush to zero is on, this shouldn't happen. 3672810SN/A assert(fpscr.fz == 0); 3682810SN/A 3692810SN/A // Check for underflow 3702810SN/A if (inexact || fpscr.ufe) 3712810SN/A fpscr.ufc = 1; 3722810SN/A 3732810SN/A // Handle rounding. 3742810SN/A unsigned mode = fpscr.rMode; 3752810SN/A if ((mode == VfpRoundUpward && !neg && extra) || 3762810SN/A (mode == VfpRoundDown && neg && extra) || 3772810SN/A (mode == VfpRoundNearest && 3782810SN/A (extra > (1 << 9) || 3792810SN/A (extra == (1 << 9) && bits(mantissa, 0))))) { 3802810SN/A mantissa++; 3812810SN/A } 3822810SN/A 3832810SN/A // See if the number became normalized after rounding. 3842810SN/A if (mantissa == (1 << 10)) { 3852810SN/A mantissa = 0; 3862826SN/A exponent = 1; 3874626SN/A } 3888833Sdam.sunwoo@arm.com } else { 3894626SN/A // Normalized. 3904626SN/A 3918833Sdam.sunwoo@arm.com // We need to track the dropped bits differently since 3924626SN/A // more can be dropped by denormalizing. 3938833Sdam.sunwoo@arm.com bool topOne = bits(extra, 12); 3948833Sdam.sunwoo@arm.com bool restZeros = bits(extra, 11, 0) == 0; 3958833Sdam.sunwoo@arm.com 3964626SN/A if (exponent <= (127 - 15)) { 3974626SN/A // The result is too small. Denormalize. 3984626SN/A mantissa |= (1 << 10); 3994626SN/A while (mantissa && exponent <= (127 - 15)) { 4004626SN/A restZeros = restZeros && !topOne; 4014626SN/A topOne = bits(mantissa, 0); 4024626SN/A mantissa = mantissa >> 1; 4034626SN/A exponent++; 4048833Sdam.sunwoo@arm.com } 4054626SN/A if (topOne || !restZeros) 4064626SN/A inexact = true; 4074626SN/A exponent = 0; 4084626SN/A } else { 4098833Sdam.sunwoo@arm.com // Change bias. 4108833Sdam.sunwoo@arm.com exponent -= (127 - 15); 4118833Sdam.sunwoo@arm.com } 4124626SN/A 4134626SN/A if (exponent == 0 && (inexact || fpscr.ufe)) { 4144626SN/A // Underflow 4154626SN/A fpscr.ufc = 1; 4164626SN/A } 4178833Sdam.sunwoo@arm.com 4184626SN/A // Handle rounding. 4194871SN/A unsigned mode = fpscr.rMode; 4208833Sdam.sunwoo@arm.com bool nonZero = topOne || !restZeros; 4218833Sdam.sunwoo@arm.com if ((mode == VfpRoundUpward && !neg && nonZero) || 4228833Sdam.sunwoo@arm.com (mode == VfpRoundDown && neg && nonZero) || 4234626SN/A (mode == VfpRoundNearest && topOne && 4244626SN/A (!restZeros || bits(mantissa, 0)))) { 4254626SN/A mantissa++; 4264626SN/A } 4278833Sdam.sunwoo@arm.com 4284626SN/A // See if we rounded up and need to bump the exponent. 4294871SN/A if (mantissa == (1 << 10)) { 4308833Sdam.sunwoo@arm.com mantissa = 0; 4318833Sdam.sunwoo@arm.com exponent++; 4328833Sdam.sunwoo@arm.com } 4334626SN/A 4344626SN/A // Deal with overflow 4354626SN/A if (fpscr.ahp) { 4364626SN/A if (exponent >= 0x20) { 4374626SN/A exponent = 0x1f; 4384626SN/A mantissa = 0x3ff; 4394626SN/A fpscr.ioc = 1; 4408833Sdam.sunwoo@arm.com // Supress inexact exception. 4414626SN/A inexact = false; 4424626SN/A } 4434626SN/A } else { 4444626SN/A if (exponent >= 0x1f) { 4458833Sdam.sunwoo@arm.com if ((mode == VfpRoundNearest) || 4468833Sdam.sunwoo@arm.com (mode == VfpRoundUpward && !neg) || 4478833Sdam.sunwoo@arm.com (mode == VfpRoundDown && neg)) { 4484626SN/A // Overflow to infinity. 4494626SN/A exponent = 0x1f; 4504626SN/A mantissa = 0; 4514626SN/A } else { 4524626SN/A // Overflow to max normal. 4538833Sdam.sunwoo@arm.com exponent = 0x1e; 4544626SN/A mantissa = 0x3ff; 4554871SN/A } 4568833Sdam.sunwoo@arm.com fpscr.ofc = 1; 4578833Sdam.sunwoo@arm.com inexact = true; 4588833Sdam.sunwoo@arm.com } 4594626SN/A } 4604626SN/A } 4614626SN/A 4624626SN/A if (inexact) { 4638833Sdam.sunwoo@arm.com fpscr.ixc = 1; 4644626SN/A } 4654871SN/A } 4668833Sdam.sunwoo@arm.com // Reassemble and install the result. 4678833Sdam.sunwoo@arm.com uint32_t result = bits(mantissa, 9, 0); 4688833Sdam.sunwoo@arm.com replaceBits(result, 14, 10, exponent); 4694626SN/A if (neg) 4704626SN/A result |= (1 << 15); 4714626SN/A if (top) 4724626SN/A replaceBits(destBits, 31, 16, result); 4734626SN/A else 4744626SN/A replaceBits(destBits, 15, 0, result); 4754626SN/A return bitsToFp(destBits, junk); 4768833Sdam.sunwoo@arm.com} 4774626SN/A 4784626SN/Afloat 4794626SN/AvcvtFpHFpS(FPSCR &fpscr, float op, bool top) 4804626SN/A{ 4818833Sdam.sunwoo@arm.com float junk = 0.0; 4828833Sdam.sunwoo@arm.com uint32_t opBits = fpToBits(op); 4838833Sdam.sunwoo@arm.com // Extract the operand. 4844626SN/A if (top) 4854626SN/A opBits = bits(opBits, 31, 16); 4864626SN/A else 4874626SN/A opBits = bits(opBits, 15, 0); 4884626SN/A // Extract the bitfields. 4898833Sdam.sunwoo@arm.com bool neg = bits(opBits, 15); 4904626SN/A uint32_t exponent = bits(opBits, 14, 10); 4914871SN/A uint32_t mantissa = bits(opBits, 9, 0); 4928833Sdam.sunwoo@arm.com // Do the conversion. 4938833Sdam.sunwoo@arm.com if (exponent == 0) { 4948833Sdam.sunwoo@arm.com if (mantissa != 0) { 4954626SN/A // Normalize the value. 4964626SN/A exponent = exponent + (127 - 15) + 1; 4974626SN/A while (mantissa < (1 << 10)) { 4984626SN/A mantissa = mantissa << 1; 4998833Sdam.sunwoo@arm.com exponent--; 5004626SN/A } 5014871SN/A } 5024871SN/A mantissa = mantissa << (23 - 10); 5038833Sdam.sunwoo@arm.com } else if (exponent == 0x1f && !fpscr.ahp) { 5048833Sdam.sunwoo@arm.com // Infinities and nans. 5058833Sdam.sunwoo@arm.com exponent = 0xff; 5064626SN/A if (mantissa != 0) { 5074626SN/A // Nans. 5084626SN/A mantissa = mantissa << (23 - 10); 5094626SN/A if (bits(mantissa, 22) == 0) { 5104626SN/A // Signalling nan. 5114626SN/A fpscr.ioc = 1; 5124626SN/A mantissa |= (1 << 22); 5138833Sdam.sunwoo@arm.com } 5144626SN/A if (fpscr.dn) { 5154626SN/A mantissa &= ~mask(22); 5164626SN/A neg = false; 5174626SN/A } 5188833Sdam.sunwoo@arm.com } 5198833Sdam.sunwoo@arm.com } else { 5208833Sdam.sunwoo@arm.com exponent = exponent + (127 - 15); 5214626SN/A mantissa = mantissa << (23 - 10); 5224626SN/A } 5234626SN/A // Reassemble the result. 5244626SN/A uint32_t result = bits(mantissa, 22, 0); 5254626SN/A replaceBits(result, 30, 23, exponent); 5268833Sdam.sunwoo@arm.com if (neg) 5274626SN/A result |= (1 << 31); 5284871SN/A return bitsToFp(result, junk); 5294871SN/A} 5308833Sdam.sunwoo@arm.com 5318833Sdam.sunwoo@arm.comuint64_t 5328833Sdam.sunwoo@arm.comvfpFpSToFixed(float val, bool isSigned, bool half, 5334626SN/A uint8_t imm, bool rzero) 5344626SN/A{ 5354626SN/A int rmode = rzero ? FeRoundZero : fegetround(); 5364626SN/A __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); 5374626SN/A fesetround(FeRoundNearest); 5384626SN/A val = val * powf(2.0, imm); 5394626SN/A __asm__ __volatile__("" : "=m" (val) : "m" (val)); 5408833Sdam.sunwoo@arm.com fesetround(rmode); 5414626SN/A feclearexcept(FeAllExceptions); 5424626SN/A __asm__ __volatile__("" : "=m" (val) : "m" (val)); 5434626SN/A float origVal = val; 5444626SN/A val = rintf(val); 5458833Sdam.sunwoo@arm.com int fpType = std::fpclassify(val); 5468833Sdam.sunwoo@arm.com if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 5478833Sdam.sunwoo@arm.com if (fpType == FP_NAN) { 5484626SN/A feraiseexcept(FeInvalid); 5494626SN/A } 5504626SN/A val = 0.0; 5514626SN/A } else if (origVal != val) { 5524626SN/A switch (rmode) { 5538833Sdam.sunwoo@arm.com case FeRoundNearest: 5544626SN/A if (origVal - val > 0.5) 5554871SN/A val += 1.0; 5564871SN/A else if (val - origVal > 0.5) 5574871SN/A val -= 1.0; 5588833Sdam.sunwoo@arm.com break; 5598833Sdam.sunwoo@arm.com case FeRoundDown: 5608833Sdam.sunwoo@arm.com if (origVal < val) 5614626SN/A val -= 1.0; 5624626SN/A break; 5634626SN/A case FeRoundUpward: 5644626SN/A if (origVal > val) 5654626SN/A val += 1.0; 5664626SN/A break; 5674626SN/A } 5684626SN/A feraiseexcept(FeInexact); 5694626SN/A } 5704626SN/A 5714626SN/A if (isSigned) { 5724626SN/A if (half) { 5734626SN/A if ((double)val < (int16_t)(1 << 15)) { 5744626SN/A feraiseexcept(FeInvalid); 5754626SN/A feclearexcept(FeInexact); 5764626SN/A return (int16_t)(1 << 15); 5774626SN/A } 5784626SN/A if ((double)val > (int16_t)mask(15)) { 5794626SN/A feraiseexcept(FeInvalid); 5804626SN/A feclearexcept(FeInexact); 5814626SN/A return (int16_t)mask(15); 5824626SN/A } 5834626SN/A return (int16_t)val; 5844626SN/A } else { 5854626SN/A if ((double)val < (int32_t)(1 << 31)) { 5864626SN/A feraiseexcept(FeInvalid); 5874626SN/A feclearexcept(FeInexact); 5884626SN/A return (int32_t)(1 << 31); 5894626SN/A } 5904626SN/A if ((double)val > (int32_t)mask(31)) { 5914626SN/A feraiseexcept(FeInvalid); 5924626SN/A feclearexcept(FeInexact); 5934626SN/A return (int32_t)mask(31); 5944626SN/A } 5954626SN/A return (int32_t)val; 5964626SN/A } 5974626SN/A } else { 5984626SN/A if (half) { 5994626SN/A if ((double)val < 0) { 6004626SN/A feraiseexcept(FeInvalid); 6014626SN/A feclearexcept(FeInexact); 6024626SN/A return 0; 6034626SN/A } 6044626SN/A if ((double)val > (mask(16))) { 6054626SN/A feraiseexcept(FeInvalid); 6068833Sdam.sunwoo@arm.com feclearexcept(FeInexact); 6078833Sdam.sunwoo@arm.com return mask(16); 6088833Sdam.sunwoo@arm.com } 6098833Sdam.sunwoo@arm.com return (uint16_t)val; 6104626SN/A } else { 6114626SN/A if ((double)val < 0) { 6124626SN/A feraiseexcept(FeInvalid); 6134626SN/A feclearexcept(FeInexact); 6144626SN/A return 0; 6158833Sdam.sunwoo@arm.com } 6164626SN/A if ((double)val > (mask(32))) { 6174626SN/A feraiseexcept(FeInvalid); 6188833Sdam.sunwoo@arm.com feclearexcept(FeInexact); 6198833Sdam.sunwoo@arm.com return mask(32); 6208833Sdam.sunwoo@arm.com } 6214626SN/A return (uint32_t)val; 6224626SN/A } 6234626SN/A } 6244626SN/A} 6258833Sdam.sunwoo@arm.com 6264626SN/Afloat 6274626SN/AvfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 6288833Sdam.sunwoo@arm.com{ 6298833Sdam.sunwoo@arm.com fesetround(FeRoundNearest); 6308833Sdam.sunwoo@arm.com if (half) 6314626SN/A val = (uint16_t)val; 6324626SN/A float scale = powf(2.0, imm); 6334626SN/A __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 6344626SN/A feclearexcept(FeAllExceptions); 6354626SN/A __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 6364626SN/A return fixDivDest(fpscr, val / scale, (float)val, scale); 6374626SN/A} 6384626SN/A 6394626SN/Afloat 6404626SN/AvfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 6414626SN/A{ 6424626SN/A fesetround(FeRoundNearest); 6434626SN/A if (half) 6448833Sdam.sunwoo@arm.com val = sext<16>(val & mask(16)); 6458833Sdam.sunwoo@arm.com float scale = powf(2.0, imm); 6468833Sdam.sunwoo@arm.com __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 6478833Sdam.sunwoo@arm.com feclearexcept(FeAllExceptions); 6484626SN/A __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 6494626SN/A return fixDivDest(fpscr, val / scale, (float)val, scale); 6504626SN/A} 6514626SN/A 6524626SN/Auint64_t 6538833Sdam.sunwoo@arm.comvfpFpDToFixed(double val, bool isSigned, bool half, 6544626SN/A uint8_t imm, bool rzero) 6554626SN/A{ 6568833Sdam.sunwoo@arm.com int rmode = rzero ? FeRoundZero : fegetround(); 6578833Sdam.sunwoo@arm.com fesetround(FeRoundNearest); 6588833Sdam.sunwoo@arm.com val = val * pow(2.0, imm); 6594626SN/A __asm__ __volatile__("" : "=m" (val) : "m" (val)); 6604626SN/A fesetround(rmode); 6614626SN/A feclearexcept(FeAllExceptions); 6624626SN/A __asm__ __volatile__("" : "=m" (val) : "m" (val)); 6638833Sdam.sunwoo@arm.com double origVal = val; 6644626SN/A val = rint(val); 6654626SN/A int fpType = std::fpclassify(val); 6668833Sdam.sunwoo@arm.com if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 6678833Sdam.sunwoo@arm.com if (fpType == FP_NAN) { 6688833Sdam.sunwoo@arm.com feraiseexcept(FeInvalid); 6694626SN/A } 6704626SN/A val = 0.0; 6714626SN/A } else if (origVal != val) { 6724626SN/A switch (rmode) { 6734626SN/A case FeRoundNearest: 6744626SN/A if (origVal - val > 0.5) 6754626SN/A val += 1.0; 6764626SN/A else if (val - origVal > 0.5) 6774626SN/A val -= 1.0; 6784626SN/A break; 6794626SN/A case FeRoundDown: 6804626SN/A if (origVal < val) 6814626SN/A val -= 1.0; 6828833Sdam.sunwoo@arm.com break; 6838833Sdam.sunwoo@arm.com case FeRoundUpward: 6848833Sdam.sunwoo@arm.com if (origVal > val) 6858833Sdam.sunwoo@arm.com val += 1.0; 6864626SN/A break; 6874626SN/A } 6884626SN/A feraiseexcept(FeInexact); 6894626SN/A } 6904626SN/A if (isSigned) { 6918833Sdam.sunwoo@arm.com if (half) { 6924626SN/A if (val < (int16_t)(1 << 15)) { 6934626SN/A feraiseexcept(FeInvalid); 6948833Sdam.sunwoo@arm.com feclearexcept(FeInexact); 6958833Sdam.sunwoo@arm.com return (int16_t)(1 << 15); 6968833Sdam.sunwoo@arm.com } 6974626SN/A if (val > (int16_t)mask(15)) { 6984626SN/A feraiseexcept(FeInvalid); 6998833Sdam.sunwoo@arm.com feclearexcept(FeInexact); 7004626SN/A return (int16_t)mask(15); 7014626SN/A } 7028833Sdam.sunwoo@arm.com return (int16_t)val; 7034626SN/A } else { 7048833Sdam.sunwoo@arm.com if (val < (int32_t)(1 << 31)) { 7058833Sdam.sunwoo@arm.com feraiseexcept(FeInvalid); 7068833Sdam.sunwoo@arm.com feclearexcept(FeInexact); 7074626SN/A return (int32_t)(1 << 31); 7084626SN/A } 7094626SN/A if (val > (int32_t)mask(31)) { 7108833Sdam.sunwoo@arm.com feraiseexcept(FeInvalid); 7114626SN/A feclearexcept(FeInexact); 7124626SN/A return (int32_t)mask(31); 7138833Sdam.sunwoo@arm.com } 7144626SN/A return (int32_t)val; 7158833Sdam.sunwoo@arm.com } 7168833Sdam.sunwoo@arm.com } else { 7178833Sdam.sunwoo@arm.com if (half) { 7184626SN/A if (val < 0) { 7194626SN/A feraiseexcept(FeInvalid); 7204626SN/A feclearexcept(FeInexact); 7214626SN/A return 0; 7224626SN/A } 7234626SN/A if (val > mask(16)) { 7242810SN/A feraiseexcept(FeInvalid); 7253503SN/A feclearexcept(FeInexact); 7263503SN/A return mask(16); 7273503SN/A } 7283503SN/A return (uint16_t)val; 7294626SN/A } else { 7304626SN/A if (val < 0) { 7313503SN/A feraiseexcept(FeInvalid); 7324626SN/A feclearexcept(FeInexact); 7333503SN/A return 0; 7343503SN/A } 7353503SN/A if (val > mask(32)) { 7364626SN/A feraiseexcept(FeInvalid); 7373503SN/A feclearexcept(FeInexact); 7383503SN/A return mask(32); 7393503SN/A } 7403503SN/A return (uint32_t)val; 7413503SN/A } 742 } 743} 744 745double 746vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 747{ 748 fesetround(FeRoundNearest); 749 if (half) 750 val = (uint16_t)val; 751 double scale = pow(2.0, imm); 752 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 753 feclearexcept(FeAllExceptions); 754 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 755 return fixDivDest(fpscr, val / scale, (double)val, scale); 756} 757 758double 759vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 760{ 761 fesetround(FeRoundNearest); 762 if (half) 763 val = sext<16>(val & mask(16)); 764 double scale = pow(2.0, imm); 765 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 766 feclearexcept(FeAllExceptions); 767 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 768 return fixDivDest(fpscr, val / scale, (double)val, scale); 769} 770 771template <class fpType> 772fpType 773FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 774 fpType (*func)(fpType, fpType), 775 bool flush, uint32_t rMode) const 776{ 777 const bool single = (sizeof(fpType) == sizeof(float)); 778 fpType junk = 0.0; 779 780 if (flush && flushToZero(op1, op2)) 781 fpscr.idc = 1; 782 VfpSavedState state = prepFpState(rMode); 783 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state) 784 : "m" (op1), "m" (op2), "m" (state)); 785 fpType dest = func(op1, op2); 786 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 787 788 int fpClass = std::fpclassify(dest); 789 // Get NAN behavior right. This varies between x86 and ARM. 790 if (fpClass == FP_NAN) { 791 const bool single = (sizeof(fpType) == sizeof(float)); 792 const uint64_t qnan = 793 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 794 const bool nan1 = std::isnan(op1); 795 const bool nan2 = std::isnan(op2); 796 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 797 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 798 if ((!nan1 && !nan2) || (fpscr.dn == 1)) { 799 dest = bitsToFp(qnan, junk); 800 } else if (signal1) { 801 dest = bitsToFp(fpToBits(op1) | qnan, junk); 802 } else if (signal2) { 803 dest = bitsToFp(fpToBits(op2) | qnan, junk); 804 } else if (nan1) { 805 dest = op1; 806 } else if (nan2) { 807 dest = op2; 808 } 809 } else if (flush && flushToZero(dest)) { 810 feraiseexcept(FeUnderflow); 811 } else if (( 812 (single && (dest == bitsToFp(0x00800000, junk) || 813 dest == bitsToFp(0x80800000, junk))) || 814 (!single && 815 (dest == bitsToFp(ULL(0x0010000000000000), junk) || 816 dest == bitsToFp(ULL(0x8010000000000000), junk))) 817 ) && rMode != VfpRoundZero) { 818 /* 819 * Correct for the fact that underflow is detected -before- rounding 820 * in ARM and -after- rounding in x86. 821 */ 822 fesetround(FeRoundZero); 823 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2) 824 : "m" (op1), "m" (op2)); 825 fpType temp = func(op1, op2); 826 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 827 if (flush && flushToZero(temp)) { 828 dest = temp; 829 } 830 } 831 finishVfp(fpscr, state); 832 return dest; 833} 834 835template 836float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2, 837 float (*func)(float, float), 838 bool flush, uint32_t rMode) const; 839template 840double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2, 841 double (*func)(double, double), 842 bool flush, uint32_t rMode) const; 843 844template <class fpType> 845fpType 846FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType), 847 bool flush, uint32_t rMode) const 848{ 849 const bool single = (sizeof(fpType) == sizeof(float)); 850 fpType junk = 0.0; 851 852 if (flush && flushToZero(op1)) 853 fpscr.idc = 1; 854 VfpSavedState state = prepFpState(rMode); 855 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state) 856 : "m" (op1), "m" (state)); 857 fpType dest = func(op1); 858 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 859 860 int fpClass = std::fpclassify(dest); 861 // Get NAN behavior right. This varies between x86 and ARM. 862 if (fpClass == FP_NAN) { 863 const bool single = (sizeof(fpType) == sizeof(float)); 864 const uint64_t qnan = 865 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 866 const bool nan = std::isnan(op1); 867 if (!nan || fpscr.dn == 1) { 868 dest = bitsToFp(qnan, junk); 869 } else if (nan) { 870 dest = bitsToFp(fpToBits(op1) | qnan, junk); 871 } 872 } else if (flush && flushToZero(dest)) { 873 feraiseexcept(FeUnderflow); 874 } else if (( 875 (single && (dest == bitsToFp(0x00800000, junk) || 876 dest == bitsToFp(0x80800000, junk))) || 877 (!single && 878 (dest == bitsToFp(ULL(0x0010000000000000), junk) || 879 dest == bitsToFp(ULL(0x8010000000000000), junk))) 880 ) && rMode != VfpRoundZero) { 881 /* 882 * Correct for the fact that underflow is detected -before- rounding 883 * in ARM and -after- rounding in x86. 884 */ 885 fesetround(FeRoundZero); 886 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1)); 887 fpType temp = func(op1); 888 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 889 if (flush && flushToZero(temp)) { 890 dest = temp; 891 } 892 } 893 finishVfp(fpscr, state); 894 return dest; 895} 896 897template 898float FpOp::unaryOp(FPSCR &fpscr, float op1, float (*func)(float), 899 bool flush, uint32_t rMode) const; 900template 901double FpOp::unaryOp(FPSCR &fpscr, double op1, double (*func)(double), 902 bool flush, uint32_t rMode) const; 903 904IntRegIndex 905VfpMacroOp::addStride(IntRegIndex idx, unsigned stride) 906{ 907 if (wide) { 908 stride *= 2; 909 } 910 unsigned offset = idx % 8; 911 idx = (IntRegIndex)(idx - offset); 912 offset += stride; 913 idx = (IntRegIndex)(idx + (offset % 8)); 914 return idx; 915} 916 917void 918VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) 919{ 920 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 921 assert(!inScalarBank(dest)); 922 dest = addStride(dest, stride); 923 op1 = addStride(op1, stride); 924 if (!inScalarBank(op2)) { 925 op2 = addStride(op2, stride); 926 } 927} 928 929void 930VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1) 931{ 932 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 933 assert(!inScalarBank(dest)); 934 dest = addStride(dest, stride); 935 if (!inScalarBank(op1)) { 936 op1 = addStride(op1, stride); 937 } 938} 939 940void 941VfpMacroOp::nextIdxs(IntRegIndex &dest) 942{ 943 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 944 assert(!inScalarBank(dest)); 945 dest = addStride(dest, stride); 946} 947 948} 949