vfp.cc revision 7434
12735Sktlim@umich.edu/* 210319SAndreas.Sandberg@ARM.com * Copyright (c) 2010 ARM Limited 310319SAndreas.Sandberg@ARM.com * All rights reserved 410319SAndreas.Sandberg@ARM.com * 510319SAndreas.Sandberg@ARM.com * The license below extends only to copyright in the software and shall 610319SAndreas.Sandberg@ARM.com * not be construed as granting a license to any other intellectual 710319SAndreas.Sandberg@ARM.com * property including but not limited to intellectual property relating 810319SAndreas.Sandberg@ARM.com * to a hardware implementation of the functionality of the software 910319SAndreas.Sandberg@ARM.com * licensed hereunder. You may use the software subject to the license 1010319SAndreas.Sandberg@ARM.com * terms below provided that you ensure that this notice is replicated 1110319SAndreas.Sandberg@ARM.com * unmodified and in its entirety in all distributions of the software, 1210319SAndreas.Sandberg@ARM.com * modified or unmodified, in source code or in binary form. 1310319SAndreas.Sandberg@ARM.com * 142735Sktlim@umich.edu * Redistribution and use in source and binary forms, with or without 1511303Ssteve.reinhardt@amd.com * modification, are permitted provided that the following conditions are 162735Sktlim@umich.edu * met: redistributions of source code must retain the above copyright 172735Sktlim@umich.edu * notice, this list of conditions and the following disclaimer; 182735Sktlim@umich.edu * redistributions in binary form must reproduce the above copyright 192735Sktlim@umich.edu * notice, this list of conditions and the following disclaimer in the 202735Sktlim@umich.edu * documentation and/or other materials provided with the distribution; 212735Sktlim@umich.edu * neither the name of the copyright holders nor the names of its 222735Sktlim@umich.edu * contributors may be used to endorse or promote products derived from 232735Sktlim@umich.edu * this software without specific prior written permission. 242735Sktlim@umich.edu * 252735Sktlim@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 262735Sktlim@umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 272735Sktlim@umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 282735Sktlim@umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 292735Sktlim@umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 302735Sktlim@umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 312735Sktlim@umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 322735Sktlim@umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 332735Sktlim@umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 342735Sktlim@umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 352735Sktlim@umich.edu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 362735Sktlim@umich.edu * 372735Sktlim@umich.edu * Authors: Gabe Black 382735Sktlim@umich.edu */ 392735Sktlim@umich.edu 402735Sktlim@umich.edu#include "arch/arm/insts/vfp.hh" 412735Sktlim@umich.edu 4210319SAndreas.Sandberg@ARM.com/* 432735Sktlim@umich.edu * The asm statements below are to keep gcc from reordering code. Otherwise 442735Sktlim@umich.edu * the rounding mode might be set after the operation it was intended for, the 4510319SAndreas.Sandberg@ARM.com * exception bits read before it, etc. 4610319SAndreas.Sandberg@ARM.com */ 4710319SAndreas.Sandberg@ARM.com 4810319SAndreas.Sandberg@ARM.comstd::string 4910319SAndreas.Sandberg@ARM.comFpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 5010319SAndreas.Sandberg@ARM.com{ 5110529Smorr@cs.wisc.edu std::stringstream ss; 5212104Snathanael.premillieu@arm.com printMnemonic(ss); 5310319SAndreas.Sandberg@ARM.com printReg(ss, dest + FP_Base_DepTag); 5410319SAndreas.Sandberg@ARM.com ss << ", "; 5511608Snikos.nikoleris@arm.com printReg(ss, op1 + FP_Base_DepTag); 562735Sktlim@umich.edu return ss.str(); 572735Sktlim@umich.edu} 5810319SAndreas.Sandberg@ARM.com 5910319SAndreas.Sandberg@ARM.comstd::string 6010319SAndreas.Sandberg@ARM.comFpRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 6110319SAndreas.Sandberg@ARM.com{ 6210319SAndreas.Sandberg@ARM.com std::stringstream ss; 6310319SAndreas.Sandberg@ARM.com printMnemonic(ss); 6410319SAndreas.Sandberg@ARM.com printReg(ss, dest + FP_Base_DepTag); 6510319SAndreas.Sandberg@ARM.com ccprintf(ss, ", #%d", imm); 6610319SAndreas.Sandberg@ARM.com return ss.str(); 6710319SAndreas.Sandberg@ARM.com} 6810319SAndreas.Sandberg@ARM.com 6910319SAndreas.Sandberg@ARM.comstd::string 7010319SAndreas.Sandberg@ARM.comFpRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 7110319SAndreas.Sandberg@ARM.com{ 722735Sktlim@umich.edu std::stringstream ss; 732735Sktlim@umich.edu printMnemonic(ss); 7410319SAndreas.Sandberg@ARM.com printReg(ss, dest + FP_Base_DepTag); 7510319SAndreas.Sandberg@ARM.com ss << ", "; 7610319SAndreas.Sandberg@ARM.com printReg(ss, op1 + FP_Base_DepTag); 7710319SAndreas.Sandberg@ARM.com ccprintf(ss, ", #%d", imm); 7810319SAndreas.Sandberg@ARM.com return ss.str(); 7910319SAndreas.Sandberg@ARM.com} 8010319SAndreas.Sandberg@ARM.com 8110319SAndreas.Sandberg@ARM.comstd::string 8210319SAndreas.Sandberg@ARM.comFpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 8310319SAndreas.Sandberg@ARM.com{ 8410319SAndreas.Sandberg@ARM.com std::stringstream ss; 8510319SAndreas.Sandberg@ARM.com printMnemonic(ss); 8610319SAndreas.Sandberg@ARM.com printReg(ss, dest + FP_Base_DepTag); 8710319SAndreas.Sandberg@ARM.com ss << ", "; 8810319SAndreas.Sandberg@ARM.com printReg(ss, op1 + FP_Base_DepTag); 892735Sktlim@umich.edu ss << ", "; 902735Sktlim@umich.edu printReg(ss, op2 + FP_Base_DepTag); 9110319SAndreas.Sandberg@ARM.com return ss.str(); 9210319SAndreas.Sandberg@ARM.com} 9310319SAndreas.Sandberg@ARM.com 9410319SAndreas.Sandberg@ARM.comnamespace ArmISA 9510319SAndreas.Sandberg@ARM.com{ 9610319SAndreas.Sandberg@ARM.com 9710319SAndreas.Sandberg@ARM.comVfpSavedState 9810319SAndreas.Sandberg@ARM.comprepFpState(uint32_t rMode) 9910319SAndreas.Sandberg@ARM.com{ 10010319SAndreas.Sandberg@ARM.com int roundingMode = fegetround(); 10110319SAndreas.Sandberg@ARM.com feclearexcept(FeAllExceptions); 10210319SAndreas.Sandberg@ARM.com switch (rMode) { 10310319SAndreas.Sandberg@ARM.com case VfpRoundNearest: 1042735Sktlim@umich.edu fesetround(FeRoundNearest); 1052735Sktlim@umich.edu break; 10610319SAndreas.Sandberg@ARM.com case VfpRoundUpward: 1072735Sktlim@umich.edu fesetround(FeRoundUpward); 1082735Sktlim@umich.edu break; 1092735Sktlim@umich.edu case VfpRoundDown: 11010319SAndreas.Sandberg@ARM.com fesetround(FeRoundDown); 11110319SAndreas.Sandberg@ARM.com break; 1122735Sktlim@umich.edu case VfpRoundZero: 1132735Sktlim@umich.edu fesetround(FeRoundZero); 11410319SAndreas.Sandberg@ARM.com break; 11510319SAndreas.Sandberg@ARM.com } 1162735Sktlim@umich.edu return roundingMode; 1172735Sktlim@umich.edu} 1182735Sktlim@umich.edu 11910319SAndreas.Sandberg@ARM.comvoid 12010319SAndreas.Sandberg@ARM.comfinishVfp(FPSCR &fpscr, VfpSavedState state) 1212735Sktlim@umich.edu{ 12210319SAndreas.Sandberg@ARM.com int exceptions = fetestexcept(FeAllExceptions); 1232735Sktlim@umich.edu bool underflow = false; 12410319SAndreas.Sandberg@ARM.com if (exceptions & FeInvalid) { 12510319SAndreas.Sandberg@ARM.com fpscr.ioc = 1; 12610319SAndreas.Sandberg@ARM.com } 12710319SAndreas.Sandberg@ARM.com if (exceptions & FeDivByZero) { 12810319SAndreas.Sandberg@ARM.com fpscr.dzc = 1; 12910319SAndreas.Sandberg@ARM.com } 13010319SAndreas.Sandberg@ARM.com if (exceptions & FeOverflow) { 1312735Sktlim@umich.edu fpscr.ofc = 1; 13210319SAndreas.Sandberg@ARM.com } 13310319SAndreas.Sandberg@ARM.com if (exceptions & FeUnderflow) { 13410319SAndreas.Sandberg@ARM.com underflow = true; 13510319SAndreas.Sandberg@ARM.com fpscr.ufc = 1; 13610319SAndreas.Sandberg@ARM.com } 13710319SAndreas.Sandberg@ARM.com if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) { 13810319SAndreas.Sandberg@ARM.com fpscr.ixc = 1; 1392735Sktlim@umich.edu } 14010319SAndreas.Sandberg@ARM.com fesetround(state); 14110319SAndreas.Sandberg@ARM.com} 14210319SAndreas.Sandberg@ARM.com 14310319SAndreas.Sandberg@ARM.comtemplate <class fpType> 14410319SAndreas.Sandberg@ARM.comfpType 1452735Sktlim@umich.edufixDest(FPSCR fpscr, fpType val, fpType op1) 14610319SAndreas.Sandberg@ARM.com{ 14710319SAndreas.Sandberg@ARM.com int fpClass = std::fpclassify(val); 14810319SAndreas.Sandberg@ARM.com fpType junk = 0.0; 14910319SAndreas.Sandberg@ARM.com if (fpClass == FP_NAN) { 15010319SAndreas.Sandberg@ARM.com const bool single = (sizeof(val) == sizeof(float)); 1512735Sktlim@umich.edu const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 15210319SAndreas.Sandberg@ARM.com const bool nan = std::isnan(op1); 1532735Sktlim@umich.edu if (!nan || (fpscr.dn == 1)) { 15410319SAndreas.Sandberg@ARM.com val = bitsToFp(qnan, junk); 15510319SAndreas.Sandberg@ARM.com } else if (nan) { 15610319SAndreas.Sandberg@ARM.com val = bitsToFp(fpToBits(op1) | qnan, junk); 15710319SAndreas.Sandberg@ARM.com } 15810319SAndreas.Sandberg@ARM.com } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 15910319SAndreas.Sandberg@ARM.com // Turn val into a zero with the correct sign; 16010319SAndreas.Sandberg@ARM.com uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 16110319SAndreas.Sandberg@ARM.com val = bitsToFp(fpToBits(val) & bitMask, junk); 16210319SAndreas.Sandberg@ARM.com feclearexcept(FeInexact); 16310319SAndreas.Sandberg@ARM.com feraiseexcept(FeUnderflow); 16410319SAndreas.Sandberg@ARM.com } 16510319SAndreas.Sandberg@ARM.com return val; 16610319SAndreas.Sandberg@ARM.com} 16710319SAndreas.Sandberg@ARM.com 16810319SAndreas.Sandberg@ARM.comtemplate 16910319SAndreas.Sandberg@ARM.comfloat fixDest<float>(FPSCR fpscr, float val, float op1); 17010319SAndreas.Sandberg@ARM.comtemplate 17110319SAndreas.Sandberg@ARM.comdouble fixDest<double>(FPSCR fpscr, double val, double op1); 17210319SAndreas.Sandberg@ARM.com 17310319SAndreas.Sandberg@ARM.comtemplate <class fpType> 17410319SAndreas.Sandberg@ARM.comfpType 17510319SAndreas.Sandberg@ARM.comfixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 17610319SAndreas.Sandberg@ARM.com{ 17710319SAndreas.Sandberg@ARM.com int fpClass = std::fpclassify(val); 17810319SAndreas.Sandberg@ARM.com fpType junk = 0.0; 17911303Ssteve.reinhardt@amd.com if (fpClass == FP_NAN) { 18011303Ssteve.reinhardt@amd.com const bool single = (sizeof(val) == sizeof(float)); 18111303Ssteve.reinhardt@amd.com const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 18211303Ssteve.reinhardt@amd.com const bool nan1 = std::isnan(op1); 18311303Ssteve.reinhardt@amd.com const bool nan2 = std::isnan(op2); 18411303Ssteve.reinhardt@amd.com const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 18511303Ssteve.reinhardt@amd.com const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 18610319SAndreas.Sandberg@ARM.com if ((!nan1 && !nan2) || (fpscr.dn == 1)) { 18711608Snikos.nikoleris@arm.com val = bitsToFp(qnan, junk); 18811303Ssteve.reinhardt@amd.com } else if (signal1) { 18911303Ssteve.reinhardt@amd.com val = bitsToFp(fpToBits(op1) | qnan, junk); 19011303Ssteve.reinhardt@amd.com } else if (signal2) { 19110319SAndreas.Sandberg@ARM.com val = bitsToFp(fpToBits(op2) | qnan, junk); 19211303Ssteve.reinhardt@amd.com } else if (nan1) { 19311303Ssteve.reinhardt@amd.com val = op1; 19411303Ssteve.reinhardt@amd.com } else if (nan2) { 19511303Ssteve.reinhardt@amd.com val = op2; 19611303Ssteve.reinhardt@amd.com } 19711303Ssteve.reinhardt@amd.com } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 19811303Ssteve.reinhardt@amd.com // Turn val into a zero with the correct sign; 19911303Ssteve.reinhardt@amd.com uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 20011608Snikos.nikoleris@arm.com val = bitsToFp(fpToBits(val) & bitMask, junk); 20111303Ssteve.reinhardt@amd.com feclearexcept(FeInexact); 20211303Ssteve.reinhardt@amd.com feraiseexcept(FeUnderflow); 20311303Ssteve.reinhardt@amd.com } 20411303Ssteve.reinhardt@amd.com return val; 20511303Ssteve.reinhardt@amd.com} 20611303Ssteve.reinhardt@amd.com 20711303Ssteve.reinhardt@amd.comtemplate 20811303Ssteve.reinhardt@amd.comfloat fixDest<float>(FPSCR fpscr, float val, float op1, float op2); 20910319SAndreas.Sandberg@ARM.comtemplate 21011608Snikos.nikoleris@arm.comdouble fixDest<double>(FPSCR fpscr, double val, double op1, double op2); 21110319SAndreas.Sandberg@ARM.com 21210319SAndreas.Sandberg@ARM.comtemplate <class fpType> 21310319SAndreas.Sandberg@ARM.comfpType 21410319SAndreas.Sandberg@ARM.comfixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 21510319SAndreas.Sandberg@ARM.com{ 21610319SAndreas.Sandberg@ARM.com fpType mid = fixDest(fpscr, val, op1, op2); 21710319SAndreas.Sandberg@ARM.com const bool single = (sizeof(fpType) == sizeof(float)); 21810319SAndreas.Sandberg@ARM.com const fpType junk = 0.0; 21910319SAndreas.Sandberg@ARM.com if ((single && (val == bitsToFp(0x00800000, junk) || 22010319SAndreas.Sandberg@ARM.com val == bitsToFp(0x80800000, junk))) || 22110319SAndreas.Sandberg@ARM.com (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) || 22210319SAndreas.Sandberg@ARM.com val == bitsToFp(ULL(0x8010000000000000), junk))) 22310319SAndreas.Sandberg@ARM.com ) { 22410319SAndreas.Sandberg@ARM.com __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); 22510319SAndreas.Sandberg@ARM.com fesetround(FeRoundZero); 22610319SAndreas.Sandberg@ARM.com fpType temp = 0.0; 22710319SAndreas.Sandberg@ARM.com __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 22810319SAndreas.Sandberg@ARM.com temp = op1 / op2; 22910319SAndreas.Sandberg@ARM.com if (flushToZero(temp)) { 23010319SAndreas.Sandberg@ARM.com feraiseexcept(FeUnderflow); 23110319SAndreas.Sandberg@ARM.com if (fpscr.fz) { 23211877Sbrandon.potter@amd.com feclearexcept(FeInexact); 23310319SAndreas.Sandberg@ARM.com mid = temp; 23410319SAndreas.Sandberg@ARM.com } 2352735Sktlim@umich.edu } 2362735Sktlim@umich.edu __asm__ __volatile__("" :: "m" (temp)); 23710319SAndreas.Sandberg@ARM.com } 2382735Sktlim@umich.edu return mid; 23910319SAndreas.Sandberg@ARM.com} 24010319SAndreas.Sandberg@ARM.com 24110319SAndreas.Sandberg@ARM.comtemplate 24210319SAndreas.Sandberg@ARM.comfloat fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2); 2437520Sgblack@eecs.umich.edutemplate 24410319SAndreas.Sandberg@ARM.comdouble fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2); 24510319SAndreas.Sandberg@ARM.com 24610319SAndreas.Sandberg@ARM.comfloat 24710319SAndreas.Sandberg@ARM.comfixFpDFpSDest(FPSCR fpscr, double val) 24810319SAndreas.Sandberg@ARM.com{ 2495702Ssaidi@eecs.umich.edu const float junk = 0.0; 2505702Ssaidi@eecs.umich.edu float op1 = 0.0; 2515702Ssaidi@eecs.umich.edu if (std::isnan(val)) { 2525702Ssaidi@eecs.umich.edu uint64_t valBits = fpToBits(val); 2535702Ssaidi@eecs.umich.edu uint32_t op1Bits = bits(valBits, 50, 29) | 25410319SAndreas.Sandberg@ARM.com (mask(9) << 22) | 2558779Sgblack@eecs.umich.edu (bits(valBits, 63) << 31); 25610319SAndreas.Sandberg@ARM.com op1 = bitsToFp(op1Bits, junk); 2576973Stjones1@inf.ed.ac.uk } 25810319SAndreas.Sandberg@ARM.com float mid = fixDest(fpscr, (float)val, op1); 25910319SAndreas.Sandberg@ARM.com if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) == 26010319SAndreas.Sandberg@ARM.com (FeUnderflow | FeInexact)) { 26110319SAndreas.Sandberg@ARM.com feclearexcept(FeInexact); 26210319SAndreas.Sandberg@ARM.com } 26310319SAndreas.Sandberg@ARM.com if (mid == bitsToFp(0x00800000, junk) || 26410319SAndreas.Sandberg@ARM.com mid == bitsToFp(0x80800000, junk)) { 26510319SAndreas.Sandberg@ARM.com __asm__ __volatile__("" : "=m" (val) : "m" (val)); 26610319SAndreas.Sandberg@ARM.com fesetround(FeRoundZero); 26710319SAndreas.Sandberg@ARM.com float temp = 0.0; 26810319SAndreas.Sandberg@ARM.com __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 26910319SAndreas.Sandberg@ARM.com temp = val; 27010319SAndreas.Sandberg@ARM.com if (flushToZero(temp)) { 27110319SAndreas.Sandberg@ARM.com feraiseexcept(FeUnderflow); 27210319SAndreas.Sandberg@ARM.com if (fpscr.fz) { 27310319SAndreas.Sandberg@ARM.com feclearexcept(FeInexact); 27410319SAndreas.Sandberg@ARM.com mid = temp; 27510319SAndreas.Sandberg@ARM.com } 27610319SAndreas.Sandberg@ARM.com } 27710529Smorr@cs.wisc.edu __asm__ __volatile__("" :: "m" (temp)); 27810529Smorr@cs.wisc.edu } 27910529Smorr@cs.wisc.edu return mid; 28010529Smorr@cs.wisc.edu} 28110319SAndreas.Sandberg@ARM.com 28210319SAndreas.Sandberg@ARM.comdouble 28310319SAndreas.Sandberg@ARM.comfixFpSFpDDest(FPSCR fpscr, float val) 28410319SAndreas.Sandberg@ARM.com{ 28510319SAndreas.Sandberg@ARM.com const double junk = 0.0; 28610319SAndreas.Sandberg@ARM.com double op1 = 0.0; 28710319SAndreas.Sandberg@ARM.com if (std::isnan(val)) { 28810319SAndreas.Sandberg@ARM.com uint32_t valBits = fpToBits(val); 28910319SAndreas.Sandberg@ARM.com uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) | 29012106SRekai.GonzalezAlberquilla@arm.com (mask(12) << 51) | 29110319SAndreas.Sandberg@ARM.com ((uint64_t)bits(valBits, 31) << 63); 29212106SRekai.GonzalezAlberquilla@arm.com op1 = bitsToFp(op1Bits, junk); 29310319SAndreas.Sandberg@ARM.com } 29410319SAndreas.Sandberg@ARM.com double mid = fixDest(fpscr, (double)val, op1); 29510319SAndreas.Sandberg@ARM.com if (mid == bitsToFp(ULL(0x0010000000000000), junk) || 29610319SAndreas.Sandberg@ARM.com mid == bitsToFp(ULL(0x8010000000000000), junk)) { 2972735Sktlim@umich.edu __asm__ __volatile__("" : "=m" (val) : "m" (val)); 29810319SAndreas.Sandberg@ARM.com fesetround(FeRoundZero); 29910319SAndreas.Sandberg@ARM.com double temp = 0.0; 300 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 301 temp = val; 302 if (flushToZero(temp)) { 303 feraiseexcept(FeUnderflow); 304 if (fpscr.fz) { 305 feclearexcept(FeInexact); 306 mid = temp; 307 } 308 } 309 __asm__ __volatile__("" :: "m" (temp)); 310 } 311 return mid; 312} 313 314float 315vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top) 316{ 317 float junk = 0.0; 318 uint32_t destBits = fpToBits(dest); 319 uint32_t opBits = fpToBits(op); 320 // Extract the operand. 321 bool neg = bits(opBits, 31); 322 uint32_t exponent = bits(opBits, 30, 23); 323 uint32_t oldMantissa = bits(opBits, 22, 0); 324 uint32_t mantissa = oldMantissa >> (23 - 10); 325 // Do the conversion. 326 uint32_t extra = oldMantissa & mask(23 - 10); 327 if (exponent == 0xff) { 328 if (oldMantissa != 0) { 329 // Nans. 330 if (bits(mantissa, 9) == 0) { 331 // Signalling nan. 332 fpscr.ioc = 1; 333 } 334 if (fpscr.ahp) { 335 mantissa = 0; 336 exponent = 0; 337 fpscr.ioc = 1; 338 } else if (fpscr.dn) { 339 mantissa = (1 << 9); 340 exponent = 0x1f; 341 neg = false; 342 } else { 343 exponent = 0x1f; 344 mantissa |= (1 << 9); 345 } 346 } else { 347 // Infinities. 348 exponent = 0x1F; 349 if (fpscr.ahp) { 350 fpscr.ioc = 1; 351 mantissa = 0x3ff; 352 } else { 353 mantissa = 0; 354 } 355 } 356 } else if (exponent == 0 && oldMantissa == 0) { 357 // Zero, don't need to do anything. 358 } else { 359 // Normalized or denormalized numbers. 360 361 bool inexact = (extra != 0); 362 363 if (exponent == 0) { 364 // Denormalized. 365 366 // If flush to zero is on, this shouldn't happen. 367 assert(fpscr.fz == 0); 368 369 // Check for underflow 370 if (inexact || fpscr.ufe) 371 fpscr.ufc = 1; 372 373 // Handle rounding. 374 unsigned mode = fpscr.rMode; 375 if ((mode == VfpRoundUpward && !neg && extra) || 376 (mode == VfpRoundDown && neg && extra) || 377 (mode == VfpRoundNearest && 378 (extra > (1 << 9) || 379 (extra == (1 << 9) && bits(mantissa, 0))))) { 380 mantissa++; 381 } 382 383 // See if the number became normalized after rounding. 384 if (mantissa == (1 << 10)) { 385 mantissa = 0; 386 exponent = 1; 387 } 388 } else { 389 // Normalized. 390 391 // We need to track the dropped bits differently since 392 // more can be dropped by denormalizing. 393 bool topOne = bits(extra, 12); 394 bool restZeros = bits(extra, 11, 0) == 0; 395 396 if (exponent <= (127 - 15)) { 397 // The result is too small. Denormalize. 398 mantissa |= (1 << 10); 399 while (mantissa && exponent <= (127 - 15)) { 400 restZeros = restZeros && !topOne; 401 topOne = bits(mantissa, 0); 402 mantissa = mantissa >> 1; 403 exponent++; 404 } 405 if (topOne || !restZeros) 406 inexact = true; 407 exponent = 0; 408 } else { 409 // Change bias. 410 exponent -= (127 - 15); 411 } 412 413 if (exponent == 0 && (inexact || fpscr.ufe)) { 414 // Underflow 415 fpscr.ufc = 1; 416 } 417 418 // Handle rounding. 419 unsigned mode = fpscr.rMode; 420 bool nonZero = topOne || !restZeros; 421 if ((mode == VfpRoundUpward && !neg && nonZero) || 422 (mode == VfpRoundDown && neg && nonZero) || 423 (mode == VfpRoundNearest && topOne && 424 (!restZeros || bits(mantissa, 0)))) { 425 mantissa++; 426 } 427 428 // See if we rounded up and need to bump the exponent. 429 if (mantissa == (1 << 10)) { 430 mantissa = 0; 431 exponent++; 432 } 433 434 // Deal with overflow 435 if (fpscr.ahp) { 436 if (exponent >= 0x20) { 437 exponent = 0x1f; 438 mantissa = 0x3ff; 439 fpscr.ioc = 1; 440 // Supress inexact exception. 441 inexact = false; 442 } 443 } else { 444 if (exponent >= 0x1f) { 445 if ((mode == VfpRoundNearest) || 446 (mode == VfpRoundUpward && !neg) || 447 (mode == VfpRoundDown && neg)) { 448 // Overflow to infinity. 449 exponent = 0x1f; 450 mantissa = 0; 451 } else { 452 // Overflow to max normal. 453 exponent = 0x1e; 454 mantissa = 0x3ff; 455 } 456 fpscr.ofc = 1; 457 inexact = true; 458 } 459 } 460 } 461 462 if (inexact) { 463 fpscr.ixc = 1; 464 } 465 } 466 // Reassemble and install the result. 467 uint32_t result = bits(mantissa, 9, 0); 468 replaceBits(result, 14, 10, exponent); 469 if (neg) 470 result |= (1 << 15); 471 if (top) 472 replaceBits(destBits, 31, 16, result); 473 else 474 replaceBits(destBits, 15, 0, result); 475 return bitsToFp(destBits, junk); 476} 477 478float 479vcvtFpHFpS(FPSCR &fpscr, float op, bool top) 480{ 481 float junk = 0.0; 482 uint32_t opBits = fpToBits(op); 483 // Extract the operand. 484 if (top) 485 opBits = bits(opBits, 31, 16); 486 else 487 opBits = bits(opBits, 15, 0); 488 // Extract the bitfields. 489 bool neg = bits(opBits, 15); 490 uint32_t exponent = bits(opBits, 14, 10); 491 uint32_t mantissa = bits(opBits, 9, 0); 492 // Do the conversion. 493 if (exponent == 0) { 494 if (mantissa != 0) { 495 // Normalize the value. 496 exponent = exponent + (127 - 15) + 1; 497 while (mantissa < (1 << 10)) { 498 mantissa = mantissa << 1; 499 exponent--; 500 } 501 } 502 mantissa = mantissa << (23 - 10); 503 } else if (exponent == 0x1f && !fpscr.ahp) { 504 // Infinities and nans. 505 exponent = 0xff; 506 if (mantissa != 0) { 507 // Nans. 508 mantissa = mantissa << (23 - 10); 509 if (bits(mantissa, 22) == 0) { 510 // Signalling nan. 511 fpscr.ioc = 1; 512 mantissa |= (1 << 22); 513 } 514 if (fpscr.dn) { 515 mantissa &= ~mask(22); 516 neg = false; 517 } 518 } 519 } else { 520 exponent = exponent + (127 - 15); 521 mantissa = mantissa << (23 - 10); 522 } 523 // Reassemble the result. 524 uint32_t result = bits(mantissa, 22, 0); 525 replaceBits(result, 30, 23, exponent); 526 if (neg) 527 result |= (1 << 31); 528 return bitsToFp(result, junk); 529} 530 531uint64_t 532vfpFpSToFixed(float val, bool isSigned, bool half, 533 uint8_t imm, bool rzero) 534{ 535 int rmode = rzero ? FeRoundZero : fegetround(); 536 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); 537 fesetround(FeRoundNearest); 538 val = val * powf(2.0, imm); 539 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 540 fesetround(rmode); 541 feclearexcept(FeAllExceptions); 542 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 543 float origVal = val; 544 val = rintf(val); 545 int fpType = std::fpclassify(val); 546 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 547 if (fpType == FP_NAN) { 548 feraiseexcept(FeInvalid); 549 } 550 val = 0.0; 551 } else if (origVal != val) { 552 switch (rmode) { 553 case FeRoundNearest: 554 if (origVal - val > 0.5) 555 val += 1.0; 556 else if (val - origVal > 0.5) 557 val -= 1.0; 558 break; 559 case FeRoundDown: 560 if (origVal < val) 561 val -= 1.0; 562 break; 563 case FeRoundUpward: 564 if (origVal > val) 565 val += 1.0; 566 break; 567 } 568 feraiseexcept(FeInexact); 569 } 570 571 if (isSigned) { 572 if (half) { 573 if ((double)val < (int16_t)(1 << 15)) { 574 feraiseexcept(FeInvalid); 575 feclearexcept(FeInexact); 576 return (int16_t)(1 << 15); 577 } 578 if ((double)val > (int16_t)mask(15)) { 579 feraiseexcept(FeInvalid); 580 feclearexcept(FeInexact); 581 return (int16_t)mask(15); 582 } 583 return (int16_t)val; 584 } else { 585 if ((double)val < (int32_t)(1 << 31)) { 586 feraiseexcept(FeInvalid); 587 feclearexcept(FeInexact); 588 return (int32_t)(1 << 31); 589 } 590 if ((double)val > (int32_t)mask(31)) { 591 feraiseexcept(FeInvalid); 592 feclearexcept(FeInexact); 593 return (int32_t)mask(31); 594 } 595 return (int32_t)val; 596 } 597 } else { 598 if (half) { 599 if ((double)val < 0) { 600 feraiseexcept(FeInvalid); 601 feclearexcept(FeInexact); 602 return 0; 603 } 604 if ((double)val > (mask(16))) { 605 feraiseexcept(FeInvalid); 606 feclearexcept(FeInexact); 607 return mask(16); 608 } 609 return (uint16_t)val; 610 } else { 611 if ((double)val < 0) { 612 feraiseexcept(FeInvalid); 613 feclearexcept(FeInexact); 614 return 0; 615 } 616 if ((double)val > (mask(32))) { 617 feraiseexcept(FeInvalid); 618 feclearexcept(FeInexact); 619 return mask(32); 620 } 621 return (uint32_t)val; 622 } 623 } 624} 625 626float 627vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 628{ 629 fesetround(FeRoundNearest); 630 if (half) 631 val = (uint16_t)val; 632 float scale = powf(2.0, imm); 633 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 634 feclearexcept(FeAllExceptions); 635 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 636 return fixDivDest(fpscr, val / scale, (float)val, scale); 637} 638 639float 640vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 641{ 642 fesetround(FeRoundNearest); 643 if (half) 644 val = sext<16>(val & mask(16)); 645 float scale = powf(2.0, imm); 646 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 647 feclearexcept(FeAllExceptions); 648 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 649 return fixDivDest(fpscr, val / scale, (float)val, scale); 650} 651 652uint64_t 653vfpFpDToFixed(double val, bool isSigned, bool half, 654 uint8_t imm, bool rzero) 655{ 656 int rmode = rzero ? FeRoundZero : fegetround(); 657 fesetround(FeRoundNearest); 658 val = val * pow(2.0, imm); 659 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 660 fesetround(rmode); 661 feclearexcept(FeAllExceptions); 662 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 663 double origVal = val; 664 val = rint(val); 665 int fpType = std::fpclassify(val); 666 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 667 if (fpType == FP_NAN) { 668 feraiseexcept(FeInvalid); 669 } 670 val = 0.0; 671 } else if (origVal != val) { 672 switch (rmode) { 673 case FeRoundNearest: 674 if (origVal - val > 0.5) 675 val += 1.0; 676 else if (val - origVal > 0.5) 677 val -= 1.0; 678 break; 679 case FeRoundDown: 680 if (origVal < val) 681 val -= 1.0; 682 break; 683 case FeRoundUpward: 684 if (origVal > val) 685 val += 1.0; 686 break; 687 } 688 feraiseexcept(FeInexact); 689 } 690 if (isSigned) { 691 if (half) { 692 if (val < (int16_t)(1 << 15)) { 693 feraiseexcept(FeInvalid); 694 feclearexcept(FeInexact); 695 return (int16_t)(1 << 15); 696 } 697 if (val > (int16_t)mask(15)) { 698 feraiseexcept(FeInvalid); 699 feclearexcept(FeInexact); 700 return (int16_t)mask(15); 701 } 702 return (int16_t)val; 703 } else { 704 if (val < (int32_t)(1 << 31)) { 705 feraiseexcept(FeInvalid); 706 feclearexcept(FeInexact); 707 return (int32_t)(1 << 31); 708 } 709 if (val > (int32_t)mask(31)) { 710 feraiseexcept(FeInvalid); 711 feclearexcept(FeInexact); 712 return (int32_t)mask(31); 713 } 714 return (int32_t)val; 715 } 716 } else { 717 if (half) { 718 if (val < 0) { 719 feraiseexcept(FeInvalid); 720 feclearexcept(FeInexact); 721 return 0; 722 } 723 if (val > mask(16)) { 724 feraiseexcept(FeInvalid); 725 feclearexcept(FeInexact); 726 return mask(16); 727 } 728 return (uint16_t)val; 729 } else { 730 if (val < 0) { 731 feraiseexcept(FeInvalid); 732 feclearexcept(FeInexact); 733 return 0; 734 } 735 if (val > mask(32)) { 736 feraiseexcept(FeInvalid); 737 feclearexcept(FeInexact); 738 return mask(32); 739 } 740 return (uint32_t)val; 741 } 742 } 743} 744 745double 746vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 747{ 748 fesetround(FeRoundNearest); 749 if (half) 750 val = (uint16_t)val; 751 double scale = pow(2.0, imm); 752 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 753 feclearexcept(FeAllExceptions); 754 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 755 return fixDivDest(fpscr, val / scale, (double)val, scale); 756} 757 758double 759vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 760{ 761 fesetround(FeRoundNearest); 762 if (half) 763 val = sext<16>(val & mask(16)); 764 double scale = pow(2.0, imm); 765 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 766 feclearexcept(FeAllExceptions); 767 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 768 return fixDivDest(fpscr, val / scale, (double)val, scale); 769} 770 771template <class fpType> 772fpType 773FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 774 fpType (*func)(fpType, fpType), 775 bool flush, uint32_t rMode) const 776{ 777 const bool single = (sizeof(fpType) == sizeof(float)); 778 fpType junk = 0.0; 779 780 if (flush && flushToZero(op1, op2)) 781 fpscr.idc = 1; 782 VfpSavedState state = prepFpState(rMode); 783 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state) 784 : "m" (op1), "m" (op2), "m" (state)); 785 fpType dest = func(op1, op2); 786 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 787 788 int fpClass = std::fpclassify(dest); 789 // Get NAN behavior right. This varies between x86 and ARM. 790 if (fpClass == FP_NAN) { 791 const bool single = (sizeof(fpType) == sizeof(float)); 792 const uint64_t qnan = 793 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 794 const bool nan1 = std::isnan(op1); 795 const bool nan2 = std::isnan(op2); 796 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 797 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 798 if ((!nan1 && !nan2) || (fpscr.dn == 1)) { 799 dest = bitsToFp(qnan, junk); 800 } else if (signal1) { 801 dest = bitsToFp(fpToBits(op1) | qnan, junk); 802 } else if (signal2) { 803 dest = bitsToFp(fpToBits(op2) | qnan, junk); 804 } else if (nan1) { 805 dest = op1; 806 } else if (nan2) { 807 dest = op2; 808 } 809 } else if (flush && flushToZero(dest)) { 810 feraiseexcept(FeUnderflow); 811 } else if (( 812 (single && (dest == bitsToFp(0x00800000, junk) || 813 dest == bitsToFp(0x80800000, junk))) || 814 (!single && 815 (dest == bitsToFp(ULL(0x0010000000000000), junk) || 816 dest == bitsToFp(ULL(0x8010000000000000), junk))) 817 ) && rMode != VfpRoundZero) { 818 /* 819 * Correct for the fact that underflow is detected -before- rounding 820 * in ARM and -after- rounding in x86. 821 */ 822 fesetround(FeRoundZero); 823 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2) 824 : "m" (op1), "m" (op2)); 825 fpType temp = func(op1, op2); 826 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 827 if (flush && flushToZero(temp)) { 828 dest = temp; 829 } 830 } 831 finishVfp(fpscr, state); 832 return dest; 833} 834 835template 836float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2, 837 float (*func)(float, float), 838 bool flush, uint32_t rMode) const; 839template 840double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2, 841 double (*func)(double, double), 842 bool flush, uint32_t rMode) const; 843 844template <class fpType> 845fpType 846FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType), 847 bool flush, uint32_t rMode) const 848{ 849 const bool single = (sizeof(fpType) == sizeof(float)); 850 fpType junk = 0.0; 851 852 if (flush && flushToZero(op1)) 853 fpscr.idc = 1; 854 VfpSavedState state = prepFpState(rMode); 855 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state) 856 : "m" (op1), "m" (state)); 857 fpType dest = func(op1); 858 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 859 860 int fpClass = std::fpclassify(dest); 861 // Get NAN behavior right. This varies between x86 and ARM. 862 if (fpClass == FP_NAN) { 863 const bool single = (sizeof(fpType) == sizeof(float)); 864 const uint64_t qnan = 865 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 866 const bool nan = std::isnan(op1); 867 if (!nan || fpscr.dn == 1) { 868 dest = bitsToFp(qnan, junk); 869 } else if (nan) { 870 dest = bitsToFp(fpToBits(op1) | qnan, junk); 871 } 872 } else if (flush && flushToZero(dest)) { 873 feraiseexcept(FeUnderflow); 874 } else if (( 875 (single && (dest == bitsToFp(0x00800000, junk) || 876 dest == bitsToFp(0x80800000, junk))) || 877 (!single && 878 (dest == bitsToFp(ULL(0x0010000000000000), junk) || 879 dest == bitsToFp(ULL(0x8010000000000000), junk))) 880 ) && rMode != VfpRoundZero) { 881 /* 882 * Correct for the fact that underflow is detected -before- rounding 883 * in ARM and -after- rounding in x86. 884 */ 885 fesetround(FeRoundZero); 886 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1)); 887 fpType temp = func(op1); 888 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 889 if (flush && flushToZero(temp)) { 890 dest = temp; 891 } 892 } 893 finishVfp(fpscr, state); 894 return dest; 895} 896 897template 898float FpOp::unaryOp(FPSCR &fpscr, float op1, float (*func)(float), 899 bool flush, uint32_t rMode) const; 900template 901double FpOp::unaryOp(FPSCR &fpscr, double op1, double (*func)(double), 902 bool flush, uint32_t rMode) const; 903 904IntRegIndex 905VfpMacroOp::addStride(IntRegIndex idx, unsigned stride) 906{ 907 if (wide) { 908 stride *= 2; 909 } 910 unsigned offset = idx % 8; 911 idx = (IntRegIndex)(idx - offset); 912 offset += stride; 913 idx = (IntRegIndex)(idx + (offset % 8)); 914 return idx; 915} 916 917void 918VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) 919{ 920 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 921 assert(!inScalarBank(dest)); 922 dest = addStride(dest, stride); 923 op1 = addStride(op1, stride); 924 if (!inScalarBank(op2)) { 925 op2 = addStride(op2, stride); 926 } 927} 928 929void 930VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1) 931{ 932 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 933 assert(!inScalarBank(dest)); 934 dest = addStride(dest, stride); 935 if (!inScalarBank(op1)) { 936 op1 = addStride(op1, stride); 937 } 938} 939 940void 941VfpMacroOp::nextIdxs(IntRegIndex &dest) 942{ 943 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 944 assert(!inScalarBank(dest)); 945 dest = addStride(dest, stride); 946} 947 948} 949