/* * Copyright (c) 2010 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall * not be construed as granting a license to any other intellectual * property including but not limited to intellectual property relating * to a hardware implementation of the functionality of the software * licensed hereunder. You may use the software subject to the license * terms below provided that you ensure that this notice is replicated * unmodified and in its entirety in all distributions of the software, * modified or unmodified, in source code or in binary form. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Gabe Black */ #ifndef __ARCH_ARM_INSTS_VFP_HH__ #define __ARCH_ARM_INSTS_VFP_HH__ #include "arch/arm/insts/misc.hh" #include "arch/arm/miscregs.hh" #include #include namespace ArmISA { enum VfpMicroMode { VfpNotAMicroop, VfpMicroop, VfpFirstMicroop, VfpLastMicroop }; template static inline void setVfpMicroFlags(VfpMicroMode mode, T &flags) { switch (mode) { case VfpMicroop: flags[StaticInst::IsMicroop] = true; break; case VfpFirstMicroop: flags[StaticInst::IsMicroop] = flags[StaticInst::IsFirstMicroop] = true; break; case VfpLastMicroop: flags[StaticInst::IsMicroop] = flags[StaticInst::IsLastMicroop] = true; break; case VfpNotAMicroop: break; } if (mode == VfpMicroop || mode == VfpFirstMicroop) { flags[StaticInst::IsDelayedCommit] = true; } } enum FeExceptionBit { FeDivByZero = FE_DIVBYZERO, FeInexact = FE_INEXACT, FeInvalid = FE_INVALID, FeOverflow = FE_OVERFLOW, FeUnderflow = FE_UNDERFLOW, FeAllExceptions = FE_ALL_EXCEPT }; enum FeRoundingMode { FeRoundDown = FE_DOWNWARD, FeRoundNearest = FE_TONEAREST, FeRoundZero = FE_TOWARDZERO, FeRoundUpward = FE_UPWARD }; enum VfpRoundingMode { VfpRoundNearest = 0, VfpRoundUpward = 1, VfpRoundDown = 2, VfpRoundZero = 3 }; template static inline void vfpFlushToZero(uint32_t &_fpscr, fpType &op) { FPSCR fpscr = _fpscr; fpType junk = 0.0; if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) { fpscr.idc = 1; uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); op = bitsToFp(fpToBits(op) & bitMask, junk); } _fpscr = fpscr; } template static inline void vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2) { vfpFlushToZero(fpscr, op1); vfpFlushToZero(fpscr, op2); } template static inline bool flushToZero(fpType &op) { fpType junk = 0.0; if (std::fpclassify(op) == FP_SUBNORMAL) { uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); op = bitsToFp(fpToBits(op) & bitMask, junk); return true; } return false; } template static inline bool flushToZero(fpType &op1, fpType &op2) { bool flush1 = flushToZero(op1); bool flush2 = flushToZero(op2); return flush1 || flush2; } static inline uint32_t fpToBits(float fp) { union { float fp; uint32_t bits; } val; val.fp = fp; return val.bits; } static inline uint64_t fpToBits(double fp) { union { double fp; uint64_t bits; } val; val.fp = fp; return val.bits; } static inline float bitsToFp(uint64_t bits, float junk) { union { float fp; uint32_t bits; } val; val.bits = bits; return val.fp; } static inline double bitsToFp(uint64_t bits, double junk) { union { double fp; uint64_t bits; } val; val.bits = bits; return val.fp; } typedef int VfpSavedState; static inline VfpSavedState prepVfpFpscr(FPSCR fpscr) { int roundingMode = fegetround(); feclearexcept(FeAllExceptions); switch (fpscr.rMode) { case VfpRoundNearest: fesetround(FeRoundNearest); break; case VfpRoundUpward: fesetround(FeRoundUpward); break; case VfpRoundDown: fesetround(FeRoundDown); break; case VfpRoundZero: fesetround(FeRoundZero); break; } return roundingMode; } static inline VfpSavedState prepFpState(uint32_t rMode) { int roundingMode = fegetround(); feclearexcept(FeAllExceptions); switch (rMode) { case VfpRoundNearest: fesetround(FeRoundNearest); break; case VfpRoundUpward: fesetround(FeRoundUpward); break; case VfpRoundDown: fesetround(FeRoundDown); break; case VfpRoundZero: fesetround(FeRoundZero); break; } return roundingMode; } static inline FPSCR setVfpFpscr(FPSCR fpscr, VfpSavedState state) { int exceptions = fetestexcept(FeAllExceptions); if (exceptions & FeInvalid) { fpscr.ioc = 1; } if (exceptions & FeDivByZero) { fpscr.dzc = 1; } if (exceptions & FeOverflow) { fpscr.ofc = 1; } if (exceptions & FeUnderflow) { fpscr.ufc = 1; } if (exceptions & FeInexact) { fpscr.ixc = 1; } fesetround(state); return fpscr; } static inline void finishVfp(FPSCR &fpscr, VfpSavedState state) { int exceptions = fetestexcept(FeAllExceptions); bool underflow = false; if (exceptions & FeInvalid) { fpscr.ioc = 1; } if (exceptions & FeDivByZero) { fpscr.dzc = 1; } if (exceptions & FeOverflow) { fpscr.ofc = 1; } if (exceptions & FeUnderflow) { underflow = true; fpscr.ufc = 1; } if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) { fpscr.ixc = 1; } fesetround(state); } template static inline fpType fixDest(FPSCR fpscr, fpType val, fpType op1) { int fpClass = std::fpclassify(val); fpType junk = 0.0; if (fpClass == FP_NAN) { const bool single = (sizeof(val) == sizeof(float)); const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); const bool nan = std::isnan(op1); if (!nan || (fpscr.dn == 1)) { val = bitsToFp(qnan, junk); } else if (nan) { val = bitsToFp(fpToBits(op1) | qnan, junk); } } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { // Turn val into a zero with the correct sign; uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); val = bitsToFp(fpToBits(val) & bitMask, junk); feclearexcept(FeInexact); feraiseexcept(FeUnderflow); } return val; } template static inline fpType fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) { int fpClass = std::fpclassify(val); fpType junk = 0.0; if (fpClass == FP_NAN) { const bool single = (sizeof(val) == sizeof(float)); const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); const bool nan1 = std::isnan(op1); const bool nan2 = std::isnan(op2); const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); if ((!nan1 && !nan2) || (fpscr.dn == 1)) { val = bitsToFp(qnan, junk); } else if (signal1) { val = bitsToFp(fpToBits(op1) | qnan, junk); } else if (signal2) { val = bitsToFp(fpToBits(op2) | qnan, junk); } else if (nan1) { val = op1; } else if (nan2) { val = op2; } } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { // Turn val into a zero with the correct sign; uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); val = bitsToFp(fpToBits(val) & bitMask, junk); feclearexcept(FeInexact); feraiseexcept(FeUnderflow); } return val; } template static inline fpType fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) { fpType mid = fixDest(fpscr, val, op1, op2); const bool single = (sizeof(fpType) == sizeof(float)); const fpType junk = 0.0; if ((single && (val == bitsToFp(0x00800000, junk) || val == bitsToFp(0x80800000, junk))) || (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) || val == bitsToFp(ULL(0x8010000000000000), junk))) ) { __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); fesetround(FeRoundZero); fpType temp = 0.0; __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); temp = op1 / op2; if (flushToZero(temp)) { feraiseexcept(FeUnderflow); if (fpscr.fz) { feclearexcept(FeInexact); mid = temp; } } __asm__ __volatile__("" :: "m" (temp)); } return mid; } static inline float fixFpDFpSDest(FPSCR fpscr, double val) { const float junk = 0.0; float op1 = 0.0; if (std::isnan(val)) { uint64_t valBits = fpToBits(val); uint32_t op1Bits = bits(valBits, 50, 29) | (mask(9) << 22) | (bits(valBits, 63) << 31); op1 = bitsToFp(op1Bits, junk); } float mid = fixDest(fpscr, (float)val, op1); if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) == (FeUnderflow | FeInexact)) { feclearexcept(FeInexact); } if (mid == bitsToFp(0x00800000, junk) || mid == bitsToFp(0x80800000, junk)) { __asm__ __volatile__("" : "=m" (val) : "m" (val)); fesetround(FeRoundZero); float temp = 0.0; __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); temp = val; if (flushToZero(temp)) { feraiseexcept(FeUnderflow); if (fpscr.fz) { feclearexcept(FeInexact); mid = temp; } } __asm__ __volatile__("" :: "m" (temp)); } return mid; } static inline double fixFpSFpDDest(FPSCR fpscr, float val) { const double junk = 0.0; double op1 = 0.0; if (std::isnan(val)) { uint32_t valBits = fpToBits(val); uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) | (mask(12) << 51) | ((uint64_t)bits(valBits, 31) << 63); op1 = bitsToFp(op1Bits, junk); } double mid = fixDest(fpscr, (double)val, op1); if (mid == bitsToFp(ULL(0x0010000000000000), junk) || mid == bitsToFp(ULL(0x8010000000000000), junk)) { __asm__ __volatile__("" : "=m" (val) : "m" (val)); fesetround(FeRoundZero); double temp = 0.0; __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); temp = val; if (flushToZero(temp)) { feraiseexcept(FeUnderflow); if (fpscr.fz) { feclearexcept(FeInexact); mid = temp; } } __asm__ __volatile__("" :: "m" (temp)); } return mid; } static inline double makeDouble(uint32_t low, uint32_t high) { double junk = 0.0; return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk); } static inline uint32_t lowFromDouble(double val) { return fpToBits(val); } static inline uint32_t highFromDouble(double val) { return fpToBits(val) >> 32; } static inline uint64_t vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm, bool rzero = true) { int rmode = rzero ? FeRoundZero : fegetround(); __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); fesetround(FeRoundNearest); val = val * powf(2.0, imm); __asm__ __volatile__("" : "=m" (val) : "m" (val)); fesetround(rmode); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (val) : "m" (val)); float origVal = val; val = rintf(val); int fpType = std::fpclassify(val); if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { if (fpType == FP_NAN) { feraiseexcept(FeInvalid); } val = 0.0; } else if (origVal != val) { switch (rmode) { case FeRoundNearest: if (origVal - val > 0.5) val += 1.0; else if (val - origVal > 0.5) val -= 1.0; break; case FeRoundDown: if (origVal < val) val -= 1.0; break; case FeRoundUpward: if (origVal > val) val += 1.0; break; } feraiseexcept(FeInexact); } if (isSigned) { if (half) { if ((double)val < (int16_t)(1 << 15)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int16_t)(1 << 15); } if ((double)val > (int16_t)mask(15)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int16_t)mask(15); } return (int16_t)val; } else { if ((double)val < (int32_t)(1 << 31)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int32_t)(1 << 31); } if ((double)val > (int32_t)mask(31)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int32_t)mask(31); } return (int32_t)val; } } else { if (half) { if ((double)val < 0) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return 0; } if ((double)val > (mask(16))) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return mask(16); } return (uint16_t)val; } else { if ((double)val < 0) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return 0; } if ((double)val > (mask(32))) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return mask(32); } return (uint32_t)val; } } } static inline float vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) { fesetround(FeRoundNearest); if (half) val = (uint16_t)val; float scale = powf(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); return fixDivDest(fpscr, val / scale, (float)val, scale); } static inline float vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm) { fesetround(FeRoundNearest); if (half) val = sext<16>(val & mask(16)); float scale = powf(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); return fixDivDest(fpscr, val / scale, (float)val, scale); } static inline uint64_t vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm, bool rzero = true) { int rmode = rzero ? FeRoundZero : fegetround(); fesetround(FeRoundNearest); val = val * pow(2.0, imm); __asm__ __volatile__("" : "=m" (val) : "m" (val)); fesetround(rmode); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (val) : "m" (val)); double origVal = val; val = rint(val); int fpType = std::fpclassify(val); if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { if (fpType == FP_NAN) { feraiseexcept(FeInvalid); } val = 0.0; } else if (origVal != val) { switch (rmode) { case FeRoundNearest: if (origVal - val > 0.5) val += 1.0; else if (val - origVal > 0.5) val -= 1.0; break; case FeRoundDown: if (origVal < val) val -= 1.0; break; case FeRoundUpward: if (origVal > val) val += 1.0; break; } feraiseexcept(FeInexact); } if (isSigned) { if (half) { if (val < (int16_t)(1 << 15)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int16_t)(1 << 15); } if (val > (int16_t)mask(15)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int16_t)mask(15); } return (int16_t)val; } else { if (val < (int32_t)(1 << 31)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int32_t)(1 << 31); } if (val > (int32_t)mask(31)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int32_t)mask(31); } return (int32_t)val; } } else { if (half) { if (val < 0) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return 0; } if (val > mask(16)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return mask(16); } return (uint16_t)val; } else { if (val < 0) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return 0; } if (val > mask(32)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return mask(32); } return (uint32_t)val; } } } static inline double vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) { fesetround(FeRoundNearest); if (half) val = (uint16_t)val; double scale = pow(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); return fixDivDest(fpscr, val / scale, (double)val, scale); } static inline double vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm) { fesetround(FeRoundNearest); if (half) val = sext<16>(val & mask(16)); double scale = pow(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); return fixDivDest(fpscr, val / scale, (double)val, scale); } class VfpMacroOp : public PredMacroOp { public: static bool inScalarBank(IntRegIndex idx) { return (idx % 32) < 8; } protected: bool wide; VfpMacroOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, bool _wide) : PredMacroOp(mnem, _machInst, __opClass), wide(_wide) {} IntRegIndex addStride(IntRegIndex idx, unsigned stride) { if (wide) { stride *= 2; } unsigned offset = idx % 8; idx = (IntRegIndex)(idx - offset); offset += stride; idx = (IntRegIndex)(idx + (offset % 8)); return idx; } void nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) { unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; assert(!inScalarBank(dest)); dest = addStride(dest, stride); op1 = addStride(op1, stride); if (!inScalarBank(op2)) { op2 = addStride(op2, stride); } } void nextIdxs(IntRegIndex &dest, IntRegIndex &op1) { unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; assert(!inScalarBank(dest)); dest = addStride(dest, stride); if (!inScalarBank(op1)) { op1 = addStride(op1, stride); } } void nextIdxs(IntRegIndex &dest) { unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; assert(!inScalarBank(dest)); dest = addStride(dest, stride); } }; static inline float fpAddS(float a, float b) { return a + b; } static inline double fpAddD(double a, double b) { return a + b; } static inline float fpSubS(float a, float b) { return a - b; } static inline double fpSubD(double a, double b) { return a - b; } static inline float fpDivS(float a, float b) { return a / b; } static inline double fpDivD(double a, double b) { return a / b; } static inline float fpMulS(float a, float b) { return a * b; } static inline double fpMulD(double a, double b) { return a * b; } class FpOp : public PredOp { protected: FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : PredOp(mnem, _machInst, __opClass) {} virtual float doOp(float op1, float op2) const { panic("Unimplemented version of doOp called.\n"); } virtual float doOp(float op1) const { panic("Unimplemented version of doOp called.\n"); } virtual double doOp(double op1, double op2) const { panic("Unimplemented version of doOp called.\n"); } virtual double doOp(double op1) const { panic("Unimplemented version of doOp called.\n"); } double dbl(uint32_t low, uint32_t high) const { double junk = 0.0; return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk); } uint32_t dblLow(double val) const { return fpToBits(val); } uint32_t dblHi(double val) const { return fpToBits(val) >> 32; } template fpType binaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType (*func)(fpType, fpType), bool flush, uint32_t rMode) const { const bool single = (sizeof(fpType) == sizeof(float)); fpType junk = 0.0; if (flush && flushToZero(op1, op2)) fpscr.idc = 1; VfpSavedState state = prepFpState(rMode); __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state) : "m" (op1), "m" (op2), "m" (state)); fpType dest = func(op1, op2); __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); int fpClass = std::fpclassify(dest); // Get NAN behavior right. This varies between x86 and ARM. if (fpClass == FP_NAN) { const bool single = (sizeof(fpType) == sizeof(float)); const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); const bool nan1 = std::isnan(op1); const bool nan2 = std::isnan(op2); const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); if ((!nan1 && !nan2) || (fpscr.dn == 1)) { dest = bitsToFp(qnan, junk); } else if (signal1) { dest = bitsToFp(fpToBits(op1) | qnan, junk); } else if (signal2) { dest = bitsToFp(fpToBits(op2) | qnan, junk); } else if (nan1) { dest = op1; } else if (nan2) { dest = op2; } } else if (flush && flushToZero(dest)) { feraiseexcept(FeUnderflow); } else if (( (single && (dest == bitsToFp(0x00800000, junk) || dest == bitsToFp(0x80800000, junk))) || (!single && (dest == bitsToFp(ULL(0x0010000000000000), junk) || dest == bitsToFp(ULL(0x8010000000000000), junk))) ) && rMode != VfpRoundZero) { /* * Correct for the fact that underflow is detected -before- rounding * in ARM and -after- rounding in x86. */ fesetround(FeRoundZero); __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2) : "m" (op1), "m" (op2)); fpType temp = func(op1, op2); __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); if (flush && flushToZero(temp)) { dest = temp; } } finishVfp(fpscr, state); return dest; } template fpType unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType), bool flush, uint32_t rMode) const { const bool single = (sizeof(fpType) == sizeof(float)); fpType junk = 0.0; if (flush && flushToZero(op1)) fpscr.idc = 1; VfpSavedState state = prepFpState(rMode); __asm__ __volatile__ ("" : "=m" (op1), "=m" (state) : "m" (op1), "m" (state)); fpType dest = func(op1); __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); int fpClass = std::fpclassify(dest); // Get NAN behavior right. This varies between x86 and ARM. if (fpClass == FP_NAN) { const bool single = (sizeof(fpType) == sizeof(float)); const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); const bool nan = std::isnan(op1); if (!nan || fpscr.dn == 1) { dest = bitsToFp(qnan, junk); } else if (nan) { dest = bitsToFp(fpToBits(op1) | qnan, junk); } } else if (flush && flushToZero(dest)) { feraiseexcept(FeUnderflow); } else if (( (single && (dest == bitsToFp(0x00800000, junk) || dest == bitsToFp(0x80800000, junk))) || (!single && (dest == bitsToFp(ULL(0x0010000000000000), junk) || dest == bitsToFp(ULL(0x8010000000000000), junk))) ) && rMode != VfpRoundZero) { /* * Correct for the fact that underflow is detected -before- rounding * in ARM and -after- rounding in x86. */ fesetround(FeRoundZero); __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1)); fpType temp = func(op1); __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); if (flush && flushToZero(temp)) { dest = temp; } } finishVfp(fpscr, state); return dest; } }; class FpRegRegOp : public FpOp { protected: IntRegIndex dest; IntRegIndex op1; FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, VfpMicroMode mode = VfpNotAMicroop) : FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1) { setVfpMicroFlags(mode, flags); } std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; class FpRegImmOp : public FpOp { protected: IntRegIndex dest; uint64_t imm; FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, IntRegIndex _dest, uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) : FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm) { setVfpMicroFlags(mode, flags); } std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; class FpRegRegImmOp : public FpOp { protected: IntRegIndex dest; IntRegIndex op1; uint64_t imm; FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) : FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm) { setVfpMicroFlags(mode, flags); } std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; class FpRegRegRegOp : public FpOp { protected: IntRegIndex dest; IntRegIndex op1; IntRegIndex op2; FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, VfpMicroMode mode = VfpNotAMicroop) : FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2) { setVfpMicroFlags(mode, flags); } std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; } #endif //__ARCH_ARM_INSTS_VFP_HH__