/* * Copyright (c) 2010 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall * not be construed as granting a license to any other intellectual * property including but not limited to intellectual property relating * to a hardware implementation of the functionality of the software * licensed hereunder. You may use the software subject to the license * terms below provided that you ensure that this notice is replicated * unmodified and in its entirety in all distributions of the software, * modified or unmodified, in source code or in binary form. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Gabe Black */ #ifndef __ARCH_ARM_INSTS_VFP_HH__ #define __ARCH_ARM_INSTS_VFP_HH__ #include "arch/arm/insts/misc.hh" #include "arch/arm/miscregs.hh" #include #include namespace ArmISA { enum VfpMicroMode { VfpNotAMicroop, VfpMicroop, VfpFirstMicroop, VfpLastMicroop }; template static inline void setVfpMicroFlags(VfpMicroMode mode, T &flags) { switch (mode) { case VfpMicroop: flags[StaticInst::IsMicroop] = true; break; case VfpFirstMicroop: flags[StaticInst::IsMicroop] = flags[StaticInst::IsFirstMicroop] = true; break; case VfpLastMicroop: flags[StaticInst::IsMicroop] = flags[StaticInst::IsLastMicroop] = true; break; case VfpNotAMicroop: break; } if (mode == VfpMicroop || mode == VfpFirstMicroop) { flags[StaticInst::IsDelayedCommit] = true; } } enum FeExceptionBit { FeDivByZero = FE_DIVBYZERO, FeInexact = FE_INEXACT, FeInvalid = FE_INVALID, FeOverflow = FE_OVERFLOW, FeUnderflow = FE_UNDERFLOW, FeAllExceptions = FE_ALL_EXCEPT }; enum FeRoundingMode { FeRoundDown = FE_DOWNWARD, FeRoundNearest = FE_TONEAREST, FeRoundZero = FE_TOWARDZERO, FeRoundUpward = FE_UPWARD }; enum VfpRoundingMode { VfpRoundNearest = 0, VfpRoundUpward = 1, VfpRoundDown = 2, VfpRoundZero = 3 }; template static inline void vfpFlushToZero(uint32_t &_fpscr, fpType &op) { FPSCR fpscr = _fpscr; if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) { fpscr.idc = 1; op = 0; } _fpscr = fpscr; } template static inline void vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2) { vfpFlushToZero(fpscr, op1); vfpFlushToZero(fpscr, op2); } static inline uint32_t fpToBits(float fp) { union { float fp; uint32_t bits; } val; val.fp = fp; return val.bits; } static inline uint64_t fpToBits(double fp) { union { double fp; uint64_t bits; } val; val.fp = fp; return val.bits; } static inline float bitsToFp(uint64_t bits, float junk) { union { float fp; uint32_t bits; } val; val.bits = bits; return val.fp; } static inline double bitsToFp(uint64_t bits, double junk) { union { double fp; uint64_t bits; } val; val.bits = bits; return val.fp; } template static inline fpType fixNan(FPSCR fpscr, fpType val, fpType op1, fpType op2) { if (std::isnan(val)) { const bool single = (sizeof(val) == sizeof(float)); const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); const bool nan1 = std::isnan(op1); const bool nan2 = std::isnan(op2); const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); fpType junk = 0.0; if ((!nan1 && !nan2) || (fpscr.dn == 1)) { val = bitsToFp(qnan, junk); } else if (signal1) { val = bitsToFp(fpToBits(op1) | qnan, junk); } else if (signal2) { val = bitsToFp(fpToBits(op2) | qnan, junk); } else if (nan1) { val = op1; } else if (nan2) { val = op2; } } return val; } static inline uint64_t vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm) { fesetround(FeRoundZero); val = val * powf(2.0, imm); __asm__ __volatile__("" : "=m" (val) : "m" (val)); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (val) : "m" (val)); float origVal = val; val = rintf(val); int fpType = std::fpclassify(val); if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { if (fpType == FP_NAN) { feraiseexcept(FeInvalid); } val = 0.0; } else if (origVal != val) { feraiseexcept(FeInexact); } if (isSigned) { if (half) { if ((double)val < (int16_t)(1 << 15)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int16_t)(1 << 15); } if ((double)val > (int16_t)mask(15)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int16_t)mask(15); } return (int16_t)val; } else { if ((double)val < (int32_t)(1 << 31)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int32_t)(1 << 31); } if ((double)val > (int32_t)mask(31)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int32_t)mask(31); } return (int32_t)val; } } else { if (half) { if ((double)val < 0) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return 0; } if ((double)val > (mask(16))) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return mask(16); } return (uint16_t)val; } else { if ((double)val < 0) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return 0; } if ((double)val > (mask(32))) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return mask(32); } return (uint32_t)val; } } } static inline float vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm) { fesetround(FeRoundNearest); if (half) val = (uint16_t)val; float scale = powf(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); return val / scale; } static inline float vfpSFixedToFpS(int32_t val, bool half, uint8_t imm) { fesetround(FeRoundNearest); if (half) val = sext<16>(val & mask(16)); float scale = powf(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); return val / scale; } static inline uint64_t vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm) { fesetround(FeRoundNearest); val = val * pow(2.0, imm); __asm__ __volatile__("" : "=m" (val) : "m" (val)); fesetround(FeRoundZero); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (val) : "m" (val)); double origVal = val; val = rint(val); int fpType = std::fpclassify(val); if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { if (fpType == FP_NAN) { feraiseexcept(FeInvalid); } val = 0.0; } else if (origVal != val) { feraiseexcept(FeInexact); } if (isSigned) { if (half) { if (val < (int16_t)(1 << 15)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int16_t)(1 << 15); } if (val > (int16_t)mask(15)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int16_t)mask(15); } return (int16_t)val; } else { if (val < (int32_t)(1 << 31)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int32_t)(1 << 31); } if (val > (int32_t)mask(31)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return (int32_t)mask(31); } return (int32_t)val; } } else { if (half) { if (val < 0) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return 0; } if (val > mask(16)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return mask(16); } return (uint16_t)val; } else { if (val < 0) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return 0; } if (val > mask(32)) { feraiseexcept(FeInvalid); feclearexcept(FeInexact); return mask(32); } return (uint32_t)val; } } } static inline double vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm) { fesetround(FeRoundNearest); if (half) val = (uint16_t)val; double scale = pow(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); return val / scale; } static inline double vfpSFixedToFpD(int32_t val, bool half, uint8_t imm) { fesetround(FeRoundNearest); if (half) val = sext<16>(val & mask(16)); double scale = pow(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); return val / scale; } typedef int VfpSavedState; static inline VfpSavedState prepVfpFpscr(FPSCR fpscr) { int roundingMode = fegetround(); feclearexcept(FeAllExceptions); switch (fpscr.rMode) { case VfpRoundNearest: fesetround(FeRoundNearest); break; case VfpRoundUpward: fesetround(FeRoundUpward); break; case VfpRoundDown: fesetround(FeRoundDown); break; case VfpRoundZero: fesetround(FeRoundZero); break; } return roundingMode; } static inline FPSCR setVfpFpscr(FPSCR fpscr, VfpSavedState state) { int exceptions = fetestexcept(FeAllExceptions); if (exceptions & FeInvalid) { fpscr.ioc = 1; } if (exceptions & FeDivByZero) { fpscr.dzc = 1; } if (exceptions & FeOverflow) { fpscr.ofc = 1; } if (exceptions & FeUnderflow) { fpscr.ufc = 1; } if (exceptions & FeInexact) { fpscr.ixc = 1; } fesetround(state); return fpscr; } class VfpMacroOp : public PredMacroOp { public: static bool inScalarBank(IntRegIndex idx) { return (idx % 32) < 8; } protected: bool wide; VfpMacroOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, bool _wide) : PredMacroOp(mnem, _machInst, __opClass), wide(_wide) {} IntRegIndex addStride(IntRegIndex idx, unsigned stride) { if (wide) { stride *= 2; } unsigned offset = idx % 8; idx = (IntRegIndex)(idx - offset); offset += stride; idx = (IntRegIndex)(idx + (offset % 8)); return idx; } void nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) { unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; assert(!inScalarBank(dest)); dest = addStride(dest, stride); op1 = addStride(op1, stride); if (!inScalarBank(op2)) { op2 = addStride(op2, stride); } } void nextIdxs(IntRegIndex &dest, IntRegIndex &op1) { unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; assert(!inScalarBank(dest)); dest = addStride(dest, stride); if (!inScalarBank(op1)) { op1 = addStride(op1, stride); } } void nextIdxs(IntRegIndex &dest) { unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; assert(!inScalarBank(dest)); dest = addStride(dest, stride); } }; class VfpRegRegOp : public RegRegOp { protected: VfpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, VfpMicroMode mode = VfpNotAMicroop) : RegRegOp(mnem, _machInst, __opClass, _dest, _op1) { setVfpMicroFlags(mode, flags); } }; class VfpRegImmOp : public RegImmOp { protected: VfpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, IntRegIndex _dest, uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) : RegImmOp(mnem, _machInst, __opClass, _dest, _imm) { setVfpMicroFlags(mode, flags); } }; class VfpRegRegImmOp : public RegRegImmOp { protected: VfpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) : RegRegImmOp(mnem, _machInst, __opClass, _dest, _op1, _imm) { setVfpMicroFlags(mode, flags); } }; class VfpRegRegRegOp : public RegRegRegOp { protected: VfpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, VfpMicroMode mode = VfpNotAMicroop) : RegRegRegOp(mnem, _machInst, __opClass, _dest, _op1, _op2) { setVfpMicroFlags(mode, flags); } }; } #endif //__ARCH_ARM_INSTS_VFP_HH__