arm/insts/vfp.cc

2810SN/A/*
8856Sandreas.hansson@arm.com * Copyright (c) 2010 ARM Limited
8856Sandreas.hansson@arm.com * All rights reserved
8856Sandreas.hansson@arm.com *
8856Sandreas.hansson@arm.com * The license below extends only to copyright in the software and shall
8856Sandreas.hansson@arm.com * not be construed as granting a license to any other intellectual
8856Sandreas.hansson@arm.com * property including but not limited to intellectual property relating
8856Sandreas.hansson@arm.com * to a hardware implementation of the functionality of the software
8856Sandreas.hansson@arm.com * licensed hereunder.  You may use the software subject to the license
8856Sandreas.hansson@arm.com * terms below provided that you ensure that this notice is replicated
8856Sandreas.hansson@arm.com * unmodified and in its entirety in all distributions of the software,
8856Sandreas.hansson@arm.com * modified or unmodified, in source code or in binary form.
8856Sandreas.hansson@arm.com *
2810SN/A * Redistribution and use in source and binary forms, with or without
2810SN/A * modification, are permitted provided that the following conditions are
2810SN/A * met: redistributions of source code must retain the above copyright
2810SN/A * notice, this list of conditions and the following disclaimer;
2810SN/A * redistributions in binary form must reproduce the above copyright
2810SN/A * notice, this list of conditions and the following disclaimer in the
2810SN/A * documentation and/or other materials provided with the distribution;
2810SN/A * neither the name of the copyright holders nor the names of its
2810SN/A * contributors may be used to endorse or promote products derived from
2810SN/A * this software without specific prior written permission.
2810SN/A *
2810SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2810SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2810SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2810SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2810SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2810SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2810SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2810SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2810SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2810SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2810SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2810SN/A *
2810SN/A * Authors: Gabe Black
2810SN/A */
2810SN/A
2810SN/A#include "arch/arm/insts/vfp.hh"
2810SN/A
2810SN/A/*
2810SN/A * The asm statements below are to keep gcc from reordering code. Otherwise
2810SN/A * the rounding mode might be set after the operation it was intended for, the
2810SN/A * exception bits read before it, etc.
2810SN/A */
2810SN/A
3348SN/Astd::string
3348SN/AFpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
8232Snate@binkert.org{
5338Sstever@gmail.com    std::stringstream ss;
5338Sstever@gmail.com    printMnemonic(ss);
8786Sgblack@eecs.umich.edu    printReg(ss, dest + FP_Base_DepTag);
2810SN/A    ss << ", ";
2810SN/A    printReg(ss, op1 + FP_Base_DepTag);
2810SN/A    return ss.str();
8856Sandreas.hansson@arm.com}
8856Sandreas.hansson@arm.com
8856Sandreas.hansson@arm.comstd::string
8914Sandreas.hansson@arm.comFpRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
8914Sandreas.hansson@arm.com{
8856Sandreas.hansson@arm.com    std::stringstream ss;
8856Sandreas.hansson@arm.com    printMnemonic(ss);
4475SN/A    printReg(ss, dest + FP_Base_DepTag);
5034SN/A    ccprintf(ss, ", #%d", imm);
5034SN/A    return ss.str();
5314SN/A}
5314SN/A
4628SN/Astd::string
5034SN/AFpRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
5034SN/A{
5034SN/A    std::stringstream ss;
6122SSteve.Reinhardt@amd.com    printMnemonic(ss);
8134SAli.Saidi@ARM.com    printReg(ss, dest + FP_Base_DepTag);
4626SN/A    ss << ", ";
4626SN/A    printReg(ss, op1 + FP_Base_DepTag);
5034SN/A    ccprintf(ss, ", #%d", imm);
6122SSteve.Reinhardt@amd.com    return ss.str();
8883SAli.Saidi@ARM.com}
8833Sdam.sunwoo@arm.com
4458SN/Astd::string
2810SN/AFpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
2810SN/A{
3013SN/A    std::stringstream ss;
8856Sandreas.hansson@arm.com    printMnemonic(ss);
2810SN/A    printReg(ss, dest + FP_Base_DepTag);
3013SN/A    ss << ", ";
8856Sandreas.hansson@arm.com    printReg(ss, op1 + FP_Base_DepTag);
2810SN/A    ss << ", ";
2810SN/A    printReg(ss, op2 + FP_Base_DepTag);
2810SN/A    return ss.str();
2810SN/A}
8856Sandreas.hansson@arm.com
2810SN/Anamespace ArmISA
3013SN/A{
8856Sandreas.hansson@arm.com
3013SN/AVfpSavedState
8856Sandreas.hansson@arm.comprepFpState(uint32_t rMode)
8856Sandreas.hansson@arm.com{
2897SN/A    int roundingMode = fegetround();
4666SN/A    feclearexcept(FeAllExceptions);
8856Sandreas.hansson@arm.com    switch (rMode) {
2897SN/A      case VfpRoundNearest:
2810SN/A        fesetround(FeRoundNearest);
2810SN/A        break;
2844SN/A      case VfpRoundUpward:
2810SN/A        fesetround(FeRoundUpward);
2858SN/A        break;
2858SN/A      case VfpRoundDown:
8856Sandreas.hansson@arm.com        fesetround(FeRoundDown);
8856Sandreas.hansson@arm.com        break;
8711Sandreas.hansson@arm.com      case VfpRoundZero:
2858SN/A        fesetround(FeRoundZero);
2858SN/A        break;
4628SN/A    }
2858SN/A    return roundingMode;
2810SN/A}
2810SN/A
2810SN/Avoid
2810SN/AfinishVfp(FPSCR &fpscr, VfpSavedState state)
2810SN/A{
4022SN/A    int exceptions = fetestexcept(FeAllExceptions);
4022SN/A    bool underflow = false;
4022SN/A    if (exceptions & FeInvalid) {
2810SN/A        fpscr.ioc = 1;
2810SN/A    }
8833Sdam.sunwoo@arm.com    if (exceptions & FeDivByZero) {
2810SN/A        fpscr.dzc = 1;
2810SN/A    }
2810SN/A    if (exceptions & FeOverflow) {
2810SN/A        fpscr.ofc = 1;
8833Sdam.sunwoo@arm.com    }
8833Sdam.sunwoo@arm.com    if (exceptions & FeUnderflow) {
8833Sdam.sunwoo@arm.com        underflow = true;
2810SN/A        fpscr.ufc = 1;
2810SN/A    }
4871SN/A    if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
4871SN/A        fpscr.ixc = 1;
4871SN/A    }
4871SN/A    fesetround(state);
4871SN/A}
4871SN/A
4871SN/Atemplate <class fpType>
4871SN/AfpType
4871SN/AfixDest(FPSCR fpscr, fpType val, fpType op1)
4871SN/A{
2810SN/A    int fpClass = std::fpclassify(val);
2810SN/A    fpType junk = 0.0;
2810SN/A    if (fpClass == FP_NAN) {
8833Sdam.sunwoo@arm.com        const bool single = (sizeof(val) == sizeof(float));
2810SN/A        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
4871SN/A        const bool nan = std::isnan(op1);
8833Sdam.sunwoo@arm.com        if (!nan || (fpscr.dn == 1)) {
8833Sdam.sunwoo@arm.com            val = bitsToFp(qnan, junk);
8833Sdam.sunwoo@arm.com        } else if (nan) {
2810SN/A            val = bitsToFp(fpToBits(op1) | qnan, junk);
2810SN/A        }
2810SN/A    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
2810SN/A        // Turn val into a zero with the correct sign;
8833Sdam.sunwoo@arm.com        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
2810SN/A        val = bitsToFp(fpToBits(val) & bitMask, junk);
4871SN/A        feclearexcept(FeInexact);
8833Sdam.sunwoo@arm.com        feraiseexcept(FeUnderflow);
8833Sdam.sunwoo@arm.com    }
8833Sdam.sunwoo@arm.com    return val;
2810SN/A}
2810SN/A
4022SN/Atemplate
4022SN/Afloat fixDest<float>(FPSCR fpscr, float val, float op1);
4022SN/Atemplate
2810SN/Adouble fixDest<double>(FPSCR fpscr, double val, double op1);
2810SN/A
8833Sdam.sunwoo@arm.comtemplate <class fpType>
2810SN/AfpType
2810SN/AfixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
2810SN/A{
2810SN/A    int fpClass = std::fpclassify(val);
8833Sdam.sunwoo@arm.com    fpType junk = 0.0;
8833Sdam.sunwoo@arm.com    if (fpClass == FP_NAN) {
8833Sdam.sunwoo@arm.com        const bool single = (sizeof(val) == sizeof(float));
2810SN/A        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
2810SN/A        const bool nan1 = std::isnan(op1);
2810SN/A        const bool nan2 = std::isnan(op2);
2810SN/A        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
2810SN/A        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
8833Sdam.sunwoo@arm.com        if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
2810SN/A            val = bitsToFp(qnan, junk);
4871SN/A        } else if (signal1) {
8833Sdam.sunwoo@arm.com            val = bitsToFp(fpToBits(op1) | qnan, junk);
8833Sdam.sunwoo@arm.com        } else if (signal2) {
8833Sdam.sunwoo@arm.com            val = bitsToFp(fpToBits(op2) | qnan, junk);
2810SN/A        } else if (nan1) {
2810SN/A            val = op1;
2810SN/A        } else if (nan2) {
2810SN/A            val = op2;
8833Sdam.sunwoo@arm.com        }
2810SN/A    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
4871SN/A        // Turn val into a zero with the correct sign;
8833Sdam.sunwoo@arm.com        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
8833Sdam.sunwoo@arm.com        val = bitsToFp(fpToBits(val) & bitMask, junk);
8833Sdam.sunwoo@arm.com        feclearexcept(FeInexact);
2810SN/A        feraiseexcept(FeUnderflow);
2810SN/A    }
4022SN/A    return val;
4022SN/A}
4022SN/A
2810SN/Atemplate
2810SN/Afloat fixDest<float>(FPSCR fpscr, float val, float op1, float op2);
8833Sdam.sunwoo@arm.comtemplate
2810SN/Adouble fixDest<double>(FPSCR fpscr, double val, double op1, double op2);
2810SN/A
2810SN/Atemplate <class fpType>
2810SN/AfpType
8833Sdam.sunwoo@arm.comfixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
8833Sdam.sunwoo@arm.com{
8833Sdam.sunwoo@arm.com    fpType mid = fixDest(fpscr, val, op1, op2);
2810SN/A    const bool single = (sizeof(fpType) == sizeof(float));
2810SN/A    const fpType junk = 0.0;
2810SN/A    if ((single && (val == bitsToFp(0x00800000, junk) ||
2810SN/A                    val == bitsToFp(0x80800000, junk))) ||
2810SN/A        (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
8833Sdam.sunwoo@arm.com                     val == bitsToFp(ULL(0x8010000000000000), junk)))
2810SN/A        ) {
4871SN/A        __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
8833Sdam.sunwoo@arm.com        fesetround(FeRoundZero);
8833Sdam.sunwoo@arm.com        fpType temp = 0.0;
8833Sdam.sunwoo@arm.com        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
2810SN/A        temp = op1 / op2;
2810SN/A        if (flushToZero(temp)) {
2810SN/A            feraiseexcept(FeUnderflow);
2810SN/A            if (fpscr.fz) {
8833Sdam.sunwoo@arm.com                feclearexcept(FeInexact);
2810SN/A                mid = temp;
4871SN/A            }
8833Sdam.sunwoo@arm.com        }
8833Sdam.sunwoo@arm.com        __asm__ __volatile__("" :: "m" (temp));
8833Sdam.sunwoo@arm.com    }
2810SN/A    return mid;
2810SN/A}
4022SN/A
4022SN/Atemplate
4022SN/Afloat fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2);
2810SN/Atemplate
2810SN/Adouble fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2);
2810SN/A
2810SN/Afloat
2810SN/AfixFpDFpSDest(FPSCR fpscr, double val)
2810SN/A{
8833Sdam.sunwoo@arm.com    const float junk = 0.0;
2810SN/A    float op1 = 0.0;
8833Sdam.sunwoo@arm.com    if (std::isnan(val)) {
8833Sdam.sunwoo@arm.com        uint64_t valBits = fpToBits(val);
8833Sdam.sunwoo@arm.com        uint32_t op1Bits = bits(valBits, 50, 29) |
2810SN/A                           (mask(9) << 22) |
2810SN/A                           (bits(valBits, 63) << 31);
2810SN/A        op1 = bitsToFp(op1Bits, junk);
2810SN/A    }
2810SN/A    float mid = fixDest(fpscr, (float)val, op1);
8833Sdam.sunwoo@arm.com    if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
2810SN/A                    (FeUnderflow | FeInexact)) {
2810SN/A        feclearexcept(FeInexact);
8833Sdam.sunwoo@arm.com    }
8833Sdam.sunwoo@arm.com    if (mid == bitsToFp(0x00800000, junk) ||
8833Sdam.sunwoo@arm.com        mid == bitsToFp(0x80800000, junk)) {
2810SN/A        __asm__ __volatile__("" : "=m" (val) : "m" (val));
2810SN/A        fesetround(FeRoundZero);
2810SN/A        float temp = 0.0;
2810SN/A        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
8833Sdam.sunwoo@arm.com        temp = val;
2810SN/A        if (flushToZero(temp)) {
2810SN/A            feraiseexcept(FeUnderflow);
8833Sdam.sunwoo@arm.com            if (fpscr.fz) {
8833Sdam.sunwoo@arm.com                feclearexcept(FeInexact);
8833Sdam.sunwoo@arm.com                mid = temp;
2810SN/A            }
2810SN/A        }
4022SN/A        __asm__ __volatile__("" :: "m" (temp));
4022SN/A    }
4022SN/A    return mid;
2810SN/A}
2810SN/A
2810SN/Adouble
2810SN/AfixFpSFpDDest(FPSCR fpscr, float val)
2810SN/A{
2810SN/A    const double junk = 0.0;
8833Sdam.sunwoo@arm.com    double op1 = 0.0;
2810SN/A    if (std::isnan(val)) {
8833Sdam.sunwoo@arm.com        uint32_t valBits = fpToBits(val);
8833Sdam.sunwoo@arm.com        uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
8833Sdam.sunwoo@arm.com                           (mask(12) << 51) |
2810SN/A                           ((uint64_t)bits(valBits, 31) << 63);
2810SN/A        op1 = bitsToFp(op1Bits, junk);
2810SN/A    }
2810SN/A    double mid = fixDest(fpscr, (double)val, op1);
2810SN/A    if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
8833Sdam.sunwoo@arm.com        mid == bitsToFp(ULL(0x8010000000000000), junk)) {
2810SN/A        __asm__ __volatile__("" : "=m" (val) : "m" (val));
2810SN/A        fesetround(FeRoundZero);
8833Sdam.sunwoo@arm.com        double temp = 0.0;
8833Sdam.sunwoo@arm.com        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
8833Sdam.sunwoo@arm.com        temp = val;
2810SN/A        if (flushToZero(temp)) {
2810SN/A            feraiseexcept(FeUnderflow);
2810SN/A            if (fpscr.fz) {
2810SN/A                feclearexcept(FeInexact);
8833Sdam.sunwoo@arm.com                mid = temp;
2810SN/A            }
2810SN/A        }
8833Sdam.sunwoo@arm.com        __asm__ __volatile__("" :: "m" (temp));
8833Sdam.sunwoo@arm.com    }
8833Sdam.sunwoo@arm.com    return mid;
2810SN/A}
2810SN/A
4022SN/Afloat
4022SN/AvcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
4022SN/A{
2810SN/A    float junk = 0.0;
2810SN/A    uint32_t destBits = fpToBits(dest);
2810SN/A    uint32_t opBits = fpToBits(op);
2810SN/A    // Extract the operand.
2810SN/A    bool neg = bits(opBits, 31);
2810SN/A    uint32_t exponent = bits(opBits, 30, 23);
2810SN/A    uint32_t oldMantissa = bits(opBits, 22, 0);
2810SN/A    uint32_t mantissa = oldMantissa >> (23 - 10);
8833Sdam.sunwoo@arm.com    // Do the conversion.
8833Sdam.sunwoo@arm.com    uint32_t extra = oldMantissa & mask(23 - 10);
8833Sdam.sunwoo@arm.com    if (exponent == 0xff) {
8833Sdam.sunwoo@arm.com        if (oldMantissa != 0) {
2810SN/A            // Nans.
2810SN/A            if (bits(mantissa, 9) == 0) {
2810SN/A                // Signalling nan.
2810SN/A                fpscr.ioc = 1;
2810SN/A            }
8833Sdam.sunwoo@arm.com            if (fpscr.ahp) {
2810SN/A                mantissa = 0;
2810SN/A                exponent = 0;
8833Sdam.sunwoo@arm.com                fpscr.ioc = 1;
8833Sdam.sunwoo@arm.com            } else if (fpscr.dn) {
8833Sdam.sunwoo@arm.com                mantissa = (1 << 9);
2810SN/A                exponent = 0x1f;
2810SN/A                neg = false;
2810SN/A            } else {
2810SN/A                exponent = 0x1f;
8833Sdam.sunwoo@arm.com                mantissa |= (1 << 9);
2810SN/A            }
2810SN/A        } else {
8833Sdam.sunwoo@arm.com            // Infinities.
8833Sdam.sunwoo@arm.com            exponent = 0x1F;
8833Sdam.sunwoo@arm.com            if (fpscr.ahp) {
2810SN/A                fpscr.ioc = 1;
2810SN/A                mantissa = 0x3ff;
2810SN/A            } else {
2810SN/A                mantissa = 0;
2810SN/A            }
2810SN/A        }
2810SN/A    } else if (exponent == 0 && oldMantissa == 0) {
2810SN/A        // Zero, don't need to do anything.
2810SN/A    } else {
2810SN/A        // Normalized or denormalized numbers.
2810SN/A
2810SN/A        bool inexact = (extra != 0);
2810SN/A
2810SN/A        if (exponent == 0) {
2810SN/A            // Denormalized.
2810SN/A
2810SN/A            // If flush to zero is on, this shouldn't happen.
2810SN/A            assert(fpscr.fz == 0);
2810SN/A
2810SN/A            // Check for underflow
2810SN/A            if (inexact || fpscr.ufe)
2810SN/A                fpscr.ufc = 1;
2810SN/A
2810SN/A            // Handle rounding.
2810SN/A            unsigned mode = fpscr.rMode;
2810SN/A            if ((mode == VfpRoundUpward && !neg && extra) ||
2810SN/A                (mode == VfpRoundDown && neg && extra) ||
2810SN/A                (mode == VfpRoundNearest &&
2810SN/A                 (extra > (1 << 9) ||
2810SN/A                  (extra == (1 << 9) && bits(mantissa, 0))))) {
2810SN/A                mantissa++;
2810SN/A            }
2810SN/A
2810SN/A            // See if the number became normalized after rounding.
2810SN/A            if (mantissa == (1 << 10)) {
2810SN/A                mantissa = 0;
2826SN/A                exponent = 1;
4626SN/A            }
8833Sdam.sunwoo@arm.com        } else {
4626SN/A            // Normalized.
4626SN/A
8833Sdam.sunwoo@arm.com            // We need to track the dropped bits differently since
4626SN/A            // more can be dropped by denormalizing.
8833Sdam.sunwoo@arm.com            bool topOne = bits(extra, 12);
8833Sdam.sunwoo@arm.com            bool restZeros = bits(extra, 11, 0) == 0;
8833Sdam.sunwoo@arm.com
4626SN/A            if (exponent <= (127 - 15)) {
4626SN/A                // The result is too small. Denormalize.
4626SN/A                mantissa |= (1 << 10);
4626SN/A                while (mantissa && exponent <= (127 - 15)) {
4626SN/A                    restZeros = restZeros && !topOne;
4626SN/A                    topOne = bits(mantissa, 0);
4626SN/A                    mantissa = mantissa >> 1;
4626SN/A                    exponent++;
8833Sdam.sunwoo@arm.com                }
4626SN/A                if (topOne || !restZeros)
4626SN/A                    inexact = true;
4626SN/A                exponent = 0;
4626SN/A            } else {
8833Sdam.sunwoo@arm.com                // Change bias.
8833Sdam.sunwoo@arm.com                exponent -= (127 - 15);
8833Sdam.sunwoo@arm.com            }
4626SN/A
4626SN/A            if (exponent == 0 && (inexact || fpscr.ufe)) {
4626SN/A                // Underflow
4626SN/A                fpscr.ufc = 1;
4626SN/A            }
8833Sdam.sunwoo@arm.com
4626SN/A            // Handle rounding.
4871SN/A            unsigned mode = fpscr.rMode;
8833Sdam.sunwoo@arm.com            bool nonZero = topOne || !restZeros;
8833Sdam.sunwoo@arm.com            if ((mode == VfpRoundUpward && !neg && nonZero) ||
8833Sdam.sunwoo@arm.com                (mode == VfpRoundDown && neg && nonZero) ||
4626SN/A                (mode == VfpRoundNearest && topOne &&
4626SN/A                 (!restZeros || bits(mantissa, 0)))) {
4626SN/A                mantissa++;
4626SN/A            }
8833Sdam.sunwoo@arm.com
4626SN/A            // See if we rounded up and need to bump the exponent.
4871SN/A            if (mantissa == (1 << 10)) {
8833Sdam.sunwoo@arm.com                mantissa = 0;
8833Sdam.sunwoo@arm.com                exponent++;
8833Sdam.sunwoo@arm.com            }
4626SN/A
4626SN/A            // Deal with overflow
4626SN/A            if (fpscr.ahp) {
4626SN/A                if (exponent >= 0x20) {
4626SN/A                    exponent = 0x1f;
4626SN/A                    mantissa = 0x3ff;
4626SN/A                    fpscr.ioc = 1;
8833Sdam.sunwoo@arm.com                    // Supress inexact exception.
4626SN/A                    inexact = false;
4626SN/A                }
4626SN/A            } else {
4626SN/A                if (exponent >= 0x1f) {
8833Sdam.sunwoo@arm.com                    if ((mode == VfpRoundNearest) ||
8833Sdam.sunwoo@arm.com                        (mode == VfpRoundUpward && !neg) ||
8833Sdam.sunwoo@arm.com                        (mode == VfpRoundDown && neg)) {
4626SN/A                        // Overflow to infinity.
4626SN/A                        exponent = 0x1f;
4626SN/A                        mantissa = 0;
4626SN/A                    } else {
4626SN/A                        // Overflow to max normal.
8833Sdam.sunwoo@arm.com                        exponent = 0x1e;
4626SN/A                        mantissa = 0x3ff;
4871SN/A                    }
8833Sdam.sunwoo@arm.com                    fpscr.ofc = 1;
8833Sdam.sunwoo@arm.com                    inexact = true;
8833Sdam.sunwoo@arm.com                }
4626SN/A            }
4626SN/A        }
4626SN/A
4626SN/A        if (inexact) {
8833Sdam.sunwoo@arm.com            fpscr.ixc = 1;
4626SN/A        }
4871SN/A    }
8833Sdam.sunwoo@arm.com    // Reassemble and install the result.
8833Sdam.sunwoo@arm.com    uint32_t result = bits(mantissa, 9, 0);
8833Sdam.sunwoo@arm.com    replaceBits(result, 14, 10, exponent);
4626SN/A    if (neg)
4626SN/A        result |= (1 << 15);
4626SN/A    if (top)
4626SN/A        replaceBits(destBits, 31, 16, result);
4626SN/A    else
4626SN/A        replaceBits(destBits, 15, 0, result);
4626SN/A    return bitsToFp(destBits, junk);
8833Sdam.sunwoo@arm.com}
4626SN/A
4626SN/Afloat
4626SN/AvcvtFpHFpS(FPSCR &fpscr, float op, bool top)
4626SN/A{
8833Sdam.sunwoo@arm.com    float junk = 0.0;
8833Sdam.sunwoo@arm.com    uint32_t opBits = fpToBits(op);
8833Sdam.sunwoo@arm.com    // Extract the operand.
4626SN/A    if (top)
4626SN/A        opBits = bits(opBits, 31, 16);
4626SN/A    else
4626SN/A        opBits = bits(opBits, 15, 0);
4626SN/A    // Extract the bitfields.
8833Sdam.sunwoo@arm.com    bool neg = bits(opBits, 15);
4626SN/A    uint32_t exponent = bits(opBits, 14, 10);
4871SN/A    uint32_t mantissa = bits(opBits, 9, 0);
8833Sdam.sunwoo@arm.com    // Do the conversion.
8833Sdam.sunwoo@arm.com    if (exponent == 0) {
8833Sdam.sunwoo@arm.com        if (mantissa != 0) {
4626SN/A            // Normalize the value.
4626SN/A            exponent = exponent + (127 - 15) + 1;
4626SN/A            while (mantissa < (1 << 10)) {
4626SN/A                mantissa = mantissa << 1;
8833Sdam.sunwoo@arm.com                exponent--;
4626SN/A            }
4871SN/A        }
4871SN/A        mantissa = mantissa << (23 - 10);
8833Sdam.sunwoo@arm.com    } else if (exponent == 0x1f && !fpscr.ahp) {
8833Sdam.sunwoo@arm.com        // Infinities and nans.
8833Sdam.sunwoo@arm.com        exponent = 0xff;
4626SN/A        if (mantissa != 0) {
4626SN/A            // Nans.
4626SN/A            mantissa = mantissa << (23 - 10);
4626SN/A            if (bits(mantissa, 22) == 0) {
4626SN/A                // Signalling nan.
4626SN/A                fpscr.ioc = 1;
4626SN/A                mantissa |= (1 << 22);
8833Sdam.sunwoo@arm.com            }
4626SN/A            if (fpscr.dn) {
4626SN/A                mantissa &= ~mask(22);
4626SN/A                neg = false;
4626SN/A            }
8833Sdam.sunwoo@arm.com        }
8833Sdam.sunwoo@arm.com    } else {
8833Sdam.sunwoo@arm.com        exponent = exponent + (127 - 15);
4626SN/A        mantissa = mantissa << (23 - 10);
4626SN/A    }
4626SN/A    // Reassemble the result.
4626SN/A    uint32_t result = bits(mantissa, 22, 0);
4626SN/A    replaceBits(result, 30, 23, exponent);
8833Sdam.sunwoo@arm.com    if (neg)
4626SN/A        result |= (1 << 31);
4871SN/A    return bitsToFp(result, junk);
4871SN/A}
8833Sdam.sunwoo@arm.com
8833Sdam.sunwoo@arm.comuint64_t
8833Sdam.sunwoo@arm.comvfpFpSToFixed(float val, bool isSigned, bool half,
4626SN/A              uint8_t imm, bool rzero)
4626SN/A{
4626SN/A    int rmode = rzero ? FeRoundZero : fegetround();
4626SN/A    __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
4626SN/A    fesetround(FeRoundNearest);
4626SN/A    val = val * powf(2.0, imm);
4626SN/A    __asm__ __volatile__("" : "=m" (val) : "m" (val));
8833Sdam.sunwoo@arm.com    fesetround(rmode);
4626SN/A    feclearexcept(FeAllExceptions);
4626SN/A    __asm__ __volatile__("" : "=m" (val) : "m" (val));
4626SN/A    float origVal = val;
4626SN/A    val = rintf(val);
8833Sdam.sunwoo@arm.com    int fpType = std::fpclassify(val);
8833Sdam.sunwoo@arm.com    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
8833Sdam.sunwoo@arm.com        if (fpType == FP_NAN) {
4626SN/A            feraiseexcept(FeInvalid);
4626SN/A        }
4626SN/A        val = 0.0;
4626SN/A    } else if (origVal != val) {
4626SN/A        switch (rmode) {
8833Sdam.sunwoo@arm.com          case FeRoundNearest:
4626SN/A            if (origVal - val > 0.5)
4871SN/A                val += 1.0;
4871SN/A            else if (val - origVal > 0.5)
4871SN/A                val -= 1.0;
8833Sdam.sunwoo@arm.com            break;
8833Sdam.sunwoo@arm.com          case FeRoundDown:
8833Sdam.sunwoo@arm.com            if (origVal < val)
4626SN/A                val -= 1.0;
4626SN/A            break;
4626SN/A          case FeRoundUpward:
4626SN/A            if (origVal > val)
4626SN/A                val += 1.0;
4626SN/A            break;
4626SN/A        }
4626SN/A        feraiseexcept(FeInexact);
4626SN/A    }
4626SN/A
4626SN/A    if (isSigned) {
4626SN/A        if (half) {
4626SN/A            if ((double)val < (int16_t)(1 << 15)) {
4626SN/A                feraiseexcept(FeInvalid);
4626SN/A                feclearexcept(FeInexact);
4626SN/A                return (int16_t)(1 << 15);
4626SN/A            }
4626SN/A            if ((double)val > (int16_t)mask(15)) {
4626SN/A                feraiseexcept(FeInvalid);
4626SN/A                feclearexcept(FeInexact);
4626SN/A                return (int16_t)mask(15);
4626SN/A            }
4626SN/A            return (int16_t)val;
4626SN/A        } else {
4626SN/A            if ((double)val < (int32_t)(1 << 31)) {
4626SN/A                feraiseexcept(FeInvalid);
4626SN/A                feclearexcept(FeInexact);
4626SN/A                return (int32_t)(1 << 31);
4626SN/A            }
4626SN/A            if ((double)val > (int32_t)mask(31)) {
4626SN/A                feraiseexcept(FeInvalid);
4626SN/A                feclearexcept(FeInexact);
4626SN/A                return (int32_t)mask(31);
4626SN/A            }
4626SN/A            return (int32_t)val;
4626SN/A        }
4626SN/A    } else {
4626SN/A        if (half) {
4626SN/A            if ((double)val < 0) {
4626SN/A                feraiseexcept(FeInvalid);
4626SN/A                feclearexcept(FeInexact);
4626SN/A                return 0;
4626SN/A            }
4626SN/A            if ((double)val > (mask(16))) {
4626SN/A                feraiseexcept(FeInvalid);
8833Sdam.sunwoo@arm.com                feclearexcept(FeInexact);
8833Sdam.sunwoo@arm.com                return mask(16);
8833Sdam.sunwoo@arm.com            }
8833Sdam.sunwoo@arm.com            return (uint16_t)val;
4626SN/A        } else {
4626SN/A            if ((double)val < 0) {
4626SN/A                feraiseexcept(FeInvalid);
4626SN/A                feclearexcept(FeInexact);
4626SN/A                return 0;
8833Sdam.sunwoo@arm.com            }
4626SN/A            if ((double)val > (mask(32))) {
4626SN/A                feraiseexcept(FeInvalid);
8833Sdam.sunwoo@arm.com                feclearexcept(FeInexact);
8833Sdam.sunwoo@arm.com                return mask(32);
8833Sdam.sunwoo@arm.com            }
4626SN/A            return (uint32_t)val;
4626SN/A        }
4626SN/A    }
4626SN/A}
8833Sdam.sunwoo@arm.com
4626SN/Afloat
4626SN/AvfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
8833Sdam.sunwoo@arm.com{
8833Sdam.sunwoo@arm.com    fesetround(FeRoundNearest);
8833Sdam.sunwoo@arm.com    if (half)
4626SN/A        val = (uint16_t)val;
4626SN/A    float scale = powf(2.0, imm);
4626SN/A    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
4626SN/A    feclearexcept(FeAllExceptions);
4626SN/A    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
4626SN/A    return fixDivDest(fpscr, val / scale, (float)val, scale);
4626SN/A}
4626SN/A
4626SN/Afloat
4626SN/AvfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
4626SN/A{
4626SN/A    fesetround(FeRoundNearest);
4626SN/A    if (half)
8833Sdam.sunwoo@arm.com        val = sext<16>(val & mask(16));
8833Sdam.sunwoo@arm.com    float scale = powf(2.0, imm);
8833Sdam.sunwoo@arm.com    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
8833Sdam.sunwoo@arm.com    feclearexcept(FeAllExceptions);
4626SN/A    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
4626SN/A    return fixDivDest(fpscr, val / scale, (float)val, scale);
4626SN/A}
4626SN/A
4626SN/Auint64_t
8833Sdam.sunwoo@arm.comvfpFpDToFixed(double val, bool isSigned, bool half,
4626SN/A              uint8_t imm, bool rzero)
4626SN/A{
8833Sdam.sunwoo@arm.com    int rmode = rzero ? FeRoundZero : fegetround();
8833Sdam.sunwoo@arm.com    fesetround(FeRoundNearest);
8833Sdam.sunwoo@arm.com    val = val * pow(2.0, imm);
4626SN/A    __asm__ __volatile__("" : "=m" (val) : "m" (val));
4626SN/A    fesetround(rmode);
4626SN/A    feclearexcept(FeAllExceptions);
4626SN/A    __asm__ __volatile__("" : "=m" (val) : "m" (val));
8833Sdam.sunwoo@arm.com    double origVal = val;
4626SN/A    val = rint(val);
4626SN/A    int fpType = std::fpclassify(val);
8833Sdam.sunwoo@arm.com    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
8833Sdam.sunwoo@arm.com        if (fpType == FP_NAN) {
8833Sdam.sunwoo@arm.com            feraiseexcept(FeInvalid);
4626SN/A        }
4626SN/A        val = 0.0;
4626SN/A    } else if (origVal != val) {
4626SN/A        switch (rmode) {
4626SN/A          case FeRoundNearest:
4626SN/A            if (origVal - val > 0.5)
4626SN/A                val += 1.0;
4626SN/A            else if (val - origVal > 0.5)
4626SN/A                val -= 1.0;
4626SN/A            break;
4626SN/A          case FeRoundDown:
4626SN/A            if (origVal < val)
4626SN/A                val -= 1.0;
8833Sdam.sunwoo@arm.com            break;
8833Sdam.sunwoo@arm.com          case FeRoundUpward:
8833Sdam.sunwoo@arm.com            if (origVal > val)
8833Sdam.sunwoo@arm.com                val += 1.0;
4626SN/A            break;
4626SN/A        }
4626SN/A        feraiseexcept(FeInexact);
4626SN/A    }
4626SN/A    if (isSigned) {
8833Sdam.sunwoo@arm.com        if (half) {
4626SN/A            if (val < (int16_t)(1 << 15)) {
4626SN/A                feraiseexcept(FeInvalid);
8833Sdam.sunwoo@arm.com                feclearexcept(FeInexact);
8833Sdam.sunwoo@arm.com                return (int16_t)(1 << 15);
8833Sdam.sunwoo@arm.com            }
4626SN/A            if (val > (int16_t)mask(15)) {
4626SN/A                feraiseexcept(FeInvalid);
8833Sdam.sunwoo@arm.com                feclearexcept(FeInexact);
4626SN/A                return (int16_t)mask(15);
4626SN/A            }
8833Sdam.sunwoo@arm.com            return (int16_t)val;
4626SN/A        } else {
8833Sdam.sunwoo@arm.com            if (val < (int32_t)(1 << 31)) {
8833Sdam.sunwoo@arm.com                feraiseexcept(FeInvalid);
8833Sdam.sunwoo@arm.com                feclearexcept(FeInexact);
4626SN/A                return (int32_t)(1 << 31);
4626SN/A            }
4626SN/A            if (val > (int32_t)mask(31)) {
8833Sdam.sunwoo@arm.com                feraiseexcept(FeInvalid);
4626SN/A                feclearexcept(FeInexact);
4626SN/A                return (int32_t)mask(31);
8833Sdam.sunwoo@arm.com            }
4626SN/A            return (int32_t)val;
8833Sdam.sunwoo@arm.com        }
8833Sdam.sunwoo@arm.com    } else {
8833Sdam.sunwoo@arm.com        if (half) {
4626SN/A            if (val < 0) {
4626SN/A                feraiseexcept(FeInvalid);
4626SN/A                feclearexcept(FeInexact);
4626SN/A                return 0;
4626SN/A            }
4626SN/A            if (val > mask(16)) {
2810SN/A                feraiseexcept(FeInvalid);
3503SN/A                feclearexcept(FeInexact);
3503SN/A                return mask(16);
3503SN/A            }
3503SN/A            return (uint16_t)val;
4626SN/A        } else {
4626SN/A            if (val < 0) {
3503SN/A                feraiseexcept(FeInvalid);
4626SN/A                feclearexcept(FeInexact);
3503SN/A                return 0;
3503SN/A            }
3503SN/A            if (val > mask(32)) {
4626SN/A                feraiseexcept(FeInvalid);
3503SN/A                feclearexcept(FeInexact);
3503SN/A                return mask(32);
3503SN/A            }
3503SN/A            return (uint32_t)val;
3503SN/A        }
    }
}

double
vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
{
    fesetround(FeRoundNearest);
    if (half)
        val = (uint16_t)val;
    double scale = pow(2.0, imm);
    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
    feclearexcept(FeAllExceptions);
    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
    return fixDivDest(fpscr, val / scale, (double)val, scale);
}

double
vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
{
    fesetround(FeRoundNearest);
    if (half)
        val = sext<16>(val & mask(16));
    double scale = pow(2.0, imm);
    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
    feclearexcept(FeAllExceptions);
    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
    return fixDivDest(fpscr, val / scale, (double)val, scale);
}

template <class fpType>
fpType
FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
               fpType (*func)(fpType, fpType),
               bool flush, uint32_t rMode) const
{
    const bool single = (sizeof(fpType) == sizeof(float));
    fpType junk = 0.0;

    if (flush && flushToZero(op1, op2))
        fpscr.idc = 1;
    VfpSavedState state = prepFpState(rMode);
    __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
                             : "m" (op1), "m" (op2), "m" (state));
    fpType dest = func(op1, op2);
    __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));

    int fpClass = std::fpclassify(dest);
    // Get NAN behavior right. This varies between x86 and ARM.
    if (fpClass == FP_NAN) {
        const bool single = (sizeof(fpType) == sizeof(float));
        const uint64_t qnan =
            single ? 0x7fc00000 : ULL(0x7ff8000000000000);
        const bool nan1 = std::isnan(op1);
        const bool nan2 = std::isnan(op2);
        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
        if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
            dest = bitsToFp(qnan, junk);
        } else if (signal1) {
            dest = bitsToFp(fpToBits(op1) | qnan, junk);
        } else if (signal2) {
            dest = bitsToFp(fpToBits(op2) | qnan, junk);
        } else if (nan1) {
            dest = op1;
        } else if (nan2) {
            dest = op2;
        }
    } else if (flush && flushToZero(dest)) {
        feraiseexcept(FeUnderflow);
    } else if ((
                (single && (dest == bitsToFp(0x00800000, junk) ||
                     dest == bitsToFp(0x80800000, junk))) ||
                (!single &&
                    (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
                     dest == bitsToFp(ULL(0x8010000000000000), junk)))
               ) && rMode != VfpRoundZero) {
        /*
         * Correct for the fact that underflow is detected -before- rounding
         * in ARM and -after- rounding in x86.
         */
        fesetround(FeRoundZero);
        __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
                                 : "m" (op1), "m" (op2));
        fpType temp = func(op1, op2);
        __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
        if (flush && flushToZero(temp)) {
            dest = temp;
        }
    }
    finishVfp(fpscr, state);
    return dest;
}

template
float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2,
                     float (*func)(float, float),
                     bool flush, uint32_t rMode) const;
template
double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2,
                      double (*func)(double, double),
                      bool flush, uint32_t rMode) const;

template <class fpType>
fpType
FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType),
              bool flush, uint32_t rMode) const
{
    const bool single = (sizeof(fpType) == sizeof(float));
    fpType junk = 0.0;

    if (flush && flushToZero(op1))
        fpscr.idc = 1;
    VfpSavedState state = prepFpState(rMode);
    __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
                             : "m" (op1), "m" (state));
    fpType dest = func(op1);
    __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));

    int fpClass = std::fpclassify(dest);
    // Get NAN behavior right. This varies between x86 and ARM.
    if (fpClass == FP_NAN) {
        const bool single = (sizeof(fpType) == sizeof(float));
        const uint64_t qnan =
            single ? 0x7fc00000 : ULL(0x7ff8000000000000);
        const bool nan = std::isnan(op1);
        if (!nan || fpscr.dn == 1) {
            dest = bitsToFp(qnan, junk);
        } else if (nan) {
            dest = bitsToFp(fpToBits(op1) | qnan, junk);
        }
    } else if (flush && flushToZero(dest)) {
        feraiseexcept(FeUnderflow);
    } else if ((
                (single && (dest == bitsToFp(0x00800000, junk) ||
                     dest == bitsToFp(0x80800000, junk))) ||
                (!single &&
                    (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
                     dest == bitsToFp(ULL(0x8010000000000000), junk)))
               ) && rMode != VfpRoundZero) {
        /*
         * Correct for the fact that underflow is detected -before- rounding
         * in ARM and -after- rounding in x86.
         */
        fesetround(FeRoundZero);
        __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
        fpType temp = func(op1);
        __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
        if (flush && flushToZero(temp)) {
            dest = temp;
        }
    }
    finishVfp(fpscr, state);
    return dest;
}

template
float FpOp::unaryOp(FPSCR &fpscr, float op1, float (*func)(float),
                    bool flush, uint32_t rMode) const;
template
double FpOp::unaryOp(FPSCR &fpscr, double op1, double (*func)(double),
                     bool flush, uint32_t rMode) const;

IntRegIndex
VfpMacroOp::addStride(IntRegIndex idx, unsigned stride)
{
    if (wide) {
        stride *= 2;
    }
    unsigned offset = idx % 8;
    idx = (IntRegIndex)(idx - offset);
    offset += stride;
    idx = (IntRegIndex)(idx + (offset % 8));
    return idx;
}

void
VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
{
    unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
    assert(!inScalarBank(dest));
    dest = addStride(dest, stride);
    op1 = addStride(op1, stride);
    if (!inScalarBank(op2)) {
        op2 = addStride(op2, stride);
    }
}

void
VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
{
    unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
    assert(!inScalarBank(dest));
    dest = addStride(dest, stride);
    if (!inScalarBank(op1)) {
        op1 = addStride(op1, stride);
    }
}

void
VfpMacroOp::nextIdxs(IntRegIndex &dest)
{
    unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
    assert(!inScalarBank(dest));
    dest = addStride(dest, stride);
}

}