vfp.cc revision 7434
12735Sktlim@umich.edu/*
210319SAndreas.Sandberg@ARM.com * Copyright (c) 2010 ARM Limited
310319SAndreas.Sandberg@ARM.com * All rights reserved
410319SAndreas.Sandberg@ARM.com *
510319SAndreas.Sandberg@ARM.com * The license below extends only to copyright in the software and shall
610319SAndreas.Sandberg@ARM.com * not be construed as granting a license to any other intellectual
710319SAndreas.Sandberg@ARM.com * property including but not limited to intellectual property relating
810319SAndreas.Sandberg@ARM.com * to a hardware implementation of the functionality of the software
910319SAndreas.Sandberg@ARM.com * licensed hereunder.  You may use the software subject to the license
1010319SAndreas.Sandberg@ARM.com * terms below provided that you ensure that this notice is replicated
1110319SAndreas.Sandberg@ARM.com * unmodified and in its entirety in all distributions of the software,
1210319SAndreas.Sandberg@ARM.com * modified or unmodified, in source code or in binary form.
1310319SAndreas.Sandberg@ARM.com *
142735Sktlim@umich.edu * Redistribution and use in source and binary forms, with or without
1511303Ssteve.reinhardt@amd.com * modification, are permitted provided that the following conditions are
162735Sktlim@umich.edu * met: redistributions of source code must retain the above copyright
172735Sktlim@umich.edu * notice, this list of conditions and the following disclaimer;
182735Sktlim@umich.edu * redistributions in binary form must reproduce the above copyright
192735Sktlim@umich.edu * notice, this list of conditions and the following disclaimer in the
202735Sktlim@umich.edu * documentation and/or other materials provided with the distribution;
212735Sktlim@umich.edu * neither the name of the copyright holders nor the names of its
222735Sktlim@umich.edu * contributors may be used to endorse or promote products derived from
232735Sktlim@umich.edu * this software without specific prior written permission.
242735Sktlim@umich.edu *
252735Sktlim@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
262735Sktlim@umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
272735Sktlim@umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
282735Sktlim@umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
292735Sktlim@umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
302735Sktlim@umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
312735Sktlim@umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
322735Sktlim@umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
332735Sktlim@umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
342735Sktlim@umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
352735Sktlim@umich.edu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
362735Sktlim@umich.edu *
372735Sktlim@umich.edu * Authors: Gabe Black
382735Sktlim@umich.edu */
392735Sktlim@umich.edu
402735Sktlim@umich.edu#include "arch/arm/insts/vfp.hh"
412735Sktlim@umich.edu
4210319SAndreas.Sandberg@ARM.com/*
432735Sktlim@umich.edu * The asm statements below are to keep gcc from reordering code. Otherwise
442735Sktlim@umich.edu * the rounding mode might be set after the operation it was intended for, the
4510319SAndreas.Sandberg@ARM.com * exception bits read before it, etc.
4610319SAndreas.Sandberg@ARM.com */
4710319SAndreas.Sandberg@ARM.com
4810319SAndreas.Sandberg@ARM.comstd::string
4910319SAndreas.Sandberg@ARM.comFpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
5010319SAndreas.Sandberg@ARM.com{
5110529Smorr@cs.wisc.edu    std::stringstream ss;
5212104Snathanael.premillieu@arm.com    printMnemonic(ss);
5310319SAndreas.Sandberg@ARM.com    printReg(ss, dest + FP_Base_DepTag);
5410319SAndreas.Sandberg@ARM.com    ss << ", ";
5511608Snikos.nikoleris@arm.com    printReg(ss, op1 + FP_Base_DepTag);
562735Sktlim@umich.edu    return ss.str();
572735Sktlim@umich.edu}
5810319SAndreas.Sandberg@ARM.com
5910319SAndreas.Sandberg@ARM.comstd::string
6010319SAndreas.Sandberg@ARM.comFpRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
6110319SAndreas.Sandberg@ARM.com{
6210319SAndreas.Sandberg@ARM.com    std::stringstream ss;
6310319SAndreas.Sandberg@ARM.com    printMnemonic(ss);
6410319SAndreas.Sandberg@ARM.com    printReg(ss, dest + FP_Base_DepTag);
6510319SAndreas.Sandberg@ARM.com    ccprintf(ss, ", #%d", imm);
6610319SAndreas.Sandberg@ARM.com    return ss.str();
6710319SAndreas.Sandberg@ARM.com}
6810319SAndreas.Sandberg@ARM.com
6910319SAndreas.Sandberg@ARM.comstd::string
7010319SAndreas.Sandberg@ARM.comFpRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
7110319SAndreas.Sandberg@ARM.com{
722735Sktlim@umich.edu    std::stringstream ss;
732735Sktlim@umich.edu    printMnemonic(ss);
7410319SAndreas.Sandberg@ARM.com    printReg(ss, dest + FP_Base_DepTag);
7510319SAndreas.Sandberg@ARM.com    ss << ", ";
7610319SAndreas.Sandberg@ARM.com    printReg(ss, op1 + FP_Base_DepTag);
7710319SAndreas.Sandberg@ARM.com    ccprintf(ss, ", #%d", imm);
7810319SAndreas.Sandberg@ARM.com    return ss.str();
7910319SAndreas.Sandberg@ARM.com}
8010319SAndreas.Sandberg@ARM.com
8110319SAndreas.Sandberg@ARM.comstd::string
8210319SAndreas.Sandberg@ARM.comFpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
8310319SAndreas.Sandberg@ARM.com{
8410319SAndreas.Sandberg@ARM.com    std::stringstream ss;
8510319SAndreas.Sandberg@ARM.com    printMnemonic(ss);
8610319SAndreas.Sandberg@ARM.com    printReg(ss, dest + FP_Base_DepTag);
8710319SAndreas.Sandberg@ARM.com    ss << ", ";
8810319SAndreas.Sandberg@ARM.com    printReg(ss, op1 + FP_Base_DepTag);
892735Sktlim@umich.edu    ss << ", ";
902735Sktlim@umich.edu    printReg(ss, op2 + FP_Base_DepTag);
9110319SAndreas.Sandberg@ARM.com    return ss.str();
9210319SAndreas.Sandberg@ARM.com}
9310319SAndreas.Sandberg@ARM.com
9410319SAndreas.Sandberg@ARM.comnamespace ArmISA
9510319SAndreas.Sandberg@ARM.com{
9610319SAndreas.Sandberg@ARM.com
9710319SAndreas.Sandberg@ARM.comVfpSavedState
9810319SAndreas.Sandberg@ARM.comprepFpState(uint32_t rMode)
9910319SAndreas.Sandberg@ARM.com{
10010319SAndreas.Sandberg@ARM.com    int roundingMode = fegetround();
10110319SAndreas.Sandberg@ARM.com    feclearexcept(FeAllExceptions);
10210319SAndreas.Sandberg@ARM.com    switch (rMode) {
10310319SAndreas.Sandberg@ARM.com      case VfpRoundNearest:
1042735Sktlim@umich.edu        fesetround(FeRoundNearest);
1052735Sktlim@umich.edu        break;
10610319SAndreas.Sandberg@ARM.com      case VfpRoundUpward:
1072735Sktlim@umich.edu        fesetround(FeRoundUpward);
1082735Sktlim@umich.edu        break;
1092735Sktlim@umich.edu      case VfpRoundDown:
11010319SAndreas.Sandberg@ARM.com        fesetround(FeRoundDown);
11110319SAndreas.Sandberg@ARM.com        break;
1122735Sktlim@umich.edu      case VfpRoundZero:
1132735Sktlim@umich.edu        fesetround(FeRoundZero);
11410319SAndreas.Sandberg@ARM.com        break;
11510319SAndreas.Sandberg@ARM.com    }
1162735Sktlim@umich.edu    return roundingMode;
1172735Sktlim@umich.edu}
1182735Sktlim@umich.edu
11910319SAndreas.Sandberg@ARM.comvoid
12010319SAndreas.Sandberg@ARM.comfinishVfp(FPSCR &fpscr, VfpSavedState state)
1212735Sktlim@umich.edu{
12210319SAndreas.Sandberg@ARM.com    int exceptions = fetestexcept(FeAllExceptions);
1232735Sktlim@umich.edu    bool underflow = false;
12410319SAndreas.Sandberg@ARM.com    if (exceptions & FeInvalid) {
12510319SAndreas.Sandberg@ARM.com        fpscr.ioc = 1;
12610319SAndreas.Sandberg@ARM.com    }
12710319SAndreas.Sandberg@ARM.com    if (exceptions & FeDivByZero) {
12810319SAndreas.Sandberg@ARM.com        fpscr.dzc = 1;
12910319SAndreas.Sandberg@ARM.com    }
13010319SAndreas.Sandberg@ARM.com    if (exceptions & FeOverflow) {
1312735Sktlim@umich.edu        fpscr.ofc = 1;
13210319SAndreas.Sandberg@ARM.com    }
13310319SAndreas.Sandberg@ARM.com    if (exceptions & FeUnderflow) {
13410319SAndreas.Sandberg@ARM.com        underflow = true;
13510319SAndreas.Sandberg@ARM.com        fpscr.ufc = 1;
13610319SAndreas.Sandberg@ARM.com    }
13710319SAndreas.Sandberg@ARM.com    if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
13810319SAndreas.Sandberg@ARM.com        fpscr.ixc = 1;
1392735Sktlim@umich.edu    }
14010319SAndreas.Sandberg@ARM.com    fesetround(state);
14110319SAndreas.Sandberg@ARM.com}
14210319SAndreas.Sandberg@ARM.com
14310319SAndreas.Sandberg@ARM.comtemplate <class fpType>
14410319SAndreas.Sandberg@ARM.comfpType
1452735Sktlim@umich.edufixDest(FPSCR fpscr, fpType val, fpType op1)
14610319SAndreas.Sandberg@ARM.com{
14710319SAndreas.Sandberg@ARM.com    int fpClass = std::fpclassify(val);
14810319SAndreas.Sandberg@ARM.com    fpType junk = 0.0;
14910319SAndreas.Sandberg@ARM.com    if (fpClass == FP_NAN) {
15010319SAndreas.Sandberg@ARM.com        const bool single = (sizeof(val) == sizeof(float));
1512735Sktlim@umich.edu        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
15210319SAndreas.Sandberg@ARM.com        const bool nan = std::isnan(op1);
1532735Sktlim@umich.edu        if (!nan || (fpscr.dn == 1)) {
15410319SAndreas.Sandberg@ARM.com            val = bitsToFp(qnan, junk);
15510319SAndreas.Sandberg@ARM.com        } else if (nan) {
15610319SAndreas.Sandberg@ARM.com            val = bitsToFp(fpToBits(op1) | qnan, junk);
15710319SAndreas.Sandberg@ARM.com        }
15810319SAndreas.Sandberg@ARM.com    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
15910319SAndreas.Sandberg@ARM.com        // Turn val into a zero with the correct sign;
16010319SAndreas.Sandberg@ARM.com        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
16110319SAndreas.Sandberg@ARM.com        val = bitsToFp(fpToBits(val) & bitMask, junk);
16210319SAndreas.Sandberg@ARM.com        feclearexcept(FeInexact);
16310319SAndreas.Sandberg@ARM.com        feraiseexcept(FeUnderflow);
16410319SAndreas.Sandberg@ARM.com    }
16510319SAndreas.Sandberg@ARM.com    return val;
16610319SAndreas.Sandberg@ARM.com}
16710319SAndreas.Sandberg@ARM.com
16810319SAndreas.Sandberg@ARM.comtemplate
16910319SAndreas.Sandberg@ARM.comfloat fixDest<float>(FPSCR fpscr, float val, float op1);
17010319SAndreas.Sandberg@ARM.comtemplate
17110319SAndreas.Sandberg@ARM.comdouble fixDest<double>(FPSCR fpscr, double val, double op1);
17210319SAndreas.Sandberg@ARM.com
17310319SAndreas.Sandberg@ARM.comtemplate <class fpType>
17410319SAndreas.Sandberg@ARM.comfpType
17510319SAndreas.Sandberg@ARM.comfixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
17610319SAndreas.Sandberg@ARM.com{
17710319SAndreas.Sandberg@ARM.com    int fpClass = std::fpclassify(val);
17810319SAndreas.Sandberg@ARM.com    fpType junk = 0.0;
17911303Ssteve.reinhardt@amd.com    if (fpClass == FP_NAN) {
18011303Ssteve.reinhardt@amd.com        const bool single = (sizeof(val) == sizeof(float));
18111303Ssteve.reinhardt@amd.com        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
18211303Ssteve.reinhardt@amd.com        const bool nan1 = std::isnan(op1);
18311303Ssteve.reinhardt@amd.com        const bool nan2 = std::isnan(op2);
18411303Ssteve.reinhardt@amd.com        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
18511303Ssteve.reinhardt@amd.com        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
18610319SAndreas.Sandberg@ARM.com        if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
18711608Snikos.nikoleris@arm.com            val = bitsToFp(qnan, junk);
18811303Ssteve.reinhardt@amd.com        } else if (signal1) {
18911303Ssteve.reinhardt@amd.com            val = bitsToFp(fpToBits(op1) | qnan, junk);
19011303Ssteve.reinhardt@amd.com        } else if (signal2) {
19110319SAndreas.Sandberg@ARM.com            val = bitsToFp(fpToBits(op2) | qnan, junk);
19211303Ssteve.reinhardt@amd.com        } else if (nan1) {
19311303Ssteve.reinhardt@amd.com            val = op1;
19411303Ssteve.reinhardt@amd.com        } else if (nan2) {
19511303Ssteve.reinhardt@amd.com            val = op2;
19611303Ssteve.reinhardt@amd.com        }
19711303Ssteve.reinhardt@amd.com    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
19811303Ssteve.reinhardt@amd.com        // Turn val into a zero with the correct sign;
19911303Ssteve.reinhardt@amd.com        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
20011608Snikos.nikoleris@arm.com        val = bitsToFp(fpToBits(val) & bitMask, junk);
20111303Ssteve.reinhardt@amd.com        feclearexcept(FeInexact);
20211303Ssteve.reinhardt@amd.com        feraiseexcept(FeUnderflow);
20311303Ssteve.reinhardt@amd.com    }
20411303Ssteve.reinhardt@amd.com    return val;
20511303Ssteve.reinhardt@amd.com}
20611303Ssteve.reinhardt@amd.com
20711303Ssteve.reinhardt@amd.comtemplate
20811303Ssteve.reinhardt@amd.comfloat fixDest<float>(FPSCR fpscr, float val, float op1, float op2);
20910319SAndreas.Sandberg@ARM.comtemplate
21011608Snikos.nikoleris@arm.comdouble fixDest<double>(FPSCR fpscr, double val, double op1, double op2);
21110319SAndreas.Sandberg@ARM.com
21210319SAndreas.Sandberg@ARM.comtemplate <class fpType>
21310319SAndreas.Sandberg@ARM.comfpType
21410319SAndreas.Sandberg@ARM.comfixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
21510319SAndreas.Sandberg@ARM.com{
21610319SAndreas.Sandberg@ARM.com    fpType mid = fixDest(fpscr, val, op1, op2);
21710319SAndreas.Sandberg@ARM.com    const bool single = (sizeof(fpType) == sizeof(float));
21810319SAndreas.Sandberg@ARM.com    const fpType junk = 0.0;
21910319SAndreas.Sandberg@ARM.com    if ((single && (val == bitsToFp(0x00800000, junk) ||
22010319SAndreas.Sandberg@ARM.com                    val == bitsToFp(0x80800000, junk))) ||
22110319SAndreas.Sandberg@ARM.com        (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
22210319SAndreas.Sandberg@ARM.com                     val == bitsToFp(ULL(0x8010000000000000), junk)))
22310319SAndreas.Sandberg@ARM.com        ) {
22410319SAndreas.Sandberg@ARM.com        __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
22510319SAndreas.Sandberg@ARM.com        fesetround(FeRoundZero);
22610319SAndreas.Sandberg@ARM.com        fpType temp = 0.0;
22710319SAndreas.Sandberg@ARM.com        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
22810319SAndreas.Sandberg@ARM.com        temp = op1 / op2;
22910319SAndreas.Sandberg@ARM.com        if (flushToZero(temp)) {
23010319SAndreas.Sandberg@ARM.com            feraiseexcept(FeUnderflow);
23110319SAndreas.Sandberg@ARM.com            if (fpscr.fz) {
23211877Sbrandon.potter@amd.com                feclearexcept(FeInexact);
23310319SAndreas.Sandberg@ARM.com                mid = temp;
23410319SAndreas.Sandberg@ARM.com            }
2352735Sktlim@umich.edu        }
2362735Sktlim@umich.edu        __asm__ __volatile__("" :: "m" (temp));
23710319SAndreas.Sandberg@ARM.com    }
2382735Sktlim@umich.edu    return mid;
23910319SAndreas.Sandberg@ARM.com}
24010319SAndreas.Sandberg@ARM.com
24110319SAndreas.Sandberg@ARM.comtemplate
24210319SAndreas.Sandberg@ARM.comfloat fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2);
2437520Sgblack@eecs.umich.edutemplate
24410319SAndreas.Sandberg@ARM.comdouble fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2);
24510319SAndreas.Sandberg@ARM.com
24610319SAndreas.Sandberg@ARM.comfloat
24710319SAndreas.Sandberg@ARM.comfixFpDFpSDest(FPSCR fpscr, double val)
24810319SAndreas.Sandberg@ARM.com{
2495702Ssaidi@eecs.umich.edu    const float junk = 0.0;
2505702Ssaidi@eecs.umich.edu    float op1 = 0.0;
2515702Ssaidi@eecs.umich.edu    if (std::isnan(val)) {
2525702Ssaidi@eecs.umich.edu        uint64_t valBits = fpToBits(val);
2535702Ssaidi@eecs.umich.edu        uint32_t op1Bits = bits(valBits, 50, 29) |
25410319SAndreas.Sandberg@ARM.com                           (mask(9) << 22) |
2558779Sgblack@eecs.umich.edu                           (bits(valBits, 63) << 31);
25610319SAndreas.Sandberg@ARM.com        op1 = bitsToFp(op1Bits, junk);
2576973Stjones1@inf.ed.ac.uk    }
25810319SAndreas.Sandberg@ARM.com    float mid = fixDest(fpscr, (float)val, op1);
25910319SAndreas.Sandberg@ARM.com    if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
26010319SAndreas.Sandberg@ARM.com                    (FeUnderflow | FeInexact)) {
26110319SAndreas.Sandberg@ARM.com        feclearexcept(FeInexact);
26210319SAndreas.Sandberg@ARM.com    }
26310319SAndreas.Sandberg@ARM.com    if (mid == bitsToFp(0x00800000, junk) ||
26410319SAndreas.Sandberg@ARM.com        mid == bitsToFp(0x80800000, junk)) {
26510319SAndreas.Sandberg@ARM.com        __asm__ __volatile__("" : "=m" (val) : "m" (val));
26610319SAndreas.Sandberg@ARM.com        fesetround(FeRoundZero);
26710319SAndreas.Sandberg@ARM.com        float temp = 0.0;
26810319SAndreas.Sandberg@ARM.com        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
26910319SAndreas.Sandberg@ARM.com        temp = val;
27010319SAndreas.Sandberg@ARM.com        if (flushToZero(temp)) {
27110319SAndreas.Sandberg@ARM.com            feraiseexcept(FeUnderflow);
27210319SAndreas.Sandberg@ARM.com            if (fpscr.fz) {
27310319SAndreas.Sandberg@ARM.com                feclearexcept(FeInexact);
27410319SAndreas.Sandberg@ARM.com                mid = temp;
27510319SAndreas.Sandberg@ARM.com            }
27610319SAndreas.Sandberg@ARM.com        }
27710529Smorr@cs.wisc.edu        __asm__ __volatile__("" :: "m" (temp));
27810529Smorr@cs.wisc.edu    }
27910529Smorr@cs.wisc.edu    return mid;
28010529Smorr@cs.wisc.edu}
28110319SAndreas.Sandberg@ARM.com
28210319SAndreas.Sandberg@ARM.comdouble
28310319SAndreas.Sandberg@ARM.comfixFpSFpDDest(FPSCR fpscr, float val)
28410319SAndreas.Sandberg@ARM.com{
28510319SAndreas.Sandberg@ARM.com    const double junk = 0.0;
28610319SAndreas.Sandberg@ARM.com    double op1 = 0.0;
28710319SAndreas.Sandberg@ARM.com    if (std::isnan(val)) {
28810319SAndreas.Sandberg@ARM.com        uint32_t valBits = fpToBits(val);
28910319SAndreas.Sandberg@ARM.com        uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
29012106SRekai.GonzalezAlberquilla@arm.com                           (mask(12) << 51) |
29110319SAndreas.Sandberg@ARM.com                           ((uint64_t)bits(valBits, 31) << 63);
29212106SRekai.GonzalezAlberquilla@arm.com        op1 = bitsToFp(op1Bits, junk);
29310319SAndreas.Sandberg@ARM.com    }
29410319SAndreas.Sandberg@ARM.com    double mid = fixDest(fpscr, (double)val, op1);
29510319SAndreas.Sandberg@ARM.com    if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
29610319SAndreas.Sandberg@ARM.com        mid == bitsToFp(ULL(0x8010000000000000), junk)) {
2972735Sktlim@umich.edu        __asm__ __volatile__("" : "=m" (val) : "m" (val));
29810319SAndreas.Sandberg@ARM.com        fesetround(FeRoundZero);
29910319SAndreas.Sandberg@ARM.com        double temp = 0.0;
300        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
301        temp = val;
302        if (flushToZero(temp)) {
303            feraiseexcept(FeUnderflow);
304            if (fpscr.fz) {
305                feclearexcept(FeInexact);
306                mid = temp;
307            }
308        }
309        __asm__ __volatile__("" :: "m" (temp));
310    }
311    return mid;
312}
313
314float
315vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
316{
317    float junk = 0.0;
318    uint32_t destBits = fpToBits(dest);
319    uint32_t opBits = fpToBits(op);
320    // Extract the operand.
321    bool neg = bits(opBits, 31);
322    uint32_t exponent = bits(opBits, 30, 23);
323    uint32_t oldMantissa = bits(opBits, 22, 0);
324    uint32_t mantissa = oldMantissa >> (23 - 10);
325    // Do the conversion.
326    uint32_t extra = oldMantissa & mask(23 - 10);
327    if (exponent == 0xff) {
328        if (oldMantissa != 0) {
329            // Nans.
330            if (bits(mantissa, 9) == 0) {
331                // Signalling nan.
332                fpscr.ioc = 1;
333            }
334            if (fpscr.ahp) {
335                mantissa = 0;
336                exponent = 0;
337                fpscr.ioc = 1;
338            } else if (fpscr.dn) {
339                mantissa = (1 << 9);
340                exponent = 0x1f;
341                neg = false;
342            } else {
343                exponent = 0x1f;
344                mantissa |= (1 << 9);
345            }
346        } else {
347            // Infinities.
348            exponent = 0x1F;
349            if (fpscr.ahp) {
350                fpscr.ioc = 1;
351                mantissa = 0x3ff;
352            } else {
353                mantissa = 0;
354            }
355        }
356    } else if (exponent == 0 && oldMantissa == 0) {
357        // Zero, don't need to do anything.
358    } else {
359        // Normalized or denormalized numbers.
360
361        bool inexact = (extra != 0);
362
363        if (exponent == 0) {
364            // Denormalized.
365
366            // If flush to zero is on, this shouldn't happen.
367            assert(fpscr.fz == 0);
368
369            // Check for underflow
370            if (inexact || fpscr.ufe)
371                fpscr.ufc = 1;
372
373            // Handle rounding.
374            unsigned mode = fpscr.rMode;
375            if ((mode == VfpRoundUpward && !neg && extra) ||
376                (mode == VfpRoundDown && neg && extra) ||
377                (mode == VfpRoundNearest &&
378                 (extra > (1 << 9) ||
379                  (extra == (1 << 9) && bits(mantissa, 0))))) {
380                mantissa++;
381            }
382
383            // See if the number became normalized after rounding.
384            if (mantissa == (1 << 10)) {
385                mantissa = 0;
386                exponent = 1;
387            }
388        } else {
389            // Normalized.
390
391            // We need to track the dropped bits differently since
392            // more can be dropped by denormalizing.
393            bool topOne = bits(extra, 12);
394            bool restZeros = bits(extra, 11, 0) == 0;
395
396            if (exponent <= (127 - 15)) {
397                // The result is too small. Denormalize.
398                mantissa |= (1 << 10);
399                while (mantissa && exponent <= (127 - 15)) {
400                    restZeros = restZeros && !topOne;
401                    topOne = bits(mantissa, 0);
402                    mantissa = mantissa >> 1;
403                    exponent++;
404                }
405                if (topOne || !restZeros)
406                    inexact = true;
407                exponent = 0;
408            } else {
409                // Change bias.
410                exponent -= (127 - 15);
411            }
412
413            if (exponent == 0 && (inexact || fpscr.ufe)) {
414                // Underflow
415                fpscr.ufc = 1;
416            }
417
418            // Handle rounding.
419            unsigned mode = fpscr.rMode;
420            bool nonZero = topOne || !restZeros;
421            if ((mode == VfpRoundUpward && !neg && nonZero) ||
422                (mode == VfpRoundDown && neg && nonZero) ||
423                (mode == VfpRoundNearest && topOne &&
424                 (!restZeros || bits(mantissa, 0)))) {
425                mantissa++;
426            }
427
428            // See if we rounded up and need to bump the exponent.
429            if (mantissa == (1 << 10)) {
430                mantissa = 0;
431                exponent++;
432            }
433
434            // Deal with overflow
435            if (fpscr.ahp) {
436                if (exponent >= 0x20) {
437                    exponent = 0x1f;
438                    mantissa = 0x3ff;
439                    fpscr.ioc = 1;
440                    // Supress inexact exception.
441                    inexact = false;
442                }
443            } else {
444                if (exponent >= 0x1f) {
445                    if ((mode == VfpRoundNearest) ||
446                        (mode == VfpRoundUpward && !neg) ||
447                        (mode == VfpRoundDown && neg)) {
448                        // Overflow to infinity.
449                        exponent = 0x1f;
450                        mantissa = 0;
451                    } else {
452                        // Overflow to max normal.
453                        exponent = 0x1e;
454                        mantissa = 0x3ff;
455                    }
456                    fpscr.ofc = 1;
457                    inexact = true;
458                }
459            }
460        }
461
462        if (inexact) {
463            fpscr.ixc = 1;
464        }
465    }
466    // Reassemble and install the result.
467    uint32_t result = bits(mantissa, 9, 0);
468    replaceBits(result, 14, 10, exponent);
469    if (neg)
470        result |= (1 << 15);
471    if (top)
472        replaceBits(destBits, 31, 16, result);
473    else
474        replaceBits(destBits, 15, 0, result);
475    return bitsToFp(destBits, junk);
476}
477
478float
479vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
480{
481    float junk = 0.0;
482    uint32_t opBits = fpToBits(op);
483    // Extract the operand.
484    if (top)
485        opBits = bits(opBits, 31, 16);
486    else
487        opBits = bits(opBits, 15, 0);
488    // Extract the bitfields.
489    bool neg = bits(opBits, 15);
490    uint32_t exponent = bits(opBits, 14, 10);
491    uint32_t mantissa = bits(opBits, 9, 0);
492    // Do the conversion.
493    if (exponent == 0) {
494        if (mantissa != 0) {
495            // Normalize the value.
496            exponent = exponent + (127 - 15) + 1;
497            while (mantissa < (1 << 10)) {
498                mantissa = mantissa << 1;
499                exponent--;
500            }
501        }
502        mantissa = mantissa << (23 - 10);
503    } else if (exponent == 0x1f && !fpscr.ahp) {
504        // Infinities and nans.
505        exponent = 0xff;
506        if (mantissa != 0) {
507            // Nans.
508            mantissa = mantissa << (23 - 10);
509            if (bits(mantissa, 22) == 0) {
510                // Signalling nan.
511                fpscr.ioc = 1;
512                mantissa |= (1 << 22);
513            }
514            if (fpscr.dn) {
515                mantissa &= ~mask(22);
516                neg = false;
517            }
518        }
519    } else {
520        exponent = exponent + (127 - 15);
521        mantissa = mantissa << (23 - 10);
522    }
523    // Reassemble the result.
524    uint32_t result = bits(mantissa, 22, 0);
525    replaceBits(result, 30, 23, exponent);
526    if (neg)
527        result |= (1 << 31);
528    return bitsToFp(result, junk);
529}
530
531uint64_t
532vfpFpSToFixed(float val, bool isSigned, bool half,
533              uint8_t imm, bool rzero)
534{
535    int rmode = rzero ? FeRoundZero : fegetround();
536    __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
537    fesetround(FeRoundNearest);
538    val = val * powf(2.0, imm);
539    __asm__ __volatile__("" : "=m" (val) : "m" (val));
540    fesetround(rmode);
541    feclearexcept(FeAllExceptions);
542    __asm__ __volatile__("" : "=m" (val) : "m" (val));
543    float origVal = val;
544    val = rintf(val);
545    int fpType = std::fpclassify(val);
546    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
547        if (fpType == FP_NAN) {
548            feraiseexcept(FeInvalid);
549        }
550        val = 0.0;
551    } else if (origVal != val) {
552        switch (rmode) {
553          case FeRoundNearest:
554            if (origVal - val > 0.5)
555                val += 1.0;
556            else if (val - origVal > 0.5)
557                val -= 1.0;
558            break;
559          case FeRoundDown:
560            if (origVal < val)
561                val -= 1.0;
562            break;
563          case FeRoundUpward:
564            if (origVal > val)
565                val += 1.0;
566            break;
567        }
568        feraiseexcept(FeInexact);
569    }
570
571    if (isSigned) {
572        if (half) {
573            if ((double)val < (int16_t)(1 << 15)) {
574                feraiseexcept(FeInvalid);
575                feclearexcept(FeInexact);
576                return (int16_t)(1 << 15);
577            }
578            if ((double)val > (int16_t)mask(15)) {
579                feraiseexcept(FeInvalid);
580                feclearexcept(FeInexact);
581                return (int16_t)mask(15);
582            }
583            return (int16_t)val;
584        } else {
585            if ((double)val < (int32_t)(1 << 31)) {
586                feraiseexcept(FeInvalid);
587                feclearexcept(FeInexact);
588                return (int32_t)(1 << 31);
589            }
590            if ((double)val > (int32_t)mask(31)) {
591                feraiseexcept(FeInvalid);
592                feclearexcept(FeInexact);
593                return (int32_t)mask(31);
594            }
595            return (int32_t)val;
596        }
597    } else {
598        if (half) {
599            if ((double)val < 0) {
600                feraiseexcept(FeInvalid);
601                feclearexcept(FeInexact);
602                return 0;
603            }
604            if ((double)val > (mask(16))) {
605                feraiseexcept(FeInvalid);
606                feclearexcept(FeInexact);
607                return mask(16);
608            }
609            return (uint16_t)val;
610        } else {
611            if ((double)val < 0) {
612                feraiseexcept(FeInvalid);
613                feclearexcept(FeInexact);
614                return 0;
615            }
616            if ((double)val > (mask(32))) {
617                feraiseexcept(FeInvalid);
618                feclearexcept(FeInexact);
619                return mask(32);
620            }
621            return (uint32_t)val;
622        }
623    }
624}
625
626float
627vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
628{
629    fesetround(FeRoundNearest);
630    if (half)
631        val = (uint16_t)val;
632    float scale = powf(2.0, imm);
633    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
634    feclearexcept(FeAllExceptions);
635    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
636    return fixDivDest(fpscr, val / scale, (float)val, scale);
637}
638
639float
640vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
641{
642    fesetround(FeRoundNearest);
643    if (half)
644        val = sext<16>(val & mask(16));
645    float scale = powf(2.0, imm);
646    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
647    feclearexcept(FeAllExceptions);
648    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
649    return fixDivDest(fpscr, val / scale, (float)val, scale);
650}
651
652uint64_t
653vfpFpDToFixed(double val, bool isSigned, bool half,
654              uint8_t imm, bool rzero)
655{
656    int rmode = rzero ? FeRoundZero : fegetround();
657    fesetround(FeRoundNearest);
658    val = val * pow(2.0, imm);
659    __asm__ __volatile__("" : "=m" (val) : "m" (val));
660    fesetround(rmode);
661    feclearexcept(FeAllExceptions);
662    __asm__ __volatile__("" : "=m" (val) : "m" (val));
663    double origVal = val;
664    val = rint(val);
665    int fpType = std::fpclassify(val);
666    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
667        if (fpType == FP_NAN) {
668            feraiseexcept(FeInvalid);
669        }
670        val = 0.0;
671    } else if (origVal != val) {
672        switch (rmode) {
673          case FeRoundNearest:
674            if (origVal - val > 0.5)
675                val += 1.0;
676            else if (val - origVal > 0.5)
677                val -= 1.0;
678            break;
679          case FeRoundDown:
680            if (origVal < val)
681                val -= 1.0;
682            break;
683          case FeRoundUpward:
684            if (origVal > val)
685                val += 1.0;
686            break;
687        }
688        feraiseexcept(FeInexact);
689    }
690    if (isSigned) {
691        if (half) {
692            if (val < (int16_t)(1 << 15)) {
693                feraiseexcept(FeInvalid);
694                feclearexcept(FeInexact);
695                return (int16_t)(1 << 15);
696            }
697            if (val > (int16_t)mask(15)) {
698                feraiseexcept(FeInvalid);
699                feclearexcept(FeInexact);
700                return (int16_t)mask(15);
701            }
702            return (int16_t)val;
703        } else {
704            if (val < (int32_t)(1 << 31)) {
705                feraiseexcept(FeInvalid);
706                feclearexcept(FeInexact);
707                return (int32_t)(1 << 31);
708            }
709            if (val > (int32_t)mask(31)) {
710                feraiseexcept(FeInvalid);
711                feclearexcept(FeInexact);
712                return (int32_t)mask(31);
713            }
714            return (int32_t)val;
715        }
716    } else {
717        if (half) {
718            if (val < 0) {
719                feraiseexcept(FeInvalid);
720                feclearexcept(FeInexact);
721                return 0;
722            }
723            if (val > mask(16)) {
724                feraiseexcept(FeInvalid);
725                feclearexcept(FeInexact);
726                return mask(16);
727            }
728            return (uint16_t)val;
729        } else {
730            if (val < 0) {
731                feraiseexcept(FeInvalid);
732                feclearexcept(FeInexact);
733                return 0;
734            }
735            if (val > mask(32)) {
736                feraiseexcept(FeInvalid);
737                feclearexcept(FeInexact);
738                return mask(32);
739            }
740            return (uint32_t)val;
741        }
742    }
743}
744
745double
746vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
747{
748    fesetround(FeRoundNearest);
749    if (half)
750        val = (uint16_t)val;
751    double scale = pow(2.0, imm);
752    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
753    feclearexcept(FeAllExceptions);
754    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
755    return fixDivDest(fpscr, val / scale, (double)val, scale);
756}
757
758double
759vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
760{
761    fesetround(FeRoundNearest);
762    if (half)
763        val = sext<16>(val & mask(16));
764    double scale = pow(2.0, imm);
765    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
766    feclearexcept(FeAllExceptions);
767    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
768    return fixDivDest(fpscr, val / scale, (double)val, scale);
769}
770
771template <class fpType>
772fpType
773FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
774               fpType (*func)(fpType, fpType),
775               bool flush, uint32_t rMode) const
776{
777    const bool single = (sizeof(fpType) == sizeof(float));
778    fpType junk = 0.0;
779
780    if (flush && flushToZero(op1, op2))
781        fpscr.idc = 1;
782    VfpSavedState state = prepFpState(rMode);
783    __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
784                             : "m" (op1), "m" (op2), "m" (state));
785    fpType dest = func(op1, op2);
786    __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
787
788    int fpClass = std::fpclassify(dest);
789    // Get NAN behavior right. This varies between x86 and ARM.
790    if (fpClass == FP_NAN) {
791        const bool single = (sizeof(fpType) == sizeof(float));
792        const uint64_t qnan =
793            single ? 0x7fc00000 : ULL(0x7ff8000000000000);
794        const bool nan1 = std::isnan(op1);
795        const bool nan2 = std::isnan(op2);
796        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
797        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
798        if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
799            dest = bitsToFp(qnan, junk);
800        } else if (signal1) {
801            dest = bitsToFp(fpToBits(op1) | qnan, junk);
802        } else if (signal2) {
803            dest = bitsToFp(fpToBits(op2) | qnan, junk);
804        } else if (nan1) {
805            dest = op1;
806        } else if (nan2) {
807            dest = op2;
808        }
809    } else if (flush && flushToZero(dest)) {
810        feraiseexcept(FeUnderflow);
811    } else if ((
812                (single && (dest == bitsToFp(0x00800000, junk) ||
813                     dest == bitsToFp(0x80800000, junk))) ||
814                (!single &&
815                    (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
816                     dest == bitsToFp(ULL(0x8010000000000000), junk)))
817               ) && rMode != VfpRoundZero) {
818        /*
819         * Correct for the fact that underflow is detected -before- rounding
820         * in ARM and -after- rounding in x86.
821         */
822        fesetround(FeRoundZero);
823        __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
824                                 : "m" (op1), "m" (op2));
825        fpType temp = func(op1, op2);
826        __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
827        if (flush && flushToZero(temp)) {
828            dest = temp;
829        }
830    }
831    finishVfp(fpscr, state);
832    return dest;
833}
834
835template
836float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2,
837                     float (*func)(float, float),
838                     bool flush, uint32_t rMode) const;
839template
840double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2,
841                      double (*func)(double, double),
842                      bool flush, uint32_t rMode) const;
843
844template <class fpType>
845fpType
846FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType),
847              bool flush, uint32_t rMode) const
848{
849    const bool single = (sizeof(fpType) == sizeof(float));
850    fpType junk = 0.0;
851
852    if (flush && flushToZero(op1))
853        fpscr.idc = 1;
854    VfpSavedState state = prepFpState(rMode);
855    __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
856                             : "m" (op1), "m" (state));
857    fpType dest = func(op1);
858    __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
859
860    int fpClass = std::fpclassify(dest);
861    // Get NAN behavior right. This varies between x86 and ARM.
862    if (fpClass == FP_NAN) {
863        const bool single = (sizeof(fpType) == sizeof(float));
864        const uint64_t qnan =
865            single ? 0x7fc00000 : ULL(0x7ff8000000000000);
866        const bool nan = std::isnan(op1);
867        if (!nan || fpscr.dn == 1) {
868            dest = bitsToFp(qnan, junk);
869        } else if (nan) {
870            dest = bitsToFp(fpToBits(op1) | qnan, junk);
871        }
872    } else if (flush && flushToZero(dest)) {
873        feraiseexcept(FeUnderflow);
874    } else if ((
875                (single && (dest == bitsToFp(0x00800000, junk) ||
876                     dest == bitsToFp(0x80800000, junk))) ||
877                (!single &&
878                    (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
879                     dest == bitsToFp(ULL(0x8010000000000000), junk)))
880               ) && rMode != VfpRoundZero) {
881        /*
882         * Correct for the fact that underflow is detected -before- rounding
883         * in ARM and -after- rounding in x86.
884         */
885        fesetround(FeRoundZero);
886        __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
887        fpType temp = func(op1);
888        __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
889        if (flush && flushToZero(temp)) {
890            dest = temp;
891        }
892    }
893    finishVfp(fpscr, state);
894    return dest;
895}
896
897template
898float FpOp::unaryOp(FPSCR &fpscr, float op1, float (*func)(float),
899                    bool flush, uint32_t rMode) const;
900template
901double FpOp::unaryOp(FPSCR &fpscr, double op1, double (*func)(double),
902                     bool flush, uint32_t rMode) const;
903
904IntRegIndex
905VfpMacroOp::addStride(IntRegIndex idx, unsigned stride)
906{
907    if (wide) {
908        stride *= 2;
909    }
910    unsigned offset = idx % 8;
911    idx = (IntRegIndex)(idx - offset);
912    offset += stride;
913    idx = (IntRegIndex)(idx + (offset % 8));
914    return idx;
915}
916
917void
918VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
919{
920    unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
921    assert(!inScalarBank(dest));
922    dest = addStride(dest, stride);
923    op1 = addStride(op1, stride);
924    if (!inScalarBank(op2)) {
925        op2 = addStride(op2, stride);
926    }
927}
928
929void
930VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
931{
932    unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
933    assert(!inScalarBank(dest));
934    dest = addStride(dest, stride);
935    if (!inScalarBank(op1)) {
936        op1 = addStride(op1, stride);
937    }
938}
939
940void
941VfpMacroOp::nextIdxs(IntRegIndex &dest)
942{
943    unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
944    assert(!inScalarBank(dest));
945    dest = addStride(dest, stride);
946}
947
948}
949