vfp.cc revision 7434:dd5a09b86b14
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#include "arch/arm/insts/vfp.hh"
41
42/*
43 * The asm statements below are to keep gcc from reordering code. Otherwise
44 * the rounding mode might be set after the operation it was intended for, the
45 * exception bits read before it, etc.
46 */
47
48std::string
49FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
50{
51    std::stringstream ss;
52    printMnemonic(ss);
53    printReg(ss, dest + FP_Base_DepTag);
54    ss << ", ";
55    printReg(ss, op1 + FP_Base_DepTag);
56    return ss.str();
57}
58
59std::string
60FpRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
61{
62    std::stringstream ss;
63    printMnemonic(ss);
64    printReg(ss, dest + FP_Base_DepTag);
65    ccprintf(ss, ", #%d", imm);
66    return ss.str();
67}
68
69std::string
70FpRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
71{
72    std::stringstream ss;
73    printMnemonic(ss);
74    printReg(ss, dest + FP_Base_DepTag);
75    ss << ", ";
76    printReg(ss, op1 + FP_Base_DepTag);
77    ccprintf(ss, ", #%d", imm);
78    return ss.str();
79}
80
81std::string
82FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
83{
84    std::stringstream ss;
85    printMnemonic(ss);
86    printReg(ss, dest + FP_Base_DepTag);
87    ss << ", ";
88    printReg(ss, op1 + FP_Base_DepTag);
89    ss << ", ";
90    printReg(ss, op2 + FP_Base_DepTag);
91    return ss.str();
92}
93
94namespace ArmISA
95{
96
97VfpSavedState
98prepFpState(uint32_t rMode)
99{
100    int roundingMode = fegetround();
101    feclearexcept(FeAllExceptions);
102    switch (rMode) {
103      case VfpRoundNearest:
104        fesetround(FeRoundNearest);
105        break;
106      case VfpRoundUpward:
107        fesetround(FeRoundUpward);
108        break;
109      case VfpRoundDown:
110        fesetround(FeRoundDown);
111        break;
112      case VfpRoundZero:
113        fesetround(FeRoundZero);
114        break;
115    }
116    return roundingMode;
117}
118
119void
120finishVfp(FPSCR &fpscr, VfpSavedState state)
121{
122    int exceptions = fetestexcept(FeAllExceptions);
123    bool underflow = false;
124    if (exceptions & FeInvalid) {
125        fpscr.ioc = 1;
126    }
127    if (exceptions & FeDivByZero) {
128        fpscr.dzc = 1;
129    }
130    if (exceptions & FeOverflow) {
131        fpscr.ofc = 1;
132    }
133    if (exceptions & FeUnderflow) {
134        underflow = true;
135        fpscr.ufc = 1;
136    }
137    if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
138        fpscr.ixc = 1;
139    }
140    fesetround(state);
141}
142
143template <class fpType>
144fpType
145fixDest(FPSCR fpscr, fpType val, fpType op1)
146{
147    int fpClass = std::fpclassify(val);
148    fpType junk = 0.0;
149    if (fpClass == FP_NAN) {
150        const bool single = (sizeof(val) == sizeof(float));
151        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
152        const bool nan = std::isnan(op1);
153        if (!nan || (fpscr.dn == 1)) {
154            val = bitsToFp(qnan, junk);
155        } else if (nan) {
156            val = bitsToFp(fpToBits(op1) | qnan, junk);
157        }
158    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
159        // Turn val into a zero with the correct sign;
160        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
161        val = bitsToFp(fpToBits(val) & bitMask, junk);
162        feclearexcept(FeInexact);
163        feraiseexcept(FeUnderflow);
164    }
165    return val;
166}
167
168template
169float fixDest<float>(FPSCR fpscr, float val, float op1);
170template
171double fixDest<double>(FPSCR fpscr, double val, double op1);
172
173template <class fpType>
174fpType
175fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
176{
177    int fpClass = std::fpclassify(val);
178    fpType junk = 0.0;
179    if (fpClass == FP_NAN) {
180        const bool single = (sizeof(val) == sizeof(float));
181        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
182        const bool nan1 = std::isnan(op1);
183        const bool nan2 = std::isnan(op2);
184        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
185        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
186        if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
187            val = bitsToFp(qnan, junk);
188        } else if (signal1) {
189            val = bitsToFp(fpToBits(op1) | qnan, junk);
190        } else if (signal2) {
191            val = bitsToFp(fpToBits(op2) | qnan, junk);
192        } else if (nan1) {
193            val = op1;
194        } else if (nan2) {
195            val = op2;
196        }
197    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
198        // Turn val into a zero with the correct sign;
199        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
200        val = bitsToFp(fpToBits(val) & bitMask, junk);
201        feclearexcept(FeInexact);
202        feraiseexcept(FeUnderflow);
203    }
204    return val;
205}
206
207template
208float fixDest<float>(FPSCR fpscr, float val, float op1, float op2);
209template
210double fixDest<double>(FPSCR fpscr, double val, double op1, double op2);
211
212template <class fpType>
213fpType
214fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
215{
216    fpType mid = fixDest(fpscr, val, op1, op2);
217    const bool single = (sizeof(fpType) == sizeof(float));
218    const fpType junk = 0.0;
219    if ((single && (val == bitsToFp(0x00800000, junk) ||
220                    val == bitsToFp(0x80800000, junk))) ||
221        (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
222                     val == bitsToFp(ULL(0x8010000000000000), junk)))
223        ) {
224        __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
225        fesetround(FeRoundZero);
226        fpType temp = 0.0;
227        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
228        temp = op1 / op2;
229        if (flushToZero(temp)) {
230            feraiseexcept(FeUnderflow);
231            if (fpscr.fz) {
232                feclearexcept(FeInexact);
233                mid = temp;
234            }
235        }
236        __asm__ __volatile__("" :: "m" (temp));
237    }
238    return mid;
239}
240
241template
242float fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2);
243template
244double fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2);
245
246float
247fixFpDFpSDest(FPSCR fpscr, double val)
248{
249    const float junk = 0.0;
250    float op1 = 0.0;
251    if (std::isnan(val)) {
252        uint64_t valBits = fpToBits(val);
253        uint32_t op1Bits = bits(valBits, 50, 29) |
254                           (mask(9) << 22) |
255                           (bits(valBits, 63) << 31);
256        op1 = bitsToFp(op1Bits, junk);
257    }
258    float mid = fixDest(fpscr, (float)val, op1);
259    if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
260                    (FeUnderflow | FeInexact)) {
261        feclearexcept(FeInexact);
262    }
263    if (mid == bitsToFp(0x00800000, junk) ||
264        mid == bitsToFp(0x80800000, junk)) {
265        __asm__ __volatile__("" : "=m" (val) : "m" (val));
266        fesetround(FeRoundZero);
267        float temp = 0.0;
268        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
269        temp = val;
270        if (flushToZero(temp)) {
271            feraiseexcept(FeUnderflow);
272            if (fpscr.fz) {
273                feclearexcept(FeInexact);
274                mid = temp;
275            }
276        }
277        __asm__ __volatile__("" :: "m" (temp));
278    }
279    return mid;
280}
281
282double
283fixFpSFpDDest(FPSCR fpscr, float val)
284{
285    const double junk = 0.0;
286    double op1 = 0.0;
287    if (std::isnan(val)) {
288        uint32_t valBits = fpToBits(val);
289        uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
290                           (mask(12) << 51) |
291                           ((uint64_t)bits(valBits, 31) << 63);
292        op1 = bitsToFp(op1Bits, junk);
293    }
294    double mid = fixDest(fpscr, (double)val, op1);
295    if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
296        mid == bitsToFp(ULL(0x8010000000000000), junk)) {
297        __asm__ __volatile__("" : "=m" (val) : "m" (val));
298        fesetround(FeRoundZero);
299        double temp = 0.0;
300        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
301        temp = val;
302        if (flushToZero(temp)) {
303            feraiseexcept(FeUnderflow);
304            if (fpscr.fz) {
305                feclearexcept(FeInexact);
306                mid = temp;
307            }
308        }
309        __asm__ __volatile__("" :: "m" (temp));
310    }
311    return mid;
312}
313
314float
315vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
316{
317    float junk = 0.0;
318    uint32_t destBits = fpToBits(dest);
319    uint32_t opBits = fpToBits(op);
320    // Extract the operand.
321    bool neg = bits(opBits, 31);
322    uint32_t exponent = bits(opBits, 30, 23);
323    uint32_t oldMantissa = bits(opBits, 22, 0);
324    uint32_t mantissa = oldMantissa >> (23 - 10);
325    // Do the conversion.
326    uint32_t extra = oldMantissa & mask(23 - 10);
327    if (exponent == 0xff) {
328        if (oldMantissa != 0) {
329            // Nans.
330            if (bits(mantissa, 9) == 0) {
331                // Signalling nan.
332                fpscr.ioc = 1;
333            }
334            if (fpscr.ahp) {
335                mantissa = 0;
336                exponent = 0;
337                fpscr.ioc = 1;
338            } else if (fpscr.dn) {
339                mantissa = (1 << 9);
340                exponent = 0x1f;
341                neg = false;
342            } else {
343                exponent = 0x1f;
344                mantissa |= (1 << 9);
345            }
346        } else {
347            // Infinities.
348            exponent = 0x1F;
349            if (fpscr.ahp) {
350                fpscr.ioc = 1;
351                mantissa = 0x3ff;
352            } else {
353                mantissa = 0;
354            }
355        }
356    } else if (exponent == 0 && oldMantissa == 0) {
357        // Zero, don't need to do anything.
358    } else {
359        // Normalized or denormalized numbers.
360
361        bool inexact = (extra != 0);
362
363        if (exponent == 0) {
364            // Denormalized.
365
366            // If flush to zero is on, this shouldn't happen.
367            assert(fpscr.fz == 0);
368
369            // Check for underflow
370            if (inexact || fpscr.ufe)
371                fpscr.ufc = 1;
372
373            // Handle rounding.
374            unsigned mode = fpscr.rMode;
375            if ((mode == VfpRoundUpward && !neg && extra) ||
376                (mode == VfpRoundDown && neg && extra) ||
377                (mode == VfpRoundNearest &&
378                 (extra > (1 << 9) ||
379                  (extra == (1 << 9) && bits(mantissa, 0))))) {
380                mantissa++;
381            }
382
383            // See if the number became normalized after rounding.
384            if (mantissa == (1 << 10)) {
385                mantissa = 0;
386                exponent = 1;
387            }
388        } else {
389            // Normalized.
390
391            // We need to track the dropped bits differently since
392            // more can be dropped by denormalizing.
393            bool topOne = bits(extra, 12);
394            bool restZeros = bits(extra, 11, 0) == 0;
395
396            if (exponent <= (127 - 15)) {
397                // The result is too small. Denormalize.
398                mantissa |= (1 << 10);
399                while (mantissa && exponent <= (127 - 15)) {
400                    restZeros = restZeros && !topOne;
401                    topOne = bits(mantissa, 0);
402                    mantissa = mantissa >> 1;
403                    exponent++;
404                }
405                if (topOne || !restZeros)
406                    inexact = true;
407                exponent = 0;
408            } else {
409                // Change bias.
410                exponent -= (127 - 15);
411            }
412
413            if (exponent == 0 && (inexact || fpscr.ufe)) {
414                // Underflow
415                fpscr.ufc = 1;
416            }
417
418            // Handle rounding.
419            unsigned mode = fpscr.rMode;
420            bool nonZero = topOne || !restZeros;
421            if ((mode == VfpRoundUpward && !neg && nonZero) ||
422                (mode == VfpRoundDown && neg && nonZero) ||
423                (mode == VfpRoundNearest && topOne &&
424                 (!restZeros || bits(mantissa, 0)))) {
425                mantissa++;
426            }
427
428            // See if we rounded up and need to bump the exponent.
429            if (mantissa == (1 << 10)) {
430                mantissa = 0;
431                exponent++;
432            }
433
434            // Deal with overflow
435            if (fpscr.ahp) {
436                if (exponent >= 0x20) {
437                    exponent = 0x1f;
438                    mantissa = 0x3ff;
439                    fpscr.ioc = 1;
440                    // Supress inexact exception.
441                    inexact = false;
442                }
443            } else {
444                if (exponent >= 0x1f) {
445                    if ((mode == VfpRoundNearest) ||
446                        (mode == VfpRoundUpward && !neg) ||
447                        (mode == VfpRoundDown && neg)) {
448                        // Overflow to infinity.
449                        exponent = 0x1f;
450                        mantissa = 0;
451                    } else {
452                        // Overflow to max normal.
453                        exponent = 0x1e;
454                        mantissa = 0x3ff;
455                    }
456                    fpscr.ofc = 1;
457                    inexact = true;
458                }
459            }
460        }
461
462        if (inexact) {
463            fpscr.ixc = 1;
464        }
465    }
466    // Reassemble and install the result.
467    uint32_t result = bits(mantissa, 9, 0);
468    replaceBits(result, 14, 10, exponent);
469    if (neg)
470        result |= (1 << 15);
471    if (top)
472        replaceBits(destBits, 31, 16, result);
473    else
474        replaceBits(destBits, 15, 0, result);
475    return bitsToFp(destBits, junk);
476}
477
478float
479vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
480{
481    float junk = 0.0;
482    uint32_t opBits = fpToBits(op);
483    // Extract the operand.
484    if (top)
485        opBits = bits(opBits, 31, 16);
486    else
487        opBits = bits(opBits, 15, 0);
488    // Extract the bitfields.
489    bool neg = bits(opBits, 15);
490    uint32_t exponent = bits(opBits, 14, 10);
491    uint32_t mantissa = bits(opBits, 9, 0);
492    // Do the conversion.
493    if (exponent == 0) {
494        if (mantissa != 0) {
495            // Normalize the value.
496            exponent = exponent + (127 - 15) + 1;
497            while (mantissa < (1 << 10)) {
498                mantissa = mantissa << 1;
499                exponent--;
500            }
501        }
502        mantissa = mantissa << (23 - 10);
503    } else if (exponent == 0x1f && !fpscr.ahp) {
504        // Infinities and nans.
505        exponent = 0xff;
506        if (mantissa != 0) {
507            // Nans.
508            mantissa = mantissa << (23 - 10);
509            if (bits(mantissa, 22) == 0) {
510                // Signalling nan.
511                fpscr.ioc = 1;
512                mantissa |= (1 << 22);
513            }
514            if (fpscr.dn) {
515                mantissa &= ~mask(22);
516                neg = false;
517            }
518        }
519    } else {
520        exponent = exponent + (127 - 15);
521        mantissa = mantissa << (23 - 10);
522    }
523    // Reassemble the result.
524    uint32_t result = bits(mantissa, 22, 0);
525    replaceBits(result, 30, 23, exponent);
526    if (neg)
527        result |= (1 << 31);
528    return bitsToFp(result, junk);
529}
530
531uint64_t
532vfpFpSToFixed(float val, bool isSigned, bool half,
533              uint8_t imm, bool rzero)
534{
535    int rmode = rzero ? FeRoundZero : fegetround();
536    __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
537    fesetround(FeRoundNearest);
538    val = val * powf(2.0, imm);
539    __asm__ __volatile__("" : "=m" (val) : "m" (val));
540    fesetround(rmode);
541    feclearexcept(FeAllExceptions);
542    __asm__ __volatile__("" : "=m" (val) : "m" (val));
543    float origVal = val;
544    val = rintf(val);
545    int fpType = std::fpclassify(val);
546    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
547        if (fpType == FP_NAN) {
548            feraiseexcept(FeInvalid);
549        }
550        val = 0.0;
551    } else if (origVal != val) {
552        switch (rmode) {
553          case FeRoundNearest:
554            if (origVal - val > 0.5)
555                val += 1.0;
556            else if (val - origVal > 0.5)
557                val -= 1.0;
558            break;
559          case FeRoundDown:
560            if (origVal < val)
561                val -= 1.0;
562            break;
563          case FeRoundUpward:
564            if (origVal > val)
565                val += 1.0;
566            break;
567        }
568        feraiseexcept(FeInexact);
569    }
570
571    if (isSigned) {
572        if (half) {
573            if ((double)val < (int16_t)(1 << 15)) {
574                feraiseexcept(FeInvalid);
575                feclearexcept(FeInexact);
576                return (int16_t)(1 << 15);
577            }
578            if ((double)val > (int16_t)mask(15)) {
579                feraiseexcept(FeInvalid);
580                feclearexcept(FeInexact);
581                return (int16_t)mask(15);
582            }
583            return (int16_t)val;
584        } else {
585            if ((double)val < (int32_t)(1 << 31)) {
586                feraiseexcept(FeInvalid);
587                feclearexcept(FeInexact);
588                return (int32_t)(1 << 31);
589            }
590            if ((double)val > (int32_t)mask(31)) {
591                feraiseexcept(FeInvalid);
592                feclearexcept(FeInexact);
593                return (int32_t)mask(31);
594            }
595            return (int32_t)val;
596        }
597    } else {
598        if (half) {
599            if ((double)val < 0) {
600                feraiseexcept(FeInvalid);
601                feclearexcept(FeInexact);
602                return 0;
603            }
604            if ((double)val > (mask(16))) {
605                feraiseexcept(FeInvalid);
606                feclearexcept(FeInexact);
607                return mask(16);
608            }
609            return (uint16_t)val;
610        } else {
611            if ((double)val < 0) {
612                feraiseexcept(FeInvalid);
613                feclearexcept(FeInexact);
614                return 0;
615            }
616            if ((double)val > (mask(32))) {
617                feraiseexcept(FeInvalid);
618                feclearexcept(FeInexact);
619                return mask(32);
620            }
621            return (uint32_t)val;
622        }
623    }
624}
625
626float
627vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
628{
629    fesetround(FeRoundNearest);
630    if (half)
631        val = (uint16_t)val;
632    float scale = powf(2.0, imm);
633    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
634    feclearexcept(FeAllExceptions);
635    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
636    return fixDivDest(fpscr, val / scale, (float)val, scale);
637}
638
639float
640vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
641{
642    fesetround(FeRoundNearest);
643    if (half)
644        val = sext<16>(val & mask(16));
645    float scale = powf(2.0, imm);
646    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
647    feclearexcept(FeAllExceptions);
648    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
649    return fixDivDest(fpscr, val / scale, (float)val, scale);
650}
651
652uint64_t
653vfpFpDToFixed(double val, bool isSigned, bool half,
654              uint8_t imm, bool rzero)
655{
656    int rmode = rzero ? FeRoundZero : fegetround();
657    fesetround(FeRoundNearest);
658    val = val * pow(2.0, imm);
659    __asm__ __volatile__("" : "=m" (val) : "m" (val));
660    fesetround(rmode);
661    feclearexcept(FeAllExceptions);
662    __asm__ __volatile__("" : "=m" (val) : "m" (val));
663    double origVal = val;
664    val = rint(val);
665    int fpType = std::fpclassify(val);
666    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
667        if (fpType == FP_NAN) {
668            feraiseexcept(FeInvalid);
669        }
670        val = 0.0;
671    } else if (origVal != val) {
672        switch (rmode) {
673          case FeRoundNearest:
674            if (origVal - val > 0.5)
675                val += 1.0;
676            else if (val - origVal > 0.5)
677                val -= 1.0;
678            break;
679          case FeRoundDown:
680            if (origVal < val)
681                val -= 1.0;
682            break;
683          case FeRoundUpward:
684            if (origVal > val)
685                val += 1.0;
686            break;
687        }
688        feraiseexcept(FeInexact);
689    }
690    if (isSigned) {
691        if (half) {
692            if (val < (int16_t)(1 << 15)) {
693                feraiseexcept(FeInvalid);
694                feclearexcept(FeInexact);
695                return (int16_t)(1 << 15);
696            }
697            if (val > (int16_t)mask(15)) {
698                feraiseexcept(FeInvalid);
699                feclearexcept(FeInexact);
700                return (int16_t)mask(15);
701            }
702            return (int16_t)val;
703        } else {
704            if (val < (int32_t)(1 << 31)) {
705                feraiseexcept(FeInvalid);
706                feclearexcept(FeInexact);
707                return (int32_t)(1 << 31);
708            }
709            if (val > (int32_t)mask(31)) {
710                feraiseexcept(FeInvalid);
711                feclearexcept(FeInexact);
712                return (int32_t)mask(31);
713            }
714            return (int32_t)val;
715        }
716    } else {
717        if (half) {
718            if (val < 0) {
719                feraiseexcept(FeInvalid);
720                feclearexcept(FeInexact);
721                return 0;
722            }
723            if (val > mask(16)) {
724                feraiseexcept(FeInvalid);
725                feclearexcept(FeInexact);
726                return mask(16);
727            }
728            return (uint16_t)val;
729        } else {
730            if (val < 0) {
731                feraiseexcept(FeInvalid);
732                feclearexcept(FeInexact);
733                return 0;
734            }
735            if (val > mask(32)) {
736                feraiseexcept(FeInvalid);
737                feclearexcept(FeInexact);
738                return mask(32);
739            }
740            return (uint32_t)val;
741        }
742    }
743}
744
745double
746vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
747{
748    fesetround(FeRoundNearest);
749    if (half)
750        val = (uint16_t)val;
751    double scale = pow(2.0, imm);
752    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
753    feclearexcept(FeAllExceptions);
754    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
755    return fixDivDest(fpscr, val / scale, (double)val, scale);
756}
757
758double
759vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
760{
761    fesetround(FeRoundNearest);
762    if (half)
763        val = sext<16>(val & mask(16));
764    double scale = pow(2.0, imm);
765    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
766    feclearexcept(FeAllExceptions);
767    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
768    return fixDivDest(fpscr, val / scale, (double)val, scale);
769}
770
771template <class fpType>
772fpType
773FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
774               fpType (*func)(fpType, fpType),
775               bool flush, uint32_t rMode) const
776{
777    const bool single = (sizeof(fpType) == sizeof(float));
778    fpType junk = 0.0;
779
780    if (flush && flushToZero(op1, op2))
781        fpscr.idc = 1;
782    VfpSavedState state = prepFpState(rMode);
783    __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
784                             : "m" (op1), "m" (op2), "m" (state));
785    fpType dest = func(op1, op2);
786    __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
787
788    int fpClass = std::fpclassify(dest);
789    // Get NAN behavior right. This varies between x86 and ARM.
790    if (fpClass == FP_NAN) {
791        const bool single = (sizeof(fpType) == sizeof(float));
792        const uint64_t qnan =
793            single ? 0x7fc00000 : ULL(0x7ff8000000000000);
794        const bool nan1 = std::isnan(op1);
795        const bool nan2 = std::isnan(op2);
796        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
797        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
798        if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
799            dest = bitsToFp(qnan, junk);
800        } else if (signal1) {
801            dest = bitsToFp(fpToBits(op1) | qnan, junk);
802        } else if (signal2) {
803            dest = bitsToFp(fpToBits(op2) | qnan, junk);
804        } else if (nan1) {
805            dest = op1;
806        } else if (nan2) {
807            dest = op2;
808        }
809    } else if (flush && flushToZero(dest)) {
810        feraiseexcept(FeUnderflow);
811    } else if ((
812                (single && (dest == bitsToFp(0x00800000, junk) ||
813                     dest == bitsToFp(0x80800000, junk))) ||
814                (!single &&
815                    (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
816                     dest == bitsToFp(ULL(0x8010000000000000), junk)))
817               ) && rMode != VfpRoundZero) {
818        /*
819         * Correct for the fact that underflow is detected -before- rounding
820         * in ARM and -after- rounding in x86.
821         */
822        fesetround(FeRoundZero);
823        __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
824                                 : "m" (op1), "m" (op2));
825        fpType temp = func(op1, op2);
826        __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
827        if (flush && flushToZero(temp)) {
828            dest = temp;
829        }
830    }
831    finishVfp(fpscr, state);
832    return dest;
833}
834
835template
836float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2,
837                     float (*func)(float, float),
838                     bool flush, uint32_t rMode) const;
839template
840double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2,
841                      double (*func)(double, double),
842                      bool flush, uint32_t rMode) const;
843
844template <class fpType>
845fpType
846FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType),
847              bool flush, uint32_t rMode) const
848{
849    const bool single = (sizeof(fpType) == sizeof(float));
850    fpType junk = 0.0;
851
852    if (flush && flushToZero(op1))
853        fpscr.idc = 1;
854    VfpSavedState state = prepFpState(rMode);
855    __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
856                             : "m" (op1), "m" (state));
857    fpType dest = func(op1);
858    __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
859
860    int fpClass = std::fpclassify(dest);
861    // Get NAN behavior right. This varies between x86 and ARM.
862    if (fpClass == FP_NAN) {
863        const bool single = (sizeof(fpType) == sizeof(float));
864        const uint64_t qnan =
865            single ? 0x7fc00000 : ULL(0x7ff8000000000000);
866        const bool nan = std::isnan(op1);
867        if (!nan || fpscr.dn == 1) {
868            dest = bitsToFp(qnan, junk);
869        } else if (nan) {
870            dest = bitsToFp(fpToBits(op1) | qnan, junk);
871        }
872    } else if (flush && flushToZero(dest)) {
873        feraiseexcept(FeUnderflow);
874    } else if ((
875                (single && (dest == bitsToFp(0x00800000, junk) ||
876                     dest == bitsToFp(0x80800000, junk))) ||
877                (!single &&
878                    (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
879                     dest == bitsToFp(ULL(0x8010000000000000), junk)))
880               ) && rMode != VfpRoundZero) {
881        /*
882         * Correct for the fact that underflow is detected -before- rounding
883         * in ARM and -after- rounding in x86.
884         */
885        fesetround(FeRoundZero);
886        __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
887        fpType temp = func(op1);
888        __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
889        if (flush && flushToZero(temp)) {
890            dest = temp;
891        }
892    }
893    finishVfp(fpscr, state);
894    return dest;
895}
896
897template
898float FpOp::unaryOp(FPSCR &fpscr, float op1, float (*func)(float),
899                    bool flush, uint32_t rMode) const;
900template
901double FpOp::unaryOp(FPSCR &fpscr, double op1, double (*func)(double),
902                     bool flush, uint32_t rMode) const;
903
904IntRegIndex
905VfpMacroOp::addStride(IntRegIndex idx, unsigned stride)
906{
907    if (wide) {
908        stride *= 2;
909    }
910    unsigned offset = idx % 8;
911    idx = (IntRegIndex)(idx - offset);
912    offset += stride;
913    idx = (IntRegIndex)(idx + (offset % 8));
914    return idx;
915}
916
917void
918VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
919{
920    unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
921    assert(!inScalarBank(dest));
922    dest = addStride(dest, stride);
923    op1 = addStride(op1, stride);
924    if (!inScalarBank(op2)) {
925        op2 = addStride(op2, stride);
926    }
927}
928
929void
930VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
931{
932    unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
933    assert(!inScalarBank(dest));
934    dest = addStride(dest, stride);
935    if (!inScalarBank(op1)) {
936        op1 = addStride(op1, stride);
937    }
938}
939
940void
941VfpMacroOp::nextIdxs(IntRegIndex &dest)
942{
943    unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
944    assert(!inScalarBank(dest));
945    dest = addStride(dest, stride);
946}
947
948}
949