vfp.hh revision 7398:063002e7106b
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include "arch/arm/insts/misc.hh"
44#include "arch/arm/miscregs.hh"
45#include <fenv.h>
46#include <cmath>
47
48namespace ArmISA
49{
50
51enum VfpMicroMode {
52    VfpNotAMicroop,
53    VfpMicroop,
54    VfpFirstMicroop,
55    VfpLastMicroop
56};
57
58template<class T>
59static inline void
60setVfpMicroFlags(VfpMicroMode mode, T &flags)
61{
62    switch (mode) {
63      case VfpMicroop:
64        flags[StaticInst::IsMicroop] = true;
65        break;
66      case VfpFirstMicroop:
67        flags[StaticInst::IsMicroop] =
68            flags[StaticInst::IsFirstMicroop] = true;
69        break;
70      case VfpLastMicroop:
71        flags[StaticInst::IsMicroop] =
72            flags[StaticInst::IsLastMicroop] = true;
73        break;
74      case VfpNotAMicroop:
75        break;
76    }
77    if (mode == VfpMicroop || mode == VfpFirstMicroop) {
78        flags[StaticInst::IsDelayedCommit] = true;
79    }
80}
81
82enum FeExceptionBit
83{
84    FeDivByZero = FE_DIVBYZERO,
85    FeInexact = FE_INEXACT,
86    FeInvalid = FE_INVALID,
87    FeOverflow = FE_OVERFLOW,
88    FeUnderflow = FE_UNDERFLOW,
89    FeAllExceptions = FE_ALL_EXCEPT
90};
91
92enum FeRoundingMode
93{
94    FeRoundDown = FE_DOWNWARD,
95    FeRoundNearest = FE_TONEAREST,
96    FeRoundZero = FE_TOWARDZERO,
97    FeRoundUpward = FE_UPWARD
98};
99
100enum VfpRoundingMode
101{
102    VfpRoundNearest = 0,
103    VfpRoundUpward = 1,
104    VfpRoundDown = 2,
105    VfpRoundZero = 3
106};
107
108template <class fpType>
109static inline bool
110flushToZero(fpType &op)
111{
112    fpType junk = 0.0;
113    if (std::fpclassify(op) == FP_SUBNORMAL) {
114        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
115        op = bitsToFp(fpToBits(op) & bitMask, junk);
116        return true;
117    }
118    return false;
119}
120
121template <class fpType>
122static inline bool
123flushToZero(fpType &op1, fpType &op2)
124{
125    bool flush1 = flushToZero(op1);
126    bool flush2 = flushToZero(op2);
127    return flush1 || flush2;
128}
129
130template <class fpType>
131static inline void
132vfpFlushToZero(FPSCR &fpscr, fpType &op)
133{
134    if (fpscr.fz == 1 && flushToZero(op)) {
135        fpscr.idc = 1;
136    }
137}
138
139template <class fpType>
140static inline void
141vfpFlushToZero(FPSCR &fpscr, fpType &op1, fpType &op2)
142{
143    vfpFlushToZero(fpscr, op1);
144    vfpFlushToZero(fpscr, op2);
145}
146
147static inline uint32_t
148fpToBits(float fp)
149{
150    union
151    {
152        float fp;
153        uint32_t bits;
154    } val;
155    val.fp = fp;
156    return val.bits;
157}
158
159static inline uint64_t
160fpToBits(double fp)
161{
162    union
163    {
164        double fp;
165        uint64_t bits;
166    } val;
167    val.fp = fp;
168    return val.bits;
169}
170
171static inline float
172bitsToFp(uint64_t bits, float junk)
173{
174    union
175    {
176        float fp;
177        uint32_t bits;
178    } val;
179    val.bits = bits;
180    return val.fp;
181}
182
183static inline double
184bitsToFp(uint64_t bits, double junk)
185{
186    union
187    {
188        double fp;
189        uint64_t bits;
190    } val;
191    val.bits = bits;
192    return val.fp;
193}
194
195typedef int VfpSavedState;
196
197static inline VfpSavedState
198prepFpState(uint32_t rMode)
199{
200    int roundingMode = fegetround();
201    feclearexcept(FeAllExceptions);
202    switch (rMode) {
203      case VfpRoundNearest:
204        fesetround(FeRoundNearest);
205        break;
206      case VfpRoundUpward:
207        fesetround(FeRoundUpward);
208        break;
209      case VfpRoundDown:
210        fesetround(FeRoundDown);
211        break;
212      case VfpRoundZero:
213        fesetround(FeRoundZero);
214        break;
215    }
216    return roundingMode;
217}
218
219static inline void
220finishVfp(FPSCR &fpscr, VfpSavedState state)
221{
222    int exceptions = fetestexcept(FeAllExceptions);
223    bool underflow = false;
224    if (exceptions & FeInvalid) {
225        fpscr.ioc = 1;
226    }
227    if (exceptions & FeDivByZero) {
228        fpscr.dzc = 1;
229    }
230    if (exceptions & FeOverflow) {
231        fpscr.ofc = 1;
232    }
233    if (exceptions & FeUnderflow) {
234        underflow = true;
235        fpscr.ufc = 1;
236    }
237    if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
238        fpscr.ixc = 1;
239    }
240    fesetround(state);
241}
242
243template <class fpType>
244static inline fpType
245fixDest(FPSCR fpscr, fpType val, fpType op1)
246{
247    int fpClass = std::fpclassify(val);
248    fpType junk = 0.0;
249    if (fpClass == FP_NAN) {
250        const bool single = (sizeof(val) == sizeof(float));
251        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
252        const bool nan = std::isnan(op1);
253        if (!nan || (fpscr.dn == 1)) {
254            val = bitsToFp(qnan, junk);
255        } else if (nan) {
256            val = bitsToFp(fpToBits(op1) | qnan, junk);
257        }
258    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
259        // Turn val into a zero with the correct sign;
260        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
261        val = bitsToFp(fpToBits(val) & bitMask, junk);
262        feclearexcept(FeInexact);
263        feraiseexcept(FeUnderflow);
264    }
265    return val;
266}
267
268template <class fpType>
269static inline fpType
270fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
271{
272    int fpClass = std::fpclassify(val);
273    fpType junk = 0.0;
274    if (fpClass == FP_NAN) {
275        const bool single = (sizeof(val) == sizeof(float));
276        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
277        const bool nan1 = std::isnan(op1);
278        const bool nan2 = std::isnan(op2);
279        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
280        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
281        if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
282            val = bitsToFp(qnan, junk);
283        } else if (signal1) {
284            val = bitsToFp(fpToBits(op1) | qnan, junk);
285        } else if (signal2) {
286            val = bitsToFp(fpToBits(op2) | qnan, junk);
287        } else if (nan1) {
288            val = op1;
289        } else if (nan2) {
290            val = op2;
291        }
292    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
293        // Turn val into a zero with the correct sign;
294        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
295        val = bitsToFp(fpToBits(val) & bitMask, junk);
296        feclearexcept(FeInexact);
297        feraiseexcept(FeUnderflow);
298    }
299    return val;
300}
301
302template <class fpType>
303static inline fpType
304fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
305{
306    fpType mid = fixDest(fpscr, val, op1, op2);
307    const bool single = (sizeof(fpType) == sizeof(float));
308    const fpType junk = 0.0;
309    if ((single && (val == bitsToFp(0x00800000, junk) ||
310                    val == bitsToFp(0x80800000, junk))) ||
311        (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
312                     val == bitsToFp(ULL(0x8010000000000000), junk)))
313        ) {
314        __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
315        fesetround(FeRoundZero);
316        fpType temp = 0.0;
317        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
318        temp = op1 / op2;
319        if (flushToZero(temp)) {
320            feraiseexcept(FeUnderflow);
321            if (fpscr.fz) {
322                feclearexcept(FeInexact);
323                mid = temp;
324            }
325        }
326        __asm__ __volatile__("" :: "m" (temp));
327    }
328    return mid;
329}
330
331static inline float
332fixFpDFpSDest(FPSCR fpscr, double val)
333{
334    const float junk = 0.0;
335    float op1 = 0.0;
336    if (std::isnan(val)) {
337        uint64_t valBits = fpToBits(val);
338        uint32_t op1Bits = bits(valBits, 50, 29) |
339                           (mask(9) << 22) |
340                           (bits(valBits, 63) << 31);
341        op1 = bitsToFp(op1Bits, junk);
342    }
343    float mid = fixDest(fpscr, (float)val, op1);
344    if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
345                    (FeUnderflow | FeInexact)) {
346        feclearexcept(FeInexact);
347    }
348    if (mid == bitsToFp(0x00800000, junk) ||
349        mid == bitsToFp(0x80800000, junk)) {
350        __asm__ __volatile__("" : "=m" (val) : "m" (val));
351        fesetround(FeRoundZero);
352        float temp = 0.0;
353        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
354        temp = val;
355        if (flushToZero(temp)) {
356            feraiseexcept(FeUnderflow);
357            if (fpscr.fz) {
358                feclearexcept(FeInexact);
359                mid = temp;
360            }
361        }
362        __asm__ __volatile__("" :: "m" (temp));
363    }
364    return mid;
365}
366
367static inline double
368fixFpSFpDDest(FPSCR fpscr, float val)
369{
370    const double junk = 0.0;
371    double op1 = 0.0;
372    if (std::isnan(val)) {
373        uint32_t valBits = fpToBits(val);
374        uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
375                           (mask(12) << 51) |
376                           ((uint64_t)bits(valBits, 31) << 63);
377        op1 = bitsToFp(op1Bits, junk);
378    }
379    double mid = fixDest(fpscr, (double)val, op1);
380    if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
381        mid == bitsToFp(ULL(0x8010000000000000), junk)) {
382        __asm__ __volatile__("" : "=m" (val) : "m" (val));
383        fesetround(FeRoundZero);
384        double temp = 0.0;
385        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
386        temp = val;
387        if (flushToZero(temp)) {
388            feraiseexcept(FeUnderflow);
389            if (fpscr.fz) {
390                feclearexcept(FeInexact);
391                mid = temp;
392            }
393        }
394        __asm__ __volatile__("" :: "m" (temp));
395    }
396    return mid;
397}
398
399static inline float
400vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
401{
402    float junk = 0.0;
403    uint32_t destBits = fpToBits(dest);
404    uint32_t opBits = fpToBits(op);
405    // Extract the operand.
406    bool neg = bits(opBits, 31);
407    uint32_t exponent = bits(opBits, 30, 23);
408    uint32_t oldMantissa = bits(opBits, 22, 0);
409    uint32_t mantissa = oldMantissa >> (23 - 10);
410    // Do the conversion.
411    uint32_t extra = oldMantissa & mask(23 - 10);
412    if (exponent == 0xff) {
413        if (oldMantissa != 0) {
414            // Nans.
415            if (bits(mantissa, 9) == 0) {
416                // Signalling nan.
417                fpscr.ioc = 1;
418            }
419            if (fpscr.ahp) {
420                mantissa = 0;
421                exponent = 0;
422                fpscr.ioc = 1;
423            } else if (fpscr.dn) {
424                mantissa = (1 << 9);
425                exponent = 0x1f;
426                neg = false;
427            } else {
428                exponent = 0x1f;
429                mantissa |= (1 << 9);
430            }
431        } else {
432            // Infinities.
433            exponent = 0x1F;
434            if (fpscr.ahp) {
435                fpscr.ioc = 1;
436                mantissa = 0x3ff;
437            } else {
438                mantissa = 0;
439            }
440        }
441    } else if (exponent == 0 && oldMantissa == 0) {
442        // Zero, don't need to do anything.
443    } else {
444        // Normalized or denormalized numbers.
445
446        bool inexact = (extra != 0);
447
448        if (exponent == 0) {
449            // Denormalized.
450
451            // If flush to zero is on, this shouldn't happen.
452            assert(fpscr.fz == 0);
453
454            // Check for underflow
455            if (inexact || fpscr.ufe)
456                fpscr.ufc = 1;
457
458            // Handle rounding.
459            unsigned mode = fpscr.rMode;
460            if ((mode == VfpRoundUpward && !neg && extra) ||
461                (mode == VfpRoundDown && neg && extra) ||
462                (mode == VfpRoundNearest &&
463                 (extra > (1 << 9) ||
464                  (extra == (1 << 9) && bits(mantissa, 0))))) {
465                mantissa++;
466            }
467
468            // See if the number became normalized after rounding.
469            if (mantissa == (1 << 10)) {
470                mantissa = 0;
471                exponent = 1;
472            }
473        } else {
474            // Normalized.
475
476            // We need to track the dropped bits differently since
477            // more can be dropped by denormalizing.
478            bool topOne = bits(extra, 12);
479            bool restZeros = bits(extra, 11, 0) == 0;
480
481            if (exponent <= (127 - 15)) {
482                // The result is too small. Denormalize.
483                mantissa |= (1 << 10);
484                while (mantissa && exponent <= (127 - 15)) {
485                    restZeros = restZeros && !topOne;
486                    topOne = bits(mantissa, 0);
487                    mantissa = mantissa >> 1;
488                    exponent++;
489                }
490                if (topOne || !restZeros)
491                    inexact = true;
492                exponent = 0;
493            } else {
494                // Change bias.
495                exponent -= (127 - 15);
496            }
497
498            if (exponent == 0 && (inexact || fpscr.ufe)) {
499                // Underflow
500                fpscr.ufc = 1;
501            }
502
503            // Handle rounding.
504            unsigned mode = fpscr.rMode;
505            bool nonZero = topOne || !restZeros;
506            if ((mode == VfpRoundUpward && !neg && nonZero) ||
507                (mode == VfpRoundDown && neg && nonZero) ||
508                (mode == VfpRoundNearest && topOne &&
509                 (!restZeros || bits(mantissa, 0)))) {
510                mantissa++;
511            }
512
513            // See if we rounded up and need to bump the exponent.
514            if (mantissa == (1 << 10)) {
515                mantissa = 0;
516                exponent++;
517            }
518
519            // Deal with overflow
520            if (fpscr.ahp) {
521                if (exponent >= 0x20) {
522                    exponent = 0x1f;
523                    mantissa = 0x3ff;
524                    fpscr.ioc = 1;
525                    // Supress inexact exception.
526                    inexact = false;
527                }
528            } else {
529                if (exponent >= 0x1f) {
530                    if ((mode == VfpRoundNearest) ||
531                        (mode == VfpRoundUpward && !neg) ||
532                        (mode == VfpRoundDown && neg)) {
533                        // Overflow to infinity.
534                        exponent = 0x1f;
535                        mantissa = 0;
536                    } else {
537                        // Overflow to max normal.
538                        exponent = 0x1e;
539                        mantissa = 0x3ff;
540                    }
541                    fpscr.ofc = 1;
542                    inexact = true;
543                }
544            }
545        }
546
547        if (inexact) {
548            fpscr.ixc = 1;
549        }
550    }
551    // Reassemble and install the result.
552    uint32_t result = bits(mantissa, 9, 0);
553    replaceBits(result, 14, 10, exponent);
554    if (neg)
555        result |= (1 << 15);
556    if (top)
557        replaceBits(destBits, 31, 16, result);
558    else
559        replaceBits(destBits, 15, 0, result);
560    return bitsToFp(destBits, junk);
561}
562
563static inline float
564vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
565{
566    float junk = 0.0;
567    uint32_t opBits = fpToBits(op);
568    // Extract the operand.
569    if (top)
570        opBits = bits(opBits, 31, 16);
571    else
572        opBits = bits(opBits, 15, 0);
573    // Extract the bitfields.
574    bool neg = bits(opBits, 15);
575    uint32_t exponent = bits(opBits, 14, 10);
576    uint32_t mantissa = bits(opBits, 9, 0);
577    // Do the conversion.
578    if (exponent == 0) {
579        if (mantissa != 0) {
580            // Normalize the value.
581            exponent = exponent + (127 - 15) + 1;
582            while (mantissa < (1 << 10)) {
583                mantissa = mantissa << 1;
584                exponent--;
585            }
586        }
587        mantissa = mantissa << (23 - 10);
588    } else if (exponent == 0x1f && !fpscr.ahp) {
589        // Infinities and nans.
590        exponent = 0xff;
591        if (mantissa != 0) {
592            // Nans.
593            mantissa = mantissa << (23 - 10);
594            if (bits(mantissa, 22) == 0) {
595                // Signalling nan.
596                fpscr.ioc = 1;
597                mantissa |= (1 << 22);
598            }
599            if (fpscr.dn) {
600                mantissa &= ~mask(22);
601                neg = false;
602            }
603        }
604    } else {
605        exponent = exponent + (127 - 15);
606        mantissa = mantissa << (23 - 10);
607    }
608    // Reassemble the result.
609    uint32_t result = bits(mantissa, 22, 0);
610    replaceBits(result, 30, 23, exponent);
611    if (neg)
612        result |= (1 << 31);
613    return bitsToFp(result, junk);
614}
615
616static inline double
617makeDouble(uint32_t low, uint32_t high)
618{
619    double junk = 0.0;
620    return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
621}
622
623static inline uint32_t
624lowFromDouble(double val)
625{
626    return fpToBits(val);
627}
628
629static inline uint32_t
630highFromDouble(double val)
631{
632    return fpToBits(val) >> 32;
633}
634
635static inline uint64_t
636vfpFpSToFixed(float val, bool isSigned, bool half,
637              uint8_t imm, bool rzero = true)
638{
639    int rmode = rzero ? FeRoundZero : fegetround();
640    __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
641    fesetround(FeRoundNearest);
642    val = val * powf(2.0, imm);
643    __asm__ __volatile__("" : "=m" (val) : "m" (val));
644    fesetround(rmode);
645    feclearexcept(FeAllExceptions);
646    __asm__ __volatile__("" : "=m" (val) : "m" (val));
647    float origVal = val;
648    val = rintf(val);
649    int fpType = std::fpclassify(val);
650    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
651        if (fpType == FP_NAN) {
652            feraiseexcept(FeInvalid);
653        }
654        val = 0.0;
655    } else if (origVal != val) {
656        switch (rmode) {
657          case FeRoundNearest:
658            if (origVal - val > 0.5)
659                val += 1.0;
660            else if (val - origVal > 0.5)
661                val -= 1.0;
662            break;
663          case FeRoundDown:
664            if (origVal < val)
665                val -= 1.0;
666            break;
667          case FeRoundUpward:
668            if (origVal > val)
669                val += 1.0;
670            break;
671        }
672        feraiseexcept(FeInexact);
673    }
674
675    if (isSigned) {
676        if (half) {
677            if ((double)val < (int16_t)(1 << 15)) {
678                feraiseexcept(FeInvalid);
679                feclearexcept(FeInexact);
680                return (int16_t)(1 << 15);
681            }
682            if ((double)val > (int16_t)mask(15)) {
683                feraiseexcept(FeInvalid);
684                feclearexcept(FeInexact);
685                return (int16_t)mask(15);
686            }
687            return (int16_t)val;
688        } else {
689            if ((double)val < (int32_t)(1 << 31)) {
690                feraiseexcept(FeInvalid);
691                feclearexcept(FeInexact);
692                return (int32_t)(1 << 31);
693            }
694            if ((double)val > (int32_t)mask(31)) {
695                feraiseexcept(FeInvalid);
696                feclearexcept(FeInexact);
697                return (int32_t)mask(31);
698            }
699            return (int32_t)val;
700        }
701    } else {
702        if (half) {
703            if ((double)val < 0) {
704                feraiseexcept(FeInvalid);
705                feclearexcept(FeInexact);
706                return 0;
707            }
708            if ((double)val > (mask(16))) {
709                feraiseexcept(FeInvalid);
710                feclearexcept(FeInexact);
711                return mask(16);
712            }
713            return (uint16_t)val;
714        } else {
715            if ((double)val < 0) {
716                feraiseexcept(FeInvalid);
717                feclearexcept(FeInexact);
718                return 0;
719            }
720            if ((double)val > (mask(32))) {
721                feraiseexcept(FeInvalid);
722                feclearexcept(FeInexact);
723                return mask(32);
724            }
725            return (uint32_t)val;
726        }
727    }
728}
729
730static inline float
731vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
732{
733    fesetround(FeRoundNearest);
734    if (half)
735        val = (uint16_t)val;
736    float scale = powf(2.0, imm);
737    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
738    feclearexcept(FeAllExceptions);
739    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
740    return fixDivDest(fpscr, val / scale, (float)val, scale);
741}
742
743static inline float
744vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
745{
746    fesetround(FeRoundNearest);
747    if (half)
748        val = sext<16>(val & mask(16));
749    float scale = powf(2.0, imm);
750    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
751    feclearexcept(FeAllExceptions);
752    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
753    return fixDivDest(fpscr, val / scale, (float)val, scale);
754}
755
756static inline uint64_t
757vfpFpDToFixed(double val, bool isSigned, bool half,
758              uint8_t imm, bool rzero = true)
759{
760    int rmode = rzero ? FeRoundZero : fegetround();
761    fesetround(FeRoundNearest);
762    val = val * pow(2.0, imm);
763    __asm__ __volatile__("" : "=m" (val) : "m" (val));
764    fesetround(rmode);
765    feclearexcept(FeAllExceptions);
766    __asm__ __volatile__("" : "=m" (val) : "m" (val));
767    double origVal = val;
768    val = rint(val);
769    int fpType = std::fpclassify(val);
770    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
771        if (fpType == FP_NAN) {
772            feraiseexcept(FeInvalid);
773        }
774        val = 0.0;
775    } else if (origVal != val) {
776        switch (rmode) {
777          case FeRoundNearest:
778            if (origVal - val > 0.5)
779                val += 1.0;
780            else if (val - origVal > 0.5)
781                val -= 1.0;
782            break;
783          case FeRoundDown:
784            if (origVal < val)
785                val -= 1.0;
786            break;
787          case FeRoundUpward:
788            if (origVal > val)
789                val += 1.0;
790            break;
791        }
792        feraiseexcept(FeInexact);
793    }
794    if (isSigned) {
795        if (half) {
796            if (val < (int16_t)(1 << 15)) {
797                feraiseexcept(FeInvalid);
798                feclearexcept(FeInexact);
799                return (int16_t)(1 << 15);
800            }
801            if (val > (int16_t)mask(15)) {
802                feraiseexcept(FeInvalid);
803                feclearexcept(FeInexact);
804                return (int16_t)mask(15);
805            }
806            return (int16_t)val;
807        } else {
808            if (val < (int32_t)(1 << 31)) {
809                feraiseexcept(FeInvalid);
810                feclearexcept(FeInexact);
811                return (int32_t)(1 << 31);
812            }
813            if (val > (int32_t)mask(31)) {
814                feraiseexcept(FeInvalid);
815                feclearexcept(FeInexact);
816                return (int32_t)mask(31);
817            }
818            return (int32_t)val;
819        }
820    } else {
821        if (half) {
822            if (val < 0) {
823                feraiseexcept(FeInvalid);
824                feclearexcept(FeInexact);
825                return 0;
826            }
827            if (val > mask(16)) {
828                feraiseexcept(FeInvalid);
829                feclearexcept(FeInexact);
830                return mask(16);
831            }
832            return (uint16_t)val;
833        } else {
834            if (val < 0) {
835                feraiseexcept(FeInvalid);
836                feclearexcept(FeInexact);
837                return 0;
838            }
839            if (val > mask(32)) {
840                feraiseexcept(FeInvalid);
841                feclearexcept(FeInexact);
842                return mask(32);
843            }
844            return (uint32_t)val;
845        }
846    }
847}
848
849static inline double
850vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
851{
852    fesetround(FeRoundNearest);
853    if (half)
854        val = (uint16_t)val;
855    double scale = pow(2.0, imm);
856    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
857    feclearexcept(FeAllExceptions);
858    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
859    return fixDivDest(fpscr, val / scale, (double)val, scale);
860}
861
862static inline double
863vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
864{
865    fesetround(FeRoundNearest);
866    if (half)
867        val = sext<16>(val & mask(16));
868    double scale = pow(2.0, imm);
869    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
870    feclearexcept(FeAllExceptions);
871    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
872    return fixDivDest(fpscr, val / scale, (double)val, scale);
873}
874
875class VfpMacroOp : public PredMacroOp
876{
877  public:
878    static bool
879    inScalarBank(IntRegIndex idx)
880    {
881        return (idx % 32) < 8;
882    }
883
884  protected:
885    bool wide;
886
887    VfpMacroOp(const char *mnem, ExtMachInst _machInst,
888            OpClass __opClass, bool _wide) :
889        PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
890    {}
891
892    IntRegIndex
893    addStride(IntRegIndex idx, unsigned stride)
894    {
895        if (wide) {
896            stride *= 2;
897        }
898        unsigned offset = idx % 8;
899        idx = (IntRegIndex)(idx - offset);
900        offset += stride;
901        idx = (IntRegIndex)(idx + (offset % 8));
902        return idx;
903    }
904
905    void
906    nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
907    {
908        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
909        assert(!inScalarBank(dest));
910        dest = addStride(dest, stride);
911        op1 = addStride(op1, stride);
912        if (!inScalarBank(op2)) {
913            op2 = addStride(op2, stride);
914        }
915    }
916
917    void
918    nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
919    {
920        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
921        assert(!inScalarBank(dest));
922        dest = addStride(dest, stride);
923        if (!inScalarBank(op1)) {
924            op1 = addStride(op1, stride);
925        }
926    }
927
928    void
929    nextIdxs(IntRegIndex &dest)
930    {
931        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
932        assert(!inScalarBank(dest));
933        dest = addStride(dest, stride);
934    }
935};
936
937static inline float
938fpAddS(float a, float b)
939{
940    return a + b;
941}
942
943static inline double
944fpAddD(double a, double b)
945{
946    return a + b;
947}
948
949static inline float
950fpSubS(float a, float b)
951{
952    return a - b;
953}
954
955static inline double
956fpSubD(double a, double b)
957{
958    return a - b;
959}
960
961static inline float
962fpDivS(float a, float b)
963{
964    return a / b;
965}
966
967static inline double
968fpDivD(double a, double b)
969{
970    return a / b;
971}
972
973static inline float
974fpMulS(float a, float b)
975{
976    return a * b;
977}
978
979static inline double
980fpMulD(double a, double b)
981{
982    return a * b;
983}
984
985class FpOp : public PredOp
986{
987  protected:
988    FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
989        PredOp(mnem, _machInst, __opClass)
990    {}
991
992    virtual float
993    doOp(float op1, float op2) const
994    {
995        panic("Unimplemented version of doOp called.\n");
996    }
997
998    virtual float
999    doOp(float op1) const
1000    {
1001        panic("Unimplemented version of doOp called.\n");
1002    }
1003
1004    virtual double
1005    doOp(double op1, double op2) const
1006    {
1007        panic("Unimplemented version of doOp called.\n");
1008    }
1009
1010    virtual double
1011    doOp(double op1) const
1012    {
1013        panic("Unimplemented version of doOp called.\n");
1014    }
1015
1016    double
1017    dbl(uint32_t low, uint32_t high) const
1018    {
1019        double junk = 0.0;
1020        return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
1021    }
1022
1023    uint32_t
1024    dblLow(double val) const
1025    {
1026        return fpToBits(val);
1027    }
1028
1029    uint32_t
1030    dblHi(double val) const
1031    {
1032        return fpToBits(val) >> 32;
1033    }
1034
1035    template <class fpType>
1036    fpType
1037    binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
1038            fpType (*func)(fpType, fpType),
1039            bool flush, uint32_t rMode) const
1040    {
1041        const bool single = (sizeof(fpType) == sizeof(float));
1042        fpType junk = 0.0;
1043
1044        if (flush && flushToZero(op1, op2))
1045            fpscr.idc = 1;
1046        VfpSavedState state = prepFpState(rMode);
1047        __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
1048                                 : "m" (op1), "m" (op2), "m" (state));
1049        fpType dest = func(op1, op2);
1050        __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
1051
1052        int fpClass = std::fpclassify(dest);
1053        // Get NAN behavior right. This varies between x86 and ARM.
1054        if (fpClass == FP_NAN) {
1055            const bool single = (sizeof(fpType) == sizeof(float));
1056            const uint64_t qnan =
1057                single ? 0x7fc00000 : ULL(0x7ff8000000000000);
1058            const bool nan1 = std::isnan(op1);
1059            const bool nan2 = std::isnan(op2);
1060            const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
1061            const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
1062            if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
1063                dest = bitsToFp(qnan, junk);
1064            } else if (signal1) {
1065                dest = bitsToFp(fpToBits(op1) | qnan, junk);
1066            } else if (signal2) {
1067                dest = bitsToFp(fpToBits(op2) | qnan, junk);
1068            } else if (nan1) {
1069                dest = op1;
1070            } else if (nan2) {
1071                dest = op2;
1072            }
1073        } else if (flush && flushToZero(dest)) {
1074            feraiseexcept(FeUnderflow);
1075        } else if ((
1076                    (single && (dest == bitsToFp(0x00800000, junk) ||
1077                         dest == bitsToFp(0x80800000, junk))) ||
1078                    (!single &&
1079                        (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
1080                         dest == bitsToFp(ULL(0x8010000000000000), junk)))
1081                   ) && rMode != VfpRoundZero) {
1082            /*
1083             * Correct for the fact that underflow is detected -before- rounding
1084             * in ARM and -after- rounding in x86.
1085             */
1086            fesetround(FeRoundZero);
1087            __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
1088                                     : "m" (op1), "m" (op2));
1089            fpType temp = func(op1, op2);
1090            __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
1091            if (flush && flushToZero(temp)) {
1092                dest = temp;
1093            }
1094        }
1095        finishVfp(fpscr, state);
1096        return dest;
1097    }
1098
1099    template <class fpType>
1100    fpType
1101    unaryOp(FPSCR &fpscr, fpType op1,
1102            fpType (*func)(fpType),
1103            bool flush, uint32_t rMode) const
1104    {
1105        const bool single = (sizeof(fpType) == sizeof(float));
1106        fpType junk = 0.0;
1107
1108        if (flush && flushToZero(op1))
1109            fpscr.idc = 1;
1110        VfpSavedState state = prepFpState(rMode);
1111        __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
1112                                 : "m" (op1), "m" (state));
1113        fpType dest = func(op1);
1114        __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
1115
1116        int fpClass = std::fpclassify(dest);
1117        // Get NAN behavior right. This varies between x86 and ARM.
1118        if (fpClass == FP_NAN) {
1119            const bool single = (sizeof(fpType) == sizeof(float));
1120            const uint64_t qnan =
1121                single ? 0x7fc00000 : ULL(0x7ff8000000000000);
1122            const bool nan = std::isnan(op1);
1123            if (!nan || fpscr.dn == 1) {
1124                dest = bitsToFp(qnan, junk);
1125            } else if (nan) {
1126                dest = bitsToFp(fpToBits(op1) | qnan, junk);
1127            }
1128        } else if (flush && flushToZero(dest)) {
1129            feraiseexcept(FeUnderflow);
1130        } else if ((
1131                    (single && (dest == bitsToFp(0x00800000, junk) ||
1132                         dest == bitsToFp(0x80800000, junk))) ||
1133                    (!single &&
1134                        (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
1135                         dest == bitsToFp(ULL(0x8010000000000000), junk)))
1136                   ) && rMode != VfpRoundZero) {
1137            /*
1138             * Correct for the fact that underflow is detected -before- rounding
1139             * in ARM and -after- rounding in x86.
1140             */
1141            fesetround(FeRoundZero);
1142            __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
1143            fpType temp = func(op1);
1144            __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
1145            if (flush && flushToZero(temp)) {
1146                dest = temp;
1147            }
1148        }
1149        finishVfp(fpscr, state);
1150        return dest;
1151    }
1152};
1153
1154class FpRegRegOp : public FpOp
1155{
1156  protected:
1157    IntRegIndex dest;
1158    IntRegIndex op1;
1159
1160    FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1161               IntRegIndex _dest, IntRegIndex _op1,
1162               VfpMicroMode mode = VfpNotAMicroop) :
1163        FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1)
1164    {
1165        setVfpMicroFlags(mode, flags);
1166    }
1167
1168    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1169};
1170
1171class FpRegImmOp : public FpOp
1172{
1173  protected:
1174    IntRegIndex dest;
1175    uint64_t imm;
1176
1177    FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1178               IntRegIndex _dest, uint64_t _imm,
1179               VfpMicroMode mode = VfpNotAMicroop) :
1180        FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm)
1181    {
1182        setVfpMicroFlags(mode, flags);
1183    }
1184
1185    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1186};
1187
1188class FpRegRegImmOp : public FpOp
1189{
1190  protected:
1191    IntRegIndex dest;
1192    IntRegIndex op1;
1193    uint64_t imm;
1194
1195    FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1196                  IntRegIndex _dest, IntRegIndex _op1,
1197                  uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
1198        FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm)
1199    {
1200        setVfpMicroFlags(mode, flags);
1201    }
1202
1203    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1204};
1205
1206class FpRegRegRegOp : public FpOp
1207{
1208  protected:
1209    IntRegIndex dest;
1210    IntRegIndex op1;
1211    IntRegIndex op2;
1212
1213    FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1214                  IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
1215                  VfpMicroMode mode = VfpNotAMicroop) :
1216        FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2)
1217    {
1218        setVfpMicroFlags(mode, flags);
1219    }
1220
1221    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1222};
1223
1224}
1225
1226#endif //__ARCH_ARM_INSTS_VFP_HH__
1227