vfp.hh revision 7386:23065556d48e
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include "arch/arm/insts/misc.hh"
44#include "arch/arm/miscregs.hh"
45#include <fenv.h>
46#include <cmath>
47
48namespace ArmISA
49{
50
51enum VfpMicroMode {
52    VfpNotAMicroop,
53    VfpMicroop,
54    VfpFirstMicroop,
55    VfpLastMicroop
56};
57
58template<class T>
59static inline void
60setVfpMicroFlags(VfpMicroMode mode, T &flags)
61{
62    switch (mode) {
63      case VfpMicroop:
64        flags[StaticInst::IsMicroop] = true;
65        break;
66      case VfpFirstMicroop:
67        flags[StaticInst::IsMicroop] =
68            flags[StaticInst::IsFirstMicroop] = true;
69        break;
70      case VfpLastMicroop:
71        flags[StaticInst::IsMicroop] =
72            flags[StaticInst::IsLastMicroop] = true;
73        break;
74      case VfpNotAMicroop:
75        break;
76    }
77    if (mode == VfpMicroop || mode == VfpFirstMicroop) {
78        flags[StaticInst::IsDelayedCommit] = true;
79    }
80}
81
82enum FeExceptionBit
83{
84    FeDivByZero = FE_DIVBYZERO,
85    FeInexact = FE_INEXACT,
86    FeInvalid = FE_INVALID,
87    FeOverflow = FE_OVERFLOW,
88    FeUnderflow = FE_UNDERFLOW,
89    FeAllExceptions = FE_ALL_EXCEPT
90};
91
92enum FeRoundingMode
93{
94    FeRoundDown = FE_DOWNWARD,
95    FeRoundNearest = FE_TONEAREST,
96    FeRoundZero = FE_TOWARDZERO,
97    FeRoundUpward = FE_UPWARD
98};
99
100enum VfpRoundingMode
101{
102    VfpRoundNearest = 0,
103    VfpRoundUpward = 1,
104    VfpRoundDown = 2,
105    VfpRoundZero = 3
106};
107
108template <class fpType>
109static inline void
110vfpFlushToZero(uint32_t &_fpscr, fpType &op)
111{
112    FPSCR fpscr = _fpscr;
113    if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
114        fpscr.idc = 1;
115        op = 0;
116    }
117    _fpscr = fpscr;
118}
119
120template <class fpType>
121static inline void
122vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
123{
124    vfpFlushToZero(fpscr, op1);
125    vfpFlushToZero(fpscr, op2);
126}
127
128static inline uint32_t
129fpToBits(float fp)
130{
131    union
132    {
133        float fp;
134        uint32_t bits;
135    } val;
136    val.fp = fp;
137    return val.bits;
138}
139
140static inline uint64_t
141fpToBits(double fp)
142{
143    union
144    {
145        double fp;
146        uint64_t bits;
147    } val;
148    val.fp = fp;
149    return val.bits;
150}
151
152static inline float
153bitsToFp(uint64_t bits, float junk)
154{
155    union
156    {
157        float fp;
158        uint32_t bits;
159    } val;
160    val.bits = bits;
161    return val.fp;
162}
163
164static inline double
165bitsToFp(uint64_t bits, double junk)
166{
167    union
168    {
169        double fp;
170        uint64_t bits;
171    } val;
172    val.bits = bits;
173    return val.fp;
174}
175
176template <class fpType>
177static inline fpType
178fixDest(FPSCR fpscr, fpType val, fpType op1)
179{
180    int fpClass = std::fpclassify(val);
181    fpType junk = 0.0;
182    if (fpClass == FP_NAN) {
183        const bool single = (sizeof(val) == sizeof(float));
184        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
185        const bool nan = std::isnan(op1);
186        if (!nan || (fpscr.dn == 1)) {
187            val = bitsToFp(qnan, junk);
188        } else if (nan) {
189            val = bitsToFp(fpToBits(op1) | qnan, junk);
190        }
191    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
192        // Turn val into a zero with the correct sign;
193        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
194        val = bitsToFp(fpToBits(val) & bitMask, junk);
195        feraiseexcept(FeUnderflow);
196    }
197    return val;
198}
199
200template <class fpType>
201static inline fpType
202fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
203{
204    int fpClass = std::fpclassify(val);
205    fpType junk = 0.0;
206    if (fpClass == FP_NAN) {
207        const bool single = (sizeof(val) == sizeof(float));
208        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
209        const bool nan1 = std::isnan(op1);
210        const bool nan2 = std::isnan(op2);
211        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
212        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
213        if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
214            val = bitsToFp(qnan, junk);
215        } else if (signal1) {
216            val = bitsToFp(fpToBits(op1) | qnan, junk);
217        } else if (signal2) {
218            val = bitsToFp(fpToBits(op2) | qnan, junk);
219        } else if (nan1) {
220            val = op1;
221        } else if (nan2) {
222            val = op2;
223        }
224    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
225        // Turn val into a zero with the correct sign;
226        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
227        val = bitsToFp(fpToBits(val) & bitMask, junk);
228        feraiseexcept(FeUnderflow);
229    }
230    return val;
231}
232
233template <class fpType>
234static inline fpType
235fixMultDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
236{
237    fpType mid = fixDest(fpscr, val, op1, op2);
238    const bool single = (sizeof(fpType) == sizeof(float));
239    const fpType junk = 0.0;
240    if ((single && (val == bitsToFp(0x00800000, junk) ||
241                    val == bitsToFp(0x80800000, junk))) ||
242        (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
243                     val == bitsToFp(ULL(0x8010000000000000), junk)))
244        ) {
245        __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
246        fesetround(FeRoundZero);
247        fpType temp = 0.0;
248        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
249        temp = op1 * op2;
250        if (!std::isnormal(temp)) {
251            feraiseexcept(FeUnderflow);
252        }
253        __asm__ __volatile__("" :: "m" (temp));
254    }
255    return mid;
256}
257
258template <class fpType>
259static inline fpType
260fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
261{
262    fpType mid = fixDest(fpscr, val, op1, op2);
263    const bool single = (sizeof(fpType) == sizeof(float));
264    const fpType junk = 0.0;
265    if ((single && (val == bitsToFp(0x00800000, junk) ||
266                    val == bitsToFp(0x80800000, junk))) ||
267        (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
268                     val == bitsToFp(ULL(0x8010000000000000), junk)))
269        ) {
270        __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
271        fesetround(FeRoundZero);
272        fpType temp = 0.0;
273        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
274        temp = op1 / op2;
275        if (!std::isnormal(temp)) {
276            feraiseexcept(FeUnderflow);
277        }
278        __asm__ __volatile__("" :: "m" (temp));
279    }
280    return mid;
281}
282
283static inline float
284fixFpDFpSDest(FPSCR fpscr, double val)
285{
286    const float junk = 0.0;
287    float op1 = 0.0;
288    if (std::isnan(val)) {
289        uint64_t valBits = fpToBits(val);
290        uint32_t op1Bits = bits(valBits, 50, 29) |
291                           (mask(9) << 22) |
292                           (bits(valBits, 63) << 31);
293        op1 = bitsToFp(op1Bits, junk);
294    }
295    float mid = fixDest(fpscr, (float)val, op1);
296    if (mid == bitsToFp(0x00800000, junk) ||
297        mid == bitsToFp(0x80800000, junk)) {
298        __asm__ __volatile__("" : "=m" (val) : "m" (val));
299        fesetround(FeRoundZero);
300        float temp = 0.0;
301        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
302        temp = val;
303        if (!std::isnormal(temp)) {
304            feraiseexcept(FeUnderflow);
305        }
306        __asm__ __volatile__("" :: "m" (temp));
307    }
308    return mid;
309}
310
311static inline uint64_t
312vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
313{
314    fesetround(FeRoundZero);
315    val = val * powf(2.0, imm);
316    __asm__ __volatile__("" : "=m" (val) : "m" (val));
317    feclearexcept(FeAllExceptions);
318    __asm__ __volatile__("" : "=m" (val) : "m" (val));
319    float origVal = val;
320    val = rintf(val);
321    int fpType = std::fpclassify(val);
322    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
323        if (fpType == FP_NAN) {
324            feraiseexcept(FeInvalid);
325        }
326        val = 0.0;
327    } else if (origVal != val) {
328        feraiseexcept(FeInexact);
329    }
330
331    if (isSigned) {
332        if (half) {
333            if ((double)val < (int16_t)(1 << 15)) {
334                feraiseexcept(FeInvalid);
335                feclearexcept(FeInexact);
336                return (int16_t)(1 << 15);
337            }
338            if ((double)val > (int16_t)mask(15)) {
339                feraiseexcept(FeInvalid);
340                feclearexcept(FeInexact);
341                return (int16_t)mask(15);
342            }
343            return (int16_t)val;
344        } else {
345            if ((double)val < (int32_t)(1 << 31)) {
346                feraiseexcept(FeInvalid);
347                feclearexcept(FeInexact);
348                return (int32_t)(1 << 31);
349            }
350            if ((double)val > (int32_t)mask(31)) {
351                feraiseexcept(FeInvalid);
352                feclearexcept(FeInexact);
353                return (int32_t)mask(31);
354            }
355            return (int32_t)val;
356        }
357    } else {
358        if (half) {
359            if ((double)val < 0) {
360                feraiseexcept(FeInvalid);
361                feclearexcept(FeInexact);
362                return 0;
363            }
364            if ((double)val > (mask(16))) {
365                feraiseexcept(FeInvalid);
366                feclearexcept(FeInexact);
367                return mask(16);
368            }
369            return (uint16_t)val;
370        } else {
371            if ((double)val < 0) {
372                feraiseexcept(FeInvalid);
373                feclearexcept(FeInexact);
374                return 0;
375            }
376            if ((double)val > (mask(32))) {
377                feraiseexcept(FeInvalid);
378                feclearexcept(FeInexact);
379                return mask(32);
380            }
381            return (uint32_t)val;
382        }
383    }
384}
385
386static inline float
387vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
388{
389    fesetround(FeRoundNearest);
390    if (half)
391        val = (uint16_t)val;
392    float scale = powf(2.0, imm);
393    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
394    feclearexcept(FeAllExceptions);
395    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
396    return fixDivDest(fpscr, val / scale, (float)val, scale);
397}
398
399static inline float
400vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
401{
402    fesetround(FeRoundNearest);
403    if (half)
404        val = sext<16>(val & mask(16));
405    float scale = powf(2.0, imm);
406    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
407    feclearexcept(FeAllExceptions);
408    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
409    return fixDivDest(fpscr, val / scale, (float)val, scale);
410}
411
412static inline uint64_t
413vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm)
414{
415    fesetround(FeRoundNearest);
416    val = val * pow(2.0, imm);
417    __asm__ __volatile__("" : "=m" (val) : "m" (val));
418    fesetround(FeRoundZero);
419    feclearexcept(FeAllExceptions);
420    __asm__ __volatile__("" : "=m" (val) : "m" (val));
421    double origVal = val;
422    val = rint(val);
423    int fpType = std::fpclassify(val);
424    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
425        if (fpType == FP_NAN) {
426            feraiseexcept(FeInvalid);
427        }
428        val = 0.0;
429    } else if (origVal != val) {
430        feraiseexcept(FeInexact);
431    }
432    if (isSigned) {
433        if (half) {
434            if (val < (int16_t)(1 << 15)) {
435                feraiseexcept(FeInvalid);
436                feclearexcept(FeInexact);
437                return (int16_t)(1 << 15);
438            }
439            if (val > (int16_t)mask(15)) {
440                feraiseexcept(FeInvalid);
441                feclearexcept(FeInexact);
442                return (int16_t)mask(15);
443            }
444            return (int16_t)val;
445        } else {
446            if (val < (int32_t)(1 << 31)) {
447                feraiseexcept(FeInvalid);
448                feclearexcept(FeInexact);
449                return (int32_t)(1 << 31);
450            }
451            if (val > (int32_t)mask(31)) {
452                feraiseexcept(FeInvalid);
453                feclearexcept(FeInexact);
454                return (int32_t)mask(31);
455            }
456            return (int32_t)val;
457        }
458    } else {
459        if (half) {
460            if (val < 0) {
461                feraiseexcept(FeInvalid);
462                feclearexcept(FeInexact);
463                return 0;
464            }
465            if (val > mask(16)) {
466                feraiseexcept(FeInvalid);
467                feclearexcept(FeInexact);
468                return mask(16);
469            }
470            return (uint16_t)val;
471        } else {
472            if (val < 0) {
473                feraiseexcept(FeInvalid);
474                feclearexcept(FeInexact);
475                return 0;
476            }
477            if (val > mask(32)) {
478                feraiseexcept(FeInvalid);
479                feclearexcept(FeInexact);
480                return mask(32);
481            }
482            return (uint32_t)val;
483        }
484    }
485}
486
487static inline double
488vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
489{
490    fesetround(FeRoundNearest);
491    if (half)
492        val = (uint16_t)val;
493    double scale = pow(2.0, imm);
494    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
495    feclearexcept(FeAllExceptions);
496    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
497    return fixDivDest(fpscr, val / scale, (double)val, scale);
498}
499
500static inline double
501vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
502{
503    fesetround(FeRoundNearest);
504    if (half)
505        val = sext<16>(val & mask(16));
506    double scale = pow(2.0, imm);
507    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
508    feclearexcept(FeAllExceptions);
509    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
510    return fixDivDest(fpscr, val / scale, (double)val, scale);
511}
512
513typedef int VfpSavedState;
514
515static inline VfpSavedState
516prepVfpFpscr(FPSCR fpscr)
517{
518    int roundingMode = fegetround();
519    feclearexcept(FeAllExceptions);
520    switch (fpscr.rMode) {
521      case VfpRoundNearest:
522        fesetround(FeRoundNearest);
523        break;
524      case VfpRoundUpward:
525        fesetround(FeRoundUpward);
526        break;
527      case VfpRoundDown:
528        fesetround(FeRoundDown);
529        break;
530      case VfpRoundZero:
531        fesetround(FeRoundZero);
532        break;
533    }
534    return roundingMode;
535}
536
537static inline FPSCR
538setVfpFpscr(FPSCR fpscr, VfpSavedState state)
539{
540    int exceptions = fetestexcept(FeAllExceptions);
541    if (exceptions & FeInvalid) {
542        fpscr.ioc = 1;
543    }
544    if (exceptions & FeDivByZero) {
545        fpscr.dzc = 1;
546    }
547    if (exceptions & FeOverflow) {
548        fpscr.ofc = 1;
549    }
550    if (exceptions & FeUnderflow) {
551        fpscr.ufc = 1;
552    }
553    if (exceptions & FeInexact) {
554        fpscr.ixc = 1;
555    }
556    fesetround(state);
557    return fpscr;
558}
559
560class VfpMacroOp : public PredMacroOp
561{
562  public:
563    static bool
564    inScalarBank(IntRegIndex idx)
565    {
566        return (idx % 32) < 8;
567    }
568
569  protected:
570    bool wide;
571
572    VfpMacroOp(const char *mnem, ExtMachInst _machInst,
573            OpClass __opClass, bool _wide) :
574        PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
575    {}
576
577    IntRegIndex
578    addStride(IntRegIndex idx, unsigned stride)
579    {
580        if (wide) {
581            stride *= 2;
582        }
583        unsigned offset = idx % 8;
584        idx = (IntRegIndex)(idx - offset);
585        offset += stride;
586        idx = (IntRegIndex)(idx + (offset % 8));
587        return idx;
588    }
589
590    void
591    nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
592    {
593        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
594        assert(!inScalarBank(dest));
595        dest = addStride(dest, stride);
596        op1 = addStride(op1, stride);
597        if (!inScalarBank(op2)) {
598            op2 = addStride(op2, stride);
599        }
600    }
601
602    void
603    nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
604    {
605        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
606        assert(!inScalarBank(dest));
607        dest = addStride(dest, stride);
608        if (!inScalarBank(op1)) {
609            op1 = addStride(op1, stride);
610        }
611    }
612
613    void
614    nextIdxs(IntRegIndex &dest)
615    {
616        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
617        assert(!inScalarBank(dest));
618        dest = addStride(dest, stride);
619    }
620};
621
622class VfpRegRegOp : public RegRegOp
623{
624  protected:
625    VfpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
626                IntRegIndex _dest, IntRegIndex _op1,
627                VfpMicroMode mode = VfpNotAMicroop) :
628        RegRegOp(mnem, _machInst, __opClass, _dest, _op1)
629    {
630        setVfpMicroFlags(mode, flags);
631    }
632};
633
634class VfpRegImmOp : public RegImmOp
635{
636  protected:
637    VfpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
638                IntRegIndex _dest, uint64_t _imm,
639                VfpMicroMode mode = VfpNotAMicroop) :
640        RegImmOp(mnem, _machInst, __opClass, _dest, _imm)
641    {
642        setVfpMicroFlags(mode, flags);
643    }
644};
645
646class VfpRegRegImmOp : public RegRegImmOp
647{
648  protected:
649    VfpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
650                   IntRegIndex _dest, IntRegIndex _op1,
651                   uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
652        RegRegImmOp(mnem, _machInst, __opClass, _dest, _op1, _imm)
653    {
654        setVfpMicroFlags(mode, flags);
655    }
656};
657
658class VfpRegRegRegOp : public RegRegRegOp
659{
660  protected:
661    VfpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
662                   IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
663                   VfpMicroMode mode = VfpNotAMicroop) :
664        RegRegRegOp(mnem, _machInst, __opClass, _dest, _op1, _op2)
665    {
666        setVfpMicroFlags(mode, flags);
667    }
668};
669
670}
671
672#endif //__ARCH_ARM_INSTS_VFP_HH__
673