vfp.hh revision 7388:293878a9d220
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include "arch/arm/insts/misc.hh"
44#include "arch/arm/miscregs.hh"
45#include <fenv.h>
46#include <cmath>
47
48namespace ArmISA
49{
50
51enum VfpMicroMode {
52    VfpNotAMicroop,
53    VfpMicroop,
54    VfpFirstMicroop,
55    VfpLastMicroop
56};
57
58template<class T>
59static inline void
60setVfpMicroFlags(VfpMicroMode mode, T &flags)
61{
62    switch (mode) {
63      case VfpMicroop:
64        flags[StaticInst::IsMicroop] = true;
65        break;
66      case VfpFirstMicroop:
67        flags[StaticInst::IsMicroop] =
68            flags[StaticInst::IsFirstMicroop] = true;
69        break;
70      case VfpLastMicroop:
71        flags[StaticInst::IsMicroop] =
72            flags[StaticInst::IsLastMicroop] = true;
73        break;
74      case VfpNotAMicroop:
75        break;
76    }
77    if (mode == VfpMicroop || mode == VfpFirstMicroop) {
78        flags[StaticInst::IsDelayedCommit] = true;
79    }
80}
81
82enum FeExceptionBit
83{
84    FeDivByZero = FE_DIVBYZERO,
85    FeInexact = FE_INEXACT,
86    FeInvalid = FE_INVALID,
87    FeOverflow = FE_OVERFLOW,
88    FeUnderflow = FE_UNDERFLOW,
89    FeAllExceptions = FE_ALL_EXCEPT
90};
91
92enum FeRoundingMode
93{
94    FeRoundDown = FE_DOWNWARD,
95    FeRoundNearest = FE_TONEAREST,
96    FeRoundZero = FE_TOWARDZERO,
97    FeRoundUpward = FE_UPWARD
98};
99
100enum VfpRoundingMode
101{
102    VfpRoundNearest = 0,
103    VfpRoundUpward = 1,
104    VfpRoundDown = 2,
105    VfpRoundZero = 3
106};
107
108template <class fpType>
109static inline void
110vfpFlushToZero(uint32_t &_fpscr, fpType &op)
111{
112    FPSCR fpscr = _fpscr;
113    if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
114        fpscr.idc = 1;
115        op = 0;
116    }
117    _fpscr = fpscr;
118}
119
120template <class fpType>
121static inline void
122vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
123{
124    vfpFlushToZero(fpscr, op1);
125    vfpFlushToZero(fpscr, op2);
126}
127
128static inline uint32_t
129fpToBits(float fp)
130{
131    union
132    {
133        float fp;
134        uint32_t bits;
135    } val;
136    val.fp = fp;
137    return val.bits;
138}
139
140static inline uint64_t
141fpToBits(double fp)
142{
143    union
144    {
145        double fp;
146        uint64_t bits;
147    } val;
148    val.fp = fp;
149    return val.bits;
150}
151
152static inline float
153bitsToFp(uint64_t bits, float junk)
154{
155    union
156    {
157        float fp;
158        uint32_t bits;
159    } val;
160    val.bits = bits;
161    return val.fp;
162}
163
164static inline double
165bitsToFp(uint64_t bits, double junk)
166{
167    union
168    {
169        double fp;
170        uint64_t bits;
171    } val;
172    val.bits = bits;
173    return val.fp;
174}
175
176template <class fpType>
177static inline fpType
178fixDest(FPSCR fpscr, fpType val, fpType op1)
179{
180    int fpClass = std::fpclassify(val);
181    fpType junk = 0.0;
182    if (fpClass == FP_NAN) {
183        const bool single = (sizeof(val) == sizeof(float));
184        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
185        const bool nan = std::isnan(op1);
186        if (!nan || (fpscr.dn == 1)) {
187            val = bitsToFp(qnan, junk);
188        } else if (nan) {
189            val = bitsToFp(fpToBits(op1) | qnan, junk);
190        }
191    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
192        // Turn val into a zero with the correct sign;
193        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
194        val = bitsToFp(fpToBits(val) & bitMask, junk);
195        feraiseexcept(FeUnderflow);
196    }
197    return val;
198}
199
200template <class fpType>
201static inline fpType
202fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
203{
204    int fpClass = std::fpclassify(val);
205    fpType junk = 0.0;
206    if (fpClass == FP_NAN) {
207        const bool single = (sizeof(val) == sizeof(float));
208        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
209        const bool nan1 = std::isnan(op1);
210        const bool nan2 = std::isnan(op2);
211        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
212        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
213        if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
214            val = bitsToFp(qnan, junk);
215        } else if (signal1) {
216            val = bitsToFp(fpToBits(op1) | qnan, junk);
217        } else if (signal2) {
218            val = bitsToFp(fpToBits(op2) | qnan, junk);
219        } else if (nan1) {
220            val = op1;
221        } else if (nan2) {
222            val = op2;
223        }
224    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
225        // Turn val into a zero with the correct sign;
226        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
227        val = bitsToFp(fpToBits(val) & bitMask, junk);
228        feraiseexcept(FeUnderflow);
229    }
230    return val;
231}
232
233template <class fpType>
234static inline fpType
235fixMultDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
236{
237    fpType mid = fixDest(fpscr, val, op1, op2);
238    const bool single = (sizeof(fpType) == sizeof(float));
239    const fpType junk = 0.0;
240    if ((single && (val == bitsToFp(0x00800000, junk) ||
241                    val == bitsToFp(0x80800000, junk))) ||
242        (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
243                     val == bitsToFp(ULL(0x8010000000000000), junk)))
244        ) {
245        __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
246        fesetround(FeRoundZero);
247        fpType temp = 0.0;
248        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
249        temp = op1 * op2;
250        if (!std::isnormal(temp)) {
251            feraiseexcept(FeUnderflow);
252        }
253        __asm__ __volatile__("" :: "m" (temp));
254    }
255    return mid;
256}
257
258template <class fpType>
259static inline fpType
260fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
261{
262    fpType mid = fixDest(fpscr, val, op1, op2);
263    const bool single = (sizeof(fpType) == sizeof(float));
264    const fpType junk = 0.0;
265    if ((single && (val == bitsToFp(0x00800000, junk) ||
266                    val == bitsToFp(0x80800000, junk))) ||
267        (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
268                     val == bitsToFp(ULL(0x8010000000000000), junk)))
269        ) {
270        __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
271        fesetround(FeRoundZero);
272        fpType temp = 0.0;
273        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
274        temp = op1 / op2;
275        if (!std::isnormal(temp)) {
276            feraiseexcept(FeUnderflow);
277        }
278        __asm__ __volatile__("" :: "m" (temp));
279    }
280    return mid;
281}
282
283static inline float
284fixFpDFpSDest(FPSCR fpscr, double val)
285{
286    const float junk = 0.0;
287    float op1 = 0.0;
288    if (std::isnan(val)) {
289        uint64_t valBits = fpToBits(val);
290        uint32_t op1Bits = bits(valBits, 50, 29) |
291                           (mask(9) << 22) |
292                           (bits(valBits, 63) << 31);
293        op1 = bitsToFp(op1Bits, junk);
294    }
295    float mid = fixDest(fpscr, (float)val, op1);
296    if (mid == bitsToFp(0x00800000, junk) ||
297        mid == bitsToFp(0x80800000, junk)) {
298        __asm__ __volatile__("" : "=m" (val) : "m" (val));
299        fesetround(FeRoundZero);
300        float temp = 0.0;
301        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
302        temp = val;
303        if (!std::isnormal(temp)) {
304            feraiseexcept(FeUnderflow);
305        }
306        __asm__ __volatile__("" :: "m" (temp));
307    }
308    return mid;
309}
310
311static inline uint64_t
312vfpFpSToFixed(float val, bool isSigned, bool half,
313              uint8_t imm, bool rzero = true)
314{
315    int rmode = fegetround();
316    fesetround(FeRoundNearest);
317    val = val * powf(2.0, imm);
318    __asm__ __volatile__("" : "=m" (val) : "m" (val));
319    if (rzero)
320        fesetround(FeRoundZero);
321    else
322        fesetround(rmode);
323    feclearexcept(FeAllExceptions);
324    __asm__ __volatile__("" : "=m" (val) : "m" (val));
325    float origVal = val;
326    val = rintf(val);
327    int fpType = std::fpclassify(val);
328    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
329        if (fpType == FP_NAN) {
330            feraiseexcept(FeInvalid);
331        }
332        val = 0.0;
333    } else if (origVal != val) {
334        feraiseexcept(FeInexact);
335    }
336
337    if (isSigned) {
338        if (half) {
339            if ((double)val < (int16_t)(1 << 15)) {
340                feraiseexcept(FeInvalid);
341                feclearexcept(FeInexact);
342                return (int16_t)(1 << 15);
343            }
344            if ((double)val > (int16_t)mask(15)) {
345                feraiseexcept(FeInvalid);
346                feclearexcept(FeInexact);
347                return (int16_t)mask(15);
348            }
349            return (int16_t)val;
350        } else {
351            if ((double)val < (int32_t)(1 << 31)) {
352                feraiseexcept(FeInvalid);
353                feclearexcept(FeInexact);
354                return (int32_t)(1 << 31);
355            }
356            if ((double)val > (int32_t)mask(31)) {
357                feraiseexcept(FeInvalid);
358                feclearexcept(FeInexact);
359                return (int32_t)mask(31);
360            }
361            return (int32_t)val;
362        }
363    } else {
364        if (half) {
365            if ((double)val < 0) {
366                feraiseexcept(FeInvalid);
367                feclearexcept(FeInexact);
368                return 0;
369            }
370            if ((double)val > (mask(16))) {
371                feraiseexcept(FeInvalid);
372                feclearexcept(FeInexact);
373                return mask(16);
374            }
375            return (uint16_t)val;
376        } else {
377            if ((double)val < 0) {
378                feraiseexcept(FeInvalid);
379                feclearexcept(FeInexact);
380                return 0;
381            }
382            if ((double)val > (mask(32))) {
383                feraiseexcept(FeInvalid);
384                feclearexcept(FeInexact);
385                return mask(32);
386            }
387            return (uint32_t)val;
388        }
389    }
390}
391
392static inline float
393vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
394{
395    fesetround(FeRoundNearest);
396    if (half)
397        val = (uint16_t)val;
398    float scale = powf(2.0, imm);
399    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
400    feclearexcept(FeAllExceptions);
401    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
402    return fixDivDest(fpscr, val / scale, (float)val, scale);
403}
404
405static inline float
406vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
407{
408    fesetround(FeRoundNearest);
409    if (half)
410        val = sext<16>(val & mask(16));
411    float scale = powf(2.0, imm);
412    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
413    feclearexcept(FeAllExceptions);
414    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
415    return fixDivDest(fpscr, val / scale, (float)val, scale);
416}
417
418static inline uint64_t
419vfpFpDToFixed(double val, bool isSigned, bool half,
420              uint8_t imm, bool rzero = true)
421{
422    int rmode = fegetround();
423    fesetround(FeRoundNearest);
424    val = val * pow(2.0, imm);
425    __asm__ __volatile__("" : "=m" (val) : "m" (val));
426    if (rzero)
427        fesetround(FeRoundZero);
428    else
429        fesetround(rmode);
430    feclearexcept(FeAllExceptions);
431    __asm__ __volatile__("" : "=m" (val) : "m" (val));
432    double origVal = val;
433    val = rint(val);
434    int fpType = std::fpclassify(val);
435    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
436        if (fpType == FP_NAN) {
437            feraiseexcept(FeInvalid);
438        }
439        val = 0.0;
440    } else if (origVal != val) {
441        feraiseexcept(FeInexact);
442    }
443    if (isSigned) {
444        if (half) {
445            if (val < (int16_t)(1 << 15)) {
446                feraiseexcept(FeInvalid);
447                feclearexcept(FeInexact);
448                return (int16_t)(1 << 15);
449            }
450            if (val > (int16_t)mask(15)) {
451                feraiseexcept(FeInvalid);
452                feclearexcept(FeInexact);
453                return (int16_t)mask(15);
454            }
455            return (int16_t)val;
456        } else {
457            if (val < (int32_t)(1 << 31)) {
458                feraiseexcept(FeInvalid);
459                feclearexcept(FeInexact);
460                return (int32_t)(1 << 31);
461            }
462            if (val > (int32_t)mask(31)) {
463                feraiseexcept(FeInvalid);
464                feclearexcept(FeInexact);
465                return (int32_t)mask(31);
466            }
467            return (int32_t)val;
468        }
469    } else {
470        if (half) {
471            if (val < 0) {
472                feraiseexcept(FeInvalid);
473                feclearexcept(FeInexact);
474                return 0;
475            }
476            if (val > mask(16)) {
477                feraiseexcept(FeInvalid);
478                feclearexcept(FeInexact);
479                return mask(16);
480            }
481            return (uint16_t)val;
482        } else {
483            if (val < 0) {
484                feraiseexcept(FeInvalid);
485                feclearexcept(FeInexact);
486                return 0;
487            }
488            if (val > mask(32)) {
489                feraiseexcept(FeInvalid);
490                feclearexcept(FeInexact);
491                return mask(32);
492            }
493            return (uint32_t)val;
494        }
495    }
496}
497
498static inline double
499vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
500{
501    fesetround(FeRoundNearest);
502    if (half)
503        val = (uint16_t)val;
504    double scale = pow(2.0, imm);
505    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
506    feclearexcept(FeAllExceptions);
507    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
508    return fixDivDest(fpscr, val / scale, (double)val, scale);
509}
510
511static inline double
512vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
513{
514    fesetround(FeRoundNearest);
515    if (half)
516        val = sext<16>(val & mask(16));
517    double scale = pow(2.0, imm);
518    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
519    feclearexcept(FeAllExceptions);
520    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
521    return fixDivDest(fpscr, val / scale, (double)val, scale);
522}
523
524typedef int VfpSavedState;
525
526static inline VfpSavedState
527prepVfpFpscr(FPSCR fpscr)
528{
529    int roundingMode = fegetround();
530    feclearexcept(FeAllExceptions);
531    switch (fpscr.rMode) {
532      case VfpRoundNearest:
533        fesetround(FeRoundNearest);
534        break;
535      case VfpRoundUpward:
536        fesetround(FeRoundUpward);
537        break;
538      case VfpRoundDown:
539        fesetround(FeRoundDown);
540        break;
541      case VfpRoundZero:
542        fesetround(FeRoundZero);
543        break;
544    }
545    return roundingMode;
546}
547
548static inline FPSCR
549setVfpFpscr(FPSCR fpscr, VfpSavedState state)
550{
551    int exceptions = fetestexcept(FeAllExceptions);
552    if (exceptions & FeInvalid) {
553        fpscr.ioc = 1;
554    }
555    if (exceptions & FeDivByZero) {
556        fpscr.dzc = 1;
557    }
558    if (exceptions & FeOverflow) {
559        fpscr.ofc = 1;
560    }
561    if (exceptions & FeUnderflow) {
562        fpscr.ufc = 1;
563    }
564    if (exceptions & FeInexact) {
565        fpscr.ixc = 1;
566    }
567    fesetround(state);
568    return fpscr;
569}
570
571class VfpMacroOp : public PredMacroOp
572{
573  public:
574    static bool
575    inScalarBank(IntRegIndex idx)
576    {
577        return (idx % 32) < 8;
578    }
579
580  protected:
581    bool wide;
582
583    VfpMacroOp(const char *mnem, ExtMachInst _machInst,
584            OpClass __opClass, bool _wide) :
585        PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
586    {}
587
588    IntRegIndex
589    addStride(IntRegIndex idx, unsigned stride)
590    {
591        if (wide) {
592            stride *= 2;
593        }
594        unsigned offset = idx % 8;
595        idx = (IntRegIndex)(idx - offset);
596        offset += stride;
597        idx = (IntRegIndex)(idx + (offset % 8));
598        return idx;
599    }
600
601    void
602    nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
603    {
604        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
605        assert(!inScalarBank(dest));
606        dest = addStride(dest, stride);
607        op1 = addStride(op1, stride);
608        if (!inScalarBank(op2)) {
609            op2 = addStride(op2, stride);
610        }
611    }
612
613    void
614    nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
615    {
616        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
617        assert(!inScalarBank(dest));
618        dest = addStride(dest, stride);
619        if (!inScalarBank(op1)) {
620            op1 = addStride(op1, stride);
621        }
622    }
623
624    void
625    nextIdxs(IntRegIndex &dest)
626    {
627        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
628        assert(!inScalarBank(dest));
629        dest = addStride(dest, stride);
630    }
631};
632
633class VfpRegRegOp : public RegRegOp
634{
635  protected:
636    VfpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
637                IntRegIndex _dest, IntRegIndex _op1,
638                VfpMicroMode mode = VfpNotAMicroop) :
639        RegRegOp(mnem, _machInst, __opClass, _dest, _op1)
640    {
641        setVfpMicroFlags(mode, flags);
642    }
643};
644
645class VfpRegImmOp : public RegImmOp
646{
647  protected:
648    VfpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
649                IntRegIndex _dest, uint64_t _imm,
650                VfpMicroMode mode = VfpNotAMicroop) :
651        RegImmOp(mnem, _machInst, __opClass, _dest, _imm)
652    {
653        setVfpMicroFlags(mode, flags);
654    }
655};
656
657class VfpRegRegImmOp : public RegRegImmOp
658{
659  protected:
660    VfpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
661                   IntRegIndex _dest, IntRegIndex _op1,
662                   uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
663        RegRegImmOp(mnem, _machInst, __opClass, _dest, _op1, _imm)
664    {
665        setVfpMicroFlags(mode, flags);
666    }
667};
668
669class VfpRegRegRegOp : public RegRegRegOp
670{
671  protected:
672    VfpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
673                   IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
674                   VfpMicroMode mode = VfpNotAMicroop) :
675        RegRegRegOp(mnem, _machInst, __opClass, _dest, _op1, _op2)
676    {
677        setVfpMicroFlags(mode, flags);
678    }
679};
680
681}
682
683#endif //__ARCH_ARM_INSTS_VFP_HH__
684