vfp.hh revision 7385
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include "arch/arm/insts/misc.hh"
44#include "arch/arm/miscregs.hh"
45#include <fenv.h>
46#include <cmath>
47
48namespace ArmISA
49{
50
51enum VfpMicroMode {
52    VfpNotAMicroop,
53    VfpMicroop,
54    VfpFirstMicroop,
55    VfpLastMicroop
56};
57
58template<class T>
59static inline void
60setVfpMicroFlags(VfpMicroMode mode, T &flags)
61{
62    switch (mode) {
63      case VfpMicroop:
64        flags[StaticInst::IsMicroop] = true;
65        break;
66      case VfpFirstMicroop:
67        flags[StaticInst::IsMicroop] =
68            flags[StaticInst::IsFirstMicroop] = true;
69        break;
70      case VfpLastMicroop:
71        flags[StaticInst::IsMicroop] =
72            flags[StaticInst::IsLastMicroop] = true;
73        break;
74      case VfpNotAMicroop:
75        break;
76    }
77    if (mode == VfpMicroop || mode == VfpFirstMicroop) {
78        flags[StaticInst::IsDelayedCommit] = true;
79    }
80}
81
82enum FeExceptionBit
83{
84    FeDivByZero = FE_DIVBYZERO,
85    FeInexact = FE_INEXACT,
86    FeInvalid = FE_INVALID,
87    FeOverflow = FE_OVERFLOW,
88    FeUnderflow = FE_UNDERFLOW,
89    FeAllExceptions = FE_ALL_EXCEPT
90};
91
92enum FeRoundingMode
93{
94    FeRoundDown = FE_DOWNWARD,
95    FeRoundNearest = FE_TONEAREST,
96    FeRoundZero = FE_TOWARDZERO,
97    FeRoundUpward = FE_UPWARD
98};
99
100enum VfpRoundingMode
101{
102    VfpRoundNearest = 0,
103    VfpRoundUpward = 1,
104    VfpRoundDown = 2,
105    VfpRoundZero = 3
106};
107
108template <class fpType>
109static inline void
110vfpFlushToZero(uint32_t &_fpscr, fpType &op)
111{
112    FPSCR fpscr = _fpscr;
113    if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
114        fpscr.idc = 1;
115        op = 0;
116    }
117    _fpscr = fpscr;
118}
119
120template <class fpType>
121static inline void
122vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
123{
124    vfpFlushToZero(fpscr, op1);
125    vfpFlushToZero(fpscr, op2);
126}
127
128static inline uint32_t
129fpToBits(float fp)
130{
131    union
132    {
133        float fp;
134        uint32_t bits;
135    } val;
136    val.fp = fp;
137    return val.bits;
138}
139
140static inline uint64_t
141fpToBits(double fp)
142{
143    union
144    {
145        double fp;
146        uint64_t bits;
147    } val;
148    val.fp = fp;
149    return val.bits;
150}
151
152static inline float
153bitsToFp(uint64_t bits, float junk)
154{
155    union
156    {
157        float fp;
158        uint32_t bits;
159    } val;
160    val.bits = bits;
161    return val.fp;
162}
163
164static inline double
165bitsToFp(uint64_t bits, double junk)
166{
167    union
168    {
169        double fp;
170        uint64_t bits;
171    } val;
172    val.bits = bits;
173    return val.fp;
174}
175
176template <class fpType>
177static inline fpType
178fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
179{
180    int fpClass = std::fpclassify(val);
181    fpType junk = 0.0;
182    if (fpClass == FP_NAN) {
183        const bool single = (sizeof(val) == sizeof(float));
184        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
185        const bool nan1 = std::isnan(op1);
186        const bool nan2 = std::isnan(op2);
187        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
188        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
189        if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
190            val = bitsToFp(qnan, junk);
191        } else if (signal1) {
192            val = bitsToFp(fpToBits(op1) | qnan, junk);
193        } else if (signal2) {
194            val = bitsToFp(fpToBits(op2) | qnan, junk);
195        } else if (nan1) {
196            val = op1;
197        } else if (nan2) {
198            val = op2;
199        }
200    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
201        // Turn val into a zero with the correct sign;
202        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
203        val = bitsToFp(fpToBits(val) & bitMask, junk);
204        feraiseexcept(FeUnderflow);
205    }
206    return val;
207}
208
209static inline uint64_t
210vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
211{
212    fesetround(FeRoundZero);
213    val = val * powf(2.0, imm);
214    __asm__ __volatile__("" : "=m" (val) : "m" (val));
215    feclearexcept(FeAllExceptions);
216    __asm__ __volatile__("" : "=m" (val) : "m" (val));
217    float origVal = val;
218    val = rintf(val);
219    int fpType = std::fpclassify(val);
220    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
221        if (fpType == FP_NAN) {
222            feraiseexcept(FeInvalid);
223        }
224        val = 0.0;
225    } else if (origVal != val) {
226        feraiseexcept(FeInexact);
227    }
228
229    if (isSigned) {
230        if (half) {
231            if ((double)val < (int16_t)(1 << 15)) {
232                feraiseexcept(FeInvalid);
233                feclearexcept(FeInexact);
234                return (int16_t)(1 << 15);
235            }
236            if ((double)val > (int16_t)mask(15)) {
237                feraiseexcept(FeInvalid);
238                feclearexcept(FeInexact);
239                return (int16_t)mask(15);
240            }
241            return (int16_t)val;
242        } else {
243            if ((double)val < (int32_t)(1 << 31)) {
244                feraiseexcept(FeInvalid);
245                feclearexcept(FeInexact);
246                return (int32_t)(1 << 31);
247            }
248            if ((double)val > (int32_t)mask(31)) {
249                feraiseexcept(FeInvalid);
250                feclearexcept(FeInexact);
251                return (int32_t)mask(31);
252            }
253            return (int32_t)val;
254        }
255    } else {
256        if (half) {
257            if ((double)val < 0) {
258                feraiseexcept(FeInvalid);
259                feclearexcept(FeInexact);
260                return 0;
261            }
262            if ((double)val > (mask(16))) {
263                feraiseexcept(FeInvalid);
264                feclearexcept(FeInexact);
265                return mask(16);
266            }
267            return (uint16_t)val;
268        } else {
269            if ((double)val < 0) {
270                feraiseexcept(FeInvalid);
271                feclearexcept(FeInexact);
272                return 0;
273            }
274            if ((double)val > (mask(32))) {
275                feraiseexcept(FeInvalid);
276                feclearexcept(FeInexact);
277                return mask(32);
278            }
279            return (uint32_t)val;
280        }
281    }
282}
283
284static inline float
285vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm)
286{
287    fesetround(FeRoundNearest);
288    if (half)
289        val = (uint16_t)val;
290    float scale = powf(2.0, imm);
291    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
292    feclearexcept(FeAllExceptions);
293    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
294    return val / scale;
295}
296
297static inline float
298vfpSFixedToFpS(int32_t val, bool half, uint8_t imm)
299{
300    fesetround(FeRoundNearest);
301    if (half)
302        val = sext<16>(val & mask(16));
303    float scale = powf(2.0, imm);
304    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
305    feclearexcept(FeAllExceptions);
306    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
307    return val / scale;
308}
309
310static inline uint64_t
311vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm)
312{
313    fesetround(FeRoundNearest);
314    val = val * pow(2.0, imm);
315    __asm__ __volatile__("" : "=m" (val) : "m" (val));
316    fesetround(FeRoundZero);
317    feclearexcept(FeAllExceptions);
318    __asm__ __volatile__("" : "=m" (val) : "m" (val));
319    double origVal = val;
320    val = rint(val);
321    int fpType = std::fpclassify(val);
322    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
323        if (fpType == FP_NAN) {
324            feraiseexcept(FeInvalid);
325        }
326        val = 0.0;
327    } else if (origVal != val) {
328        feraiseexcept(FeInexact);
329    }
330    if (isSigned) {
331        if (half) {
332            if (val < (int16_t)(1 << 15)) {
333                feraiseexcept(FeInvalid);
334                feclearexcept(FeInexact);
335                return (int16_t)(1 << 15);
336            }
337            if (val > (int16_t)mask(15)) {
338                feraiseexcept(FeInvalid);
339                feclearexcept(FeInexact);
340                return (int16_t)mask(15);
341            }
342            return (int16_t)val;
343        } else {
344            if (val < (int32_t)(1 << 31)) {
345                feraiseexcept(FeInvalid);
346                feclearexcept(FeInexact);
347                return (int32_t)(1 << 31);
348            }
349            if (val > (int32_t)mask(31)) {
350                feraiseexcept(FeInvalid);
351                feclearexcept(FeInexact);
352                return (int32_t)mask(31);
353            }
354            return (int32_t)val;
355        }
356    } else {
357        if (half) {
358            if (val < 0) {
359                feraiseexcept(FeInvalid);
360                feclearexcept(FeInexact);
361                return 0;
362            }
363            if (val > mask(16)) {
364                feraiseexcept(FeInvalid);
365                feclearexcept(FeInexact);
366                return mask(16);
367            }
368            return (uint16_t)val;
369        } else {
370            if (val < 0) {
371                feraiseexcept(FeInvalid);
372                feclearexcept(FeInexact);
373                return 0;
374            }
375            if (val > mask(32)) {
376                feraiseexcept(FeInvalid);
377                feclearexcept(FeInexact);
378                return mask(32);
379            }
380            return (uint32_t)val;
381        }
382    }
383}
384
385static inline double
386vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm)
387{
388    fesetround(FeRoundNearest);
389    if (half)
390        val = (uint16_t)val;
391    double scale = pow(2.0, imm);
392    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
393    feclearexcept(FeAllExceptions);
394    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
395    return val / scale;
396}
397
398static inline double
399vfpSFixedToFpD(int32_t val, bool half, uint8_t imm)
400{
401    fesetround(FeRoundNearest);
402    if (half)
403        val = sext<16>(val & mask(16));
404    double scale = pow(2.0, imm);
405    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
406    feclearexcept(FeAllExceptions);
407    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
408    return val / scale;
409}
410
411typedef int VfpSavedState;
412
413static inline VfpSavedState
414prepVfpFpscr(FPSCR fpscr)
415{
416    int roundingMode = fegetround();
417    feclearexcept(FeAllExceptions);
418    switch (fpscr.rMode) {
419      case VfpRoundNearest:
420        fesetround(FeRoundNearest);
421        break;
422      case VfpRoundUpward:
423        fesetround(FeRoundUpward);
424        break;
425      case VfpRoundDown:
426        fesetround(FeRoundDown);
427        break;
428      case VfpRoundZero:
429        fesetround(FeRoundZero);
430        break;
431    }
432    return roundingMode;
433}
434
435static inline FPSCR
436setVfpFpscr(FPSCR fpscr, VfpSavedState state)
437{
438    int exceptions = fetestexcept(FeAllExceptions);
439    if (exceptions & FeInvalid) {
440        fpscr.ioc = 1;
441    }
442    if (exceptions & FeDivByZero) {
443        fpscr.dzc = 1;
444    }
445    if (exceptions & FeOverflow) {
446        fpscr.ofc = 1;
447    }
448    if (exceptions & FeUnderflow) {
449        fpscr.ufc = 1;
450    }
451    if (exceptions & FeInexact) {
452        fpscr.ixc = 1;
453    }
454    fesetround(state);
455    return fpscr;
456}
457
458class VfpMacroOp : public PredMacroOp
459{
460  public:
461    static bool
462    inScalarBank(IntRegIndex idx)
463    {
464        return (idx % 32) < 8;
465    }
466
467  protected:
468    bool wide;
469
470    VfpMacroOp(const char *mnem, ExtMachInst _machInst,
471            OpClass __opClass, bool _wide) :
472        PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
473    {}
474
475    IntRegIndex
476    addStride(IntRegIndex idx, unsigned stride)
477    {
478        if (wide) {
479            stride *= 2;
480        }
481        unsigned offset = idx % 8;
482        idx = (IntRegIndex)(idx - offset);
483        offset += stride;
484        idx = (IntRegIndex)(idx + (offset % 8));
485        return idx;
486    }
487
488    void
489    nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
490    {
491        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
492        assert(!inScalarBank(dest));
493        dest = addStride(dest, stride);
494        op1 = addStride(op1, stride);
495        if (!inScalarBank(op2)) {
496            op2 = addStride(op2, stride);
497        }
498    }
499
500    void
501    nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
502    {
503        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
504        assert(!inScalarBank(dest));
505        dest = addStride(dest, stride);
506        if (!inScalarBank(op1)) {
507            op1 = addStride(op1, stride);
508        }
509    }
510
511    void
512    nextIdxs(IntRegIndex &dest)
513    {
514        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
515        assert(!inScalarBank(dest));
516        dest = addStride(dest, stride);
517    }
518};
519
520class VfpRegRegOp : public RegRegOp
521{
522  protected:
523    VfpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
524                IntRegIndex _dest, IntRegIndex _op1,
525                VfpMicroMode mode = VfpNotAMicroop) :
526        RegRegOp(mnem, _machInst, __opClass, _dest, _op1)
527    {
528        setVfpMicroFlags(mode, flags);
529    }
530};
531
532class VfpRegImmOp : public RegImmOp
533{
534  protected:
535    VfpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
536                IntRegIndex _dest, uint64_t _imm,
537                VfpMicroMode mode = VfpNotAMicroop) :
538        RegImmOp(mnem, _machInst, __opClass, _dest, _imm)
539    {
540        setVfpMicroFlags(mode, flags);
541    }
542};
543
544class VfpRegRegImmOp : public RegRegImmOp
545{
546  protected:
547    VfpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
548                   IntRegIndex _dest, IntRegIndex _op1,
549                   uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
550        RegRegImmOp(mnem, _machInst, __opClass, _dest, _op1, _imm)
551    {
552        setVfpMicroFlags(mode, flags);
553    }
554};
555
556class VfpRegRegRegOp : public RegRegRegOp
557{
558  protected:
559    VfpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
560                   IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
561                   VfpMicroMode mode = VfpNotAMicroop) :
562        RegRegRegOp(mnem, _machInst, __opClass, _dest, _op1, _op2)
563    {
564        setVfpMicroFlags(mode, flags);
565    }
566};
567
568}
569
570#endif //__ARCH_ARM_INSTS_VFP_HH__
571