vfp.hh revision 7384:f12b4f28e5eb
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include "arch/arm/insts/misc.hh"
44#include "arch/arm/miscregs.hh"
45#include <fenv.h>
46#include <cmath>
47
48namespace ArmISA
49{
50
51enum VfpMicroMode {
52    VfpNotAMicroop,
53    VfpMicroop,
54    VfpFirstMicroop,
55    VfpLastMicroop
56};
57
58template<class T>
59static inline void
60setVfpMicroFlags(VfpMicroMode mode, T &flags)
61{
62    switch (mode) {
63      case VfpMicroop:
64        flags[StaticInst::IsMicroop] = true;
65        break;
66      case VfpFirstMicroop:
67        flags[StaticInst::IsMicroop] =
68            flags[StaticInst::IsFirstMicroop] = true;
69        break;
70      case VfpLastMicroop:
71        flags[StaticInst::IsMicroop] =
72            flags[StaticInst::IsLastMicroop] = true;
73        break;
74      case VfpNotAMicroop:
75        break;
76    }
77    if (mode == VfpMicroop || mode == VfpFirstMicroop) {
78        flags[StaticInst::IsDelayedCommit] = true;
79    }
80}
81
82enum FeExceptionBit
83{
84    FeDivByZero = FE_DIVBYZERO,
85    FeInexact = FE_INEXACT,
86    FeInvalid = FE_INVALID,
87    FeOverflow = FE_OVERFLOW,
88    FeUnderflow = FE_UNDERFLOW,
89    FeAllExceptions = FE_ALL_EXCEPT
90};
91
92enum FeRoundingMode
93{
94    FeRoundDown = FE_DOWNWARD,
95    FeRoundNearest = FE_TONEAREST,
96    FeRoundZero = FE_TOWARDZERO,
97    FeRoundUpward = FE_UPWARD
98};
99
100enum VfpRoundingMode
101{
102    VfpRoundNearest = 0,
103    VfpRoundUpward = 1,
104    VfpRoundDown = 2,
105    VfpRoundZero = 3
106};
107
108template <class fpType>
109static inline void
110vfpFlushToZero(uint32_t &_fpscr, fpType &op)
111{
112    FPSCR fpscr = _fpscr;
113    if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
114        fpscr.idc = 1;
115        op = 0;
116    }
117    _fpscr = fpscr;
118}
119
120template <class fpType>
121static inline void
122vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
123{
124    vfpFlushToZero(fpscr, op1);
125    vfpFlushToZero(fpscr, op2);
126}
127
128static inline uint32_t
129fpToBits(float fp)
130{
131    union
132    {
133        float fp;
134        uint32_t bits;
135    } val;
136    val.fp = fp;
137    return val.bits;
138}
139
140static inline uint64_t
141fpToBits(double fp)
142{
143    union
144    {
145        double fp;
146        uint64_t bits;
147    } val;
148    val.fp = fp;
149    return val.bits;
150}
151
152static inline float
153bitsToFp(uint64_t bits, float junk)
154{
155    union
156    {
157        float fp;
158        uint32_t bits;
159    } val;
160    val.bits = bits;
161    return val.fp;
162}
163
164static inline double
165bitsToFp(uint64_t bits, double junk)
166{
167    union
168    {
169        double fp;
170        uint64_t bits;
171    } val;
172    val.bits = bits;
173    return val.fp;
174}
175
176template <class fpType>
177static inline fpType
178fixNan(FPSCR fpscr, fpType val, fpType op1, fpType op2)
179{
180    if (std::isnan(val)) {
181        const bool single = (sizeof(val) == sizeof(float));
182        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
183        const bool nan1 = std::isnan(op1);
184        const bool nan2 = std::isnan(op2);
185        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
186        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
187        fpType junk = 0.0;
188        if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
189            val = bitsToFp(qnan, junk);
190        } else if (signal1) {
191            val = bitsToFp(fpToBits(op1) | qnan, junk);
192        } else if (signal2) {
193            val = bitsToFp(fpToBits(op2) | qnan, junk);
194        } else if (nan1) {
195            val = op1;
196        } else if (nan2) {
197            val = op2;
198        }
199    }
200    return val;
201}
202
203static inline uint64_t
204vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
205{
206    fesetround(FeRoundZero);
207    val = val * powf(2.0, imm);
208    __asm__ __volatile__("" : "=m" (val) : "m" (val));
209    feclearexcept(FeAllExceptions);
210    __asm__ __volatile__("" : "=m" (val) : "m" (val));
211    float origVal = val;
212    val = rintf(val);
213    int fpType = std::fpclassify(val);
214    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
215        if (fpType == FP_NAN) {
216            feraiseexcept(FeInvalid);
217        }
218        val = 0.0;
219    } else if (origVal != val) {
220        feraiseexcept(FeInexact);
221    }
222
223    if (isSigned) {
224        if (half) {
225            if ((double)val < (int16_t)(1 << 15)) {
226                feraiseexcept(FeInvalid);
227                feclearexcept(FeInexact);
228                return (int16_t)(1 << 15);
229            }
230            if ((double)val > (int16_t)mask(15)) {
231                feraiseexcept(FeInvalid);
232                feclearexcept(FeInexact);
233                return (int16_t)mask(15);
234            }
235            return (int16_t)val;
236        } else {
237            if ((double)val < (int32_t)(1 << 31)) {
238                feraiseexcept(FeInvalid);
239                feclearexcept(FeInexact);
240                return (int32_t)(1 << 31);
241            }
242            if ((double)val > (int32_t)mask(31)) {
243                feraiseexcept(FeInvalid);
244                feclearexcept(FeInexact);
245                return (int32_t)mask(31);
246            }
247            return (int32_t)val;
248        }
249    } else {
250        if (half) {
251            if ((double)val < 0) {
252                feraiseexcept(FeInvalid);
253                feclearexcept(FeInexact);
254                return 0;
255            }
256            if ((double)val > (mask(16))) {
257                feraiseexcept(FeInvalid);
258                feclearexcept(FeInexact);
259                return mask(16);
260            }
261            return (uint16_t)val;
262        } else {
263            if ((double)val < 0) {
264                feraiseexcept(FeInvalid);
265                feclearexcept(FeInexact);
266                return 0;
267            }
268            if ((double)val > (mask(32))) {
269                feraiseexcept(FeInvalid);
270                feclearexcept(FeInexact);
271                return mask(32);
272            }
273            return (uint32_t)val;
274        }
275    }
276}
277
278static inline float
279vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm)
280{
281    fesetround(FeRoundNearest);
282    if (half)
283        val = (uint16_t)val;
284    float scale = powf(2.0, imm);
285    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
286    feclearexcept(FeAllExceptions);
287    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
288    return val / scale;
289}
290
291static inline float
292vfpSFixedToFpS(int32_t val, bool half, uint8_t imm)
293{
294    fesetround(FeRoundNearest);
295    if (half)
296        val = sext<16>(val & mask(16));
297    float scale = powf(2.0, imm);
298    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
299    feclearexcept(FeAllExceptions);
300    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
301    return val / scale;
302}
303
304static inline uint64_t
305vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm)
306{
307    fesetround(FeRoundNearest);
308    val = val * pow(2.0, imm);
309    __asm__ __volatile__("" : "=m" (val) : "m" (val));
310    fesetround(FeRoundZero);
311    feclearexcept(FeAllExceptions);
312    __asm__ __volatile__("" : "=m" (val) : "m" (val));
313    double origVal = val;
314    val = rint(val);
315    int fpType = std::fpclassify(val);
316    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
317        if (fpType == FP_NAN) {
318            feraiseexcept(FeInvalid);
319        }
320        val = 0.0;
321    } else if (origVal != val) {
322        feraiseexcept(FeInexact);
323    }
324    if (isSigned) {
325        if (half) {
326            if (val < (int16_t)(1 << 15)) {
327                feraiseexcept(FeInvalid);
328                feclearexcept(FeInexact);
329                return (int16_t)(1 << 15);
330            }
331            if (val > (int16_t)mask(15)) {
332                feraiseexcept(FeInvalid);
333                feclearexcept(FeInexact);
334                return (int16_t)mask(15);
335            }
336            return (int16_t)val;
337        } else {
338            if (val < (int32_t)(1 << 31)) {
339                feraiseexcept(FeInvalid);
340                feclearexcept(FeInexact);
341                return (int32_t)(1 << 31);
342            }
343            if (val > (int32_t)mask(31)) {
344                feraiseexcept(FeInvalid);
345                feclearexcept(FeInexact);
346                return (int32_t)mask(31);
347            }
348            return (int32_t)val;
349        }
350    } else {
351        if (half) {
352            if (val < 0) {
353                feraiseexcept(FeInvalid);
354                feclearexcept(FeInexact);
355                return 0;
356            }
357            if (val > mask(16)) {
358                feraiseexcept(FeInvalid);
359                feclearexcept(FeInexact);
360                return mask(16);
361            }
362            return (uint16_t)val;
363        } else {
364            if (val < 0) {
365                feraiseexcept(FeInvalid);
366                feclearexcept(FeInexact);
367                return 0;
368            }
369            if (val > mask(32)) {
370                feraiseexcept(FeInvalid);
371                feclearexcept(FeInexact);
372                return mask(32);
373            }
374            return (uint32_t)val;
375        }
376    }
377}
378
379static inline double
380vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm)
381{
382    fesetround(FeRoundNearest);
383    if (half)
384        val = (uint16_t)val;
385    double scale = pow(2.0, imm);
386    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
387    feclearexcept(FeAllExceptions);
388    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
389    return val / scale;
390}
391
392static inline double
393vfpSFixedToFpD(int32_t val, bool half, uint8_t imm)
394{
395    fesetround(FeRoundNearest);
396    if (half)
397        val = sext<16>(val & mask(16));
398    double scale = pow(2.0, imm);
399    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
400    feclearexcept(FeAllExceptions);
401    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
402    return val / scale;
403}
404
405typedef int VfpSavedState;
406
407static inline VfpSavedState
408prepVfpFpscr(FPSCR fpscr)
409{
410    int roundingMode = fegetround();
411    feclearexcept(FeAllExceptions);
412    switch (fpscr.rMode) {
413      case VfpRoundNearest:
414        fesetround(FeRoundNearest);
415        break;
416      case VfpRoundUpward:
417        fesetround(FeRoundUpward);
418        break;
419      case VfpRoundDown:
420        fesetround(FeRoundDown);
421        break;
422      case VfpRoundZero:
423        fesetround(FeRoundZero);
424        break;
425    }
426    return roundingMode;
427}
428
429static inline FPSCR
430setVfpFpscr(FPSCR fpscr, VfpSavedState state)
431{
432    int exceptions = fetestexcept(FeAllExceptions);
433    if (exceptions & FeInvalid) {
434        fpscr.ioc = 1;
435    }
436    if (exceptions & FeDivByZero) {
437        fpscr.dzc = 1;
438    }
439    if (exceptions & FeOverflow) {
440        fpscr.ofc = 1;
441    }
442    if (exceptions & FeUnderflow) {
443        fpscr.ufc = 1;
444    }
445    if (exceptions & FeInexact) {
446        fpscr.ixc = 1;
447    }
448    fesetround(state);
449    return fpscr;
450}
451
452class VfpMacroOp : public PredMacroOp
453{
454  public:
455    static bool
456    inScalarBank(IntRegIndex idx)
457    {
458        return (idx % 32) < 8;
459    }
460
461  protected:
462    bool wide;
463
464    VfpMacroOp(const char *mnem, ExtMachInst _machInst,
465            OpClass __opClass, bool _wide) :
466        PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
467    {}
468
469    IntRegIndex
470    addStride(IntRegIndex idx, unsigned stride)
471    {
472        if (wide) {
473            stride *= 2;
474        }
475        unsigned offset = idx % 8;
476        idx = (IntRegIndex)(idx - offset);
477        offset += stride;
478        idx = (IntRegIndex)(idx + (offset % 8));
479        return idx;
480    }
481
482    void
483    nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
484    {
485        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
486        assert(!inScalarBank(dest));
487        dest = addStride(dest, stride);
488        op1 = addStride(op1, stride);
489        if (!inScalarBank(op2)) {
490            op2 = addStride(op2, stride);
491        }
492    }
493
494    void
495    nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
496    {
497        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
498        assert(!inScalarBank(dest));
499        dest = addStride(dest, stride);
500        if (!inScalarBank(op1)) {
501            op1 = addStride(op1, stride);
502        }
503    }
504
505    void
506    nextIdxs(IntRegIndex &dest)
507    {
508        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
509        assert(!inScalarBank(dest));
510        dest = addStride(dest, stride);
511    }
512};
513
514class VfpRegRegOp : public RegRegOp
515{
516  protected:
517    VfpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
518                IntRegIndex _dest, IntRegIndex _op1,
519                VfpMicroMode mode = VfpNotAMicroop) :
520        RegRegOp(mnem, _machInst, __opClass, _dest, _op1)
521    {
522        setVfpMicroFlags(mode, flags);
523    }
524};
525
526class VfpRegImmOp : public RegImmOp
527{
528  protected:
529    VfpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
530                IntRegIndex _dest, uint64_t _imm,
531                VfpMicroMode mode = VfpNotAMicroop) :
532        RegImmOp(mnem, _machInst, __opClass, _dest, _imm)
533    {
534        setVfpMicroFlags(mode, flags);
535    }
536};
537
538class VfpRegRegImmOp : public RegRegImmOp
539{
540  protected:
541    VfpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
542                   IntRegIndex _dest, IntRegIndex _op1,
543                   uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
544        RegRegImmOp(mnem, _machInst, __opClass, _dest, _op1, _imm)
545    {
546        setVfpMicroFlags(mode, flags);
547    }
548};
549
550class VfpRegRegRegOp : public RegRegRegOp
551{
552  protected:
553    VfpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
554                   IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
555                   VfpMicroMode mode = VfpNotAMicroop) :
556        RegRegRegOp(mnem, _machInst, __opClass, _dest, _op1, _op2)
557    {
558        setVfpMicroFlags(mode, flags);
559    }
560};
561
562}
563
564#endif //__ARCH_ARM_INSTS_VFP_HH__
565