vfp.hh revision 7382:b3c768629a54
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include "arch/arm/insts/misc.hh"
44#include "arch/arm/miscregs.hh"
45#include <fenv.h>
46#include <cmath>
47
48enum VfpMicroMode {
49    VfpNotAMicroop,
50    VfpMicroop,
51    VfpFirstMicroop,
52    VfpLastMicroop
53};
54
55template<class T>
56static inline void
57setVfpMicroFlags(VfpMicroMode mode, T &flags)
58{
59    switch (mode) {
60      case VfpMicroop:
61        flags[StaticInst::IsMicroop] = true;
62        break;
63      case VfpFirstMicroop:
64        flags[StaticInst::IsMicroop] =
65            flags[StaticInst::IsFirstMicroop] = true;
66        break;
67      case VfpLastMicroop:
68        flags[StaticInst::IsMicroop] =
69            flags[StaticInst::IsLastMicroop] = true;
70        break;
71      case VfpNotAMicroop:
72        break;
73    }
74    if (mode == VfpMicroop || mode == VfpFirstMicroop) {
75        flags[StaticInst::IsDelayedCommit] = true;
76    }
77}
78
79enum FeExceptionBit
80{
81    FeDivByZero = FE_DIVBYZERO,
82    FeInexact = FE_INEXACT,
83    FeInvalid = FE_INVALID,
84    FeOverflow = FE_OVERFLOW,
85    FeUnderflow = FE_UNDERFLOW,
86    FeAllExceptions = FE_ALL_EXCEPT
87};
88
89enum FeRoundingMode
90{
91    FeRoundDown = FE_DOWNWARD,
92    FeRoundNearest = FE_TONEAREST,
93    FeRoundZero = FE_TOWARDZERO,
94    FeRoundUpward = FE_UPWARD
95};
96
97enum VfpRoundingMode
98{
99    VfpRoundNearest = 0,
100    VfpRoundUpward = 1,
101    VfpRoundDown = 2,
102    VfpRoundZero = 3
103};
104
105template <class fpType>
106static inline void
107vfpFlushToZero(uint32_t &_fpscr, fpType &op)
108{
109    FPSCR fpscr = _fpscr;
110    if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
111        fpscr.idc = 1;
112        op = 0;
113    }
114    _fpscr = fpscr;
115}
116
117template <class fpType>
118static inline void
119vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
120{
121    vfpFlushToZero(fpscr, op1);
122    vfpFlushToZero(fpscr, op2);
123}
124
125static inline uint64_t
126vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
127{
128    fesetround(FeRoundZero);
129    val = val * powf(2.0, imm);
130    __asm__ __volatile__("" : "=m" (val) : "m" (val));
131    feclearexcept(FeAllExceptions);
132    __asm__ __volatile__("" : "=m" (val) : "m" (val));
133    float origVal = val;
134    val = rintf(val);
135    int fpType = std::fpclassify(val);
136    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
137        if (fpType == FP_NAN) {
138            feraiseexcept(FeInvalid);
139        }
140        val = 0.0;
141    } else if (origVal != val) {
142        feraiseexcept(FeInexact);
143    }
144
145    if (isSigned) {
146        if (half) {
147            if ((double)val < (int16_t)(1 << 15)) {
148                feraiseexcept(FeInvalid);
149                feclearexcept(FeInexact);
150                return (int16_t)(1 << 15);
151            }
152            if ((double)val > (int16_t)mask(15)) {
153                feraiseexcept(FeInvalid);
154                feclearexcept(FeInexact);
155                return (int16_t)mask(15);
156            }
157            return (int16_t)val;
158        } else {
159            if ((double)val < (int32_t)(1 << 31)) {
160                feraiseexcept(FeInvalid);
161                feclearexcept(FeInexact);
162                return (int32_t)(1 << 31);
163            }
164            if ((double)val > (int32_t)mask(31)) {
165                feraiseexcept(FeInvalid);
166                feclearexcept(FeInexact);
167                return (int32_t)mask(31);
168            }
169            return (int32_t)val;
170        }
171    } else {
172        if (half) {
173            if ((double)val < 0) {
174                feraiseexcept(FeInvalid);
175                feclearexcept(FeInexact);
176                return 0;
177            }
178            if ((double)val > (mask(16))) {
179                feraiseexcept(FeInvalid);
180                feclearexcept(FeInexact);
181                return mask(16);
182            }
183            return (uint16_t)val;
184        } else {
185            if ((double)val < 0) {
186                feraiseexcept(FeInvalid);
187                feclearexcept(FeInexact);
188                return 0;
189            }
190            if ((double)val > (mask(32))) {
191                feraiseexcept(FeInvalid);
192                feclearexcept(FeInexact);
193                return mask(32);
194            }
195            return (uint32_t)val;
196        }
197    }
198}
199
200static inline float
201vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm)
202{
203    fesetround(FeRoundNearest);
204    if (half)
205        val = (uint16_t)val;
206    float scale = powf(2.0, imm);
207    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
208    feclearexcept(FeAllExceptions);
209    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
210    return val / scale;
211}
212
213static inline float
214vfpSFixedToFpS(int32_t val, bool half, uint8_t imm)
215{
216    fesetround(FeRoundNearest);
217    if (half)
218        val = sext<16>(val & mask(16));
219    float scale = powf(2.0, imm);
220    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
221    feclearexcept(FeAllExceptions);
222    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
223    return val / scale;
224}
225
226static inline uint64_t
227vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm)
228{
229    fesetround(FeRoundNearest);
230    val = val * pow(2.0, imm);
231    __asm__ __volatile__("" : "=m" (val) : "m" (val));
232    fesetround(FeRoundZero);
233    feclearexcept(FeAllExceptions);
234    __asm__ __volatile__("" : "=m" (val) : "m" (val));
235    double origVal = val;
236    val = rint(val);
237    int fpType = std::fpclassify(val);
238    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
239        if (fpType == FP_NAN) {
240            feraiseexcept(FeInvalid);
241        }
242        val = 0.0;
243    } else if (origVal != val) {
244        feraiseexcept(FeInexact);
245    }
246    if (isSigned) {
247        if (half) {
248            if (val < (int16_t)(1 << 15)) {
249                feraiseexcept(FeInvalid);
250                feclearexcept(FeInexact);
251                return (int16_t)(1 << 15);
252            }
253            if (val > (int16_t)mask(15)) {
254                feraiseexcept(FeInvalid);
255                feclearexcept(FeInexact);
256                return (int16_t)mask(15);
257            }
258            return (int16_t)val;
259        } else {
260            if (val < (int32_t)(1 << 31)) {
261                feraiseexcept(FeInvalid);
262                feclearexcept(FeInexact);
263                return (int32_t)(1 << 31);
264            }
265            if (val > (int32_t)mask(31)) {
266                feraiseexcept(FeInvalid);
267                feclearexcept(FeInexact);
268                return (int32_t)mask(31);
269            }
270            return (int32_t)val;
271        }
272    } else {
273        if (half) {
274            if (val < 0) {
275                feraiseexcept(FeInvalid);
276                feclearexcept(FeInexact);
277                return 0;
278            }
279            if (val > mask(16)) {
280                feraiseexcept(FeInvalid);
281                feclearexcept(FeInexact);
282                return mask(16);
283            }
284            return (uint16_t)val;
285        } else {
286            if (val < 0) {
287                feraiseexcept(FeInvalid);
288                feclearexcept(FeInexact);
289                return 0;
290            }
291            if (val > mask(32)) {
292                feraiseexcept(FeInvalid);
293                feclearexcept(FeInexact);
294                return mask(32);
295            }
296            return (uint32_t)val;
297        }
298    }
299}
300
301static inline double
302vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm)
303{
304    fesetround(FeRoundNearest);
305    if (half)
306        val = (uint16_t)val;
307    double scale = pow(2.0, imm);
308    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
309    feclearexcept(FeAllExceptions);
310    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
311    return val / scale;
312}
313
314static inline double
315vfpSFixedToFpD(int32_t val, bool half, uint8_t imm)
316{
317    fesetround(FeRoundNearest);
318    if (half)
319        val = sext<16>(val & mask(16));
320    double scale = pow(2.0, imm);
321    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
322    feclearexcept(FeAllExceptions);
323    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
324    return val / scale;
325}
326
327typedef int VfpSavedState;
328
329static inline VfpSavedState
330prepVfpFpscr(FPSCR fpscr)
331{
332    int roundingMode = fegetround();
333    feclearexcept(FeAllExceptions);
334    switch (fpscr.rMode) {
335      case VfpRoundNearest:
336        fesetround(FeRoundNearest);
337        break;
338      case VfpRoundUpward:
339        fesetround(FeRoundUpward);
340        break;
341      case VfpRoundDown:
342        fesetround(FeRoundDown);
343        break;
344      case VfpRoundZero:
345        fesetround(FeRoundZero);
346        break;
347    }
348    return roundingMode;
349}
350
351static inline FPSCR
352setVfpFpscr(FPSCR fpscr, VfpSavedState state)
353{
354    int exceptions = fetestexcept(FeAllExceptions);
355    if (exceptions & FeInvalid) {
356        fpscr.ioc = 1;
357    }
358    if (exceptions & FeDivByZero) {
359        fpscr.dzc = 1;
360    }
361    if (exceptions & FeOverflow) {
362        fpscr.ofc = 1;
363    }
364    if (exceptions & FeUnderflow) {
365        fpscr.ufc = 1;
366    }
367    if (exceptions & FeInexact) {
368        fpscr.ixc = 1;
369    }
370    fesetround(state);
371    return fpscr;
372}
373
374class VfpMacroOp : public PredMacroOp
375{
376  public:
377    static bool
378    inScalarBank(IntRegIndex idx)
379    {
380        return (idx % 32) < 8;
381    }
382
383  protected:
384    bool wide;
385
386    VfpMacroOp(const char *mnem, ExtMachInst _machInst,
387            OpClass __opClass, bool _wide) :
388        PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
389    {}
390
391    IntRegIndex
392    addStride(IntRegIndex idx, unsigned stride)
393    {
394        if (wide) {
395            stride *= 2;
396        }
397        unsigned offset = idx % 8;
398        idx = (IntRegIndex)(idx - offset);
399        offset += stride;
400        idx = (IntRegIndex)(idx + (offset % 8));
401        return idx;
402    }
403
404    void
405    nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
406    {
407        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
408        assert(!inScalarBank(dest));
409        dest = addStride(dest, stride);
410        op1 = addStride(op1, stride);
411        if (!inScalarBank(op2)) {
412            op2 = addStride(op2, stride);
413        }
414    }
415
416    void
417    nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
418    {
419        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
420        assert(!inScalarBank(dest));
421        dest = addStride(dest, stride);
422        if (!inScalarBank(op1)) {
423            op1 = addStride(op1, stride);
424        }
425    }
426
427    void
428    nextIdxs(IntRegIndex &dest)
429    {
430        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
431        assert(!inScalarBank(dest));
432        dest = addStride(dest, stride);
433    }
434};
435
436class VfpRegRegOp : public RegRegOp
437{
438  protected:
439    VfpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
440                IntRegIndex _dest, IntRegIndex _op1,
441                VfpMicroMode mode = VfpNotAMicroop) :
442        RegRegOp(mnem, _machInst, __opClass, _dest, _op1)
443    {
444        setVfpMicroFlags(mode, flags);
445    }
446};
447
448class VfpRegImmOp : public RegImmOp
449{
450  protected:
451    VfpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
452                IntRegIndex _dest, uint64_t _imm,
453                VfpMicroMode mode = VfpNotAMicroop) :
454        RegImmOp(mnem, _machInst, __opClass, _dest, _imm)
455    {
456        setVfpMicroFlags(mode, flags);
457    }
458};
459
460class VfpRegRegImmOp : public RegRegImmOp
461{
462  protected:
463    VfpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
464                   IntRegIndex _dest, IntRegIndex _op1,
465                   uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
466        RegRegImmOp(mnem, _machInst, __opClass, _dest, _op1, _imm)
467    {
468        setVfpMicroFlags(mode, flags);
469    }
470};
471
472class VfpRegRegRegOp : public RegRegRegOp
473{
474  protected:
475    VfpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
476                   IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
477                   VfpMicroMode mode = VfpNotAMicroop) :
478        RegRegRegOp(mnem, _machInst, __opClass, _dest, _op1, _op2)
479    {
480        setVfpMicroFlags(mode, flags);
481    }
482};
483
484#endif //__ARCH_ARM_INSTS_VFP_HH__
485