vfp.hh revision 7397:cbd950459a29
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include "arch/arm/insts/misc.hh"
44#include "arch/arm/miscregs.hh"
45#include <fenv.h>
46#include <cmath>
47
48namespace ArmISA
49{
50
51enum VfpMicroMode {
52    VfpNotAMicroop,
53    VfpMicroop,
54    VfpFirstMicroop,
55    VfpLastMicroop
56};
57
58template<class T>
59static inline void
60setVfpMicroFlags(VfpMicroMode mode, T &flags)
61{
62    switch (mode) {
63      case VfpMicroop:
64        flags[StaticInst::IsMicroop] = true;
65        break;
66      case VfpFirstMicroop:
67        flags[StaticInst::IsMicroop] =
68            flags[StaticInst::IsFirstMicroop] = true;
69        break;
70      case VfpLastMicroop:
71        flags[StaticInst::IsMicroop] =
72            flags[StaticInst::IsLastMicroop] = true;
73        break;
74      case VfpNotAMicroop:
75        break;
76    }
77    if (mode == VfpMicroop || mode == VfpFirstMicroop) {
78        flags[StaticInst::IsDelayedCommit] = true;
79    }
80}
81
82enum FeExceptionBit
83{
84    FeDivByZero = FE_DIVBYZERO,
85    FeInexact = FE_INEXACT,
86    FeInvalid = FE_INVALID,
87    FeOverflow = FE_OVERFLOW,
88    FeUnderflow = FE_UNDERFLOW,
89    FeAllExceptions = FE_ALL_EXCEPT
90};
91
92enum FeRoundingMode
93{
94    FeRoundDown = FE_DOWNWARD,
95    FeRoundNearest = FE_TONEAREST,
96    FeRoundZero = FE_TOWARDZERO,
97    FeRoundUpward = FE_UPWARD
98};
99
100enum VfpRoundingMode
101{
102    VfpRoundNearest = 0,
103    VfpRoundUpward = 1,
104    VfpRoundDown = 2,
105    VfpRoundZero = 3
106};
107
108template <class fpType>
109static inline bool
110flushToZero(fpType &op)
111{
112    fpType junk = 0.0;
113    if (std::fpclassify(op) == FP_SUBNORMAL) {
114        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
115        op = bitsToFp(fpToBits(op) & bitMask, junk);
116        return true;
117    }
118    return false;
119}
120
121template <class fpType>
122static inline bool
123flushToZero(fpType &op1, fpType &op2)
124{
125    bool flush1 = flushToZero(op1);
126    bool flush2 = flushToZero(op2);
127    return flush1 || flush2;
128}
129
130template <class fpType>
131static inline void
132vfpFlushToZero(FPSCR &fpscr, fpType &op)
133{
134    if (fpscr.fz == 1 && flushToZero(op)) {
135        fpscr.idc = 1;
136    }
137}
138
139template <class fpType>
140static inline void
141vfpFlushToZero(FPSCR &fpscr, fpType &op1, fpType &op2)
142{
143    vfpFlushToZero(fpscr, op1);
144    vfpFlushToZero(fpscr, op2);
145}
146
147static inline uint32_t
148fpToBits(float fp)
149{
150    union
151    {
152        float fp;
153        uint32_t bits;
154    } val;
155    val.fp = fp;
156    return val.bits;
157}
158
159static inline uint64_t
160fpToBits(double fp)
161{
162    union
163    {
164        double fp;
165        uint64_t bits;
166    } val;
167    val.fp = fp;
168    return val.bits;
169}
170
171static inline float
172bitsToFp(uint64_t bits, float junk)
173{
174    union
175    {
176        float fp;
177        uint32_t bits;
178    } val;
179    val.bits = bits;
180    return val.fp;
181}
182
183static inline double
184bitsToFp(uint64_t bits, double junk)
185{
186    union
187    {
188        double fp;
189        uint64_t bits;
190    } val;
191    val.bits = bits;
192    return val.fp;
193}
194
195typedef int VfpSavedState;
196
197static inline VfpSavedState
198prepFpState(uint32_t rMode)
199{
200    int roundingMode = fegetround();
201    feclearexcept(FeAllExceptions);
202    switch (rMode) {
203      case VfpRoundNearest:
204        fesetround(FeRoundNearest);
205        break;
206      case VfpRoundUpward:
207        fesetround(FeRoundUpward);
208        break;
209      case VfpRoundDown:
210        fesetround(FeRoundDown);
211        break;
212      case VfpRoundZero:
213        fesetround(FeRoundZero);
214        break;
215    }
216    return roundingMode;
217}
218
219static inline void
220finishVfp(FPSCR &fpscr, VfpSavedState state)
221{
222    int exceptions = fetestexcept(FeAllExceptions);
223    bool underflow = false;
224    if (exceptions & FeInvalid) {
225        fpscr.ioc = 1;
226    }
227    if (exceptions & FeDivByZero) {
228        fpscr.dzc = 1;
229    }
230    if (exceptions & FeOverflow) {
231        fpscr.ofc = 1;
232    }
233    if (exceptions & FeUnderflow) {
234        underflow = true;
235        fpscr.ufc = 1;
236    }
237    if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
238        fpscr.ixc = 1;
239    }
240    fesetround(state);
241}
242
243template <class fpType>
244static inline fpType
245fixDest(FPSCR fpscr, fpType val, fpType op1)
246{
247    int fpClass = std::fpclassify(val);
248    fpType junk = 0.0;
249    if (fpClass == FP_NAN) {
250        const bool single = (sizeof(val) == sizeof(float));
251        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
252        const bool nan = std::isnan(op1);
253        if (!nan || (fpscr.dn == 1)) {
254            val = bitsToFp(qnan, junk);
255        } else if (nan) {
256            val = bitsToFp(fpToBits(op1) | qnan, junk);
257        }
258    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
259        // Turn val into a zero with the correct sign;
260        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
261        val = bitsToFp(fpToBits(val) & bitMask, junk);
262        feclearexcept(FeInexact);
263        feraiseexcept(FeUnderflow);
264    }
265    return val;
266}
267
268template <class fpType>
269static inline fpType
270fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
271{
272    int fpClass = std::fpclassify(val);
273    fpType junk = 0.0;
274    if (fpClass == FP_NAN) {
275        const bool single = (sizeof(val) == sizeof(float));
276        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
277        const bool nan1 = std::isnan(op1);
278        const bool nan2 = std::isnan(op2);
279        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
280        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
281        if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
282            val = bitsToFp(qnan, junk);
283        } else if (signal1) {
284            val = bitsToFp(fpToBits(op1) | qnan, junk);
285        } else if (signal2) {
286            val = bitsToFp(fpToBits(op2) | qnan, junk);
287        } else if (nan1) {
288            val = op1;
289        } else if (nan2) {
290            val = op2;
291        }
292    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
293        // Turn val into a zero with the correct sign;
294        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
295        val = bitsToFp(fpToBits(val) & bitMask, junk);
296        feclearexcept(FeInexact);
297        feraiseexcept(FeUnderflow);
298    }
299    return val;
300}
301
302template <class fpType>
303static inline fpType
304fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
305{
306    fpType mid = fixDest(fpscr, val, op1, op2);
307    const bool single = (sizeof(fpType) == sizeof(float));
308    const fpType junk = 0.0;
309    if ((single && (val == bitsToFp(0x00800000, junk) ||
310                    val == bitsToFp(0x80800000, junk))) ||
311        (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
312                     val == bitsToFp(ULL(0x8010000000000000), junk)))
313        ) {
314        __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
315        fesetround(FeRoundZero);
316        fpType temp = 0.0;
317        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
318        temp = op1 / op2;
319        if (flushToZero(temp)) {
320            feraiseexcept(FeUnderflow);
321            if (fpscr.fz) {
322                feclearexcept(FeInexact);
323                mid = temp;
324            }
325        }
326        __asm__ __volatile__("" :: "m" (temp));
327    }
328    return mid;
329}
330
331static inline float
332fixFpDFpSDest(FPSCR fpscr, double val)
333{
334    const float junk = 0.0;
335    float op1 = 0.0;
336    if (std::isnan(val)) {
337        uint64_t valBits = fpToBits(val);
338        uint32_t op1Bits = bits(valBits, 50, 29) |
339                           (mask(9) << 22) |
340                           (bits(valBits, 63) << 31);
341        op1 = bitsToFp(op1Bits, junk);
342    }
343    float mid = fixDest(fpscr, (float)val, op1);
344    if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
345                    (FeUnderflow | FeInexact)) {
346        feclearexcept(FeInexact);
347    }
348    if (mid == bitsToFp(0x00800000, junk) ||
349        mid == bitsToFp(0x80800000, junk)) {
350        __asm__ __volatile__("" : "=m" (val) : "m" (val));
351        fesetround(FeRoundZero);
352        float temp = 0.0;
353        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
354        temp = val;
355        if (flushToZero(temp)) {
356            feraiseexcept(FeUnderflow);
357            if (fpscr.fz) {
358                feclearexcept(FeInexact);
359                mid = temp;
360            }
361        }
362        __asm__ __volatile__("" :: "m" (temp));
363    }
364    return mid;
365}
366
367static inline double
368fixFpSFpDDest(FPSCR fpscr, float val)
369{
370    const double junk = 0.0;
371    double op1 = 0.0;
372    if (std::isnan(val)) {
373        uint32_t valBits = fpToBits(val);
374        uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
375                           (mask(12) << 51) |
376                           ((uint64_t)bits(valBits, 31) << 63);
377        op1 = bitsToFp(op1Bits, junk);
378    }
379    double mid = fixDest(fpscr, (double)val, op1);
380    if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
381        mid == bitsToFp(ULL(0x8010000000000000), junk)) {
382        __asm__ __volatile__("" : "=m" (val) : "m" (val));
383        fesetround(FeRoundZero);
384        double temp = 0.0;
385        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
386        temp = val;
387        if (flushToZero(temp)) {
388            feraiseexcept(FeUnderflow);
389            if (fpscr.fz) {
390                feclearexcept(FeInexact);
391                mid = temp;
392            }
393        }
394        __asm__ __volatile__("" :: "m" (temp));
395    }
396    return mid;
397}
398
399static inline double
400makeDouble(uint32_t low, uint32_t high)
401{
402    double junk = 0.0;
403    return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
404}
405
406static inline uint32_t
407lowFromDouble(double val)
408{
409    return fpToBits(val);
410}
411
412static inline uint32_t
413highFromDouble(double val)
414{
415    return fpToBits(val) >> 32;
416}
417
418static inline uint64_t
419vfpFpSToFixed(float val, bool isSigned, bool half,
420              uint8_t imm, bool rzero = true)
421{
422    int rmode = rzero ? FeRoundZero : fegetround();
423    __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
424    fesetround(FeRoundNearest);
425    val = val * powf(2.0, imm);
426    __asm__ __volatile__("" : "=m" (val) : "m" (val));
427    fesetround(rmode);
428    feclearexcept(FeAllExceptions);
429    __asm__ __volatile__("" : "=m" (val) : "m" (val));
430    float origVal = val;
431    val = rintf(val);
432    int fpType = std::fpclassify(val);
433    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
434        if (fpType == FP_NAN) {
435            feraiseexcept(FeInvalid);
436        }
437        val = 0.0;
438    } else if (origVal != val) {
439        switch (rmode) {
440          case FeRoundNearest:
441            if (origVal - val > 0.5)
442                val += 1.0;
443            else if (val - origVal > 0.5)
444                val -= 1.0;
445            break;
446          case FeRoundDown:
447            if (origVal < val)
448                val -= 1.0;
449            break;
450          case FeRoundUpward:
451            if (origVal > val)
452                val += 1.0;
453            break;
454        }
455        feraiseexcept(FeInexact);
456    }
457
458    if (isSigned) {
459        if (half) {
460            if ((double)val < (int16_t)(1 << 15)) {
461                feraiseexcept(FeInvalid);
462                feclearexcept(FeInexact);
463                return (int16_t)(1 << 15);
464            }
465            if ((double)val > (int16_t)mask(15)) {
466                feraiseexcept(FeInvalid);
467                feclearexcept(FeInexact);
468                return (int16_t)mask(15);
469            }
470            return (int16_t)val;
471        } else {
472            if ((double)val < (int32_t)(1 << 31)) {
473                feraiseexcept(FeInvalid);
474                feclearexcept(FeInexact);
475                return (int32_t)(1 << 31);
476            }
477            if ((double)val > (int32_t)mask(31)) {
478                feraiseexcept(FeInvalid);
479                feclearexcept(FeInexact);
480                return (int32_t)mask(31);
481            }
482            return (int32_t)val;
483        }
484    } else {
485        if (half) {
486            if ((double)val < 0) {
487                feraiseexcept(FeInvalid);
488                feclearexcept(FeInexact);
489                return 0;
490            }
491            if ((double)val > (mask(16))) {
492                feraiseexcept(FeInvalid);
493                feclearexcept(FeInexact);
494                return mask(16);
495            }
496            return (uint16_t)val;
497        } else {
498            if ((double)val < 0) {
499                feraiseexcept(FeInvalid);
500                feclearexcept(FeInexact);
501                return 0;
502            }
503            if ((double)val > (mask(32))) {
504                feraiseexcept(FeInvalid);
505                feclearexcept(FeInexact);
506                return mask(32);
507            }
508            return (uint32_t)val;
509        }
510    }
511}
512
513static inline float
514vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
515{
516    fesetround(FeRoundNearest);
517    if (half)
518        val = (uint16_t)val;
519    float scale = powf(2.0, imm);
520    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
521    feclearexcept(FeAllExceptions);
522    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
523    return fixDivDest(fpscr, val / scale, (float)val, scale);
524}
525
526static inline float
527vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
528{
529    fesetround(FeRoundNearest);
530    if (half)
531        val = sext<16>(val & mask(16));
532    float scale = powf(2.0, imm);
533    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
534    feclearexcept(FeAllExceptions);
535    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
536    return fixDivDest(fpscr, val / scale, (float)val, scale);
537}
538
539static inline uint64_t
540vfpFpDToFixed(double val, bool isSigned, bool half,
541              uint8_t imm, bool rzero = true)
542{
543    int rmode = rzero ? FeRoundZero : fegetround();
544    fesetround(FeRoundNearest);
545    val = val * pow(2.0, imm);
546    __asm__ __volatile__("" : "=m" (val) : "m" (val));
547    fesetround(rmode);
548    feclearexcept(FeAllExceptions);
549    __asm__ __volatile__("" : "=m" (val) : "m" (val));
550    double origVal = val;
551    val = rint(val);
552    int fpType = std::fpclassify(val);
553    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
554        if (fpType == FP_NAN) {
555            feraiseexcept(FeInvalid);
556        }
557        val = 0.0;
558    } else if (origVal != val) {
559        switch (rmode) {
560          case FeRoundNearest:
561            if (origVal - val > 0.5)
562                val += 1.0;
563            else if (val - origVal > 0.5)
564                val -= 1.0;
565            break;
566          case FeRoundDown:
567            if (origVal < val)
568                val -= 1.0;
569            break;
570          case FeRoundUpward:
571            if (origVal > val)
572                val += 1.0;
573            break;
574        }
575        feraiseexcept(FeInexact);
576    }
577    if (isSigned) {
578        if (half) {
579            if (val < (int16_t)(1 << 15)) {
580                feraiseexcept(FeInvalid);
581                feclearexcept(FeInexact);
582                return (int16_t)(1 << 15);
583            }
584            if (val > (int16_t)mask(15)) {
585                feraiseexcept(FeInvalid);
586                feclearexcept(FeInexact);
587                return (int16_t)mask(15);
588            }
589            return (int16_t)val;
590        } else {
591            if (val < (int32_t)(1 << 31)) {
592                feraiseexcept(FeInvalid);
593                feclearexcept(FeInexact);
594                return (int32_t)(1 << 31);
595            }
596            if (val > (int32_t)mask(31)) {
597                feraiseexcept(FeInvalid);
598                feclearexcept(FeInexact);
599                return (int32_t)mask(31);
600            }
601            return (int32_t)val;
602        }
603    } else {
604        if (half) {
605            if (val < 0) {
606                feraiseexcept(FeInvalid);
607                feclearexcept(FeInexact);
608                return 0;
609            }
610            if (val > mask(16)) {
611                feraiseexcept(FeInvalid);
612                feclearexcept(FeInexact);
613                return mask(16);
614            }
615            return (uint16_t)val;
616        } else {
617            if (val < 0) {
618                feraiseexcept(FeInvalid);
619                feclearexcept(FeInexact);
620                return 0;
621            }
622            if (val > mask(32)) {
623                feraiseexcept(FeInvalid);
624                feclearexcept(FeInexact);
625                return mask(32);
626            }
627            return (uint32_t)val;
628        }
629    }
630}
631
632static inline double
633vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
634{
635    fesetround(FeRoundNearest);
636    if (half)
637        val = (uint16_t)val;
638    double scale = pow(2.0, imm);
639    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
640    feclearexcept(FeAllExceptions);
641    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
642    return fixDivDest(fpscr, val / scale, (double)val, scale);
643}
644
645static inline double
646vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
647{
648    fesetround(FeRoundNearest);
649    if (half)
650        val = sext<16>(val & mask(16));
651    double scale = pow(2.0, imm);
652    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
653    feclearexcept(FeAllExceptions);
654    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
655    return fixDivDest(fpscr, val / scale, (double)val, scale);
656}
657
658class VfpMacroOp : public PredMacroOp
659{
660  public:
661    static bool
662    inScalarBank(IntRegIndex idx)
663    {
664        return (idx % 32) < 8;
665    }
666
667  protected:
668    bool wide;
669
670    VfpMacroOp(const char *mnem, ExtMachInst _machInst,
671            OpClass __opClass, bool _wide) :
672        PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
673    {}
674
675    IntRegIndex
676    addStride(IntRegIndex idx, unsigned stride)
677    {
678        if (wide) {
679            stride *= 2;
680        }
681        unsigned offset = idx % 8;
682        idx = (IntRegIndex)(idx - offset);
683        offset += stride;
684        idx = (IntRegIndex)(idx + (offset % 8));
685        return idx;
686    }
687
688    void
689    nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
690    {
691        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
692        assert(!inScalarBank(dest));
693        dest = addStride(dest, stride);
694        op1 = addStride(op1, stride);
695        if (!inScalarBank(op2)) {
696            op2 = addStride(op2, stride);
697        }
698    }
699
700    void
701    nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
702    {
703        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
704        assert(!inScalarBank(dest));
705        dest = addStride(dest, stride);
706        if (!inScalarBank(op1)) {
707            op1 = addStride(op1, stride);
708        }
709    }
710
711    void
712    nextIdxs(IntRegIndex &dest)
713    {
714        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
715        assert(!inScalarBank(dest));
716        dest = addStride(dest, stride);
717    }
718};
719
720static inline float
721fpAddS(float a, float b)
722{
723    return a + b;
724}
725
726static inline double
727fpAddD(double a, double b)
728{
729    return a + b;
730}
731
732static inline float
733fpSubS(float a, float b)
734{
735    return a - b;
736}
737
738static inline double
739fpSubD(double a, double b)
740{
741    return a - b;
742}
743
744static inline float
745fpDivS(float a, float b)
746{
747    return a / b;
748}
749
750static inline double
751fpDivD(double a, double b)
752{
753    return a / b;
754}
755
756static inline float
757fpMulS(float a, float b)
758{
759    return a * b;
760}
761
762static inline double
763fpMulD(double a, double b)
764{
765    return a * b;
766}
767
768class FpOp : public PredOp
769{
770  protected:
771    FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
772        PredOp(mnem, _machInst, __opClass)
773    {}
774
775    virtual float
776    doOp(float op1, float op2) const
777    {
778        panic("Unimplemented version of doOp called.\n");
779    }
780
781    virtual float
782    doOp(float op1) const
783    {
784        panic("Unimplemented version of doOp called.\n");
785    }
786
787    virtual double
788    doOp(double op1, double op2) const
789    {
790        panic("Unimplemented version of doOp called.\n");
791    }
792
793    virtual double
794    doOp(double op1) const
795    {
796        panic("Unimplemented version of doOp called.\n");
797    }
798
799    double
800    dbl(uint32_t low, uint32_t high) const
801    {
802        double junk = 0.0;
803        return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
804    }
805
806    uint32_t
807    dblLow(double val) const
808    {
809        return fpToBits(val);
810    }
811
812    uint32_t
813    dblHi(double val) const
814    {
815        return fpToBits(val) >> 32;
816    }
817
818    template <class fpType>
819    fpType
820    binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
821            fpType (*func)(fpType, fpType),
822            bool flush, uint32_t rMode) const
823    {
824        const bool single = (sizeof(fpType) == sizeof(float));
825        fpType junk = 0.0;
826
827        if (flush && flushToZero(op1, op2))
828            fpscr.idc = 1;
829        VfpSavedState state = prepFpState(rMode);
830        __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
831                                 : "m" (op1), "m" (op2), "m" (state));
832        fpType dest = func(op1, op2);
833        __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
834
835        int fpClass = std::fpclassify(dest);
836        // Get NAN behavior right. This varies between x86 and ARM.
837        if (fpClass == FP_NAN) {
838            const bool single = (sizeof(fpType) == sizeof(float));
839            const uint64_t qnan =
840                single ? 0x7fc00000 : ULL(0x7ff8000000000000);
841            const bool nan1 = std::isnan(op1);
842            const bool nan2 = std::isnan(op2);
843            const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
844            const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
845            if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
846                dest = bitsToFp(qnan, junk);
847            } else if (signal1) {
848                dest = bitsToFp(fpToBits(op1) | qnan, junk);
849            } else if (signal2) {
850                dest = bitsToFp(fpToBits(op2) | qnan, junk);
851            } else if (nan1) {
852                dest = op1;
853            } else if (nan2) {
854                dest = op2;
855            }
856        } else if (flush && flushToZero(dest)) {
857            feraiseexcept(FeUnderflow);
858        } else if ((
859                    (single && (dest == bitsToFp(0x00800000, junk) ||
860                         dest == bitsToFp(0x80800000, junk))) ||
861                    (!single &&
862                        (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
863                         dest == bitsToFp(ULL(0x8010000000000000), junk)))
864                   ) && rMode != VfpRoundZero) {
865            /*
866             * Correct for the fact that underflow is detected -before- rounding
867             * in ARM and -after- rounding in x86.
868             */
869            fesetround(FeRoundZero);
870            __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
871                                     : "m" (op1), "m" (op2));
872            fpType temp = func(op1, op2);
873            __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
874            if (flush && flushToZero(temp)) {
875                dest = temp;
876            }
877        }
878        finishVfp(fpscr, state);
879        return dest;
880    }
881
882    template <class fpType>
883    fpType
884    unaryOp(FPSCR &fpscr, fpType op1,
885            fpType (*func)(fpType),
886            bool flush, uint32_t rMode) const
887    {
888        const bool single = (sizeof(fpType) == sizeof(float));
889        fpType junk = 0.0;
890
891        if (flush && flushToZero(op1))
892            fpscr.idc = 1;
893        VfpSavedState state = prepFpState(rMode);
894        __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
895                                 : "m" (op1), "m" (state));
896        fpType dest = func(op1);
897        __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
898
899        int fpClass = std::fpclassify(dest);
900        // Get NAN behavior right. This varies between x86 and ARM.
901        if (fpClass == FP_NAN) {
902            const bool single = (sizeof(fpType) == sizeof(float));
903            const uint64_t qnan =
904                single ? 0x7fc00000 : ULL(0x7ff8000000000000);
905            const bool nan = std::isnan(op1);
906            if (!nan || fpscr.dn == 1) {
907                dest = bitsToFp(qnan, junk);
908            } else if (nan) {
909                dest = bitsToFp(fpToBits(op1) | qnan, junk);
910            }
911        } else if (flush && flushToZero(dest)) {
912            feraiseexcept(FeUnderflow);
913        } else if ((
914                    (single && (dest == bitsToFp(0x00800000, junk) ||
915                         dest == bitsToFp(0x80800000, junk))) ||
916                    (!single &&
917                        (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
918                         dest == bitsToFp(ULL(0x8010000000000000), junk)))
919                   ) && rMode != VfpRoundZero) {
920            /*
921             * Correct for the fact that underflow is detected -before- rounding
922             * in ARM and -after- rounding in x86.
923             */
924            fesetround(FeRoundZero);
925            __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
926            fpType temp = func(op1);
927            __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
928            if (flush && flushToZero(temp)) {
929                dest = temp;
930            }
931        }
932        finishVfp(fpscr, state);
933        return dest;
934    }
935};
936
937class FpRegRegOp : public FpOp
938{
939  protected:
940    IntRegIndex dest;
941    IntRegIndex op1;
942
943    FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
944               IntRegIndex _dest, IntRegIndex _op1,
945               VfpMicroMode mode = VfpNotAMicroop) :
946        FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1)
947    {
948        setVfpMicroFlags(mode, flags);
949    }
950
951    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
952};
953
954class FpRegImmOp : public FpOp
955{
956  protected:
957    IntRegIndex dest;
958    uint64_t imm;
959
960    FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
961               IntRegIndex _dest, uint64_t _imm,
962               VfpMicroMode mode = VfpNotAMicroop) :
963        FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm)
964    {
965        setVfpMicroFlags(mode, flags);
966    }
967
968    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
969};
970
971class FpRegRegImmOp : public FpOp
972{
973  protected:
974    IntRegIndex dest;
975    IntRegIndex op1;
976    uint64_t imm;
977
978    FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
979                  IntRegIndex _dest, IntRegIndex _op1,
980                  uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
981        FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm)
982    {
983        setVfpMicroFlags(mode, flags);
984    }
985
986    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
987};
988
989class FpRegRegRegOp : public FpOp
990{
991  protected:
992    IntRegIndex dest;
993    IntRegIndex op1;
994    IntRegIndex op2;
995
996    FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
997                  IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
998                  VfpMicroMode mode = VfpNotAMicroop) :
999        FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2)
1000    {
1001        setVfpMicroFlags(mode, flags);
1002    }
1003
1004    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1005};
1006
1007}
1008
1009#endif //__ARCH_ARM_INSTS_VFP_HH__
1010