vfp.hh revision 7396:53454ef35b46
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include "arch/arm/insts/misc.hh"
44#include "arch/arm/miscregs.hh"
45#include <fenv.h>
46#include <cmath>
47
48namespace ArmISA
49{
50
51enum VfpMicroMode {
52    VfpNotAMicroop,
53    VfpMicroop,
54    VfpFirstMicroop,
55    VfpLastMicroop
56};
57
58template<class T>
59static inline void
60setVfpMicroFlags(VfpMicroMode mode, T &flags)
61{
62    switch (mode) {
63      case VfpMicroop:
64        flags[StaticInst::IsMicroop] = true;
65        break;
66      case VfpFirstMicroop:
67        flags[StaticInst::IsMicroop] =
68            flags[StaticInst::IsFirstMicroop] = true;
69        break;
70      case VfpLastMicroop:
71        flags[StaticInst::IsMicroop] =
72            flags[StaticInst::IsLastMicroop] = true;
73        break;
74      case VfpNotAMicroop:
75        break;
76    }
77    if (mode == VfpMicroop || mode == VfpFirstMicroop) {
78        flags[StaticInst::IsDelayedCommit] = true;
79    }
80}
81
82enum FeExceptionBit
83{
84    FeDivByZero = FE_DIVBYZERO,
85    FeInexact = FE_INEXACT,
86    FeInvalid = FE_INVALID,
87    FeOverflow = FE_OVERFLOW,
88    FeUnderflow = FE_UNDERFLOW,
89    FeAllExceptions = FE_ALL_EXCEPT
90};
91
92enum FeRoundingMode
93{
94    FeRoundDown = FE_DOWNWARD,
95    FeRoundNearest = FE_TONEAREST,
96    FeRoundZero = FE_TOWARDZERO,
97    FeRoundUpward = FE_UPWARD
98};
99
100enum VfpRoundingMode
101{
102    VfpRoundNearest = 0,
103    VfpRoundUpward = 1,
104    VfpRoundDown = 2,
105    VfpRoundZero = 3
106};
107
108template <class fpType>
109static inline void
110vfpFlushToZero(uint32_t &_fpscr, fpType &op)
111{
112    FPSCR fpscr = _fpscr;
113    fpType junk = 0.0;
114    if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
115        fpscr.idc = 1;
116        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
117        op = bitsToFp(fpToBits(op) & bitMask, junk);
118    }
119    _fpscr = fpscr;
120}
121
122template <class fpType>
123static inline void
124vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
125{
126    vfpFlushToZero(fpscr, op1);
127    vfpFlushToZero(fpscr, op2);
128}
129
130template <class fpType>
131static inline bool
132flushToZero(fpType &op)
133{
134    fpType junk = 0.0;
135    if (std::fpclassify(op) == FP_SUBNORMAL) {
136        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
137        op = bitsToFp(fpToBits(op) & bitMask, junk);
138        return true;
139    }
140    return false;
141}
142
143template <class fpType>
144static inline bool
145flushToZero(fpType &op1, fpType &op2)
146{
147    bool flush1 = flushToZero(op1);
148    bool flush2 = flushToZero(op2);
149    return flush1 || flush2;
150}
151
152static inline uint32_t
153fpToBits(float fp)
154{
155    union
156    {
157        float fp;
158        uint32_t bits;
159    } val;
160    val.fp = fp;
161    return val.bits;
162}
163
164static inline uint64_t
165fpToBits(double fp)
166{
167    union
168    {
169        double fp;
170        uint64_t bits;
171    } val;
172    val.fp = fp;
173    return val.bits;
174}
175
176static inline float
177bitsToFp(uint64_t bits, float junk)
178{
179    union
180    {
181        float fp;
182        uint32_t bits;
183    } val;
184    val.bits = bits;
185    return val.fp;
186}
187
188static inline double
189bitsToFp(uint64_t bits, double junk)
190{
191    union
192    {
193        double fp;
194        uint64_t bits;
195    } val;
196    val.bits = bits;
197    return val.fp;
198}
199
200typedef int VfpSavedState;
201
202static inline VfpSavedState
203prepVfpFpscr(FPSCR fpscr)
204{
205    int roundingMode = fegetround();
206    feclearexcept(FeAllExceptions);
207    switch (fpscr.rMode) {
208      case VfpRoundNearest:
209        fesetround(FeRoundNearest);
210        break;
211      case VfpRoundUpward:
212        fesetround(FeRoundUpward);
213        break;
214      case VfpRoundDown:
215        fesetround(FeRoundDown);
216        break;
217      case VfpRoundZero:
218        fesetround(FeRoundZero);
219        break;
220    }
221    return roundingMode;
222}
223
224static inline VfpSavedState
225prepFpState(uint32_t rMode)
226{
227    int roundingMode = fegetround();
228    feclearexcept(FeAllExceptions);
229    switch (rMode) {
230      case VfpRoundNearest:
231        fesetround(FeRoundNearest);
232        break;
233      case VfpRoundUpward:
234        fesetround(FeRoundUpward);
235        break;
236      case VfpRoundDown:
237        fesetround(FeRoundDown);
238        break;
239      case VfpRoundZero:
240        fesetround(FeRoundZero);
241        break;
242    }
243    return roundingMode;
244}
245
246static inline FPSCR
247setVfpFpscr(FPSCR fpscr, VfpSavedState state)
248{
249    int exceptions = fetestexcept(FeAllExceptions);
250    if (exceptions & FeInvalid) {
251        fpscr.ioc = 1;
252    }
253    if (exceptions & FeDivByZero) {
254        fpscr.dzc = 1;
255    }
256    if (exceptions & FeOverflow) {
257        fpscr.ofc = 1;
258    }
259    if (exceptions & FeUnderflow) {
260        fpscr.ufc = 1;
261    }
262    if (exceptions & FeInexact) {
263        fpscr.ixc = 1;
264    }
265    fesetround(state);
266    return fpscr;
267}
268
269static inline void
270finishVfp(FPSCR &fpscr, VfpSavedState state)
271{
272    int exceptions = fetestexcept(FeAllExceptions);
273    bool underflow = false;
274    if (exceptions & FeInvalid) {
275        fpscr.ioc = 1;
276    }
277    if (exceptions & FeDivByZero) {
278        fpscr.dzc = 1;
279    }
280    if (exceptions & FeOverflow) {
281        fpscr.ofc = 1;
282    }
283    if (exceptions & FeUnderflow) {
284        underflow = true;
285        fpscr.ufc = 1;
286    }
287    if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
288        fpscr.ixc = 1;
289    }
290    fesetround(state);
291}
292
293template <class fpType>
294static inline fpType
295fixDest(FPSCR fpscr, fpType val, fpType op1)
296{
297    int fpClass = std::fpclassify(val);
298    fpType junk = 0.0;
299    if (fpClass == FP_NAN) {
300        const bool single = (sizeof(val) == sizeof(float));
301        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
302        const bool nan = std::isnan(op1);
303        if (!nan || (fpscr.dn == 1)) {
304            val = bitsToFp(qnan, junk);
305        } else if (nan) {
306            val = bitsToFp(fpToBits(op1) | qnan, junk);
307        }
308    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
309        // Turn val into a zero with the correct sign;
310        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
311        val = bitsToFp(fpToBits(val) & bitMask, junk);
312        feclearexcept(FeInexact);
313        feraiseexcept(FeUnderflow);
314    }
315    return val;
316}
317
318template <class fpType>
319static inline fpType
320fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
321{
322    int fpClass = std::fpclassify(val);
323    fpType junk = 0.0;
324    if (fpClass == FP_NAN) {
325        const bool single = (sizeof(val) == sizeof(float));
326        const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
327        const bool nan1 = std::isnan(op1);
328        const bool nan2 = std::isnan(op2);
329        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
330        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
331        if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
332            val = bitsToFp(qnan, junk);
333        } else if (signal1) {
334            val = bitsToFp(fpToBits(op1) | qnan, junk);
335        } else if (signal2) {
336            val = bitsToFp(fpToBits(op2) | qnan, junk);
337        } else if (nan1) {
338            val = op1;
339        } else if (nan2) {
340            val = op2;
341        }
342    } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
343        // Turn val into a zero with the correct sign;
344        uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
345        val = bitsToFp(fpToBits(val) & bitMask, junk);
346        feclearexcept(FeInexact);
347        feraiseexcept(FeUnderflow);
348    }
349    return val;
350}
351
352template <class fpType>
353static inline fpType
354fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
355{
356    fpType mid = fixDest(fpscr, val, op1, op2);
357    const bool single = (sizeof(fpType) == sizeof(float));
358    const fpType junk = 0.0;
359    if ((single && (val == bitsToFp(0x00800000, junk) ||
360                    val == bitsToFp(0x80800000, junk))) ||
361        (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
362                     val == bitsToFp(ULL(0x8010000000000000), junk)))
363        ) {
364        __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
365        fesetround(FeRoundZero);
366        fpType temp = 0.0;
367        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
368        temp = op1 / op2;
369        if (flushToZero(temp)) {
370            feraiseexcept(FeUnderflow);
371            if (fpscr.fz) {
372                feclearexcept(FeInexact);
373                mid = temp;
374            }
375        }
376        __asm__ __volatile__("" :: "m" (temp));
377    }
378    return mid;
379}
380
381static inline float
382fixFpDFpSDest(FPSCR fpscr, double val)
383{
384    const float junk = 0.0;
385    float op1 = 0.0;
386    if (std::isnan(val)) {
387        uint64_t valBits = fpToBits(val);
388        uint32_t op1Bits = bits(valBits, 50, 29) |
389                           (mask(9) << 22) |
390                           (bits(valBits, 63) << 31);
391        op1 = bitsToFp(op1Bits, junk);
392    }
393    float mid = fixDest(fpscr, (float)val, op1);
394    if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
395                    (FeUnderflow | FeInexact)) {
396        feclearexcept(FeInexact);
397    }
398    if (mid == bitsToFp(0x00800000, junk) ||
399        mid == bitsToFp(0x80800000, junk)) {
400        __asm__ __volatile__("" : "=m" (val) : "m" (val));
401        fesetround(FeRoundZero);
402        float temp = 0.0;
403        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
404        temp = val;
405        if (flushToZero(temp)) {
406            feraiseexcept(FeUnderflow);
407            if (fpscr.fz) {
408                feclearexcept(FeInexact);
409                mid = temp;
410            }
411        }
412        __asm__ __volatile__("" :: "m" (temp));
413    }
414    return mid;
415}
416
417static inline double
418fixFpSFpDDest(FPSCR fpscr, float val)
419{
420    const double junk = 0.0;
421    double op1 = 0.0;
422    if (std::isnan(val)) {
423        uint32_t valBits = fpToBits(val);
424        uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
425                           (mask(12) << 51) |
426                           ((uint64_t)bits(valBits, 31) << 63);
427        op1 = bitsToFp(op1Bits, junk);
428    }
429    double mid = fixDest(fpscr, (double)val, op1);
430    if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
431        mid == bitsToFp(ULL(0x8010000000000000), junk)) {
432        __asm__ __volatile__("" : "=m" (val) : "m" (val));
433        fesetround(FeRoundZero);
434        double temp = 0.0;
435        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
436        temp = val;
437        if (flushToZero(temp)) {
438            feraiseexcept(FeUnderflow);
439            if (fpscr.fz) {
440                feclearexcept(FeInexact);
441                mid = temp;
442            }
443        }
444        __asm__ __volatile__("" :: "m" (temp));
445    }
446    return mid;
447}
448
449static inline double
450makeDouble(uint32_t low, uint32_t high)
451{
452    double junk = 0.0;
453    return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
454}
455
456static inline uint32_t
457lowFromDouble(double val)
458{
459    return fpToBits(val);
460}
461
462static inline uint32_t
463highFromDouble(double val)
464{
465    return fpToBits(val) >> 32;
466}
467
468static inline uint64_t
469vfpFpSToFixed(float val, bool isSigned, bool half,
470              uint8_t imm, bool rzero = true)
471{
472    int rmode = rzero ? FeRoundZero : fegetround();
473    __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
474    fesetround(FeRoundNearest);
475    val = val * powf(2.0, imm);
476    __asm__ __volatile__("" : "=m" (val) : "m" (val));
477    fesetround(rmode);
478    feclearexcept(FeAllExceptions);
479    __asm__ __volatile__("" : "=m" (val) : "m" (val));
480    float origVal = val;
481    val = rintf(val);
482    int fpType = std::fpclassify(val);
483    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
484        if (fpType == FP_NAN) {
485            feraiseexcept(FeInvalid);
486        }
487        val = 0.0;
488    } else if (origVal != val) {
489        switch (rmode) {
490          case FeRoundNearest:
491            if (origVal - val > 0.5)
492                val += 1.0;
493            else if (val - origVal > 0.5)
494                val -= 1.0;
495            break;
496          case FeRoundDown:
497            if (origVal < val)
498                val -= 1.0;
499            break;
500          case FeRoundUpward:
501            if (origVal > val)
502                val += 1.0;
503            break;
504        }
505        feraiseexcept(FeInexact);
506    }
507
508    if (isSigned) {
509        if (half) {
510            if ((double)val < (int16_t)(1 << 15)) {
511                feraiseexcept(FeInvalid);
512                feclearexcept(FeInexact);
513                return (int16_t)(1 << 15);
514            }
515            if ((double)val > (int16_t)mask(15)) {
516                feraiseexcept(FeInvalid);
517                feclearexcept(FeInexact);
518                return (int16_t)mask(15);
519            }
520            return (int16_t)val;
521        } else {
522            if ((double)val < (int32_t)(1 << 31)) {
523                feraiseexcept(FeInvalid);
524                feclearexcept(FeInexact);
525                return (int32_t)(1 << 31);
526            }
527            if ((double)val > (int32_t)mask(31)) {
528                feraiseexcept(FeInvalid);
529                feclearexcept(FeInexact);
530                return (int32_t)mask(31);
531            }
532            return (int32_t)val;
533        }
534    } else {
535        if (half) {
536            if ((double)val < 0) {
537                feraiseexcept(FeInvalid);
538                feclearexcept(FeInexact);
539                return 0;
540            }
541            if ((double)val > (mask(16))) {
542                feraiseexcept(FeInvalid);
543                feclearexcept(FeInexact);
544                return mask(16);
545            }
546            return (uint16_t)val;
547        } else {
548            if ((double)val < 0) {
549                feraiseexcept(FeInvalid);
550                feclearexcept(FeInexact);
551                return 0;
552            }
553            if ((double)val > (mask(32))) {
554                feraiseexcept(FeInvalid);
555                feclearexcept(FeInexact);
556                return mask(32);
557            }
558            return (uint32_t)val;
559        }
560    }
561}
562
563static inline float
564vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
565{
566    fesetround(FeRoundNearest);
567    if (half)
568        val = (uint16_t)val;
569    float scale = powf(2.0, imm);
570    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
571    feclearexcept(FeAllExceptions);
572    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
573    return fixDivDest(fpscr, val / scale, (float)val, scale);
574}
575
576static inline float
577vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
578{
579    fesetround(FeRoundNearest);
580    if (half)
581        val = sext<16>(val & mask(16));
582    float scale = powf(2.0, imm);
583    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
584    feclearexcept(FeAllExceptions);
585    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
586    return fixDivDest(fpscr, val / scale, (float)val, scale);
587}
588
589static inline uint64_t
590vfpFpDToFixed(double val, bool isSigned, bool half,
591              uint8_t imm, bool rzero = true)
592{
593    int rmode = rzero ? FeRoundZero : fegetround();
594    fesetround(FeRoundNearest);
595    val = val * pow(2.0, imm);
596    __asm__ __volatile__("" : "=m" (val) : "m" (val));
597    fesetround(rmode);
598    feclearexcept(FeAllExceptions);
599    __asm__ __volatile__("" : "=m" (val) : "m" (val));
600    double origVal = val;
601    val = rint(val);
602    int fpType = std::fpclassify(val);
603    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
604        if (fpType == FP_NAN) {
605            feraiseexcept(FeInvalid);
606        }
607        val = 0.0;
608    } else if (origVal != val) {
609        switch (rmode) {
610          case FeRoundNearest:
611            if (origVal - val > 0.5)
612                val += 1.0;
613            else if (val - origVal > 0.5)
614                val -= 1.0;
615            break;
616          case FeRoundDown:
617            if (origVal < val)
618                val -= 1.0;
619            break;
620          case FeRoundUpward:
621            if (origVal > val)
622                val += 1.0;
623            break;
624        }
625        feraiseexcept(FeInexact);
626    }
627    if (isSigned) {
628        if (half) {
629            if (val < (int16_t)(1 << 15)) {
630                feraiseexcept(FeInvalid);
631                feclearexcept(FeInexact);
632                return (int16_t)(1 << 15);
633            }
634            if (val > (int16_t)mask(15)) {
635                feraiseexcept(FeInvalid);
636                feclearexcept(FeInexact);
637                return (int16_t)mask(15);
638            }
639            return (int16_t)val;
640        } else {
641            if (val < (int32_t)(1 << 31)) {
642                feraiseexcept(FeInvalid);
643                feclearexcept(FeInexact);
644                return (int32_t)(1 << 31);
645            }
646            if (val > (int32_t)mask(31)) {
647                feraiseexcept(FeInvalid);
648                feclearexcept(FeInexact);
649                return (int32_t)mask(31);
650            }
651            return (int32_t)val;
652        }
653    } else {
654        if (half) {
655            if (val < 0) {
656                feraiseexcept(FeInvalid);
657                feclearexcept(FeInexact);
658                return 0;
659            }
660            if (val > mask(16)) {
661                feraiseexcept(FeInvalid);
662                feclearexcept(FeInexact);
663                return mask(16);
664            }
665            return (uint16_t)val;
666        } else {
667            if (val < 0) {
668                feraiseexcept(FeInvalid);
669                feclearexcept(FeInexact);
670                return 0;
671            }
672            if (val > mask(32)) {
673                feraiseexcept(FeInvalid);
674                feclearexcept(FeInexact);
675                return mask(32);
676            }
677            return (uint32_t)val;
678        }
679    }
680}
681
682static inline double
683vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
684{
685    fesetround(FeRoundNearest);
686    if (half)
687        val = (uint16_t)val;
688    double scale = pow(2.0, imm);
689    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
690    feclearexcept(FeAllExceptions);
691    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
692    return fixDivDest(fpscr, val / scale, (double)val, scale);
693}
694
695static inline double
696vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
697{
698    fesetround(FeRoundNearest);
699    if (half)
700        val = sext<16>(val & mask(16));
701    double scale = pow(2.0, imm);
702    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
703    feclearexcept(FeAllExceptions);
704    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
705    return fixDivDest(fpscr, val / scale, (double)val, scale);
706}
707
708class VfpMacroOp : public PredMacroOp
709{
710  public:
711    static bool
712    inScalarBank(IntRegIndex idx)
713    {
714        return (idx % 32) < 8;
715    }
716
717  protected:
718    bool wide;
719
720    VfpMacroOp(const char *mnem, ExtMachInst _machInst,
721            OpClass __opClass, bool _wide) :
722        PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
723    {}
724
725    IntRegIndex
726    addStride(IntRegIndex idx, unsigned stride)
727    {
728        if (wide) {
729            stride *= 2;
730        }
731        unsigned offset = idx % 8;
732        idx = (IntRegIndex)(idx - offset);
733        offset += stride;
734        idx = (IntRegIndex)(idx + (offset % 8));
735        return idx;
736    }
737
738    void
739    nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
740    {
741        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
742        assert(!inScalarBank(dest));
743        dest = addStride(dest, stride);
744        op1 = addStride(op1, stride);
745        if (!inScalarBank(op2)) {
746            op2 = addStride(op2, stride);
747        }
748    }
749
750    void
751    nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
752    {
753        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
754        assert(!inScalarBank(dest));
755        dest = addStride(dest, stride);
756        if (!inScalarBank(op1)) {
757            op1 = addStride(op1, stride);
758        }
759    }
760
761    void
762    nextIdxs(IntRegIndex &dest)
763    {
764        unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
765        assert(!inScalarBank(dest));
766        dest = addStride(dest, stride);
767    }
768};
769
770static inline float
771fpAddS(float a, float b)
772{
773    return a + b;
774}
775
776static inline double
777fpAddD(double a, double b)
778{
779    return a + b;
780}
781
782static inline float
783fpSubS(float a, float b)
784{
785    return a - b;
786}
787
788static inline double
789fpSubD(double a, double b)
790{
791    return a - b;
792}
793
794static inline float
795fpDivS(float a, float b)
796{
797    return a / b;
798}
799
800static inline double
801fpDivD(double a, double b)
802{
803    return a / b;
804}
805
806static inline float
807fpMulS(float a, float b)
808{
809    return a * b;
810}
811
812static inline double
813fpMulD(double a, double b)
814{
815    return a * b;
816}
817
818class FpOp : public PredOp
819{
820  protected:
821    FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
822        PredOp(mnem, _machInst, __opClass)
823    {}
824
825    virtual float
826    doOp(float op1, float op2) const
827    {
828        panic("Unimplemented version of doOp called.\n");
829    }
830
831    virtual float
832    doOp(float op1) const
833    {
834        panic("Unimplemented version of doOp called.\n");
835    }
836
837    virtual double
838    doOp(double op1, double op2) const
839    {
840        panic("Unimplemented version of doOp called.\n");
841    }
842
843    virtual double
844    doOp(double op1) const
845    {
846        panic("Unimplemented version of doOp called.\n");
847    }
848
849    double
850    dbl(uint32_t low, uint32_t high) const
851    {
852        double junk = 0.0;
853        return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
854    }
855
856    uint32_t
857    dblLow(double val) const
858    {
859        return fpToBits(val);
860    }
861
862    uint32_t
863    dblHi(double val) const
864    {
865        return fpToBits(val) >> 32;
866    }
867
868    template <class fpType>
869    fpType
870    binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
871            fpType (*func)(fpType, fpType),
872            bool flush, uint32_t rMode) const
873    {
874        const bool single = (sizeof(fpType) == sizeof(float));
875        fpType junk = 0.0;
876
877        if (flush && flushToZero(op1, op2))
878            fpscr.idc = 1;
879        VfpSavedState state = prepFpState(rMode);
880        __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
881                                 : "m" (op1), "m" (op2), "m" (state));
882        fpType dest = func(op1, op2);
883        __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
884
885        int fpClass = std::fpclassify(dest);
886        // Get NAN behavior right. This varies between x86 and ARM.
887        if (fpClass == FP_NAN) {
888            const bool single = (sizeof(fpType) == sizeof(float));
889            const uint64_t qnan =
890                single ? 0x7fc00000 : ULL(0x7ff8000000000000);
891            const bool nan1 = std::isnan(op1);
892            const bool nan2 = std::isnan(op2);
893            const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
894            const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
895            if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
896                dest = bitsToFp(qnan, junk);
897            } else if (signal1) {
898                dest = bitsToFp(fpToBits(op1) | qnan, junk);
899            } else if (signal2) {
900                dest = bitsToFp(fpToBits(op2) | qnan, junk);
901            } else if (nan1) {
902                dest = op1;
903            } else if (nan2) {
904                dest = op2;
905            }
906        } else if (flush && flushToZero(dest)) {
907            feraiseexcept(FeUnderflow);
908        } else if ((
909                    (single && (dest == bitsToFp(0x00800000, junk) ||
910                         dest == bitsToFp(0x80800000, junk))) ||
911                    (!single &&
912                        (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
913                         dest == bitsToFp(ULL(0x8010000000000000), junk)))
914                   ) && rMode != VfpRoundZero) {
915            /*
916             * Correct for the fact that underflow is detected -before- rounding
917             * in ARM and -after- rounding in x86.
918             */
919            fesetround(FeRoundZero);
920            __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
921                                     : "m" (op1), "m" (op2));
922            fpType temp = func(op1, op2);
923            __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
924            if (flush && flushToZero(temp)) {
925                dest = temp;
926            }
927        }
928        finishVfp(fpscr, state);
929        return dest;
930    }
931
932    template <class fpType>
933    fpType
934    unaryOp(FPSCR &fpscr, fpType op1,
935            fpType (*func)(fpType),
936            bool flush, uint32_t rMode) const
937    {
938        const bool single = (sizeof(fpType) == sizeof(float));
939        fpType junk = 0.0;
940
941        if (flush && flushToZero(op1))
942            fpscr.idc = 1;
943        VfpSavedState state = prepFpState(rMode);
944        __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
945                                 : "m" (op1), "m" (state));
946        fpType dest = func(op1);
947        __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
948
949        int fpClass = std::fpclassify(dest);
950        // Get NAN behavior right. This varies between x86 and ARM.
951        if (fpClass == FP_NAN) {
952            const bool single = (sizeof(fpType) == sizeof(float));
953            const uint64_t qnan =
954                single ? 0x7fc00000 : ULL(0x7ff8000000000000);
955            const bool nan = std::isnan(op1);
956            if (!nan || fpscr.dn == 1) {
957                dest = bitsToFp(qnan, junk);
958            } else if (nan) {
959                dest = bitsToFp(fpToBits(op1) | qnan, junk);
960            }
961        } else if (flush && flushToZero(dest)) {
962            feraiseexcept(FeUnderflow);
963        } else if ((
964                    (single && (dest == bitsToFp(0x00800000, junk) ||
965                         dest == bitsToFp(0x80800000, junk))) ||
966                    (!single &&
967                        (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
968                         dest == bitsToFp(ULL(0x8010000000000000), junk)))
969                   ) && rMode != VfpRoundZero) {
970            /*
971             * Correct for the fact that underflow is detected -before- rounding
972             * in ARM and -after- rounding in x86.
973             */
974            fesetround(FeRoundZero);
975            __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
976            fpType temp = func(op1);
977            __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
978            if (flush && flushToZero(temp)) {
979                dest = temp;
980            }
981        }
982        finishVfp(fpscr, state);
983        return dest;
984    }
985};
986
987class FpRegRegOp : public FpOp
988{
989  protected:
990    IntRegIndex dest;
991    IntRegIndex op1;
992
993    FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
994               IntRegIndex _dest, IntRegIndex _op1,
995               VfpMicroMode mode = VfpNotAMicroop) :
996        FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1)
997    {
998        setVfpMicroFlags(mode, flags);
999    }
1000
1001    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1002};
1003
1004class FpRegImmOp : public FpOp
1005{
1006  protected:
1007    IntRegIndex dest;
1008    uint64_t imm;
1009
1010    FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1011               IntRegIndex _dest, uint64_t _imm,
1012               VfpMicroMode mode = VfpNotAMicroop) :
1013        FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm)
1014    {
1015        setVfpMicroFlags(mode, flags);
1016    }
1017
1018    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1019};
1020
1021class FpRegRegImmOp : public FpOp
1022{
1023  protected:
1024    IntRegIndex dest;
1025    IntRegIndex op1;
1026    uint64_t imm;
1027
1028    FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1029                  IntRegIndex _dest, IntRegIndex _op1,
1030                  uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
1031        FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm)
1032    {
1033        setVfpMicroFlags(mode, flags);
1034    }
1035
1036    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1037};
1038
1039class FpRegRegRegOp : public FpOp
1040{
1041  protected:
1042    IntRegIndex dest;
1043    IntRegIndex op1;
1044    IntRegIndex op2;
1045
1046    FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1047                  IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
1048                  VfpMicroMode mode = VfpNotAMicroop) :
1049        FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2)
1050    {
1051        setVfpMicroFlags(mode, flags);
1052    }
1053
1054    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1055};
1056
1057}
1058
1059#endif //__ARCH_ARM_INSTS_VFP_HH__
1060