vfp.hh (11321:02e930db812d) vfp.hh (11671:520509f3e66c)
1/*
2 * Copyright (c) 2010-2013 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include <fenv.h>
44
45#include <cmath>
46
47#include "arch/arm/insts/misc.hh"
48#include "arch/arm/miscregs.hh"
49
50namespace ArmISA
51{
52
53enum VfpMicroMode {
54 VfpNotAMicroop,
55 VfpMicroop,
56 VfpFirstMicroop,
57 VfpLastMicroop
58};
59
60template<class T>
61static inline void
62setVfpMicroFlags(VfpMicroMode mode, T &flags)
63{
64 switch (mode) {
65 case VfpMicroop:
66 flags[StaticInst::IsMicroop] = true;
67 break;
68 case VfpFirstMicroop:
69 flags[StaticInst::IsMicroop] =
70 flags[StaticInst::IsFirstMicroop] = true;
71 break;
72 case VfpLastMicroop:
73 flags[StaticInst::IsMicroop] =
74 flags[StaticInst::IsLastMicroop] = true;
75 break;
76 case VfpNotAMicroop:
77 break;
78 }
79 if (mode == VfpMicroop || mode == VfpFirstMicroop) {
80 flags[StaticInst::IsDelayedCommit] = true;
81 }
82}
83
84enum FeExceptionBit
85{
86 FeDivByZero = FE_DIVBYZERO,
87 FeInexact = FE_INEXACT,
88 FeInvalid = FE_INVALID,
89 FeOverflow = FE_OVERFLOW,
90 FeUnderflow = FE_UNDERFLOW,
91 FeAllExceptions = FE_ALL_EXCEPT
92};
93
94enum FeRoundingMode
95{
96 FeRoundDown = FE_DOWNWARD,
97 FeRoundNearest = FE_TONEAREST,
98 FeRoundZero = FE_TOWARDZERO,
99 FeRoundUpward = FE_UPWARD
100};
101
102enum VfpRoundingMode
103{
104 VfpRoundNearest = 0,
105 VfpRoundUpward = 1,
106 VfpRoundDown = 2,
107 VfpRoundZero = 3,
108 VfpRoundAway = 4
109};
110
111static inline float bitsToFp(uint64_t, float);
112static inline double bitsToFp(uint64_t, double);
113static inline uint32_t fpToBits(float);
114static inline uint64_t fpToBits(double);
115
116template <class fpType>
117static inline bool
118flushToZero(fpType &op)
119{
120 fpType junk = 0.0;
121 if (std::fpclassify(op) == FP_SUBNORMAL) {
122 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
123 op = bitsToFp(fpToBits(op) & bitMask, junk);
124 return true;
125 }
126 return false;
127}
128
129template <class fpType>
130static inline bool
131flushToZero(fpType &op1, fpType &op2)
132{
133 bool flush1 = flushToZero(op1);
134 bool flush2 = flushToZero(op2);
135 return flush1 || flush2;
136}
137
138template <class fpType>
139static inline void
140vfpFlushToZero(FPSCR &fpscr, fpType &op)
141{
142 if (fpscr.fz == 1 && flushToZero(op)) {
143 fpscr.idc = 1;
144 }
145}
146
147template <class fpType>
148static inline void
149vfpFlushToZero(FPSCR &fpscr, fpType &op1, fpType &op2)
150{
151 vfpFlushToZero(fpscr, op1);
152 vfpFlushToZero(fpscr, op2);
153}
154
155static inline uint32_t
156fpToBits(float fp)
157{
158 union
159 {
160 float fp;
161 uint32_t bits;
162 } val;
163 val.fp = fp;
164 return val.bits;
165}
166
167static inline uint64_t
168fpToBits(double fp)
169{
170 union
171 {
172 double fp;
173 uint64_t bits;
174 } val;
175 val.fp = fp;
176 return val.bits;
177}
178
179static inline float
180bitsToFp(uint64_t bits, float junk)
181{
182 union
183 {
184 float fp;
185 uint32_t bits;
186 } val;
187 val.bits = bits;
188 return val.fp;
189}
190
191static inline double
192bitsToFp(uint64_t bits, double junk)
193{
194 union
195 {
196 double fp;
197 uint64_t bits;
198 } val;
199 val.bits = bits;
200 return val.fp;
201}
202
203template <class fpType>
204static bool
205isSnan(fpType val)
206{
207 const bool single = (sizeof(fpType) == sizeof(float));
208 const uint64_t qnan =
209 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
210 return std::isnan(val) && ((fpToBits(val) & qnan) != qnan);
211}
212
213typedef int VfpSavedState;
214
215VfpSavedState prepFpState(uint32_t rMode);
216void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask = FpscrExcMask);
217
218template <class fpType>
219fpType fixDest(FPSCR fpscr, fpType val, fpType op1);
220
221template <class fpType>
222fpType fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
223
224template <class fpType>
225fpType fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
226
227float fixFpDFpSDest(FPSCR fpscr, double val);
228double fixFpSFpDDest(FPSCR fpscr, float val);
229
230uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
231 uint32_t rMode, bool ahp, float op);
232uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
233 uint32_t rMode, bool ahp, double op);
234
235float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
236double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
237
238static inline double
239makeDouble(uint32_t low, uint32_t high)
240{
241 double junk = 0.0;
242 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
243}
244
245static inline uint32_t
246lowFromDouble(double val)
247{
248 return fpToBits(val);
249}
250
251static inline uint32_t
252highFromDouble(double val)
253{
254 return fpToBits(val) >> 32;
255}
256
257static inline void
258setFPExceptions(int exceptions) {
259 feclearexcept(FeAllExceptions);
260 feraiseexcept(exceptions);
261}
262
263template <typename T>
264uint64_t
265vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool
266 useRmode = true, VfpRoundingMode roundMode = VfpRoundZero,
267 bool aarch64 = false)
268{
269 int rmode;
270 bool roundAwayFix = false;
271
272 if (!useRmode) {
273 rmode = fegetround();
274 } else {
275 switch (roundMode)
276 {
277 case VfpRoundNearest:
278 rmode = FeRoundNearest;
279 break;
280 case VfpRoundUpward:
281 rmode = FeRoundUpward;
282 break;
283 case VfpRoundDown:
284 rmode = FeRoundDown;
285 break;
286 case VfpRoundZero:
287 rmode = FeRoundZero;
288 break;
289 case VfpRoundAway:
290 // There is no equivalent rounding mode, use round down and we'll
291 // fix it later
292 rmode = FeRoundDown;
293 roundAwayFix = true;
294 break;
295 default:
296 panic("Unsupported roundMode %d\n", roundMode);
297 }
298 }
299 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
300 fesetround(FeRoundNearest);
301 val = val * pow(2.0, imm);
302 __asm__ __volatile__("" : "=m" (val) : "m" (val));
303 fesetround(rmode);
304 feclearexcept(FeAllExceptions);
305 __asm__ __volatile__("" : "=m" (val) : "m" (val));
306 T origVal = val;
307 val = rint(val);
308 __asm__ __volatile__("" : "=m" (val) : "m" (val));
309
310 int exceptions = fetestexcept(FeAllExceptions);
311
312 int fpType = std::fpclassify(val);
313 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
314 if (fpType == FP_NAN) {
315 exceptions |= FeInvalid;
316 }
317 val = 0.0;
318 } else if (origVal != val) {
319 switch (rmode) {
320 case FeRoundNearest:
321 if (origVal - val > 0.5)
322 val += 1.0;
323 else if (val - origVal > 0.5)
324 val -= 1.0;
325 break;
326 case FeRoundDown:
327 if (roundAwayFix) {
328 // The ordering on the subtraction looks a bit odd in that we
329 // don't do the obvious origVal - val, instead we do
330 // -(val - origVal). This is required to get the corruct bit
331 // exact behaviour when very close to the 0.5 threshold.
332 volatile T error = val;
333 error -= origVal;
334 error = -error;
335 if ( (error > 0.5) ||
336 ((error == 0.5) && (val >= 0)) )
337 val += 1.0;
338 } else {
339 if (origVal < val)
340 val -= 1.0;
341 }
342 break;
343 case FeRoundUpward:
344 if (origVal > val)
345 val += 1.0;
346 break;
347 }
348 exceptions |= FeInexact;
349 }
350
351 __asm__ __volatile__("" : "=m" (val) : "m" (val));
352
353 if (isSigned) {
354 bool outOfRange = false;
355 int64_t result = (int64_t) val;
356 uint64_t finalVal;
357
358 if (!aarch64) {
359 if (width == 16) {
360 finalVal = (int16_t)val;
361 } else if (width == 32) {
362 finalVal =(int32_t)val;
363 } else if (width == 64) {
364 finalVal = result;
365 } else {
366 panic("Unsupported width %d\n", width);
367 }
368
369 // check if value is in range
370 int64_t minVal = ~mask(width-1);
371 if ((double)val < minVal) {
372 outOfRange = true;
373 finalVal = minVal;
374 }
375 int64_t maxVal = mask(width-1);
376 if ((double)val > maxVal) {
377 outOfRange = true;
378 finalVal = maxVal;
379 }
380 } else {
381 bool isNeg = val < 0;
382 finalVal = result & mask(width);
383 // If the result is supposed to be less than 64 bits check that the
384 // upper bits that got thrown away are just sign extension bits
385 if (width != 64) {
386 outOfRange = ((uint64_t) result >> (width - 1)) !=
387 (isNeg ? mask(64-width+1) : 0);
388 }
389 // Check if the original floating point value doesn't matches the
390 // integer version we are also out of range. So create a saturated
391 // result.
392 if (isNeg) {
393 outOfRange |= val < result;
394 if (outOfRange) {
395 finalVal = 1LL << (width-1);
396 }
397 } else {
398 outOfRange |= val > result;
399 if (outOfRange) {
400 finalVal = mask(width-1);
401 }
402 }
403 }
404
405 // Raise an exception if the value was out of range
406 if (outOfRange) {
407 exceptions |= FeInvalid;
408 exceptions &= ~FeInexact;
409 }
410 setFPExceptions(exceptions);
411 return finalVal;
412 } else {
413 if ((double)val < 0) {
414 exceptions |= FeInvalid;
415 exceptions &= ~FeInexact;
416 setFPExceptions(exceptions);
417 return 0;
418 }
419
420 uint64_t result = ((uint64_t) val) & mask(width);
421 if (val > result) {
422 exceptions |= FeInvalid;
423 exceptions &= ~FeInexact;
424 setFPExceptions(exceptions);
425 return mask(width);
426 }
427
428 setFPExceptions(exceptions);
429 return result;
430 }
431};
432
433
434float vfpUFixedToFpS(bool flush, bool defaultNan,
435 uint64_t val, uint8_t width, uint8_t imm);
436float vfpSFixedToFpS(bool flush, bool defaultNan,
437 int64_t val, uint8_t width, uint8_t imm);
438
439double vfpUFixedToFpD(bool flush, bool defaultNan,
440 uint64_t val, uint8_t width, uint8_t imm);
441double vfpSFixedToFpD(bool flush, bool defaultNan,
442 int64_t val, uint8_t width, uint8_t imm);
443
444float fprSqrtEstimate(FPSCR &fpscr, float op);
445uint32_t unsignedRSqrtEstimate(uint32_t op);
446
447float fpRecipEstimate(FPSCR &fpscr, float op);
448uint32_t unsignedRecipEstimate(uint32_t op);
449
450class VfpMacroOp : public PredMacroOp
451{
452 public:
453 static bool
454 inScalarBank(IntRegIndex idx)
455 {
456 return (idx % 32) < 8;
457 }
458
459 protected:
460 bool wide;
461
462 VfpMacroOp(const char *mnem, ExtMachInst _machInst,
463 OpClass __opClass, bool _wide) :
464 PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
465 {}
466
467 IntRegIndex addStride(IntRegIndex idx, unsigned stride);
468 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2);
469 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1);
470 void nextIdxs(IntRegIndex &dest);
471};
472
473template <typename T>
474static inline T
475fpAdd(T a, T b)
476{
477 return a + b;
478};
479
480template <typename T>
481static inline T
482fpSub(T a, T b)
483{
484 return a - b;
485};
486
487static inline float
488fpAddS(float a, float b)
489{
490 return a + b;
491}
492
493static inline double
494fpAddD(double a, double b)
495{
496 return a + b;
497}
498
499static inline float
500fpSubS(float a, float b)
501{
502 return a - b;
503}
504
505static inline double
506fpSubD(double a, double b)
507{
508 return a - b;
509}
510
511static inline float
512fpDivS(float a, float b)
513{
514 return a / b;
515}
516
517static inline double
518fpDivD(double a, double b)
519{
520 return a / b;
521}
522
523template <typename T>
524static inline T
525fpDiv(T a, T b)
526{
527 return a / b;
528};
529
530template <typename T>
531static inline T
532fpMulX(T a, T b)
533{
534 uint64_t opData;
535 uint32_t sign1;
536 uint32_t sign2;
537 const bool single = (sizeof(T) == sizeof(float));
538 if (single) {
539 opData = (fpToBits(a));
540 sign1 = opData>>31;
541 opData = (fpToBits(b));
542 sign2 = opData>>31;
543 } else {
544 opData = (fpToBits(a));
545 sign1 = opData>>63;
546 opData = (fpToBits(b));
547 sign2 = opData>>63;
548 }
549 bool inf1 = (std::fpclassify(a) == FP_INFINITE);
550 bool inf2 = (std::fpclassify(b) == FP_INFINITE);
551 bool zero1 = (std::fpclassify(a) == FP_ZERO);
552 bool zero2 = (std::fpclassify(b) == FP_ZERO);
553 if ((inf1 && zero2) || (zero1 && inf2)) {
554 if (sign1 ^ sign2)
555 return (T)(-2.0);
556 else
557 return (T)(2.0);
558 } else {
559 return (a * b);
560 }
561};
562
563
564template <typename T>
565static inline T
566fpMul(T a, T b)
567{
568 return a * b;
569};
570
571static inline float
572fpMulS(float a, float b)
573{
574 return a * b;
575}
576
577static inline double
578fpMulD(double a, double b)
579{
580 return a * b;
581}
582
583template <typename T>
584static inline T
585// @todo remove this when all calls to it have been replaced with the new fplib implementation
586fpMulAdd(T op1, T op2, T addend)
587{
588 T result;
589
590 if (sizeof(T) == sizeof(float))
591 result = fmaf(op1, op2, addend);
592 else
593 result = fma(op1, op2, addend);
594
595 // ARM doesn't generate signed nan's from this opperation, so fix up the result
596 if (std::isnan(result) && !std::isnan(op1) &&
597 !std::isnan(op2) && !std::isnan(addend))
598 {
599 uint64_t bitMask = ULL(0x1) << ((sizeof(T) * 8) - 1);
600 result = bitsToFp(fpToBits(result) & ~bitMask, op1);
601 }
602 return result;
603}
604
605template <typename T>
606static inline T
607fpRIntX(T a, FPSCR &fpscr)
608{
609 T rVal;
610
611 rVal = rint(a);
612 if (rVal != a && !std::isnan(a))
613 fpscr.ixc = 1;
614 return (rVal);
615};
616
617template <typename T>
618static inline T
619fpMaxNum(T a, T b)
620{
621 const bool single = (sizeof(T) == sizeof(float));
622 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
623
624 if (std::isnan(a))
625 return ((fpToBits(a) & qnan) == qnan) ? b : a;
626 if (std::isnan(b))
627 return ((fpToBits(b) & qnan) == qnan) ? a : b;
628 // Handle comparisons of +0 and -0.
629 if (!std::signbit(a) && std::signbit(b))
630 return a;
631 return fmax(a, b);
632};
633
634template <typename T>
635static inline T
636fpMax(T a, T b)
637{
638 if (std::isnan(a))
639 return a;
640 if (std::isnan(b))
641 return b;
642 return fpMaxNum<T>(a, b);
643};
644
645template <typename T>
646static inline T
647fpMinNum(T a, T b)
648{
649 const bool single = (sizeof(T) == sizeof(float));
650 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
651
652 if (std::isnan(a))
653 return ((fpToBits(a) & qnan) == qnan) ? b : a;
654 if (std::isnan(b))
655 return ((fpToBits(b) & qnan) == qnan) ? a : b;
656 // Handle comparisons of +0 and -0.
657 if (std::signbit(a) && !std::signbit(b))
658 return a;
659 return fmin(a, b);
660};
661
662template <typename T>
663static inline T
664fpMin(T a, T b)
665{
666 if (std::isnan(a))
667 return a;
668 if (std::isnan(b))
669 return b;
670 return fpMinNum<T>(a, b);
671};
672
673template <typename T>
674static inline T
675fpRSqrts(T a, T b)
676{
677 int fpClassA = std::fpclassify(a);
678 int fpClassB = std::fpclassify(b);
679 T aXb;
680 int fpClassAxB;
681
682 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
683 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
684 return 1.5;
685 }
686 aXb = a*b;
687 fpClassAxB = std::fpclassify(aXb);
688 if (fpClassAxB == FP_SUBNORMAL) {
689 feraiseexcept(FeUnderflow);
690 return 1.5;
691 }
692 return (3.0 - (a * b)) / 2.0;
693};
694
695template <typename T>
696static inline T
697fpRecps(T a, T b)
698{
699 int fpClassA = std::fpclassify(a);
700 int fpClassB = std::fpclassify(b);
701 T aXb;
702 int fpClassAxB;
703
704 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
705 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
706 return 2.0;
707 }
708 aXb = a*b;
709 fpClassAxB = std::fpclassify(aXb);
710 if (fpClassAxB == FP_SUBNORMAL) {
711 feraiseexcept(FeUnderflow);
712 return 2.0;
713 }
714 return 2.0 - (a * b);
715};
716
717
718static inline float
719fpRSqrtsS(float a, float b)
720{
721 int fpClassA = std::fpclassify(a);
722 int fpClassB = std::fpclassify(b);
723 float aXb;
724 int fpClassAxB;
725
726 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
727 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
728 return 1.5;
729 }
730 aXb = a*b;
731 fpClassAxB = std::fpclassify(aXb);
732 if (fpClassAxB == FP_SUBNORMAL) {
733 feraiseexcept(FeUnderflow);
734 return 1.5;
735 }
736 return (3.0 - (a * b)) / 2.0;
737}
738
739static inline float
740fpRecpsS(float a, float b)
741{
742 int fpClassA = std::fpclassify(a);
743 int fpClassB = std::fpclassify(b);
744 float aXb;
745 int fpClassAxB;
746
747 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
748 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
749 return 2.0;
750 }
751 aXb = a*b;
752 fpClassAxB = std::fpclassify(aXb);
753 if (fpClassAxB == FP_SUBNORMAL) {
754 feraiseexcept(FeUnderflow);
755 return 2.0;
756 }
757 return 2.0 - (a * b);
758}
759
760template <typename T>
761static inline T
762roundNEven(T a) {
763 T val;
764
765 val = round(a);
766 if (a - val == 0.5) {
767 if ( (((int) a) & 1) == 0 ) val += 1.0;
768 }
769 else if (a - val == -0.5) {
770 if ( (((int) a) & 1) == 0 ) val -= 1.0;
771 }
772 return val;
773}
774
775
776
777class FpOp : public PredOp
778{
779 protected:
780 FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
781 PredOp(mnem, _machInst, __opClass)
782 {}
783
784 virtual float
785 doOp(float op1, float op2) const
786 {
787 panic("Unimplemented version of doOp called.\n");
788 }
789
790 virtual float
791 doOp(float op1) const
792 {
793 panic("Unimplemented version of doOp called.\n");
794 }
795
796 virtual double
797 doOp(double op1, double op2) const
798 {
799 panic("Unimplemented version of doOp called.\n");
800 }
801
802 virtual double
803 doOp(double op1) const
804 {
805 panic("Unimplemented version of doOp called.\n");
806 }
807
808 double
809 dbl(uint32_t low, uint32_t high) const
810 {
811 double junk = 0.0;
812 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
813 }
814
815 uint32_t
816 dblLow(double val) const
817 {
818 return fpToBits(val);
819 }
820
821 uint32_t
822 dblHi(double val) const
823 {
824 return fpToBits(val) >> 32;
825 }
826
827 template <class fpType>
828 fpType
829 processNans(FPSCR &fpscr, bool &done, bool defaultNan,
830 fpType op1, fpType op2) const;
831
832 template <class fpType>
833 fpType
834 ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
835 fpType (*func)(fpType, fpType, fpType),
836 bool flush, bool defaultNan, uint32_t rMode) const;
837
838 template <class fpType>
839 fpType
840 binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
841 fpType (*func)(fpType, fpType),
842 bool flush, bool defaultNan, uint32_t rMode) const;
843
844 template <class fpType>
845 fpType
846 unaryOp(FPSCR &fpscr, fpType op1,
847 fpType (*func)(fpType),
848 bool flush, uint32_t rMode) const;
849
850 void
851 advancePC(PCState &pcState) const
852 {
853 if (flags[IsLastMicroop]) {
854 pcState.uEnd();
855 } else if (flags[IsMicroop]) {
856 pcState.uAdvance();
857 } else {
858 pcState.advance();
859 }
860 }
861
862 float
863 fpSqrt (FPSCR fpscr,float x) const
864 {
865
866 return unaryOp(fpscr,x,sqrtf,fpscr.fz,fpscr.rMode);
867
868 }
869
870 double
871 fpSqrt (FPSCR fpscr,double x) const
872 {
873
874 return unaryOp(fpscr,x,sqrt,fpscr.fz,fpscr.rMode);
875
876 }
877};
878
879class FpCondCompRegOp : public FpOp
880{
881 protected:
882 IntRegIndex op1, op2;
883 ConditionCode condCode;
884 uint8_t defCc;
885
886 FpCondCompRegOp(const char *mnem, ExtMachInst _machInst,
887 OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2,
888 ConditionCode _condCode, uint8_t _defCc) :
889 FpOp(mnem, _machInst, __opClass),
890 op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc)
891 {}
892
893 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
894};
895
896class FpCondSelOp : public FpOp
897{
898 protected:
899 IntRegIndex dest, op1, op2;
900 ConditionCode condCode;
901
902 FpCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
903 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
904 ConditionCode _condCode) :
905 FpOp(mnem, _machInst, __opClass),
906 dest(_dest), op1(_op1), op2(_op2), condCode(_condCode)
907 {}
908
909 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
910};
911
912class FpRegRegOp : public FpOp
913{
914 protected:
915 IntRegIndex dest;
916 IntRegIndex op1;
917
918 FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
919 IntRegIndex _dest, IntRegIndex _op1,
920 VfpMicroMode mode = VfpNotAMicroop) :
921 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1)
922 {
923 setVfpMicroFlags(mode, flags);
924 }
925
926 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
927};
928
929class FpRegImmOp : public FpOp
930{
931 protected:
932 IntRegIndex dest;
933 uint64_t imm;
934
935 FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
936 IntRegIndex _dest, uint64_t _imm,
937 VfpMicroMode mode = VfpNotAMicroop) :
938 FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm)
939 {
940 setVfpMicroFlags(mode, flags);
941 }
942
943 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
944};
945
946class FpRegRegImmOp : public FpOp
947{
948 protected:
949 IntRegIndex dest;
950 IntRegIndex op1;
951 uint64_t imm;
952
953 FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
954 IntRegIndex _dest, IntRegIndex _op1,
955 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
956 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm)
957 {
958 setVfpMicroFlags(mode, flags);
959 }
960
961 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
962};
963
964class FpRegRegRegOp : public FpOp
965{
966 protected:
967 IntRegIndex dest;
968 IntRegIndex op1;
969 IntRegIndex op2;
970
971 FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
972 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
973 VfpMicroMode mode = VfpNotAMicroop) :
974 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2)
975 {
976 setVfpMicroFlags(mode, flags);
977 }
978
979 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
980};
981
1/*
2 * Copyright (c) 2010-2013 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include <fenv.h>
44
45#include <cmath>
46
47#include "arch/arm/insts/misc.hh"
48#include "arch/arm/miscregs.hh"
49
50namespace ArmISA
51{
52
53enum VfpMicroMode {
54 VfpNotAMicroop,
55 VfpMicroop,
56 VfpFirstMicroop,
57 VfpLastMicroop
58};
59
60template<class T>
61static inline void
62setVfpMicroFlags(VfpMicroMode mode, T &flags)
63{
64 switch (mode) {
65 case VfpMicroop:
66 flags[StaticInst::IsMicroop] = true;
67 break;
68 case VfpFirstMicroop:
69 flags[StaticInst::IsMicroop] =
70 flags[StaticInst::IsFirstMicroop] = true;
71 break;
72 case VfpLastMicroop:
73 flags[StaticInst::IsMicroop] =
74 flags[StaticInst::IsLastMicroop] = true;
75 break;
76 case VfpNotAMicroop:
77 break;
78 }
79 if (mode == VfpMicroop || mode == VfpFirstMicroop) {
80 flags[StaticInst::IsDelayedCommit] = true;
81 }
82}
83
84enum FeExceptionBit
85{
86 FeDivByZero = FE_DIVBYZERO,
87 FeInexact = FE_INEXACT,
88 FeInvalid = FE_INVALID,
89 FeOverflow = FE_OVERFLOW,
90 FeUnderflow = FE_UNDERFLOW,
91 FeAllExceptions = FE_ALL_EXCEPT
92};
93
94enum FeRoundingMode
95{
96 FeRoundDown = FE_DOWNWARD,
97 FeRoundNearest = FE_TONEAREST,
98 FeRoundZero = FE_TOWARDZERO,
99 FeRoundUpward = FE_UPWARD
100};
101
102enum VfpRoundingMode
103{
104 VfpRoundNearest = 0,
105 VfpRoundUpward = 1,
106 VfpRoundDown = 2,
107 VfpRoundZero = 3,
108 VfpRoundAway = 4
109};
110
111static inline float bitsToFp(uint64_t, float);
112static inline double bitsToFp(uint64_t, double);
113static inline uint32_t fpToBits(float);
114static inline uint64_t fpToBits(double);
115
116template <class fpType>
117static inline bool
118flushToZero(fpType &op)
119{
120 fpType junk = 0.0;
121 if (std::fpclassify(op) == FP_SUBNORMAL) {
122 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
123 op = bitsToFp(fpToBits(op) & bitMask, junk);
124 return true;
125 }
126 return false;
127}
128
129template <class fpType>
130static inline bool
131flushToZero(fpType &op1, fpType &op2)
132{
133 bool flush1 = flushToZero(op1);
134 bool flush2 = flushToZero(op2);
135 return flush1 || flush2;
136}
137
138template <class fpType>
139static inline void
140vfpFlushToZero(FPSCR &fpscr, fpType &op)
141{
142 if (fpscr.fz == 1 && flushToZero(op)) {
143 fpscr.idc = 1;
144 }
145}
146
147template <class fpType>
148static inline void
149vfpFlushToZero(FPSCR &fpscr, fpType &op1, fpType &op2)
150{
151 vfpFlushToZero(fpscr, op1);
152 vfpFlushToZero(fpscr, op2);
153}
154
155static inline uint32_t
156fpToBits(float fp)
157{
158 union
159 {
160 float fp;
161 uint32_t bits;
162 } val;
163 val.fp = fp;
164 return val.bits;
165}
166
167static inline uint64_t
168fpToBits(double fp)
169{
170 union
171 {
172 double fp;
173 uint64_t bits;
174 } val;
175 val.fp = fp;
176 return val.bits;
177}
178
179static inline float
180bitsToFp(uint64_t bits, float junk)
181{
182 union
183 {
184 float fp;
185 uint32_t bits;
186 } val;
187 val.bits = bits;
188 return val.fp;
189}
190
191static inline double
192bitsToFp(uint64_t bits, double junk)
193{
194 union
195 {
196 double fp;
197 uint64_t bits;
198 } val;
199 val.bits = bits;
200 return val.fp;
201}
202
203template <class fpType>
204static bool
205isSnan(fpType val)
206{
207 const bool single = (sizeof(fpType) == sizeof(float));
208 const uint64_t qnan =
209 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
210 return std::isnan(val) && ((fpToBits(val) & qnan) != qnan);
211}
212
213typedef int VfpSavedState;
214
215VfpSavedState prepFpState(uint32_t rMode);
216void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask = FpscrExcMask);
217
218template <class fpType>
219fpType fixDest(FPSCR fpscr, fpType val, fpType op1);
220
221template <class fpType>
222fpType fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
223
224template <class fpType>
225fpType fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
226
227float fixFpDFpSDest(FPSCR fpscr, double val);
228double fixFpSFpDDest(FPSCR fpscr, float val);
229
230uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
231 uint32_t rMode, bool ahp, float op);
232uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
233 uint32_t rMode, bool ahp, double op);
234
235float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
236double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
237
238static inline double
239makeDouble(uint32_t low, uint32_t high)
240{
241 double junk = 0.0;
242 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
243}
244
245static inline uint32_t
246lowFromDouble(double val)
247{
248 return fpToBits(val);
249}
250
251static inline uint32_t
252highFromDouble(double val)
253{
254 return fpToBits(val) >> 32;
255}
256
257static inline void
258setFPExceptions(int exceptions) {
259 feclearexcept(FeAllExceptions);
260 feraiseexcept(exceptions);
261}
262
263template <typename T>
264uint64_t
265vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool
266 useRmode = true, VfpRoundingMode roundMode = VfpRoundZero,
267 bool aarch64 = false)
268{
269 int rmode;
270 bool roundAwayFix = false;
271
272 if (!useRmode) {
273 rmode = fegetround();
274 } else {
275 switch (roundMode)
276 {
277 case VfpRoundNearest:
278 rmode = FeRoundNearest;
279 break;
280 case VfpRoundUpward:
281 rmode = FeRoundUpward;
282 break;
283 case VfpRoundDown:
284 rmode = FeRoundDown;
285 break;
286 case VfpRoundZero:
287 rmode = FeRoundZero;
288 break;
289 case VfpRoundAway:
290 // There is no equivalent rounding mode, use round down and we'll
291 // fix it later
292 rmode = FeRoundDown;
293 roundAwayFix = true;
294 break;
295 default:
296 panic("Unsupported roundMode %d\n", roundMode);
297 }
298 }
299 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
300 fesetround(FeRoundNearest);
301 val = val * pow(2.0, imm);
302 __asm__ __volatile__("" : "=m" (val) : "m" (val));
303 fesetround(rmode);
304 feclearexcept(FeAllExceptions);
305 __asm__ __volatile__("" : "=m" (val) : "m" (val));
306 T origVal = val;
307 val = rint(val);
308 __asm__ __volatile__("" : "=m" (val) : "m" (val));
309
310 int exceptions = fetestexcept(FeAllExceptions);
311
312 int fpType = std::fpclassify(val);
313 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
314 if (fpType == FP_NAN) {
315 exceptions |= FeInvalid;
316 }
317 val = 0.0;
318 } else if (origVal != val) {
319 switch (rmode) {
320 case FeRoundNearest:
321 if (origVal - val > 0.5)
322 val += 1.0;
323 else if (val - origVal > 0.5)
324 val -= 1.0;
325 break;
326 case FeRoundDown:
327 if (roundAwayFix) {
328 // The ordering on the subtraction looks a bit odd in that we
329 // don't do the obvious origVal - val, instead we do
330 // -(val - origVal). This is required to get the corruct bit
331 // exact behaviour when very close to the 0.5 threshold.
332 volatile T error = val;
333 error -= origVal;
334 error = -error;
335 if ( (error > 0.5) ||
336 ((error == 0.5) && (val >= 0)) )
337 val += 1.0;
338 } else {
339 if (origVal < val)
340 val -= 1.0;
341 }
342 break;
343 case FeRoundUpward:
344 if (origVal > val)
345 val += 1.0;
346 break;
347 }
348 exceptions |= FeInexact;
349 }
350
351 __asm__ __volatile__("" : "=m" (val) : "m" (val));
352
353 if (isSigned) {
354 bool outOfRange = false;
355 int64_t result = (int64_t) val;
356 uint64_t finalVal;
357
358 if (!aarch64) {
359 if (width == 16) {
360 finalVal = (int16_t)val;
361 } else if (width == 32) {
362 finalVal =(int32_t)val;
363 } else if (width == 64) {
364 finalVal = result;
365 } else {
366 panic("Unsupported width %d\n", width);
367 }
368
369 // check if value is in range
370 int64_t minVal = ~mask(width-1);
371 if ((double)val < minVal) {
372 outOfRange = true;
373 finalVal = minVal;
374 }
375 int64_t maxVal = mask(width-1);
376 if ((double)val > maxVal) {
377 outOfRange = true;
378 finalVal = maxVal;
379 }
380 } else {
381 bool isNeg = val < 0;
382 finalVal = result & mask(width);
383 // If the result is supposed to be less than 64 bits check that the
384 // upper bits that got thrown away are just sign extension bits
385 if (width != 64) {
386 outOfRange = ((uint64_t) result >> (width - 1)) !=
387 (isNeg ? mask(64-width+1) : 0);
388 }
389 // Check if the original floating point value doesn't matches the
390 // integer version we are also out of range. So create a saturated
391 // result.
392 if (isNeg) {
393 outOfRange |= val < result;
394 if (outOfRange) {
395 finalVal = 1LL << (width-1);
396 }
397 } else {
398 outOfRange |= val > result;
399 if (outOfRange) {
400 finalVal = mask(width-1);
401 }
402 }
403 }
404
405 // Raise an exception if the value was out of range
406 if (outOfRange) {
407 exceptions |= FeInvalid;
408 exceptions &= ~FeInexact;
409 }
410 setFPExceptions(exceptions);
411 return finalVal;
412 } else {
413 if ((double)val < 0) {
414 exceptions |= FeInvalid;
415 exceptions &= ~FeInexact;
416 setFPExceptions(exceptions);
417 return 0;
418 }
419
420 uint64_t result = ((uint64_t) val) & mask(width);
421 if (val > result) {
422 exceptions |= FeInvalid;
423 exceptions &= ~FeInexact;
424 setFPExceptions(exceptions);
425 return mask(width);
426 }
427
428 setFPExceptions(exceptions);
429 return result;
430 }
431};
432
433
434float vfpUFixedToFpS(bool flush, bool defaultNan,
435 uint64_t val, uint8_t width, uint8_t imm);
436float vfpSFixedToFpS(bool flush, bool defaultNan,
437 int64_t val, uint8_t width, uint8_t imm);
438
439double vfpUFixedToFpD(bool flush, bool defaultNan,
440 uint64_t val, uint8_t width, uint8_t imm);
441double vfpSFixedToFpD(bool flush, bool defaultNan,
442 int64_t val, uint8_t width, uint8_t imm);
443
444float fprSqrtEstimate(FPSCR &fpscr, float op);
445uint32_t unsignedRSqrtEstimate(uint32_t op);
446
447float fpRecipEstimate(FPSCR &fpscr, float op);
448uint32_t unsignedRecipEstimate(uint32_t op);
449
450class VfpMacroOp : public PredMacroOp
451{
452 public:
453 static bool
454 inScalarBank(IntRegIndex idx)
455 {
456 return (idx % 32) < 8;
457 }
458
459 protected:
460 bool wide;
461
462 VfpMacroOp(const char *mnem, ExtMachInst _machInst,
463 OpClass __opClass, bool _wide) :
464 PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
465 {}
466
467 IntRegIndex addStride(IntRegIndex idx, unsigned stride);
468 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2);
469 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1);
470 void nextIdxs(IntRegIndex &dest);
471};
472
473template <typename T>
474static inline T
475fpAdd(T a, T b)
476{
477 return a + b;
478};
479
480template <typename T>
481static inline T
482fpSub(T a, T b)
483{
484 return a - b;
485};
486
487static inline float
488fpAddS(float a, float b)
489{
490 return a + b;
491}
492
493static inline double
494fpAddD(double a, double b)
495{
496 return a + b;
497}
498
499static inline float
500fpSubS(float a, float b)
501{
502 return a - b;
503}
504
505static inline double
506fpSubD(double a, double b)
507{
508 return a - b;
509}
510
511static inline float
512fpDivS(float a, float b)
513{
514 return a / b;
515}
516
517static inline double
518fpDivD(double a, double b)
519{
520 return a / b;
521}
522
523template <typename T>
524static inline T
525fpDiv(T a, T b)
526{
527 return a / b;
528};
529
530template <typename T>
531static inline T
532fpMulX(T a, T b)
533{
534 uint64_t opData;
535 uint32_t sign1;
536 uint32_t sign2;
537 const bool single = (sizeof(T) == sizeof(float));
538 if (single) {
539 opData = (fpToBits(a));
540 sign1 = opData>>31;
541 opData = (fpToBits(b));
542 sign2 = opData>>31;
543 } else {
544 opData = (fpToBits(a));
545 sign1 = opData>>63;
546 opData = (fpToBits(b));
547 sign2 = opData>>63;
548 }
549 bool inf1 = (std::fpclassify(a) == FP_INFINITE);
550 bool inf2 = (std::fpclassify(b) == FP_INFINITE);
551 bool zero1 = (std::fpclassify(a) == FP_ZERO);
552 bool zero2 = (std::fpclassify(b) == FP_ZERO);
553 if ((inf1 && zero2) || (zero1 && inf2)) {
554 if (sign1 ^ sign2)
555 return (T)(-2.0);
556 else
557 return (T)(2.0);
558 } else {
559 return (a * b);
560 }
561};
562
563
564template <typename T>
565static inline T
566fpMul(T a, T b)
567{
568 return a * b;
569};
570
571static inline float
572fpMulS(float a, float b)
573{
574 return a * b;
575}
576
577static inline double
578fpMulD(double a, double b)
579{
580 return a * b;
581}
582
583template <typename T>
584static inline T
585// @todo remove this when all calls to it have been replaced with the new fplib implementation
586fpMulAdd(T op1, T op2, T addend)
587{
588 T result;
589
590 if (sizeof(T) == sizeof(float))
591 result = fmaf(op1, op2, addend);
592 else
593 result = fma(op1, op2, addend);
594
595 // ARM doesn't generate signed nan's from this opperation, so fix up the result
596 if (std::isnan(result) && !std::isnan(op1) &&
597 !std::isnan(op2) && !std::isnan(addend))
598 {
599 uint64_t bitMask = ULL(0x1) << ((sizeof(T) * 8) - 1);
600 result = bitsToFp(fpToBits(result) & ~bitMask, op1);
601 }
602 return result;
603}
604
605template <typename T>
606static inline T
607fpRIntX(T a, FPSCR &fpscr)
608{
609 T rVal;
610
611 rVal = rint(a);
612 if (rVal != a && !std::isnan(a))
613 fpscr.ixc = 1;
614 return (rVal);
615};
616
617template <typename T>
618static inline T
619fpMaxNum(T a, T b)
620{
621 const bool single = (sizeof(T) == sizeof(float));
622 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
623
624 if (std::isnan(a))
625 return ((fpToBits(a) & qnan) == qnan) ? b : a;
626 if (std::isnan(b))
627 return ((fpToBits(b) & qnan) == qnan) ? a : b;
628 // Handle comparisons of +0 and -0.
629 if (!std::signbit(a) && std::signbit(b))
630 return a;
631 return fmax(a, b);
632};
633
634template <typename T>
635static inline T
636fpMax(T a, T b)
637{
638 if (std::isnan(a))
639 return a;
640 if (std::isnan(b))
641 return b;
642 return fpMaxNum<T>(a, b);
643};
644
645template <typename T>
646static inline T
647fpMinNum(T a, T b)
648{
649 const bool single = (sizeof(T) == sizeof(float));
650 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
651
652 if (std::isnan(a))
653 return ((fpToBits(a) & qnan) == qnan) ? b : a;
654 if (std::isnan(b))
655 return ((fpToBits(b) & qnan) == qnan) ? a : b;
656 // Handle comparisons of +0 and -0.
657 if (std::signbit(a) && !std::signbit(b))
658 return a;
659 return fmin(a, b);
660};
661
662template <typename T>
663static inline T
664fpMin(T a, T b)
665{
666 if (std::isnan(a))
667 return a;
668 if (std::isnan(b))
669 return b;
670 return fpMinNum<T>(a, b);
671};
672
673template <typename T>
674static inline T
675fpRSqrts(T a, T b)
676{
677 int fpClassA = std::fpclassify(a);
678 int fpClassB = std::fpclassify(b);
679 T aXb;
680 int fpClassAxB;
681
682 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
683 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
684 return 1.5;
685 }
686 aXb = a*b;
687 fpClassAxB = std::fpclassify(aXb);
688 if (fpClassAxB == FP_SUBNORMAL) {
689 feraiseexcept(FeUnderflow);
690 return 1.5;
691 }
692 return (3.0 - (a * b)) / 2.0;
693};
694
695template <typename T>
696static inline T
697fpRecps(T a, T b)
698{
699 int fpClassA = std::fpclassify(a);
700 int fpClassB = std::fpclassify(b);
701 T aXb;
702 int fpClassAxB;
703
704 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
705 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
706 return 2.0;
707 }
708 aXb = a*b;
709 fpClassAxB = std::fpclassify(aXb);
710 if (fpClassAxB == FP_SUBNORMAL) {
711 feraiseexcept(FeUnderflow);
712 return 2.0;
713 }
714 return 2.0 - (a * b);
715};
716
717
718static inline float
719fpRSqrtsS(float a, float b)
720{
721 int fpClassA = std::fpclassify(a);
722 int fpClassB = std::fpclassify(b);
723 float aXb;
724 int fpClassAxB;
725
726 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
727 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
728 return 1.5;
729 }
730 aXb = a*b;
731 fpClassAxB = std::fpclassify(aXb);
732 if (fpClassAxB == FP_SUBNORMAL) {
733 feraiseexcept(FeUnderflow);
734 return 1.5;
735 }
736 return (3.0 - (a * b)) / 2.0;
737}
738
739static inline float
740fpRecpsS(float a, float b)
741{
742 int fpClassA = std::fpclassify(a);
743 int fpClassB = std::fpclassify(b);
744 float aXb;
745 int fpClassAxB;
746
747 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
748 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
749 return 2.0;
750 }
751 aXb = a*b;
752 fpClassAxB = std::fpclassify(aXb);
753 if (fpClassAxB == FP_SUBNORMAL) {
754 feraiseexcept(FeUnderflow);
755 return 2.0;
756 }
757 return 2.0 - (a * b);
758}
759
760template <typename T>
761static inline T
762roundNEven(T a) {
763 T val;
764
765 val = round(a);
766 if (a - val == 0.5) {
767 if ( (((int) a) & 1) == 0 ) val += 1.0;
768 }
769 else if (a - val == -0.5) {
770 if ( (((int) a) & 1) == 0 ) val -= 1.0;
771 }
772 return val;
773}
774
775
776
777class FpOp : public PredOp
778{
779 protected:
780 FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
781 PredOp(mnem, _machInst, __opClass)
782 {}
783
784 virtual float
785 doOp(float op1, float op2) const
786 {
787 panic("Unimplemented version of doOp called.\n");
788 }
789
790 virtual float
791 doOp(float op1) const
792 {
793 panic("Unimplemented version of doOp called.\n");
794 }
795
796 virtual double
797 doOp(double op1, double op2) const
798 {
799 panic("Unimplemented version of doOp called.\n");
800 }
801
802 virtual double
803 doOp(double op1) const
804 {
805 panic("Unimplemented version of doOp called.\n");
806 }
807
808 double
809 dbl(uint32_t low, uint32_t high) const
810 {
811 double junk = 0.0;
812 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
813 }
814
815 uint32_t
816 dblLow(double val) const
817 {
818 return fpToBits(val);
819 }
820
821 uint32_t
822 dblHi(double val) const
823 {
824 return fpToBits(val) >> 32;
825 }
826
827 template <class fpType>
828 fpType
829 processNans(FPSCR &fpscr, bool &done, bool defaultNan,
830 fpType op1, fpType op2) const;
831
832 template <class fpType>
833 fpType
834 ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
835 fpType (*func)(fpType, fpType, fpType),
836 bool flush, bool defaultNan, uint32_t rMode) const;
837
838 template <class fpType>
839 fpType
840 binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
841 fpType (*func)(fpType, fpType),
842 bool flush, bool defaultNan, uint32_t rMode) const;
843
844 template <class fpType>
845 fpType
846 unaryOp(FPSCR &fpscr, fpType op1,
847 fpType (*func)(fpType),
848 bool flush, uint32_t rMode) const;
849
850 void
851 advancePC(PCState &pcState) const
852 {
853 if (flags[IsLastMicroop]) {
854 pcState.uEnd();
855 } else if (flags[IsMicroop]) {
856 pcState.uAdvance();
857 } else {
858 pcState.advance();
859 }
860 }
861
862 float
863 fpSqrt (FPSCR fpscr,float x) const
864 {
865
866 return unaryOp(fpscr,x,sqrtf,fpscr.fz,fpscr.rMode);
867
868 }
869
870 double
871 fpSqrt (FPSCR fpscr,double x) const
872 {
873
874 return unaryOp(fpscr,x,sqrt,fpscr.fz,fpscr.rMode);
875
876 }
877};
878
879class FpCondCompRegOp : public FpOp
880{
881 protected:
882 IntRegIndex op1, op2;
883 ConditionCode condCode;
884 uint8_t defCc;
885
886 FpCondCompRegOp(const char *mnem, ExtMachInst _machInst,
887 OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2,
888 ConditionCode _condCode, uint8_t _defCc) :
889 FpOp(mnem, _machInst, __opClass),
890 op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc)
891 {}
892
893 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
894};
895
896class FpCondSelOp : public FpOp
897{
898 protected:
899 IntRegIndex dest, op1, op2;
900 ConditionCode condCode;
901
902 FpCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
903 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
904 ConditionCode _condCode) :
905 FpOp(mnem, _machInst, __opClass),
906 dest(_dest), op1(_op1), op2(_op2), condCode(_condCode)
907 {}
908
909 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
910};
911
912class FpRegRegOp : public FpOp
913{
914 protected:
915 IntRegIndex dest;
916 IntRegIndex op1;
917
918 FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
919 IntRegIndex _dest, IntRegIndex _op1,
920 VfpMicroMode mode = VfpNotAMicroop) :
921 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1)
922 {
923 setVfpMicroFlags(mode, flags);
924 }
925
926 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
927};
928
929class FpRegImmOp : public FpOp
930{
931 protected:
932 IntRegIndex dest;
933 uint64_t imm;
934
935 FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
936 IntRegIndex _dest, uint64_t _imm,
937 VfpMicroMode mode = VfpNotAMicroop) :
938 FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm)
939 {
940 setVfpMicroFlags(mode, flags);
941 }
942
943 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
944};
945
946class FpRegRegImmOp : public FpOp
947{
948 protected:
949 IntRegIndex dest;
950 IntRegIndex op1;
951 uint64_t imm;
952
953 FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
954 IntRegIndex _dest, IntRegIndex _op1,
955 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
956 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm)
957 {
958 setVfpMicroFlags(mode, flags);
959 }
960
961 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
962};
963
964class FpRegRegRegOp : public FpOp
965{
966 protected:
967 IntRegIndex dest;
968 IntRegIndex op1;
969 IntRegIndex op2;
970
971 FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
972 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
973 VfpMicroMode mode = VfpNotAMicroop) :
974 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2)
975 {
976 setVfpMicroFlags(mode, flags);
977 }
978
979 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
980};
981
982class FpRegRegRegCondOp : public FpOp
983{
984 protected:
985 IntRegIndex dest;
986 IntRegIndex op1;
987 IntRegIndex op2;
988 ConditionCode cond;
989
990 FpRegRegRegCondOp(const char *mnem, ExtMachInst _machInst,
991 OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1,
992 IntRegIndex _op2, ConditionCode _cond,
993 VfpMicroMode mode = VfpNotAMicroop) :
994 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2),
995 cond(_cond)
996 {
997 setVfpMicroFlags(mode, flags);
998 }
999
1000 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1001};
1002
982class FpRegRegRegRegOp : public FpOp
983{
984 protected:
985 IntRegIndex dest;
986 IntRegIndex op1;
987 IntRegIndex op2;
988 IntRegIndex op3;
989
990 FpRegRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
991 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
992 IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop) :
993 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2),
994 op3(_op3)
995 {
996 setVfpMicroFlags(mode, flags);
997 }
998
999 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1000};
1001
1002class FpRegRegRegImmOp : public FpOp
1003{
1004 protected:
1005 IntRegIndex dest;
1006 IntRegIndex op1;
1007 IntRegIndex op2;
1008 uint64_t imm;
1009
1010 FpRegRegRegImmOp(const char *mnem, ExtMachInst _machInst,
1011 OpClass __opClass, IntRegIndex _dest,
1012 IntRegIndex _op1, IntRegIndex _op2,
1013 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
1014 FpOp(mnem, _machInst, __opClass),
1015 dest(_dest), op1(_op1), op2(_op2), imm(_imm)
1016 {
1017 setVfpMicroFlags(mode, flags);
1018 }
1019
1020 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1021};
1022
1023}
1024
1025#endif //__ARCH_ARM_INSTS_VFP_HH__
1003class FpRegRegRegRegOp : public FpOp
1004{
1005 protected:
1006 IntRegIndex dest;
1007 IntRegIndex op1;
1008 IntRegIndex op2;
1009 IntRegIndex op3;
1010
1011 FpRegRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1012 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
1013 IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop) :
1014 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2),
1015 op3(_op3)
1016 {
1017 setVfpMicroFlags(mode, flags);
1018 }
1019
1020 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1021};
1022
1023class FpRegRegRegImmOp : public FpOp
1024{
1025 protected:
1026 IntRegIndex dest;
1027 IntRegIndex op1;
1028 IntRegIndex op2;
1029 uint64_t imm;
1030
1031 FpRegRegRegImmOp(const char *mnem, ExtMachInst _machInst,
1032 OpClass __opClass, IntRegIndex _dest,
1033 IntRegIndex _op1, IntRegIndex _op2,
1034 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
1035 FpOp(mnem, _machInst, __opClass),
1036 dest(_dest), op1(_op1), op2(_op2), imm(_imm)
1037 {
1038 setVfpMicroFlags(mode, flags);
1039 }
1040
1041 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1042};
1043
1044}
1045
1046#endif //__ARCH_ARM_INSTS_VFP_HH__