vfp.cc (9918:2c7219e2d999) vfp.cc (10037:5cac77888310)
1/*
1/*
2 * Copyright (c) 2010 ARM Limited
2 * Copyright (c) 2010-2013 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated

--- 30 unchanged lines hidden (view full) ---

41
42/*
43 * The asm statements below are to keep gcc from reordering code. Otherwise
44 * the rounding mode might be set after the operation it was intended for, the
45 * exception bits read before it, etc.
46 */
47
48std::string
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated

--- 30 unchanged lines hidden (view full) ---

41
42/*
43 * The asm statements below are to keep gcc from reordering code. Otherwise
44 * the rounding mode might be set after the operation it was intended for, the
45 * exception bits read before it, etc.
46 */
47
48std::string
49FpCondCompRegOp::generateDisassembly(
50 Addr pc, const SymbolTable *symtab) const
51{
52 std::stringstream ss;
53 printMnemonic(ss, "", false);
54 printReg(ss, op1);
55 ccprintf(ss, ", ");
56 printReg(ss, op2);
57 ccprintf(ss, ", #%d", defCc);
58 ccprintf(ss, ", ");
59 printCondition(ss, condCode, true);
60 return ss.str();
61}
62
63std::string
64FpCondSelOp::generateDisassembly(
65 Addr pc, const SymbolTable *symtab) const
66{
67 std::stringstream ss;
68 printMnemonic(ss, "", false);
69 printReg(ss, dest);
70 ccprintf(ss, ", ");
71 printReg(ss, op1);
72 ccprintf(ss, ", ");
73 printReg(ss, op2);
74 ccprintf(ss, ", ");
75 printCondition(ss, condCode, true);
76 return ss.str();
77}
78
79std::string
49FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
50{
51 std::stringstream ss;
52 printMnemonic(ss);
53 printReg(ss, dest + FP_Reg_Base);
54 ss << ", ";
55 printReg(ss, op1 + FP_Reg_Base);
56 return ss.str();

--- 30 unchanged lines hidden (view full) ---

87 ss << ", ";
88 printReg(ss, op1 + FP_Reg_Base);
89 ss << ", ";
90 printReg(ss, op2 + FP_Reg_Base);
91 return ss.str();
92}
93
94std::string
80FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
81{
82 std::stringstream ss;
83 printMnemonic(ss);
84 printReg(ss, dest + FP_Reg_Base);
85 ss << ", ";
86 printReg(ss, op1 + FP_Reg_Base);
87 return ss.str();

--- 30 unchanged lines hidden (view full) ---

118 ss << ", ";
119 printReg(ss, op1 + FP_Reg_Base);
120 ss << ", ";
121 printReg(ss, op2 + FP_Reg_Base);
122 return ss.str();
123}
124
125std::string
126FpRegRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
127{
128 std::stringstream ss;
129 printMnemonic(ss);
130 printReg(ss, dest + FP_Reg_Base);
131 ss << ", ";
132 printReg(ss, op1 + FP_Reg_Base);
133 ss << ", ";
134 printReg(ss, op2 + FP_Reg_Base);
135 ss << ", ";
136 printReg(ss, op3 + FP_Reg_Base);
137 return ss.str();
138}
139
140std::string
95FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
96{
97 std::stringstream ss;
98 printMnemonic(ss);
99 printReg(ss, dest + FP_Reg_Base);
100 ss << ", ";
101 printReg(ss, op1 + FP_Reg_Base);
102 ss << ", ";

--- 23 unchanged lines hidden (view full) ---

126 case VfpRoundZero:
127 fesetround(FeRoundZero);
128 break;
129 }
130 return roundingMode;
131}
132
133void
141FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
142{
143 std::stringstream ss;
144 printMnemonic(ss);
145 printReg(ss, dest + FP_Reg_Base);
146 ss << ", ";
147 printReg(ss, op1 + FP_Reg_Base);
148 ss << ", ";

--- 23 unchanged lines hidden (view full) ---

172 case VfpRoundZero:
173 fesetround(FeRoundZero);
174 break;
175 }
176 return roundingMode;
177}
178
179void
134finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush)
180finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)
135{
136 int exceptions = fetestexcept(FeAllExceptions);
137 bool underflow = false;
181{
182 int exceptions = fetestexcept(FeAllExceptions);
183 bool underflow = false;
138 if (exceptions & FeInvalid) {
184 if ((exceptions & FeInvalid) && mask.ioc) {
139 fpscr.ioc = 1;
140 }
185 fpscr.ioc = 1;
186 }
141 if (exceptions & FeDivByZero) {
187 if ((exceptions & FeDivByZero) && mask.dzc) {
142 fpscr.dzc = 1;
143 }
188 fpscr.dzc = 1;
189 }
144 if (exceptions & FeOverflow) {
190 if ((exceptions & FeOverflow) && mask.ofc) {
145 fpscr.ofc = 1;
146 }
147 if (exceptions & FeUnderflow) {
148 underflow = true;
191 fpscr.ofc = 1;
192 }
193 if (exceptions & FeUnderflow) {
194 underflow = true;
149 fpscr.ufc = 1;
195 if (mask.ufc)
196 fpscr.ufc = 1;
150 }
197 }
151 if ((exceptions & FeInexact) && !(underflow && flush)) {
198 if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) {
152 fpscr.ixc = 1;
153 }
154 fesetround(state);
155}
156
157template <class fpType>
158fpType
159fixDest(bool flush, bool defaultNan, fpType val, fpType op1)

--- 164 unchanged lines hidden (view full) ---

324 mid = temp;
325 }
326 }
327 __asm__ __volatile__("" :: "m" (temp));
328 }
329 return mid;
330}
331
199 fpscr.ixc = 1;
200 }
201 fesetround(state);
202}
203
204template <class fpType>
205fpType
206fixDest(bool flush, bool defaultNan, fpType val, fpType op1)

--- 164 unchanged lines hidden (view full) ---

371 mid = temp;
372 }
373 }
374 __asm__ __volatile__("" :: "m" (temp));
375 }
376 return mid;
377}
378
332uint16_t
333vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
334 uint32_t rMode, bool ahp, float op)
379static inline uint16_t
380vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan,
381 uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble)
335{
382{
336 uint32_t opBits = fpToBits(op);
383 uint32_t mWidth;
384 uint32_t eWidth;
385 uint32_t eHalfRange;
386 uint32_t sBitPos;
387
388 if (isDouble) {
389 mWidth = 52;
390 eWidth = 11;
391 } else {
392 mWidth = 23;
393 eWidth = 8;
394 }
395 sBitPos = eWidth + mWidth;
396 eHalfRange = (1 << (eWidth-1)) - 1;
397
337 // Extract the operand.
398 // Extract the operand.
338 bool neg = bits(opBits, 31);
339 uint32_t exponent = bits(opBits, 30, 23);
340 uint32_t oldMantissa = bits(opBits, 22, 0);
341 uint32_t mantissa = oldMantissa >> (23 - 10);
399 bool neg = bits(opBits, sBitPos);
400 uint32_t exponent = bits(opBits, sBitPos-1, mWidth);
401 uint64_t oldMantissa = bits(opBits, mWidth-1, 0);
402 uint32_t mantissa = oldMantissa >> (mWidth - 10);
342 // Do the conversion.
403 // Do the conversion.
343 uint32_t extra = oldMantissa & mask(23 - 10);
344 if (exponent == 0xff) {
404 uint64_t extra = oldMantissa & mask(mWidth - 10);
405 if (exponent == mask(eWidth)) {
345 if (oldMantissa != 0) {
346 // Nans.
347 if (bits(mantissa, 9) == 0) {
348 // Signalling nan.
349 fpscr.ioc = 1;
350 }
351 if (ahp) {
352 mantissa = 0;

--- 21 unchanged lines hidden (view full) ---

374 // Zero, don't need to do anything.
375 } else {
376 // Normalized or denormalized numbers.
377
378 bool inexact = (extra != 0);
379
380 if (exponent == 0) {
381 // Denormalized.
406 if (oldMantissa != 0) {
407 // Nans.
408 if (bits(mantissa, 9) == 0) {
409 // Signalling nan.
410 fpscr.ioc = 1;
411 }
412 if (ahp) {
413 mantissa = 0;

--- 21 unchanged lines hidden (view full) ---

435 // Zero, don't need to do anything.
436 } else {
437 // Normalized or denormalized numbers.
438
439 bool inexact = (extra != 0);
440
441 if (exponent == 0) {
442 // Denormalized.
382
383 // If flush to zero is on, this shouldn't happen.
384 assert(!flush);
385
386 // Check for underflow
387 if (inexact || fpscr.ufe)
388 fpscr.ufc = 1;
389
390 // Handle rounding.

--- 11 unchanged lines hidden (view full) ---

402 mantissa = 0;
403 exponent = 1;
404 }
405 } else {
406 // Normalized.
407
408 // We need to track the dropped bits differently since
409 // more can be dropped by denormalizing.
443 // If flush to zero is on, this shouldn't happen.
444 assert(!flush);
445
446 // Check for underflow
447 if (inexact || fpscr.ufe)
448 fpscr.ufc = 1;
449
450 // Handle rounding.

--- 11 unchanged lines hidden (view full) ---

462 mantissa = 0;
463 exponent = 1;
464 }
465 } else {
466 // Normalized.
467
468 // We need to track the dropped bits differently since
469 // more can be dropped by denormalizing.
410 bool topOne = bits(extra, 12);
411 bool restZeros = bits(extra, 11, 0) == 0;
470 bool topOne = bits(extra, mWidth - 10 - 1);
471 bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0;
412
472
413 if (exponent <= (127 - 15)) {
473 if (exponent <= (eHalfRange - 15)) {
414 // The result is too small. Denormalize.
415 mantissa |= (1 << 10);
474 // The result is too small. Denormalize.
475 mantissa |= (1 << 10);
416 while (mantissa && exponent <= (127 - 15)) {
476 while (mantissa && exponent <= (eHalfRange - 15)) {
417 restZeros = restZeros && !topOne;
418 topOne = bits(mantissa, 0);
419 mantissa = mantissa >> 1;
420 exponent++;
421 }
422 if (topOne || !restZeros)
423 inexact = true;
424 exponent = 0;
425 } else {
426 // Change bias.
477 restZeros = restZeros && !topOne;
478 topOne = bits(mantissa, 0);
479 mantissa = mantissa >> 1;
480 exponent++;
481 }
482 if (topOne || !restZeros)
483 inexact = true;
484 exponent = 0;
485 } else {
486 // Change bias.
427 exponent -= (127 - 15);
487 exponent -= (eHalfRange - 15);
428 }
429
430 if (exponent == 0 && (inexact || fpscr.ufe)) {
431 // Underflow
432 fpscr.ufc = 1;
433 }
434
435 // Handle rounding.

--- 47 unchanged lines hidden (view full) ---

483 // Reassemble and install the result.
484 uint32_t result = bits(mantissa, 9, 0);
485 replaceBits(result, 14, 10, exponent);
486 if (neg)
487 result |= (1 << 15);
488 return result;
489}
490
488 }
489
490 if (exponent == 0 && (inexact || fpscr.ufe)) {
491 // Underflow
492 fpscr.ufc = 1;
493 }
494
495 // Handle rounding.

--- 47 unchanged lines hidden (view full) ---

543 // Reassemble and install the result.
544 uint32_t result = bits(mantissa, 9, 0);
545 replaceBits(result, 14, 10, exponent);
546 if (neg)
547 result |= (1 << 15);
548 return result;
549}
550
491float
492vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
551uint16_t
552vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
553 uint32_t rMode, bool ahp, float op)
493{
554{
494 float junk = 0.0;
555 uint64_t opBits = fpToBits(op);
556 return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false);
557}
558
559uint16_t
560vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
561 uint32_t rMode, bool ahp, double op)
562{
563 uint64_t opBits = fpToBits(op);
564 return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true);
565}
566
567static inline uint64_t
568vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble)
569{
570 uint32_t mWidth;
571 uint32_t eWidth;
572 uint32_t eHalfRange;
573 uint32_t sBitPos;
574
575 if (isDouble) {
576 mWidth = 52;
577 eWidth = 11;
578 } else {
579 mWidth = 23;
580 eWidth = 8;
581 }
582 sBitPos = eWidth + mWidth;
583 eHalfRange = (1 << (eWidth-1)) - 1;
584
495 // Extract the bitfields.
496 bool neg = bits(op, 15);
497 uint32_t exponent = bits(op, 14, 10);
585 // Extract the bitfields.
586 bool neg = bits(op, 15);
587 uint32_t exponent = bits(op, 14, 10);
498 uint32_t mantissa = bits(op, 9, 0);
588 uint64_t mantissa = bits(op, 9, 0);
499 // Do the conversion.
500 if (exponent == 0) {
501 if (mantissa != 0) {
502 // Normalize the value.
589 // Do the conversion.
590 if (exponent == 0) {
591 if (mantissa != 0) {
592 // Normalize the value.
503 exponent = exponent + (127 - 15) + 1;
593 exponent = exponent + (eHalfRange - 15) + 1;
504 while (mantissa < (1 << 10)) {
505 mantissa = mantissa << 1;
506 exponent--;
507 }
508 }
594 while (mantissa < (1 << 10)) {
595 mantissa = mantissa << 1;
596 exponent--;
597 }
598 }
509 mantissa = mantissa << (23 - 10);
599 mantissa = mantissa << (mWidth - 10);
510 } else if (exponent == 0x1f && !ahp) {
511 // Infinities and nans.
600 } else if (exponent == 0x1f && !ahp) {
601 // Infinities and nans.
512 exponent = 0xff;
602 exponent = mask(eWidth);
513 if (mantissa != 0) {
514 // Nans.
603 if (mantissa != 0) {
604 // Nans.
515 mantissa = mantissa << (23 - 10);
516 if (bits(mantissa, 22) == 0) {
605 mantissa = mantissa << (mWidth - 10);
606 if (bits(mantissa, mWidth-1) == 0) {
517 // Signalling nan.
518 fpscr.ioc = 1;
607 // Signalling nan.
608 fpscr.ioc = 1;
519 mantissa |= (1 << 22);
609 mantissa |= (((uint64_t) 1) << (mWidth-1));
520 }
521 if (defaultNan) {
610 }
611 if (defaultNan) {
522 mantissa &= ~mask(22);
612 mantissa &= ~mask(mWidth-1);
523 neg = false;
524 }
525 }
526 } else {
613 neg = false;
614 }
615 }
616 } else {
527 exponent = exponent + (127 - 15);
528 mantissa = mantissa << (23 - 10);
617 exponent = exponent + (eHalfRange - 15);
618 mantissa = mantissa << (mWidth - 10);
529 }
530 // Reassemble the result.
619 }
620 // Reassemble the result.
531 uint32_t result = bits(mantissa, 22, 0);
532 replaceBits(result, 30, 23, exponent);
533 if (neg)
534 result |= (1 << 31);
621 uint64_t result = bits(mantissa, mWidth-1, 0);
622 replaceBits(result, sBitPos-1, mWidth, exponent);
623 if (neg) {
624 result |= (((uint64_t) 1) << sBitPos);
625 }
626 return result;
627}
628
629double
630vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
631{
632 double junk = 0.0;
633 uint64_t result;
634
635 result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true);
535 return bitsToFp(result, junk);
536}
537
636 return bitsToFp(result, junk);
637}
638
538uint64_t
539vfpFpSToFixed(float val, bool isSigned, bool half,
540 uint8_t imm, bool rzero)
639float
640vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
541{
641{
542 int rmode = rzero ? FeRoundZero : fegetround();
543 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
544 fesetround(FeRoundNearest);
545 val = val * powf(2.0, imm);
546 __asm__ __volatile__("" : "=m" (val) : "m" (val));
547 fesetround(rmode);
548 feclearexcept(FeAllExceptions);
549 __asm__ __volatile__("" : "=m" (val) : "m" (val));
550 float origVal = val;
551 val = rintf(val);
552 int fpType = std::fpclassify(val);
553 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
554 if (fpType == FP_NAN) {
555 feraiseexcept(FeInvalid);
556 }
557 val = 0.0;
558 } else if (origVal != val) {
559 switch (rmode) {
560 case FeRoundNearest:
561 if (origVal - val > 0.5)
562 val += 1.0;
563 else if (val - origVal > 0.5)
564 val -= 1.0;
565 break;
566 case FeRoundDown:
567 if (origVal < val)
568 val -= 1.0;
569 break;
570 case FeRoundUpward:
571 if (origVal > val)
572 val += 1.0;
573 break;
574 }
575 feraiseexcept(FeInexact);
576 }
642 float junk = 0.0;
643 uint64_t result;
577
644
578 if (isSigned) {
579 if (half) {
580 if ((double)val < (int16_t)(1 << 15)) {
581 feraiseexcept(FeInvalid);
582 feclearexcept(FeInexact);
583 return (int16_t)(1 << 15);
584 }
585 if ((double)val > (int16_t)mask(15)) {
586 feraiseexcept(FeInvalid);
587 feclearexcept(FeInexact);
588 return (int16_t)mask(15);
589 }
590 return (int16_t)val;
591 } else {
592 if ((double)val < (int32_t)(1 << 31)) {
593 feraiseexcept(FeInvalid);
594 feclearexcept(FeInexact);
595 return (int32_t)(1 << 31);
596 }
597 if ((double)val > (int32_t)mask(31)) {
598 feraiseexcept(FeInvalid);
599 feclearexcept(FeInexact);
600 return (int32_t)mask(31);
601 }
602 return (int32_t)val;
603 }
604 } else {
605 if (half) {
606 if ((double)val < 0) {
607 feraiseexcept(FeInvalid);
608 feclearexcept(FeInexact);
609 return 0;
610 }
611 if ((double)val > (mask(16))) {
612 feraiseexcept(FeInvalid);
613 feclearexcept(FeInexact);
614 return mask(16);
615 }
616 return (uint16_t)val;
617 } else {
618 if ((double)val < 0) {
619 feraiseexcept(FeInvalid);
620 feclearexcept(FeInexact);
621 return 0;
622 }
623 if ((double)val > (mask(32))) {
624 feraiseexcept(FeInvalid);
625 feclearexcept(FeInexact);
626 return mask(32);
627 }
628 return (uint32_t)val;
629 }
630 }
645 result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false);
646 return bitsToFp(result, junk);
631}
632
633float
634vfpUFixedToFpS(bool flush, bool defaultNan,
647}
648
649float
650vfpUFixedToFpS(bool flush, bool defaultNan,
635 uint32_t val, bool half, uint8_t imm)
651 uint64_t val, uint8_t width, uint8_t imm)
636{
637 fesetround(FeRoundNearest);
652{
653 fesetround(FeRoundNearest);
638 if (half)
654 if (width == 16)
639 val = (uint16_t)val;
655 val = (uint16_t)val;
656 else if (width == 32)
657 val = (uint32_t)val;
658 else if (width != 64)
659 panic("Unsupported width %d", width);
640 float scale = powf(2.0, imm);
641 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
642 feclearexcept(FeAllExceptions);
643 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
644 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
645}
646
647float
648vfpSFixedToFpS(bool flush, bool defaultNan,
660 float scale = powf(2.0, imm);
661 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
662 feclearexcept(FeAllExceptions);
663 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
664 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
665}
666
667float
668vfpSFixedToFpS(bool flush, bool defaultNan,
649 int32_t val, bool half, uint8_t imm)
669 int64_t val, uint8_t width, uint8_t imm)
650{
651 fesetround(FeRoundNearest);
670{
671 fesetround(FeRoundNearest);
652 if (half)
672 if (width == 16)
653 val = sext<16>(val & mask(16));
673 val = sext<16>(val & mask(16));
674 else if (width == 32)
675 val = sext<32>(val & mask(32));
676 else if (width != 64)
677 panic("Unsupported width %d", width);
678
654 float scale = powf(2.0, imm);
655 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
656 feclearexcept(FeAllExceptions);
657 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
658 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
659}
660
679 float scale = powf(2.0, imm);
680 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
681 feclearexcept(FeAllExceptions);
682 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
683 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
684}
685
661uint64_t
662vfpFpDToFixed(double val, bool isSigned, bool half,
663 uint8_t imm, bool rzero)
664{
665 int rmode = rzero ? FeRoundZero : fegetround();
666 fesetround(FeRoundNearest);
667 val = val * pow(2.0, imm);
668 __asm__ __volatile__("" : "=m" (val) : "m" (val));
669 fesetround(rmode);
670 feclearexcept(FeAllExceptions);
671 __asm__ __volatile__("" : "=m" (val) : "m" (val));
672 double origVal = val;
673 val = rint(val);
674 int fpType = std::fpclassify(val);
675 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
676 if (fpType == FP_NAN) {
677 feraiseexcept(FeInvalid);
678 }
679 val = 0.0;
680 } else if (origVal != val) {
681 switch (rmode) {
682 case FeRoundNearest:
683 if (origVal - val > 0.5)
684 val += 1.0;
685 else if (val - origVal > 0.5)
686 val -= 1.0;
687 break;
688 case FeRoundDown:
689 if (origVal < val)
690 val -= 1.0;
691 break;
692 case FeRoundUpward:
693 if (origVal > val)
694 val += 1.0;
695 break;
696 }
697 feraiseexcept(FeInexact);
698 }
699 if (isSigned) {
700 if (half) {
701 if (val < (int16_t)(1 << 15)) {
702 feraiseexcept(FeInvalid);
703 feclearexcept(FeInexact);
704 return (int16_t)(1 << 15);
705 }
706 if (val > (int16_t)mask(15)) {
707 feraiseexcept(FeInvalid);
708 feclearexcept(FeInexact);
709 return (int16_t)mask(15);
710 }
711 return (int16_t)val;
712 } else {
713 if (val < (int32_t)(1 << 31)) {
714 feraiseexcept(FeInvalid);
715 feclearexcept(FeInexact);
716 return (int32_t)(1 << 31);
717 }
718 if (val > (int32_t)mask(31)) {
719 feraiseexcept(FeInvalid);
720 feclearexcept(FeInexact);
721 return (int32_t)mask(31);
722 }
723 return (int32_t)val;
724 }
725 } else {
726 if (half) {
727 if (val < 0) {
728 feraiseexcept(FeInvalid);
729 feclearexcept(FeInexact);
730 return 0;
731 }
732 if (val > mask(16)) {
733 feraiseexcept(FeInvalid);
734 feclearexcept(FeInexact);
735 return mask(16);
736 }
737 return (uint16_t)val;
738 } else {
739 if (val < 0) {
740 feraiseexcept(FeInvalid);
741 feclearexcept(FeInexact);
742 return 0;
743 }
744 if (val > mask(32)) {
745 feraiseexcept(FeInvalid);
746 feclearexcept(FeInexact);
747 return mask(32);
748 }
749 return (uint32_t)val;
750 }
751 }
752}
753
754double
755vfpUFixedToFpD(bool flush, bool defaultNan,
686
687double
688vfpUFixedToFpD(bool flush, bool defaultNan,
756 uint32_t val, bool half, uint8_t imm)
689 uint64_t val, uint8_t width, uint8_t imm)
757{
758 fesetround(FeRoundNearest);
690{
691 fesetround(FeRoundNearest);
759 if (half)
692 if (width == 16)
760 val = (uint16_t)val;
693 val = (uint16_t)val;
694 else if (width == 32)
695 val = (uint32_t)val;
696 else if (width != 64)
697 panic("Unsupported width %d", width);
698
761 double scale = pow(2.0, imm);
762 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
763 feclearexcept(FeAllExceptions);
764 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
765 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
766}
767
768double
769vfpSFixedToFpD(bool flush, bool defaultNan,
699 double scale = pow(2.0, imm);
700 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
701 feclearexcept(FeAllExceptions);
702 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
703 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
704}
705
706double
707vfpSFixedToFpD(bool flush, bool defaultNan,
770 int32_t val, bool half, uint8_t imm)
708 int64_t val, uint8_t width, uint8_t imm)
771{
772 fesetround(FeRoundNearest);
709{
710 fesetround(FeRoundNearest);
773 if (half)
711 if (width == 16)
774 val = sext<16>(val & mask(16));
712 val = sext<16>(val & mask(16));
713 else if (width == 32)
714 val = sext<32>(val & mask(32));
715 else if (width != 64)
716 panic("Unsupported width %d", width);
717
775 double scale = pow(2.0, imm);
776 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
777 feclearexcept(FeAllExceptions);
778 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
779 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
780}
781
782// This function implements a magic formula taken from the architecture

--- 188 unchanged lines hidden (view full) ---

971
972template
973float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
974 float op1, float op2) const;
975template
976double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
977 double op1, double op2) const;
978
718 double scale = pow(2.0, imm);
719 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
720 feclearexcept(FeAllExceptions);
721 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
722 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
723}
724
725// This function implements a magic formula taken from the architecture

--- 188 unchanged lines hidden (view full) ---

914
915template
916float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
917 float op1, float op2) const;
918template
919double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
920 double op1, double op2) const;
921
922// @TODO remove this function when we've finished switching all FMA code to use the new FPLIB
979template <class fpType>
980fpType
923template <class fpType>
924fpType
925FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
926 fpType (*func)(fpType, fpType, fpType),
927 bool flush, bool defaultNan, uint32_t rMode) const
928{
929 const bool single = (sizeof(fpType) == sizeof(float));
930 fpType junk = 0.0;
931
932 if (flush && (flushToZero(op1, op2) || flushToZero(op3)))
933 fpscr.idc = 1;
934 VfpSavedState state = prepFpState(rMode);
935 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state)
936 : "m" (op1), "m" (op2), "m" (op3), "m" (state));
937 fpType dest = func(op1, op2, op3);
938 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
939
940 int fpClass = std::fpclassify(dest);
941 // Get NAN behavior right. This varies between x86 and ARM.
942 if (fpClass == FP_NAN) {
943 const uint64_t qnan =
944 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
945 const bool nan1 = std::isnan(op1);
946 const bool nan2 = std::isnan(op2);
947 const bool nan3 = std::isnan(op3);
948 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
949 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
950 const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan);
951 if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) {
952 dest = bitsToFp(qnan, junk);
953 } else if (signal1) {
954 dest = bitsToFp(fpToBits(op1) | qnan, junk);
955 } else if (signal2) {
956 dest = bitsToFp(fpToBits(op2) | qnan, junk);
957 } else if (signal3) {
958 dest = bitsToFp(fpToBits(op3) | qnan, junk);
959 } else if (nan1) {
960 dest = op1;
961 } else if (nan2) {
962 dest = op2;
963 } else if (nan3) {
964 dest = op3;
965 }
966 } else if (flush && flushToZero(dest)) {
967 feraiseexcept(FeUnderflow);
968 } else if ((
969 (single && (dest == bitsToFp(0x00800000, junk) ||
970 dest == bitsToFp(0x80800000, junk))) ||
971 (!single &&
972 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
973 dest == bitsToFp(ULL(0x8010000000000000), junk)))
974 ) && rMode != VfpRoundZero) {
975 /*
976 * Correct for the fact that underflow is detected -before- rounding
977 * in ARM and -after- rounding in x86.
978 */
979 fesetround(FeRoundZero);
980 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3)
981 : "m" (op1), "m" (op2), "m" (op3));
982 fpType temp = func(op1, op2, op2);
983 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
984 if (flush && flushToZero(temp)) {
985 dest = temp;
986 }
987 }
988 finishVfp(fpscr, state, flush);
989 return dest;
990}
991
992template
993float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3,
994 float (*func)(float, float, float),
995 bool flush, bool defaultNan, uint32_t rMode) const;
996template
997double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3,
998 double (*func)(double, double, double),
999 bool flush, bool defaultNan, uint32_t rMode) const;
1000
1001template <class fpType>
1002fpType
981FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
982 fpType (*func)(fpType, fpType),
983 bool flush, bool defaultNan, uint32_t rMode) const
984{
985 const bool single = (sizeof(fpType) == sizeof(float));
986 fpType junk = 0.0;
987
988 if (flush && flushToZero(op1, op2))

--- 164 unchanged lines hidden ---
1003FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
1004 fpType (*func)(fpType, fpType),
1005 bool flush, bool defaultNan, uint32_t rMode) const
1006{
1007 const bool single = (sizeof(fpType) == sizeof(float));
1008 fpType junk = 0.0;
1009
1010 if (flush && flushToZero(op1, op2))

--- 164 unchanged lines hidden ---