vfp.cc (9918:2c7219e2d999) | vfp.cc (10037:5cac77888310) |
---|---|
1/* | 1/* |
2 * Copyright (c) 2010 ARM Limited | 2 * Copyright (c) 2010-2013 ARM Limited |
3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated --- 30 unchanged lines hidden (view full) --- 41 42/* 43 * The asm statements below are to keep gcc from reordering code. Otherwise 44 * the rounding mode might be set after the operation it was intended for, the 45 * exception bits read before it, etc. 46 */ 47 48std::string | 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated --- 30 unchanged lines hidden (view full) --- 41 42/* 43 * The asm statements below are to keep gcc from reordering code. Otherwise 44 * the rounding mode might be set after the operation it was intended for, the 45 * exception bits read before it, etc. 46 */ 47 48std::string |
49FpCondCompRegOp::generateDisassembly( 50 Addr pc, const SymbolTable *symtab) const 51{ 52 std::stringstream ss; 53 printMnemonic(ss, "", false); 54 printReg(ss, op1); 55 ccprintf(ss, ", "); 56 printReg(ss, op2); 57 ccprintf(ss, ", #%d", defCc); 58 ccprintf(ss, ", "); 59 printCondition(ss, condCode, true); 60 return ss.str(); 61} 62 63std::string 64FpCondSelOp::generateDisassembly( 65 Addr pc, const SymbolTable *symtab) const 66{ 67 std::stringstream ss; 68 printMnemonic(ss, "", false); 69 printReg(ss, dest); 70 ccprintf(ss, ", "); 71 printReg(ss, op1); 72 ccprintf(ss, ", "); 73 printReg(ss, op2); 74 ccprintf(ss, ", "); 75 printCondition(ss, condCode, true); 76 return ss.str(); 77} 78 79std::string |
|
49FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 50{ 51 std::stringstream ss; 52 printMnemonic(ss); 53 printReg(ss, dest + FP_Reg_Base); 54 ss << ", "; 55 printReg(ss, op1 + FP_Reg_Base); 56 return ss.str(); --- 30 unchanged lines hidden (view full) --- 87 ss << ", "; 88 printReg(ss, op1 + FP_Reg_Base); 89 ss << ", "; 90 printReg(ss, op2 + FP_Reg_Base); 91 return ss.str(); 92} 93 94std::string | 80FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 81{ 82 std::stringstream ss; 83 printMnemonic(ss); 84 printReg(ss, dest + FP_Reg_Base); 85 ss << ", "; 86 printReg(ss, op1 + FP_Reg_Base); 87 return ss.str(); --- 30 unchanged lines hidden (view full) --- 118 ss << ", "; 119 printReg(ss, op1 + FP_Reg_Base); 120 ss << ", "; 121 printReg(ss, op2 + FP_Reg_Base); 122 return ss.str(); 123} 124 125std::string |
126FpRegRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 127{ 128 std::stringstream ss; 129 printMnemonic(ss); 130 printReg(ss, dest + FP_Reg_Base); 131 ss << ", "; 132 printReg(ss, op1 + FP_Reg_Base); 133 ss << ", "; 134 printReg(ss, op2 + FP_Reg_Base); 135 ss << ", "; 136 printReg(ss, op3 + FP_Reg_Base); 137 return ss.str(); 138} 139 140std::string |
|
95FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 96{ 97 std::stringstream ss; 98 printMnemonic(ss); 99 printReg(ss, dest + FP_Reg_Base); 100 ss << ", "; 101 printReg(ss, op1 + FP_Reg_Base); 102 ss << ", "; --- 23 unchanged lines hidden (view full) --- 126 case VfpRoundZero: 127 fesetround(FeRoundZero); 128 break; 129 } 130 return roundingMode; 131} 132 133void | 141FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 142{ 143 std::stringstream ss; 144 printMnemonic(ss); 145 printReg(ss, dest + FP_Reg_Base); 146 ss << ", "; 147 printReg(ss, op1 + FP_Reg_Base); 148 ss << ", "; --- 23 unchanged lines hidden (view full) --- 172 case VfpRoundZero: 173 fesetround(FeRoundZero); 174 break; 175 } 176 return roundingMode; 177} 178 179void |
134finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush) | 180finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask) |
135{ 136 int exceptions = fetestexcept(FeAllExceptions); 137 bool underflow = false; | 181{ 182 int exceptions = fetestexcept(FeAllExceptions); 183 bool underflow = false; |
138 if (exceptions & FeInvalid) { | 184 if ((exceptions & FeInvalid) && mask.ioc) { |
139 fpscr.ioc = 1; 140 } | 185 fpscr.ioc = 1; 186 } |
141 if (exceptions & FeDivByZero) { | 187 if ((exceptions & FeDivByZero) && mask.dzc) { |
142 fpscr.dzc = 1; 143 } | 188 fpscr.dzc = 1; 189 } |
144 if (exceptions & FeOverflow) { | 190 if ((exceptions & FeOverflow) && mask.ofc) { |
145 fpscr.ofc = 1; 146 } 147 if (exceptions & FeUnderflow) { 148 underflow = true; | 191 fpscr.ofc = 1; 192 } 193 if (exceptions & FeUnderflow) { 194 underflow = true; |
149 fpscr.ufc = 1; | 195 if (mask.ufc) 196 fpscr.ufc = 1; |
150 } | 197 } |
151 if ((exceptions & FeInexact) && !(underflow && flush)) { | 198 if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) { |
152 fpscr.ixc = 1; 153 } 154 fesetround(state); 155} 156 157template <class fpType> 158fpType 159fixDest(bool flush, bool defaultNan, fpType val, fpType op1) --- 164 unchanged lines hidden (view full) --- 324 mid = temp; 325 } 326 } 327 __asm__ __volatile__("" :: "m" (temp)); 328 } 329 return mid; 330} 331 | 199 fpscr.ixc = 1; 200 } 201 fesetround(state); 202} 203 204template <class fpType> 205fpType 206fixDest(bool flush, bool defaultNan, fpType val, fpType op1) --- 164 unchanged lines hidden (view full) --- 371 mid = temp; 372 } 373 } 374 __asm__ __volatile__("" :: "m" (temp)); 375 } 376 return mid; 377} 378 |
332uint16_t 333vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, 334 uint32_t rMode, bool ahp, float op) | 379static inline uint16_t 380vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan, 381 uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble) |
335{ | 382{ |
336 uint32_t opBits = fpToBits(op); | 383 uint32_t mWidth; 384 uint32_t eWidth; 385 uint32_t eHalfRange; 386 uint32_t sBitPos; 387 388 if (isDouble) { 389 mWidth = 52; 390 eWidth = 11; 391 } else { 392 mWidth = 23; 393 eWidth = 8; 394 } 395 sBitPos = eWidth + mWidth; 396 eHalfRange = (1 << (eWidth-1)) - 1; 397 |
337 // Extract the operand. | 398 // Extract the operand. |
338 bool neg = bits(opBits, 31); 339 uint32_t exponent = bits(opBits, 30, 23); 340 uint32_t oldMantissa = bits(opBits, 22, 0); 341 uint32_t mantissa = oldMantissa >> (23 - 10); | 399 bool neg = bits(opBits, sBitPos); 400 uint32_t exponent = bits(opBits, sBitPos-1, mWidth); 401 uint64_t oldMantissa = bits(opBits, mWidth-1, 0); 402 uint32_t mantissa = oldMantissa >> (mWidth - 10); |
342 // Do the conversion. | 403 // Do the conversion. |
343 uint32_t extra = oldMantissa & mask(23 - 10); 344 if (exponent == 0xff) { | 404 uint64_t extra = oldMantissa & mask(mWidth - 10); 405 if (exponent == mask(eWidth)) { |
345 if (oldMantissa != 0) { 346 // Nans. 347 if (bits(mantissa, 9) == 0) { 348 // Signalling nan. 349 fpscr.ioc = 1; 350 } 351 if (ahp) { 352 mantissa = 0; --- 21 unchanged lines hidden (view full) --- 374 // Zero, don't need to do anything. 375 } else { 376 // Normalized or denormalized numbers. 377 378 bool inexact = (extra != 0); 379 380 if (exponent == 0) { 381 // Denormalized. | 406 if (oldMantissa != 0) { 407 // Nans. 408 if (bits(mantissa, 9) == 0) { 409 // Signalling nan. 410 fpscr.ioc = 1; 411 } 412 if (ahp) { 413 mantissa = 0; --- 21 unchanged lines hidden (view full) --- 435 // Zero, don't need to do anything. 436 } else { 437 // Normalized or denormalized numbers. 438 439 bool inexact = (extra != 0); 440 441 if (exponent == 0) { 442 // Denormalized. |
382 | |
383 // If flush to zero is on, this shouldn't happen. 384 assert(!flush); 385 386 // Check for underflow 387 if (inexact || fpscr.ufe) 388 fpscr.ufc = 1; 389 390 // Handle rounding. --- 11 unchanged lines hidden (view full) --- 402 mantissa = 0; 403 exponent = 1; 404 } 405 } else { 406 // Normalized. 407 408 // We need to track the dropped bits differently since 409 // more can be dropped by denormalizing. | 443 // If flush to zero is on, this shouldn't happen. 444 assert(!flush); 445 446 // Check for underflow 447 if (inexact || fpscr.ufe) 448 fpscr.ufc = 1; 449 450 // Handle rounding. --- 11 unchanged lines hidden (view full) --- 462 mantissa = 0; 463 exponent = 1; 464 } 465 } else { 466 // Normalized. 467 468 // We need to track the dropped bits differently since 469 // more can be dropped by denormalizing. |
410 bool topOne = bits(extra, 12); 411 bool restZeros = bits(extra, 11, 0) == 0; | 470 bool topOne = bits(extra, mWidth - 10 - 1); 471 bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0; |
412 | 472 |
413 if (exponent <= (127 - 15)) { | 473 if (exponent <= (eHalfRange - 15)) { |
414 // The result is too small. Denormalize. 415 mantissa |= (1 << 10); | 474 // The result is too small. Denormalize. 475 mantissa |= (1 << 10); |
416 while (mantissa && exponent <= (127 - 15)) { | 476 while (mantissa && exponent <= (eHalfRange - 15)) { |
417 restZeros = restZeros && !topOne; 418 topOne = bits(mantissa, 0); 419 mantissa = mantissa >> 1; 420 exponent++; 421 } 422 if (topOne || !restZeros) 423 inexact = true; 424 exponent = 0; 425 } else { 426 // Change bias. | 477 restZeros = restZeros && !topOne; 478 topOne = bits(mantissa, 0); 479 mantissa = mantissa >> 1; 480 exponent++; 481 } 482 if (topOne || !restZeros) 483 inexact = true; 484 exponent = 0; 485 } else { 486 // Change bias. |
427 exponent -= (127 - 15); | 487 exponent -= (eHalfRange - 15); |
428 } 429 430 if (exponent == 0 && (inexact || fpscr.ufe)) { 431 // Underflow 432 fpscr.ufc = 1; 433 } 434 435 // Handle rounding. --- 47 unchanged lines hidden (view full) --- 483 // Reassemble and install the result. 484 uint32_t result = bits(mantissa, 9, 0); 485 replaceBits(result, 14, 10, exponent); 486 if (neg) 487 result |= (1 << 15); 488 return result; 489} 490 | 488 } 489 490 if (exponent == 0 && (inexact || fpscr.ufe)) { 491 // Underflow 492 fpscr.ufc = 1; 493 } 494 495 // Handle rounding. --- 47 unchanged lines hidden (view full) --- 543 // Reassemble and install the result. 544 uint32_t result = bits(mantissa, 9, 0); 545 replaceBits(result, 14, 10, exponent); 546 if (neg) 547 result |= (1 << 15); 548 return result; 549} 550 |
491float 492vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) | 551uint16_t 552vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, 553 uint32_t rMode, bool ahp, float op) |
493{ | 554{ |
494 float junk = 0.0; | 555 uint64_t opBits = fpToBits(op); 556 return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false); 557} 558 559uint16_t 560vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, 561 uint32_t rMode, bool ahp, double op) 562{ 563 uint64_t opBits = fpToBits(op); 564 return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true); 565} 566 567static inline uint64_t 568vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble) 569{ 570 uint32_t mWidth; 571 uint32_t eWidth; 572 uint32_t eHalfRange; 573 uint32_t sBitPos; 574 575 if (isDouble) { 576 mWidth = 52; 577 eWidth = 11; 578 } else { 579 mWidth = 23; 580 eWidth = 8; 581 } 582 sBitPos = eWidth + mWidth; 583 eHalfRange = (1 << (eWidth-1)) - 1; 584 |
495 // Extract the bitfields. 496 bool neg = bits(op, 15); 497 uint32_t exponent = bits(op, 14, 10); | 585 // Extract the bitfields. 586 bool neg = bits(op, 15); 587 uint32_t exponent = bits(op, 14, 10); |
498 uint32_t mantissa = bits(op, 9, 0); | 588 uint64_t mantissa = bits(op, 9, 0); |
499 // Do the conversion. 500 if (exponent == 0) { 501 if (mantissa != 0) { 502 // Normalize the value. | 589 // Do the conversion. 590 if (exponent == 0) { 591 if (mantissa != 0) { 592 // Normalize the value. |
503 exponent = exponent + (127 - 15) + 1; | 593 exponent = exponent + (eHalfRange - 15) + 1; |
504 while (mantissa < (1 << 10)) { 505 mantissa = mantissa << 1; 506 exponent--; 507 } 508 } | 594 while (mantissa < (1 << 10)) { 595 mantissa = mantissa << 1; 596 exponent--; 597 } 598 } |
509 mantissa = mantissa << (23 - 10); | 599 mantissa = mantissa << (mWidth - 10); |
510 } else if (exponent == 0x1f && !ahp) { 511 // Infinities and nans. | 600 } else if (exponent == 0x1f && !ahp) { 601 // Infinities and nans. |
512 exponent = 0xff; | 602 exponent = mask(eWidth); |
513 if (mantissa != 0) { 514 // Nans. | 603 if (mantissa != 0) { 604 // Nans. |
515 mantissa = mantissa << (23 - 10); 516 if (bits(mantissa, 22) == 0) { | 605 mantissa = mantissa << (mWidth - 10); 606 if (bits(mantissa, mWidth-1) == 0) { |
517 // Signalling nan. 518 fpscr.ioc = 1; | 607 // Signalling nan. 608 fpscr.ioc = 1; |
519 mantissa |= (1 << 22); | 609 mantissa |= (((uint64_t) 1) << (mWidth-1)); |
520 } 521 if (defaultNan) { | 610 } 611 if (defaultNan) { |
522 mantissa &= ~mask(22); | 612 mantissa &= ~mask(mWidth-1); |
523 neg = false; 524 } 525 } 526 } else { | 613 neg = false; 614 } 615 } 616 } else { |
527 exponent = exponent + (127 - 15); 528 mantissa = mantissa << (23 - 10); | 617 exponent = exponent + (eHalfRange - 15); 618 mantissa = mantissa << (mWidth - 10); |
529 } 530 // Reassemble the result. | 619 } 620 // Reassemble the result. |
531 uint32_t result = bits(mantissa, 22, 0); 532 replaceBits(result, 30, 23, exponent); 533 if (neg) 534 result |= (1 << 31); | 621 uint64_t result = bits(mantissa, mWidth-1, 0); 622 replaceBits(result, sBitPos-1, mWidth, exponent); 623 if (neg) { 624 result |= (((uint64_t) 1) << sBitPos); 625 } 626 return result; 627} 628 629double 630vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) 631{ 632 double junk = 0.0; 633 uint64_t result; 634 635 result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true); |
535 return bitsToFp(result, junk); 536} 537 | 636 return bitsToFp(result, junk); 637} 638 |
538uint64_t 539vfpFpSToFixed(float val, bool isSigned, bool half, 540 uint8_t imm, bool rzero) | 639float 640vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) |
541{ | 641{ |
542 int rmode = rzero ? FeRoundZero : fegetround(); 543 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); 544 fesetround(FeRoundNearest); 545 val = val * powf(2.0, imm); 546 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 547 fesetround(rmode); 548 feclearexcept(FeAllExceptions); 549 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 550 float origVal = val; 551 val = rintf(val); 552 int fpType = std::fpclassify(val); 553 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 554 if (fpType == FP_NAN) { 555 feraiseexcept(FeInvalid); 556 } 557 val = 0.0; 558 } else if (origVal != val) { 559 switch (rmode) { 560 case FeRoundNearest: 561 if (origVal - val > 0.5) 562 val += 1.0; 563 else if (val - origVal > 0.5) 564 val -= 1.0; 565 break; 566 case FeRoundDown: 567 if (origVal < val) 568 val -= 1.0; 569 break; 570 case FeRoundUpward: 571 if (origVal > val) 572 val += 1.0; 573 break; 574 } 575 feraiseexcept(FeInexact); 576 } | 642 float junk = 0.0; 643 uint64_t result; |
577 | 644 |
578 if (isSigned) { 579 if (half) { 580 if ((double)val < (int16_t)(1 << 15)) { 581 feraiseexcept(FeInvalid); 582 feclearexcept(FeInexact); 583 return (int16_t)(1 << 15); 584 } 585 if ((double)val > (int16_t)mask(15)) { 586 feraiseexcept(FeInvalid); 587 feclearexcept(FeInexact); 588 return (int16_t)mask(15); 589 } 590 return (int16_t)val; 591 } else { 592 if ((double)val < (int32_t)(1 << 31)) { 593 feraiseexcept(FeInvalid); 594 feclearexcept(FeInexact); 595 return (int32_t)(1 << 31); 596 } 597 if ((double)val > (int32_t)mask(31)) { 598 feraiseexcept(FeInvalid); 599 feclearexcept(FeInexact); 600 return (int32_t)mask(31); 601 } 602 return (int32_t)val; 603 } 604 } else { 605 if (half) { 606 if ((double)val < 0) { 607 feraiseexcept(FeInvalid); 608 feclearexcept(FeInexact); 609 return 0; 610 } 611 if ((double)val > (mask(16))) { 612 feraiseexcept(FeInvalid); 613 feclearexcept(FeInexact); 614 return mask(16); 615 } 616 return (uint16_t)val; 617 } else { 618 if ((double)val < 0) { 619 feraiseexcept(FeInvalid); 620 feclearexcept(FeInexact); 621 return 0; 622 } 623 if ((double)val > (mask(32))) { 624 feraiseexcept(FeInvalid); 625 feclearexcept(FeInexact); 626 return mask(32); 627 } 628 return (uint32_t)val; 629 } 630 } | 645 result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false); 646 return bitsToFp(result, junk); |
631} 632 633float 634vfpUFixedToFpS(bool flush, bool defaultNan, | 647} 648 649float 650vfpUFixedToFpS(bool flush, bool defaultNan, |
635 uint32_t val, bool half, uint8_t imm) | 651 uint64_t val, uint8_t width, uint8_t imm) |
636{ 637 fesetround(FeRoundNearest); | 652{ 653 fesetround(FeRoundNearest); |
638 if (half) | 654 if (width == 16) |
639 val = (uint16_t)val; | 655 val = (uint16_t)val; |
656 else if (width == 32) 657 val = (uint32_t)val; 658 else if (width != 64) 659 panic("Unsupported width %d", width); |
|
640 float scale = powf(2.0, imm); 641 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 642 feclearexcept(FeAllExceptions); 643 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 644 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); 645} 646 647float 648vfpSFixedToFpS(bool flush, bool defaultNan, | 660 float scale = powf(2.0, imm); 661 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 662 feclearexcept(FeAllExceptions); 663 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 664 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); 665} 666 667float 668vfpSFixedToFpS(bool flush, bool defaultNan, |
649 int32_t val, bool half, uint8_t imm) | 669 int64_t val, uint8_t width, uint8_t imm) |
650{ 651 fesetround(FeRoundNearest); | 670{ 671 fesetround(FeRoundNearest); |
652 if (half) | 672 if (width == 16) |
653 val = sext<16>(val & mask(16)); | 673 val = sext<16>(val & mask(16)); |
674 else if (width == 32) 675 val = sext<32>(val & mask(32)); 676 else if (width != 64) 677 panic("Unsupported width %d", width); 678 |
|
654 float scale = powf(2.0, imm); 655 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 656 feclearexcept(FeAllExceptions); 657 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 658 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); 659} 660 | 679 float scale = powf(2.0, imm); 680 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 681 feclearexcept(FeAllExceptions); 682 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 683 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); 684} 685 |
661uint64_t 662vfpFpDToFixed(double val, bool isSigned, bool half, 663 uint8_t imm, bool rzero) 664{ 665 int rmode = rzero ? FeRoundZero : fegetround(); 666 fesetround(FeRoundNearest); 667 val = val * pow(2.0, imm); 668 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 669 fesetround(rmode); 670 feclearexcept(FeAllExceptions); 671 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 672 double origVal = val; 673 val = rint(val); 674 int fpType = std::fpclassify(val); 675 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 676 if (fpType == FP_NAN) { 677 feraiseexcept(FeInvalid); 678 } 679 val = 0.0; 680 } else if (origVal != val) { 681 switch (rmode) { 682 case FeRoundNearest: 683 if (origVal - val > 0.5) 684 val += 1.0; 685 else if (val - origVal > 0.5) 686 val -= 1.0; 687 break; 688 case FeRoundDown: 689 if (origVal < val) 690 val -= 1.0; 691 break; 692 case FeRoundUpward: 693 if (origVal > val) 694 val += 1.0; 695 break; 696 } 697 feraiseexcept(FeInexact); 698 } 699 if (isSigned) { 700 if (half) { 701 if (val < (int16_t)(1 << 15)) { 702 feraiseexcept(FeInvalid); 703 feclearexcept(FeInexact); 704 return (int16_t)(1 << 15); 705 } 706 if (val > (int16_t)mask(15)) { 707 feraiseexcept(FeInvalid); 708 feclearexcept(FeInexact); 709 return (int16_t)mask(15); 710 } 711 return (int16_t)val; 712 } else { 713 if (val < (int32_t)(1 << 31)) { 714 feraiseexcept(FeInvalid); 715 feclearexcept(FeInexact); 716 return (int32_t)(1 << 31); 717 } 718 if (val > (int32_t)mask(31)) { 719 feraiseexcept(FeInvalid); 720 feclearexcept(FeInexact); 721 return (int32_t)mask(31); 722 } 723 return (int32_t)val; 724 } 725 } else { 726 if (half) { 727 if (val < 0) { 728 feraiseexcept(FeInvalid); 729 feclearexcept(FeInexact); 730 return 0; 731 } 732 if (val > mask(16)) { 733 feraiseexcept(FeInvalid); 734 feclearexcept(FeInexact); 735 return mask(16); 736 } 737 return (uint16_t)val; 738 } else { 739 if (val < 0) { 740 feraiseexcept(FeInvalid); 741 feclearexcept(FeInexact); 742 return 0; 743 } 744 if (val > mask(32)) { 745 feraiseexcept(FeInvalid); 746 feclearexcept(FeInexact); 747 return mask(32); 748 } 749 return (uint32_t)val; 750 } 751 } 752} | |
753 754double 755vfpUFixedToFpD(bool flush, bool defaultNan, | 686 687double 688vfpUFixedToFpD(bool flush, bool defaultNan, |
756 uint32_t val, bool half, uint8_t imm) | 689 uint64_t val, uint8_t width, uint8_t imm) |
757{ 758 fesetround(FeRoundNearest); | 690{ 691 fesetround(FeRoundNearest); |
759 if (half) | 692 if (width == 16) |
760 val = (uint16_t)val; | 693 val = (uint16_t)val; |
694 else if (width == 32) 695 val = (uint32_t)val; 696 else if (width != 64) 697 panic("Unsupported width %d", width); 698 |
|
761 double scale = pow(2.0, imm); 762 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 763 feclearexcept(FeAllExceptions); 764 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 765 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale); 766} 767 768double 769vfpSFixedToFpD(bool flush, bool defaultNan, | 699 double scale = pow(2.0, imm); 700 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 701 feclearexcept(FeAllExceptions); 702 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 703 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale); 704} 705 706double 707vfpSFixedToFpD(bool flush, bool defaultNan, |
770 int32_t val, bool half, uint8_t imm) | 708 int64_t val, uint8_t width, uint8_t imm) |
771{ 772 fesetround(FeRoundNearest); | 709{ 710 fesetround(FeRoundNearest); |
773 if (half) | 711 if (width == 16) |
774 val = sext<16>(val & mask(16)); | 712 val = sext<16>(val & mask(16)); |
713 else if (width == 32) 714 val = sext<32>(val & mask(32)); 715 else if (width != 64) 716 panic("Unsupported width %d", width); 717 |
|
775 double scale = pow(2.0, imm); 776 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 777 feclearexcept(FeAllExceptions); 778 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 779 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale); 780} 781 782// This function implements a magic formula taken from the architecture --- 188 unchanged lines hidden (view full) --- 971 972template 973float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 974 float op1, float op2) const; 975template 976double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 977 double op1, double op2) const; 978 | 718 double scale = pow(2.0, imm); 719 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 720 feclearexcept(FeAllExceptions); 721 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 722 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale); 723} 724 725// This function implements a magic formula taken from the architecture --- 188 unchanged lines hidden (view full) --- 914 915template 916float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 917 float op1, float op2) const; 918template 919double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 920 double op1, double op2) const; 921 |
922// @TODO remove this function when we've finished switching all FMA code to use the new FPLIB |
|
979template <class fpType> 980fpType | 923template <class fpType> 924fpType |
925FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, 926 fpType (*func)(fpType, fpType, fpType), 927 bool flush, bool defaultNan, uint32_t rMode) const 928{ 929 const bool single = (sizeof(fpType) == sizeof(float)); 930 fpType junk = 0.0; 931 932 if (flush && (flushToZero(op1, op2) || flushToZero(op3))) 933 fpscr.idc = 1; 934 VfpSavedState state = prepFpState(rMode); 935 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state) 936 : "m" (op1), "m" (op2), "m" (op3), "m" (state)); 937 fpType dest = func(op1, op2, op3); 938 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 939 940 int fpClass = std::fpclassify(dest); 941 // Get NAN behavior right. This varies between x86 and ARM. 942 if (fpClass == FP_NAN) { 943 const uint64_t qnan = 944 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 945 const bool nan1 = std::isnan(op1); 946 const bool nan2 = std::isnan(op2); 947 const bool nan3 = std::isnan(op3); 948 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 949 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 950 const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan); 951 if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) { 952 dest = bitsToFp(qnan, junk); 953 } else if (signal1) { 954 dest = bitsToFp(fpToBits(op1) | qnan, junk); 955 } else if (signal2) { 956 dest = bitsToFp(fpToBits(op2) | qnan, junk); 957 } else if (signal3) { 958 dest = bitsToFp(fpToBits(op3) | qnan, junk); 959 } else if (nan1) { 960 dest = op1; 961 } else if (nan2) { 962 dest = op2; 963 } else if (nan3) { 964 dest = op3; 965 } 966 } else if (flush && flushToZero(dest)) { 967 feraiseexcept(FeUnderflow); 968 } else if (( 969 (single && (dest == bitsToFp(0x00800000, junk) || 970 dest == bitsToFp(0x80800000, junk))) || 971 (!single && 972 (dest == bitsToFp(ULL(0x0010000000000000), junk) || 973 dest == bitsToFp(ULL(0x8010000000000000), junk))) 974 ) && rMode != VfpRoundZero) { 975 /* 976 * Correct for the fact that underflow is detected -before- rounding 977 * in ARM and -after- rounding in x86. 978 */ 979 fesetround(FeRoundZero); 980 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3) 981 : "m" (op1), "m" (op2), "m" (op3)); 982 fpType temp = func(op1, op2, op2); 983 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 984 if (flush && flushToZero(temp)) { 985 dest = temp; 986 } 987 } 988 finishVfp(fpscr, state, flush); 989 return dest; 990} 991 992template 993float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3, 994 float (*func)(float, float, float), 995 bool flush, bool defaultNan, uint32_t rMode) const; 996template 997double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3, 998 double (*func)(double, double, double), 999 bool flush, bool defaultNan, uint32_t rMode) const; 1000 1001template <class fpType> 1002fpType |
|
981FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 982 fpType (*func)(fpType, fpType), 983 bool flush, bool defaultNan, uint32_t rMode) const 984{ 985 const bool single = (sizeof(fpType) == sizeof(float)); 986 fpType junk = 0.0; 987 988 if (flush && flushToZero(op1, op2)) --- 164 unchanged lines hidden --- | 1003FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 1004 fpType (*func)(fpType, fpType), 1005 bool flush, bool defaultNan, uint32_t rMode) const 1006{ 1007 const bool single = (sizeof(fpType) == sizeof(float)); 1008 fpType junk = 0.0; 1009 1010 if (flush && flushToZero(op1, op2)) --- 164 unchanged lines hidden --- |