Cross Reference: /gem5/src/arch/arm/insts/vfp.cc

vfp.cc (9918:2c7219e2d999)	vfp.cc (10037:5cac77888310)
1/*	1/*
2 * Copyright (c) 2010 ARM Limited	2 * Copyright (c) 2010-2013 ARM Limited
3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated --- 30 unchanged lines hidden (view full) --- 41 42/* 43 * The asm statements below are to keep gcc from reordering code. Otherwise 44 * the rounding mode might be set after the operation it was intended for, the 45 * exception bits read before it, etc. 46 */ 47 48std::string	3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated --- 30 unchanged lines hidden (view full) --- 41 42/* 43 * The asm statements below are to keep gcc from reordering code. Otherwise 44 * the rounding mode might be set after the operation it was intended for, the 45 * exception bits read before it, etc. 46 */ 47 48std::string
	49FpCondCompRegOp::generateDisassembly( 50 Addr pc, const SymbolTable symtab) const 51{ 52 std::stringstream ss; 53 printMnemonic(ss, "", false); 54 printReg(ss, op1); 55 ccprintf(ss, ", "); 56 printReg(ss, op2); 57 ccprintf(ss, ", #%d", defCc); 58 ccprintf(ss, ", "); 59 printCondition(ss, condCode, true); 60 return ss.str(); 61} 62 63std::string 64FpCondSelOp::generateDisassembly( 65 Addr pc, const SymbolTable symtab) const 66{ 67 std::stringstream ss; 68 printMnemonic(ss, "", false); 69 printReg(ss, dest); 70 ccprintf(ss, ", "); 71 printReg(ss, op1); 72 ccprintf(ss, ", "); 73 printReg(ss, op2); 74 ccprintf(ss, ", "); 75 printCondition(ss, condCode, true); 76 return ss.str(); 77} 78 79std::string
49FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 50{ 51 std::stringstream ss; 52 printMnemonic(ss); 53 printReg(ss, dest + FP_Reg_Base); 54 ss << ", "; 55 printReg(ss, op1 + FP_Reg_Base); 56 return ss.str(); --- 30 unchanged lines hidden* (view full) --- 87 ss << ", "; 88 printReg(ss, op1 + FP_Reg_Base); 89 ss << ", "; 90 printReg(ss, op2 + FP_Reg_Base); 91 return ss.str(); 92} 93 94std::string	80FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 81{ 82 std::stringstream ss; 83 printMnemonic(ss); 84 printReg(ss, dest + FP_Reg_Base); 85 ss << ", "; 86 printReg(ss, op1 + FP_Reg_Base); 87 return ss.str(); --- 30 unchanged lines hidden* (view full) --- 118 ss << ", "; 119 printReg(ss, op1 + FP_Reg_Base); 120 ss << ", "; 121 printReg(ss, op2 + FP_Reg_Base); 122 return ss.str(); 123} 124 125std::string
	126FpRegRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 127{ 128* std::stringstream ss; 129 printMnemonic(ss); 130 printReg(ss, dest + FP_Reg_Base); 131 ss << ", "; 132 printReg(ss, op1 + FP_Reg_Base); 133 ss << ", "; 134 printReg(ss, op2 + FP_Reg_Base); 135 ss << ", "; 136 printReg(ss, op3 + FP_Reg_Base); 137 return ss.str(); 138} 139 140std::string
95FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 96{ 97 std::stringstream ss; 98 printMnemonic(ss); 99 printReg(ss, dest + FP_Reg_Base); 100* ss << ", "; 101 printReg(ss, op1 + FP_Reg_Base); 102 ss << ", "; --- 23 unchanged lines hidden (view full) --- 126 case VfpRoundZero: 127 fesetround(FeRoundZero); 128 break; 129 } 130 return roundingMode; 131} 132 133void	141FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 142{ 143* std::stringstream ss; 144 printMnemonic(ss); 145 printReg(ss, dest + FP_Reg_Base); 146 ss << ", "; 147 printReg(ss, op1 + FP_Reg_Base); 148 ss << ", "; --- 23 unchanged lines hidden (view full) --- 172 case VfpRoundZero: 173 fesetround(FeRoundZero); 174 break; 175 } 176 return roundingMode; 177} 178 179void
134finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush)	180finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)
135{ 136 int exceptions = fetestexcept(FeAllExceptions); 137 bool underflow = false;	181{ 182 int exceptions = fetestexcept(FeAllExceptions); 183 bool underflow = false;
138 if (exceptions & FeInvalid) {	184 if ((exceptions & FeInvalid) && mask.ioc) {
139 fpscr.ioc = 1; 140 }	185 fpscr.ioc = 1; 186 }
141 if (exceptions & FeDivByZero) {	187 if ((exceptions & FeDivByZero) && mask.dzc) {
142 fpscr.dzc = 1; 143 }	188 fpscr.dzc = 1; 189 }
144 if (exceptions & FeOverflow) {	190 if ((exceptions & FeOverflow) && mask.ofc) {
145 fpscr.ofc = 1; 146 } 147 if (exceptions & FeUnderflow) { 148 underflow = true;	191 fpscr.ofc = 1; 192 } 193 if (exceptions & FeUnderflow) { 194 underflow = true;
149 fpscr.ufc = 1;	195 if (mask.ufc) 196 fpscr.ufc = 1;
150 }	197 }
151 if ((exceptions & FeInexact) && !(underflow && flush)) {	198 if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) {
152 fpscr.ixc = 1; 153 } 154 fesetround(state); 155} 156 157template <class fpType> 158fpType 159fixDest(bool flush, bool defaultNan, fpType val, fpType op1) --- 164 unchanged lines hidden (view full) --- 324 mid = temp; 325 } 326 } 327 __asm__ __volatile__("" :: "m" (temp)); 328 } 329 return mid; 330} 331	199 fpscr.ixc = 1; 200 } 201 fesetround(state); 202} 203 204template <class fpType> 205fpType 206fixDest(bool flush, bool defaultNan, fpType val, fpType op1) --- 164 unchanged lines hidden (view full) --- 371 mid = temp; 372 } 373 } 374 __asm__ __volatile__("" :: "m" (temp)); 375 } 376 return mid; 377} 378
332uint16_t 333vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, 334 uint32_t rMode, bool ahp, float op)	379static inline uint16_t 380vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan, 381 uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble)
335{	382{
336 uint32_t opBits = fpToBits(op);	383 uint32_t mWidth; 384 uint32_t eWidth; 385 uint32_t eHalfRange; 386 uint32_t sBitPos; 387 388 if (isDouble) { 389 mWidth = 52; 390 eWidth = 11; 391 } else { 392 mWidth = 23; 393 eWidth = 8; 394 } 395 sBitPos = eWidth + mWidth; 396 eHalfRange = (1 << (eWidth-1)) - 1; 397
337 // Extract the operand.	398 // Extract the operand.
338 bool neg = bits(opBits, 31); 339 uint32_t exponent = bits(opBits, 30, 23); 340 uint32_t oldMantissa = bits(opBits, 22, 0); 341 uint32_t mantissa = oldMantissa >> (23 - 10);	399 bool neg = bits(opBits, sBitPos); 400 uint32_t exponent = bits(opBits, sBitPos-1, mWidth); 401 uint64_t oldMantissa = bits(opBits, mWidth-1, 0); 402 uint32_t mantissa = oldMantissa >> (mWidth - 10);
342 // Do the conversion.	403 // Do the conversion.
343 uint32_t extra = oldMantissa & mask(23 - 10); 344 if (exponent == 0xff) {	404 uint64_t extra = oldMantissa & mask(mWidth - 10); 405 if (exponent == mask(eWidth)) {
345 if (oldMantissa != 0) { 346 // Nans. 347 if (bits(mantissa, 9) == 0) { 348 // Signalling nan. 349 fpscr.ioc = 1; 350 } 351 if (ahp) { 352 mantissa = 0; --- 21 unchanged lines hidden (view full) --- 374 // Zero, don't need to do anything. 375 } else { 376 // Normalized or denormalized numbers. 377 378 bool inexact = (extra != 0); 379 380 if (exponent == 0) { 381 // Denormalized.	406 if (oldMantissa != 0) { 407 // Nans. 408 if (bits(mantissa, 9) == 0) { 409 // Signalling nan. 410 fpscr.ioc = 1; 411 } 412 if (ahp) { 413 mantissa = 0; --- 21 unchanged lines hidden (view full) --- 435 // Zero, don't need to do anything. 436 } else { 437 // Normalized or denormalized numbers. 438 439 bool inexact = (extra != 0); 440 441 if (exponent == 0) { 442 // Denormalized.
382
383 // If flush to zero is on, this shouldn't happen. 384 assert(!flush); 385 386 // Check for underflow 387 if (inexact \|\| fpscr.ufe) 388 fpscr.ufc = 1; 389 390 // Handle rounding. --- 11 unchanged lines hidden (view full) --- 402 mantissa = 0; 403 exponent = 1; 404 } 405 } else { 406 // Normalized. 407 408 // We need to track the dropped bits differently since 409 // more can be dropped by denormalizing.	443 // If flush to zero is on, this shouldn't happen. 444 assert(!flush); 445 446 // Check for underflow 447 if (inexact \|\| fpscr.ufe) 448 fpscr.ufc = 1; 449 450 // Handle rounding. --- 11 unchanged lines hidden (view full) --- 462 mantissa = 0; 463 exponent = 1; 464 } 465 } else { 466 // Normalized. 467 468 // We need to track the dropped bits differently since 469 // more can be dropped by denormalizing.
410 bool topOne = bits(extra, 12); 411 bool restZeros = bits(extra, 11, 0) == 0;	470 bool topOne = bits(extra, mWidth - 10 - 1); 471 bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0;
412	472
413 if (exponent <= (127 - 15)) {	473 if (exponent <= (eHalfRange - 15)) {
414 // The result is too small. Denormalize. 415 mantissa \|= (1 << 10);	474 // The result is too small. Denormalize. 475 mantissa \|= (1 << 10);
416 while (mantissa && exponent <= (127 - 15)) {	476 while (mantissa && exponent <= (eHalfRange - 15)) {
417 restZeros = restZeros && !topOne; 418 topOne = bits(mantissa, 0); 419 mantissa = mantissa >> 1; 420 exponent++; 421 } 422 if (topOne \|\| !restZeros) 423 inexact = true; 424 exponent = 0; 425 } else { 426 // Change bias.	477 restZeros = restZeros && !topOne; 478 topOne = bits(mantissa, 0); 479 mantissa = mantissa >> 1; 480 exponent++; 481 } 482 if (topOne \|\| !restZeros) 483 inexact = true; 484 exponent = 0; 485 } else { 486 // Change bias.
427 exponent -= (127 - 15);	487 exponent -= (eHalfRange - 15);
428 } 429 430 if (exponent == 0 && (inexact \|\| fpscr.ufe)) { 431 // Underflow 432 fpscr.ufc = 1; 433 } 434 435 // Handle rounding. --- 47 unchanged lines hidden (view full) --- 483 // Reassemble and install the result. 484 uint32_t result = bits(mantissa, 9, 0); 485 replaceBits(result, 14, 10, exponent); 486 if (neg) 487 result \|= (1 << 15); 488 return result; 489} 490	488 } 489 490 if (exponent == 0 && (inexact \|\| fpscr.ufe)) { 491 // Underflow 492 fpscr.ufc = 1; 493 } 494 495 // Handle rounding. --- 47 unchanged lines hidden (view full) --- 543 // Reassemble and install the result. 544 uint32_t result = bits(mantissa, 9, 0); 545 replaceBits(result, 14, 10, exponent); 546 if (neg) 547 result \|= (1 << 15); 548 return result; 549} 550
491float 492vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)	551uint16_t 552vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, 553 uint32_t rMode, bool ahp, float op)
493{	554{
494 float junk = 0.0;	555 uint64_t opBits = fpToBits(op); 556 return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false); 557} 558 559uint16_t 560vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, 561 uint32_t rMode, bool ahp, double op) 562{ 563 uint64_t opBits = fpToBits(op); 564 return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true); 565} 566 567static inline uint64_t 568vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble) 569{ 570 uint32_t mWidth; 571 uint32_t eWidth; 572 uint32_t eHalfRange; 573 uint32_t sBitPos; 574 575 if (isDouble) { 576 mWidth = 52; 577 eWidth = 11; 578 } else { 579 mWidth = 23; 580 eWidth = 8; 581 } 582 sBitPos = eWidth + mWidth; 583 eHalfRange = (1 << (eWidth-1)) - 1; 584
495 // Extract the bitfields. 496 bool neg = bits(op, 15); 497 uint32_t exponent = bits(op, 14, 10);	585 // Extract the bitfields. 586 bool neg = bits(op, 15); 587 uint32_t exponent = bits(op, 14, 10);
498 uint32_t mantissa = bits(op, 9, 0);	588 uint64_t mantissa = bits(op, 9, 0);
499 // Do the conversion. 500 if (exponent == 0) { 501 if (mantissa != 0) { 502 // Normalize the value.	589 // Do the conversion. 590 if (exponent == 0) { 591 if (mantissa != 0) { 592 // Normalize the value.
503 exponent = exponent + (127 - 15) + 1;	593 exponent = exponent + (eHalfRange - 15) + 1;
504 while (mantissa < (1 << 10)) { 505 mantissa = mantissa << 1; 506 exponent--; 507 } 508 }	594 while (mantissa < (1 << 10)) { 595 mantissa = mantissa << 1; 596 exponent--; 597 } 598 }
509 mantissa = mantissa << (23 - 10);	599 mantissa = mantissa << (mWidth - 10);
510 } else if (exponent == 0x1f && !ahp) { 511 // Infinities and nans.	600 } else if (exponent == 0x1f && !ahp) { 601 // Infinities and nans.
512 exponent = 0xff;	602 exponent = mask(eWidth);
513 if (mantissa != 0) { 514 // Nans.	603 if (mantissa != 0) { 604 // Nans.
515 mantissa = mantissa << (23 - 10); 516 if (bits(mantissa, 22) == 0) {	605 mantissa = mantissa << (mWidth - 10); 606 if (bits(mantissa, mWidth-1) == 0) {
517 // Signalling nan. 518 fpscr.ioc = 1;	607 // Signalling nan. 608 fpscr.ioc = 1;
519 mantissa \|= (1 << 22);	609 mantissa \|= (((uint64_t) 1) << (mWidth-1));
520 } 521 if (defaultNan) {	610 } 611 if (defaultNan) {
522 mantissa &= ~mask(22);	612 mantissa &= ~mask(mWidth-1);
523 neg = false; 524 } 525 } 526 } else {	613 neg = false; 614 } 615 } 616 } else {
527 exponent = exponent + (127 - 15); 528 mantissa = mantissa << (23 - 10);	617 exponent = exponent + (eHalfRange - 15); 618 mantissa = mantissa << (mWidth - 10);
529 } 530 // Reassemble the result.	619 } 620 // Reassemble the result.
531 uint32_t result = bits(mantissa, 22, 0); 532 replaceBits(result, 30, 23, exponent); 533 if (neg) 534 result \|= (1 << 31);	621 uint64_t result = bits(mantissa, mWidth-1, 0); 622 replaceBits(result, sBitPos-1, mWidth, exponent); 623 if (neg) { 624 result \|= (((uint64_t) 1) << sBitPos); 625 } 626 return result; 627} 628 629double 630vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) 631{ 632 double junk = 0.0; 633 uint64_t result; 634 635 result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true);
535 return bitsToFp(result, junk); 536} 537	636 return bitsToFp(result, junk); 637} 638
538uint64_t 539vfpFpSToFixed(float val, bool isSigned, bool half, 540 uint8_t imm, bool rzero)	639float 640vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
541{	641{
542 int rmode = rzero ? FeRoundZero : fegetround(); 543 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); 544 fesetround(FeRoundNearest); 545 val = val * powf(2.0, imm); 546 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 547 fesetround(rmode); 548 feclearexcept(FeAllExceptions); 549 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 550 float origVal = val; 551 val = rintf(val); 552 int fpType = std::fpclassify(val); 553 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 554 if (fpType == FP_NAN) { 555 feraiseexcept(FeInvalid); 556 } 557 val = 0.0; 558 } else if (origVal != val) { 559 switch (rmode) { 560 case FeRoundNearest: 561 if (origVal - val > 0.5) 562 val += 1.0; 563 else if (val - origVal > 0.5) 564 val -= 1.0; 565 break; 566 case FeRoundDown: 567 if (origVal < val) 568 val -= 1.0; 569 break; 570 case FeRoundUpward: 571 if (origVal > val) 572 val += 1.0; 573 break; 574 } 575 feraiseexcept(FeInexact); 576 }	642 float junk = 0.0; 643 uint64_t result;
577	644
578 if (isSigned) { 579 if (half) { 580 if ((double)val < (int16_t)(1 << 15)) { 581 feraiseexcept(FeInvalid); 582 feclearexcept(FeInexact); 583 return (int16_t)(1 << 15); 584 } 585 if ((double)val > (int16_t)mask(15)) { 586 feraiseexcept(FeInvalid); 587 feclearexcept(FeInexact); 588 return (int16_t)mask(15); 589 } 590 return (int16_t)val; 591 } else { 592 if ((double)val < (int32_t)(1 << 31)) { 593 feraiseexcept(FeInvalid); 594 feclearexcept(FeInexact); 595 return (int32_t)(1 << 31); 596 } 597 if ((double)val > (int32_t)mask(31)) { 598 feraiseexcept(FeInvalid); 599 feclearexcept(FeInexact); 600 return (int32_t)mask(31); 601 } 602 return (int32_t)val; 603 } 604 } else { 605 if (half) { 606 if ((double)val < 0) { 607 feraiseexcept(FeInvalid); 608 feclearexcept(FeInexact); 609 return 0; 610 } 611 if ((double)val > (mask(16))) { 612 feraiseexcept(FeInvalid); 613 feclearexcept(FeInexact); 614 return mask(16); 615 } 616 return (uint16_t)val; 617 } else { 618 if ((double)val < 0) { 619 feraiseexcept(FeInvalid); 620 feclearexcept(FeInexact); 621 return 0; 622 } 623 if ((double)val > (mask(32))) { 624 feraiseexcept(FeInvalid); 625 feclearexcept(FeInexact); 626 return mask(32); 627 } 628 return (uint32_t)val; 629 } 630 }	645 result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false); 646 return bitsToFp(result, junk);
631} 632 633float 634vfpUFixedToFpS(bool flush, bool defaultNan,	647} 648 649float 650vfpUFixedToFpS(bool flush, bool defaultNan,
635 uint32_t val, bool half, uint8_t imm)	651 uint64_t val, uint8_t width, uint8_t imm)
636{ 637 fesetround(FeRoundNearest);	652{ 653 fesetround(FeRoundNearest);
638 if (half)	654 if (width == 16)
639 val = (uint16_t)val;	655 val = (uint16_t)val;
	656 else if (width == 32) 657 val = (uint32_t)val; 658 else if (width != 64) 659 panic("Unsupported width %d", width);
640 float scale = powf(2.0, imm); 641 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 642 feclearexcept(FeAllExceptions); 643 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 644 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); 645} 646 647float 648vfpSFixedToFpS(bool flush, bool defaultNan,	660 float scale = powf(2.0, imm); 661 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 662 feclearexcept(FeAllExceptions); 663 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 664 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); 665} 666 667float 668vfpSFixedToFpS(bool flush, bool defaultNan,
649 int32_t val, bool half, uint8_t imm)	669 int64_t val, uint8_t width, uint8_t imm)
650{ 651 fesetround(FeRoundNearest);	670{ 671 fesetround(FeRoundNearest);
652 if (half)	672 if (width == 16)
653 val = sext<16>(val & mask(16));	673 val = sext<16>(val & mask(16));
	674 else if (width == 32) 675 val = sext<32>(val & mask(32)); 676 else if (width != 64) 677 panic("Unsupported width %d", width); 678
654 float scale = powf(2.0, imm); 655 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 656 feclearexcept(FeAllExceptions); 657 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 658 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); 659} 660	679 float scale = powf(2.0, imm); 680 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 681 feclearexcept(FeAllExceptions); 682 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 683 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); 684} 685
661uint64_t 662vfpFpDToFixed(double val, bool isSigned, bool half, 663 uint8_t imm, bool rzero) 664{ 665 int rmode = rzero ? FeRoundZero : fegetround(); 666 fesetround(FeRoundNearest); 667 val = val * pow(2.0, imm); 668 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 669 fesetround(rmode); 670 feclearexcept(FeAllExceptions); 671 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 672 double origVal = val; 673 val = rint(val); 674 int fpType = std::fpclassify(val); 675 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 676 if (fpType == FP_NAN) { 677 feraiseexcept(FeInvalid); 678 } 679 val = 0.0; 680 } else if (origVal != val) { 681 switch (rmode) { 682 case FeRoundNearest: 683 if (origVal - val > 0.5) 684 val += 1.0; 685 else if (val - origVal > 0.5) 686 val -= 1.0; 687 break; 688 case FeRoundDown: 689 if (origVal < val) 690 val -= 1.0; 691 break; 692 case FeRoundUpward: 693 if (origVal > val) 694 val += 1.0; 695 break; 696 } 697 feraiseexcept(FeInexact); 698 } 699 if (isSigned) { 700 if (half) { 701 if (val < (int16_t)(1 << 15)) { 702 feraiseexcept(FeInvalid); 703 feclearexcept(FeInexact); 704 return (int16_t)(1 << 15); 705 } 706 if (val > (int16_t)mask(15)) { 707 feraiseexcept(FeInvalid); 708 feclearexcept(FeInexact); 709 return (int16_t)mask(15); 710 } 711 return (int16_t)val; 712 } else { 713 if (val < (int32_t)(1 << 31)) { 714 feraiseexcept(FeInvalid); 715 feclearexcept(FeInexact); 716 return (int32_t)(1 << 31); 717 } 718 if (val > (int32_t)mask(31)) { 719 feraiseexcept(FeInvalid); 720 feclearexcept(FeInexact); 721 return (int32_t)mask(31); 722 } 723 return (int32_t)val; 724 } 725 } else { 726 if (half) { 727 if (val < 0) { 728 feraiseexcept(FeInvalid); 729 feclearexcept(FeInexact); 730 return 0; 731 } 732 if (val > mask(16)) { 733 feraiseexcept(FeInvalid); 734 feclearexcept(FeInexact); 735 return mask(16); 736 } 737 return (uint16_t)val; 738 } else { 739 if (val < 0) { 740 feraiseexcept(FeInvalid); 741 feclearexcept(FeInexact); 742 return 0; 743 } 744 if (val > mask(32)) { 745 feraiseexcept(FeInvalid); 746 feclearexcept(FeInexact); 747 return mask(32); 748 } 749 return (uint32_t)val; 750 } 751 } 752}
753 754double 755vfpUFixedToFpD(bool flush, bool defaultNan,	686 687double 688vfpUFixedToFpD(bool flush, bool defaultNan,
756 uint32_t val, bool half, uint8_t imm)	689 uint64_t val, uint8_t width, uint8_t imm)
757{ 758 fesetround(FeRoundNearest);	690{ 691 fesetround(FeRoundNearest);
759 if (half)	692 if (width == 16)
760 val = (uint16_t)val;	693 val = (uint16_t)val;
	694 else if (width == 32) 695 val = (uint32_t)val; 696 else if (width != 64) 697 panic("Unsupported width %d", width); 698
761 double scale = pow(2.0, imm); 762 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 763 feclearexcept(FeAllExceptions); 764 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 765 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale); 766} 767 768double 769vfpSFixedToFpD(bool flush, bool defaultNan,	699 double scale = pow(2.0, imm); 700 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 701 feclearexcept(FeAllExceptions); 702 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 703 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale); 704} 705 706double 707vfpSFixedToFpD(bool flush, bool defaultNan,
770 int32_t val, bool half, uint8_t imm)	708 int64_t val, uint8_t width, uint8_t imm)
771{ 772 fesetround(FeRoundNearest);	709{ 710 fesetround(FeRoundNearest);
773 if (half)	711 if (width == 16)
774 val = sext<16>(val & mask(16));	712 val = sext<16>(val & mask(16));
	713 else if (width == 32) 714 val = sext<32>(val & mask(32)); 715 else if (width != 64) 716 panic("Unsupported width %d", width); 717
775 double scale = pow(2.0, imm); 776 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 777 feclearexcept(FeAllExceptions); 778 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 779 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale); 780} 781 782// This function implements a magic formula taken from the architecture --- 188 unchanged lines hidden (view full) --- 971 972template 973float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 974 float op1, float op2) const; 975template 976double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 977 double op1, double op2) const; 978	718 double scale = pow(2.0, imm); 719 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 720 feclearexcept(FeAllExceptions); 721 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 722 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale); 723} 724 725// This function implements a magic formula taken from the architecture --- 188 unchanged lines hidden (view full) --- 914 915template 916float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 917 float op1, float op2) const; 918template 919double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, 920 double op1, double op2) const; 921
	922// @TODO remove this function when we've finished switching all FMA code to use the new FPLIB
979template <class fpType> 980fpType	923template <class fpType> 924fpType
	925FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, 926 fpType (func)(fpType, fpType, fpType), 927* bool flush, bool defaultNan, uint32_t rMode) const 928{ 929 const bool single = (sizeof(fpType) == sizeof(float)); 930 fpType junk = 0.0; 931 932 if (flush && (flushToZero(op1, op2) \|\| flushToZero(op3))) 933 fpscr.idc = 1; 934 VfpSavedState state = prepFpState(rMode); 935 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state) 936 : "m" (op1), "m" (op2), "m" (op3), "m" (state)); 937 fpType dest = func(op1, op2, op3); 938 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 939 940 int fpClass = std::fpclassify(dest); 941 // Get NAN behavior right. This varies between x86 and ARM. 942 if (fpClass == FP_NAN) { 943 const uint64_t qnan = 944 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 945 const bool nan1 = std::isnan(op1); 946 const bool nan2 = std::isnan(op2); 947 const bool nan3 = std::isnan(op3); 948 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 949 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 950 const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan); 951 if ((!nan1 && !nan2 && !nan3) \|\| (defaultNan == 1)) { 952 dest = bitsToFp(qnan, junk); 953 } else if (signal1) { 954 dest = bitsToFp(fpToBits(op1) \| qnan, junk); 955 } else if (signal2) { 956 dest = bitsToFp(fpToBits(op2) \| qnan, junk); 957 } else if (signal3) { 958 dest = bitsToFp(fpToBits(op3) \| qnan, junk); 959 } else if (nan1) { 960 dest = op1; 961 } else if (nan2) { 962 dest = op2; 963 } else if (nan3) { 964 dest = op3; 965 } 966 } else if (flush && flushToZero(dest)) { 967 feraiseexcept(FeUnderflow); 968 } else if (( 969 (single && (dest == bitsToFp(0x00800000, junk) \|\| 970 dest == bitsToFp(0x80800000, junk))) \|\| 971 (!single && 972 (dest == bitsToFp(ULL(0x0010000000000000), junk) \|\| 973 dest == bitsToFp(ULL(0x8010000000000000), junk))) 974 ) && rMode != VfpRoundZero) { 975 /* 976 * Correct for the fact that underflow is detected -before- rounding 977 * in ARM and -after- rounding in x86. 978 / 979* fesetround(FeRoundZero); 980 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3) 981 : "m" (op1), "m" (op2), "m" (op3)); 982 fpType temp = func(op1, op2, op2); 983 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 984 if (flush && flushToZero(temp)) { 985 dest = temp; 986 } 987 } 988 finishVfp(fpscr, state, flush); 989 return dest; 990} 991 992template 993float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3, 994 float (func)(float, float, float), 995* bool flush, bool defaultNan, uint32_t rMode) const; 996template 997double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3, 998 double (func)(double, double, double), 999* bool flush, bool defaultNan, uint32_t rMode) const; 1000 1001template <class fpType> 1002fpType
981FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 982 fpType (func)(fpType, fpType), 983* bool flush, bool defaultNan, uint32_t rMode) const 984{ 985 const bool single = (sizeof(fpType) == sizeof(float)); 986 fpType junk = 0.0; 987 988 if (flush && flushToZero(op1, op2)) --- 164 unchanged lines hidden ---	1003FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 1004 fpType (func)(fpType, fpType), 1005* bool flush, bool defaultNan, uint32_t rMode) const 1006{ 1007 const bool single = (sizeof(fpType) == sizeof(float)); 1008 fpType junk = 0.0; 1009 1010 if (flush && flushToZero(op1, op2)) --- 164 unchanged lines hidden ---

1/*

3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated

--- 30 unchanged lines hidden (view full) ---

41
42/*
43 * The asm statements below are to keep gcc from reordering code. Otherwise
44 * the rounding mode might be set after the operation it was intended for, the
45 * exception bits read before it, etc.
46 */
47
48std::string

49FpCondCompRegOp::generateDisassembly(
50 Addr pc, const SymbolTable *symtab) const
51{
52 std::stringstream ss;
53 printMnemonic(ss, "", false);
54 printReg(ss, op1);
55 ccprintf(ss, ", ");
56 printReg(ss, op2);
57 ccprintf(ss, ", #%d", defCc);
58 ccprintf(ss, ", ");
59 printCondition(ss, condCode, true);
60 return ss.str();
61}
62
63std::string
64FpCondSelOp::generateDisassembly(
65 Addr pc, const SymbolTable *symtab) const
66{
67 std::stringstream ss;
68 printMnemonic(ss, "", false);
69 printReg(ss, dest);
70 ccprintf(ss, ", ");
71 printReg(ss, op1);
72 ccprintf(ss, ", ");
73 printReg(ss, op2);
74 ccprintf(ss, ", ");
75 printCondition(ss, condCode, true);
76 return ss.str();
77}
78
79std::string

49FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
50{
51 std::stringstream ss;
52 printMnemonic(ss);
53 printReg(ss, dest + FP_Reg_Base);
54 ss << ", ";
55 printReg(ss, op1 + FP_Reg_Base);
56 return ss.str();

--- 30 unchanged lines hidden (view full) ---

87 ss << ", ";
88 printReg(ss, op1 + FP_Reg_Base);
89 ss << ", ";
90 printReg(ss, op2 + FP_Reg_Base);
91 return ss.str();
92}
93
94std::string

80FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
81{
82 std::stringstream ss;
83 printMnemonic(ss);
84 printReg(ss, dest + FP_Reg_Base);
85 ss << ", ";
86 printReg(ss, op1 + FP_Reg_Base);
87 return ss.str();

--- 30 unchanged lines hidden (view full) ---

118 ss << ", ";
119 printReg(ss, op1 + FP_Reg_Base);
120 ss << ", ";
121 printReg(ss, op2 + FP_Reg_Base);
122 return ss.str();
123}
124
125std::string

126FpRegRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
127{
128 std::stringstream ss;
129 printMnemonic(ss);
130 printReg(ss, dest + FP_Reg_Base);
131 ss << ", ";
132 printReg(ss, op1 + FP_Reg_Base);
133 ss << ", ";
134 printReg(ss, op2 + FP_Reg_Base);
135 ss << ", ";
136 printReg(ss, op3 + FP_Reg_Base);
137 return ss.str();
138}
139
140std::string

95FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
96{
97 std::stringstream ss;
98 printMnemonic(ss);
99 printReg(ss, dest + FP_Reg_Base);
100 ss << ", ";
101 printReg(ss, op1 + FP_Reg_Base);
102 ss << ", ";

--- 23 unchanged lines hidden (view full) ---

126 case VfpRoundZero:
127 fesetround(FeRoundZero);
128 break;
129 }
130 return roundingMode;
131}
132
133void

141FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
142{
143 std::stringstream ss;
144 printMnemonic(ss);
145 printReg(ss, dest + FP_Reg_Base);
146 ss << ", ";
147 printReg(ss, op1 + FP_Reg_Base);
148 ss << ", ";

--- 23 unchanged lines hidden (view full) ---

172 case VfpRoundZero:
173 fesetround(FeRoundZero);
174 break;
175 }
176 return roundingMode;
177}
178
179void

134finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush)

180finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)

135{
136 int exceptions = fetestexcept(FeAllExceptions);
137 bool underflow = false;

181{
182 int exceptions = fetestexcept(FeAllExceptions);
183 bool underflow = false;

138 if (exceptions & FeInvalid) {

184 if ((exceptions & FeInvalid) && mask.ioc) {

139 fpscr.ioc = 1;
140 }

185 fpscr.ioc = 1;
186 }

141 if (exceptions & FeDivByZero) {

187 if ((exceptions & FeDivByZero) && mask.dzc) {

142 fpscr.dzc = 1;
143 }

188 fpscr.dzc = 1;
189 }

144 if (exceptions & FeOverflow) {

190 if ((exceptions & FeOverflow) && mask.ofc) {

145 fpscr.ofc = 1;
146 }
147 if (exceptions & FeUnderflow) {
148 underflow = true;

191 fpscr.ofc = 1;
192 }
193 if (exceptions & FeUnderflow) {
194 underflow = true;

149 fpscr.ufc = 1;

195 if (mask.ufc)
196 fpscr.ufc = 1;

150 }

197 }

151 if ((exceptions & FeInexact) && !(underflow && flush)) {

198 if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) {

152 fpscr.ixc = 1;
153 }
154 fesetround(state);
155}
156
157template <class fpType>
158fpType
159fixDest(bool flush, bool defaultNan, fpType val, fpType op1)

--- 164 unchanged lines hidden (view full) ---

324 mid = temp;
325 }
326 }
327 __asm__ __volatile__("" :: "m" (temp));
328 }
329 return mid;
330}
331

199 fpscr.ixc = 1;
200 }
201 fesetround(state);
202}
203
204template <class fpType>
205fpType
206fixDest(bool flush, bool defaultNan, fpType val, fpType op1)

--- 164 unchanged lines hidden (view full) ---

371 mid = temp;
372 }
373 }
374 __asm__ __volatile__("" :: "m" (temp));
375 }
376 return mid;
377}
378

332uint16_t
333vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
334 uint32_t rMode, bool ahp, float op)

379static inline uint16_t
380vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan,
381 uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble)

335{

382{

336 uint32_t opBits = fpToBits(op);

383 uint32_t mWidth;
384 uint32_t eWidth;
385 uint32_t eHalfRange;
386 uint32_t sBitPos;
387
388 if (isDouble) {
389 mWidth = 52;
390 eWidth = 11;
391 } else {
392 mWidth = 23;
393 eWidth = 8;
394 }
395 sBitPos = eWidth + mWidth;
396 eHalfRange = (1 << (eWidth-1)) - 1;
397

337 // Extract the operand.

398 // Extract the operand.

338 bool neg = bits(opBits, 31);
339 uint32_t exponent = bits(opBits, 30, 23);
340 uint32_t oldMantissa = bits(opBits, 22, 0);
341 uint32_t mantissa = oldMantissa >> (23 - 10);

399 bool neg = bits(opBits, sBitPos);
400 uint32_t exponent = bits(opBits, sBitPos-1, mWidth);
401 uint64_t oldMantissa = bits(opBits, mWidth-1, 0);
402 uint32_t mantissa = oldMantissa >> (mWidth - 10);

342 // Do the conversion.

403 // Do the conversion.

343 uint32_t extra = oldMantissa & mask(23 - 10);
344 if (exponent == 0xff) {

404 uint64_t extra = oldMantissa & mask(mWidth - 10);
405 if (exponent == mask(eWidth)) {

345 if (oldMantissa != 0) {
346 // Nans.
347 if (bits(mantissa, 9) == 0) {
348 // Signalling nan.
349 fpscr.ioc = 1;
350 }
351 if (ahp) {
352 mantissa = 0;

--- 21 unchanged lines hidden (view full) ---

374 // Zero, don't need to do anything.
375 } else {
376 // Normalized or denormalized numbers.
377
378 bool inexact = (extra != 0);
379
380 if (exponent == 0) {
381 // Denormalized.

406 if (oldMantissa != 0) {
407 // Nans.
408 if (bits(mantissa, 9) == 0) {
409 // Signalling nan.
410 fpscr.ioc = 1;
411 }
412 if (ahp) {
413 mantissa = 0;

--- 21 unchanged lines hidden (view full) ---

435 // Zero, don't need to do anything.
436 } else {
437 // Normalized or denormalized numbers.
438
439 bool inexact = (extra != 0);
440
441 if (exponent == 0) {
442 // Denormalized.

382

383 // If flush to zero is on, this shouldn't happen.
384 assert(!flush);
385
386 // Check for underflow
387 if (inexact || fpscr.ufe)
388 fpscr.ufc = 1;
389
390 // Handle rounding.

--- 11 unchanged lines hidden (view full) ---

402 mantissa = 0;
403 exponent = 1;
404 }
405 } else {
406 // Normalized.
407
408 // We need to track the dropped bits differently since
409 // more can be dropped by denormalizing.

443 // If flush to zero is on, this shouldn't happen.
444 assert(!flush);
445
446 // Check for underflow
447 if (inexact || fpscr.ufe)
448 fpscr.ufc = 1;
449
450 // Handle rounding.

--- 11 unchanged lines hidden (view full) ---

462 mantissa = 0;
463 exponent = 1;
464 }
465 } else {
466 // Normalized.
467
468 // We need to track the dropped bits differently since
469 // more can be dropped by denormalizing.

410 bool topOne = bits(extra, 12);
411 bool restZeros = bits(extra, 11, 0) == 0;

470 bool topOne = bits(extra, mWidth - 10 - 1);
471 bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0;

412

472

413 if (exponent <= (127 - 15)) {

473 if (exponent <= (eHalfRange - 15)) {

414 // The result is too small. Denormalize.
415 mantissa |= (1 << 10);

474 // The result is too small. Denormalize.
475 mantissa |= (1 << 10);

416 while (mantissa && exponent <= (127 - 15)) {

476 while (mantissa && exponent <= (eHalfRange - 15)) {

417 restZeros = restZeros && !topOne;
418 topOne = bits(mantissa, 0);
419 mantissa = mantissa >> 1;
420 exponent++;
421 }
422 if (topOne || !restZeros)
423 inexact = true;
424 exponent = 0;
425 } else {
426 // Change bias.

477 restZeros = restZeros && !topOne;
478 topOne = bits(mantissa, 0);
479 mantissa = mantissa >> 1;
480 exponent++;
481 }
482 if (topOne || !restZeros)
483 inexact = true;
484 exponent = 0;
485 } else {
486 // Change bias.

427 exponent -= (127 - 15);

487 exponent -= (eHalfRange - 15);

428 }
429
430 if (exponent == 0 && (inexact || fpscr.ufe)) {
431 // Underflow
432 fpscr.ufc = 1;
433 }
434
435 // Handle rounding.

--- 47 unchanged lines hidden (view full) ---

483 // Reassemble and install the result.
484 uint32_t result = bits(mantissa, 9, 0);
485 replaceBits(result, 14, 10, exponent);
486 if (neg)
487 result |= (1 << 15);
488 return result;
489}
490

488 }
489
490 if (exponent == 0 && (inexact || fpscr.ufe)) {
491 // Underflow
492 fpscr.ufc = 1;
493 }
494
495 // Handle rounding.

--- 47 unchanged lines hidden (view full) ---

543 // Reassemble and install the result.
544 uint32_t result = bits(mantissa, 9, 0);
545 replaceBits(result, 14, 10, exponent);
546 if (neg)
547 result |= (1 << 15);
548 return result;
549}
550

491float
492vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)

551uint16_t
552vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
553 uint32_t rMode, bool ahp, float op)

493{

554{

494 float junk = 0.0;

555 uint64_t opBits = fpToBits(op);
556 return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false);
557}
558
559uint16_t
560vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
561 uint32_t rMode, bool ahp, double op)
562{
563 uint64_t opBits = fpToBits(op);
564 return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true);
565}
566
567static inline uint64_t
568vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble)
569{
570 uint32_t mWidth;
571 uint32_t eWidth;
572 uint32_t eHalfRange;
573 uint32_t sBitPos;
574
575 if (isDouble) {
576 mWidth = 52;
577 eWidth = 11;
578 } else {
579 mWidth = 23;
580 eWidth = 8;
581 }
582 sBitPos = eWidth + mWidth;
583 eHalfRange = (1 << (eWidth-1)) - 1;
584

495 // Extract the bitfields.
496 bool neg = bits(op, 15);
497 uint32_t exponent = bits(op, 14, 10);

585 // Extract the bitfields.
586 bool neg = bits(op, 15);
587 uint32_t exponent = bits(op, 14, 10);

498 uint32_t mantissa = bits(op, 9, 0);

588 uint64_t mantissa = bits(op, 9, 0);

499 // Do the conversion.
500 if (exponent == 0) {
501 if (mantissa != 0) {
502 // Normalize the value.

589 // Do the conversion.
590 if (exponent == 0) {
591 if (mantissa != 0) {
592 // Normalize the value.

503 exponent = exponent + (127 - 15) + 1;

593 exponent = exponent + (eHalfRange - 15) + 1;

504 while (mantissa < (1 << 10)) {
505 mantissa = mantissa << 1;
506 exponent--;
507 }
508 }

594 while (mantissa < (1 << 10)) {
595 mantissa = mantissa << 1;
596 exponent--;
597 }
598 }

509 mantissa = mantissa << (23 - 10);

599 mantissa = mantissa << (mWidth - 10);

510 } else if (exponent == 0x1f && !ahp) {
511 // Infinities and nans.

600 } else if (exponent == 0x1f && !ahp) {
601 // Infinities and nans.

512 exponent = 0xff;

602 exponent = mask(eWidth);

513 if (mantissa != 0) {
514 // Nans.

603 if (mantissa != 0) {
604 // Nans.

515 mantissa = mantissa << (23 - 10);
516 if (bits(mantissa, 22) == 0) {

605 mantissa = mantissa << (mWidth - 10);
606 if (bits(mantissa, mWidth-1) == 0) {

517 // Signalling nan.
518 fpscr.ioc = 1;

607 // Signalling nan.
608 fpscr.ioc = 1;

519 mantissa |= (1 << 22);

609 mantissa |= (((uint64_t) 1) << (mWidth-1));

520 }
521 if (defaultNan) {

610 }
611 if (defaultNan) {

522 mantissa &= ~mask(22);

612 mantissa &= ~mask(mWidth-1);

523 neg = false;
524 }
525 }
526 } else {

613 neg = false;
614 }
615 }
616 } else {

527 exponent = exponent + (127 - 15);
528 mantissa = mantissa << (23 - 10);

617 exponent = exponent + (eHalfRange - 15);
618 mantissa = mantissa << (mWidth - 10);

529 }
530 // Reassemble the result.

619 }
620 // Reassemble the result.

531 uint32_t result = bits(mantissa, 22, 0);
532 replaceBits(result, 30, 23, exponent);
533 if (neg)
534 result |= (1 << 31);

621 uint64_t result = bits(mantissa, mWidth-1, 0);
622 replaceBits(result, sBitPos-1, mWidth, exponent);
623 if (neg) {
624 result |= (((uint64_t) 1) << sBitPos);
625 }
626 return result;
627}
628
629double
630vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
631{
632 double junk = 0.0;
633 uint64_t result;
634
635 result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true);

535 return bitsToFp(result, junk);
536}
537

636 return bitsToFp(result, junk);
637}
638

538uint64_t
539vfpFpSToFixed(float val, bool isSigned, bool half,
540 uint8_t imm, bool rzero)

639float
640vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)

541{

641{

542 int rmode = rzero ? FeRoundZero : fegetround();
543 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
544 fesetround(FeRoundNearest);
545 val = val * powf(2.0, imm);
546 __asm__ __volatile__("" : "=m" (val) : "m" (val));
547 fesetround(rmode);
548 feclearexcept(FeAllExceptions);
549 __asm__ __volatile__("" : "=m" (val) : "m" (val));
550 float origVal = val;
551 val = rintf(val);
552 int fpType = std::fpclassify(val);
553 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
554 if (fpType == FP_NAN) {
555 feraiseexcept(FeInvalid);
556 }
557 val = 0.0;
558 } else if (origVal != val) {
559 switch (rmode) {
560 case FeRoundNearest:
561 if (origVal - val > 0.5)
562 val += 1.0;
563 else if (val - origVal > 0.5)
564 val -= 1.0;
565 break;
566 case FeRoundDown:
567 if (origVal < val)
568 val -= 1.0;
569 break;
570 case FeRoundUpward:
571 if (origVal > val)
572 val += 1.0;
573 break;
574 }
575 feraiseexcept(FeInexact);
576 }

642 float junk = 0.0;
643 uint64_t result;

577

644

578 if (isSigned) {
579 if (half) {
580 if ((double)val < (int16_t)(1 << 15)) {
581 feraiseexcept(FeInvalid);
582 feclearexcept(FeInexact);
583 return (int16_t)(1 << 15);
584 }
585 if ((double)val > (int16_t)mask(15)) {
586 feraiseexcept(FeInvalid);
587 feclearexcept(FeInexact);
588 return (int16_t)mask(15);
589 }
590 return (int16_t)val;
591 } else {
592 if ((double)val < (int32_t)(1 << 31)) {
593 feraiseexcept(FeInvalid);
594 feclearexcept(FeInexact);
595 return (int32_t)(1 << 31);
596 }
597 if ((double)val > (int32_t)mask(31)) {
598 feraiseexcept(FeInvalid);
599 feclearexcept(FeInexact);
600 return (int32_t)mask(31);
601 }
602 return (int32_t)val;
603 }
604 } else {
605 if (half) {
606 if ((double)val < 0) {
607 feraiseexcept(FeInvalid);
608 feclearexcept(FeInexact);
609 return 0;
610 }
611 if ((double)val > (mask(16))) {
612 feraiseexcept(FeInvalid);
613 feclearexcept(FeInexact);
614 return mask(16);
615 }
616 return (uint16_t)val;
617 } else {
618 if ((double)val < 0) {
619 feraiseexcept(FeInvalid);
620 feclearexcept(FeInexact);
621 return 0;
622 }
623 if ((double)val > (mask(32))) {
624 feraiseexcept(FeInvalid);
625 feclearexcept(FeInexact);
626 return mask(32);
627 }
628 return (uint32_t)val;
629 }
630 }

645 result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false);
646 return bitsToFp(result, junk);

631}
632
633float
634vfpUFixedToFpS(bool flush, bool defaultNan,

647}
648
649float
650vfpUFixedToFpS(bool flush, bool defaultNan,

635 uint32_t val, bool half, uint8_t imm)

651 uint64_t val, uint8_t width, uint8_t imm)

636{
637 fesetround(FeRoundNearest);

652{
653 fesetround(FeRoundNearest);

638 if (half)

654 if (width == 16)

639 val = (uint16_t)val;

655 val = (uint16_t)val;

656 else if (width == 32)
657 val = (uint32_t)val;
658 else if (width != 64)
659 panic("Unsupported width %d", width);

640 float scale = powf(2.0, imm);
641 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
642 feclearexcept(FeAllExceptions);
643 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
644 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
645}
646
647float
648vfpSFixedToFpS(bool flush, bool defaultNan,

660 float scale = powf(2.0, imm);
661 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
662 feclearexcept(FeAllExceptions);
663 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
664 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
665}
666
667float
668vfpSFixedToFpS(bool flush, bool defaultNan,

649 int32_t val, bool half, uint8_t imm)

669 int64_t val, uint8_t width, uint8_t imm)

650{
651 fesetround(FeRoundNearest);

670{
671 fesetround(FeRoundNearest);

652 if (half)

672 if (width == 16)

653 val = sext<16>(val & mask(16));

673 val = sext<16>(val & mask(16));

674 else if (width == 32)
675 val = sext<32>(val & mask(32));
676 else if (width != 64)
677 panic("Unsupported width %d", width);
678

654 float scale = powf(2.0, imm);
655 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
656 feclearexcept(FeAllExceptions);
657 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
658 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
659}
660

679 float scale = powf(2.0, imm);
680 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
681 feclearexcept(FeAllExceptions);
682 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
683 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
684}
685

661uint64_t
662vfpFpDToFixed(double val, bool isSigned, bool half,
663 uint8_t imm, bool rzero)
664{
665 int rmode = rzero ? FeRoundZero : fegetround();
666 fesetround(FeRoundNearest);
667 val = val * pow(2.0, imm);
668 __asm__ __volatile__("" : "=m" (val) : "m" (val));
669 fesetround(rmode);
670 feclearexcept(FeAllExceptions);
671 __asm__ __volatile__("" : "=m" (val) : "m" (val));
672 double origVal = val;
673 val = rint(val);
674 int fpType = std::fpclassify(val);
675 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
676 if (fpType == FP_NAN) {
677 feraiseexcept(FeInvalid);
678 }
679 val = 0.0;
680 } else if (origVal != val) {
681 switch (rmode) {
682 case FeRoundNearest:
683 if (origVal - val > 0.5)
684 val += 1.0;
685 else if (val - origVal > 0.5)
686 val -= 1.0;
687 break;
688 case FeRoundDown:
689 if (origVal < val)
690 val -= 1.0;
691 break;
692 case FeRoundUpward:
693 if (origVal > val)
694 val += 1.0;
695 break;
696 }
697 feraiseexcept(FeInexact);
698 }
699 if (isSigned) {
700 if (half) {
701 if (val < (int16_t)(1 << 15)) {
702 feraiseexcept(FeInvalid);
703 feclearexcept(FeInexact);
704 return (int16_t)(1 << 15);
705 }
706 if (val > (int16_t)mask(15)) {
707 feraiseexcept(FeInvalid);
708 feclearexcept(FeInexact);
709 return (int16_t)mask(15);
710 }
711 return (int16_t)val;
712 } else {
713 if (val < (int32_t)(1 << 31)) {
714 feraiseexcept(FeInvalid);
715 feclearexcept(FeInexact);
716 return (int32_t)(1 << 31);
717 }
718 if (val > (int32_t)mask(31)) {
719 feraiseexcept(FeInvalid);
720 feclearexcept(FeInexact);
721 return (int32_t)mask(31);
722 }
723 return (int32_t)val;
724 }
725 } else {
726 if (half) {
727 if (val < 0) {
728 feraiseexcept(FeInvalid);
729 feclearexcept(FeInexact);
730 return 0;
731 }
732 if (val > mask(16)) {
733 feraiseexcept(FeInvalid);
734 feclearexcept(FeInexact);
735 return mask(16);
736 }
737 return (uint16_t)val;
738 } else {
739 if (val < 0) {
740 feraiseexcept(FeInvalid);
741 feclearexcept(FeInexact);
742 return 0;
743 }
744 if (val > mask(32)) {
745 feraiseexcept(FeInvalid);
746 feclearexcept(FeInexact);
747 return mask(32);
748 }
749 return (uint32_t)val;
750 }
751 }
752}

753
754double
755vfpUFixedToFpD(bool flush, bool defaultNan,

686
687double
688vfpUFixedToFpD(bool flush, bool defaultNan,

756 uint32_t val, bool half, uint8_t imm)

689 uint64_t val, uint8_t width, uint8_t imm)

757{
758 fesetround(FeRoundNearest);

690{
691 fesetround(FeRoundNearest);

759 if (half)

692 if (width == 16)

760 val = (uint16_t)val;

693 val = (uint16_t)val;

694 else if (width == 32)
695 val = (uint32_t)val;
696 else if (width != 64)
697 panic("Unsupported width %d", width);
698

761 double scale = pow(2.0, imm);
762 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
763 feclearexcept(FeAllExceptions);
764 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
765 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
766}
767
768double
769vfpSFixedToFpD(bool flush, bool defaultNan,

699 double scale = pow(2.0, imm);
700 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
701 feclearexcept(FeAllExceptions);
702 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
703 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
704}
705
706double
707vfpSFixedToFpD(bool flush, bool defaultNan,

770 int32_t val, bool half, uint8_t imm)

708 int64_t val, uint8_t width, uint8_t imm)

771{
772 fesetround(FeRoundNearest);

709{
710 fesetround(FeRoundNearest);

773 if (half)

711 if (width == 16)

774 val = sext<16>(val & mask(16));

712 val = sext<16>(val & mask(16));

713 else if (width == 32)
714 val = sext<32>(val & mask(32));
715 else if (width != 64)
716 panic("Unsupported width %d", width);
717

775 double scale = pow(2.0, imm);
776 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
777 feclearexcept(FeAllExceptions);
778 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
779 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
780}
781
782// This function implements a magic formula taken from the architecture

--- 188 unchanged lines hidden (view full) ---

971
972template
973float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
974 float op1, float op2) const;
975template
976double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
977 double op1, double op2) const;
978

718 double scale = pow(2.0, imm);
719 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
720 feclearexcept(FeAllExceptions);
721 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
722 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
723}
724
725// This function implements a magic formula taken from the architecture

--- 188 unchanged lines hidden (view full) ---

914
915template
916float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
917 float op1, float op2) const;
918template
919double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
920 double op1, double op2) const;
921

922// @TODO remove this function when we've finished switching all FMA code to use the new FPLIB

979template <class fpType>
980fpType

923template <class fpType>
924fpType

925FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
926 fpType (*func)(fpType, fpType, fpType),
927 bool flush, bool defaultNan, uint32_t rMode) const
928{
929 const bool single = (sizeof(fpType) == sizeof(float));
930 fpType junk = 0.0;
931
932 if (flush && (flushToZero(op1, op2) || flushToZero(op3)))
933 fpscr.idc = 1;
934 VfpSavedState state = prepFpState(rMode);
935 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state)
936 : "m" (op1), "m" (op2), "m" (op3), "m" (state));
937 fpType dest = func(op1, op2, op3);
938 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
939
940 int fpClass = std::fpclassify(dest);
941 // Get NAN behavior right. This varies between x86 and ARM.
942 if (fpClass == FP_NAN) {
943 const uint64_t qnan =
944 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
945 const bool nan1 = std::isnan(op1);
946 const bool nan2 = std::isnan(op2);
947 const bool nan3 = std::isnan(op3);
948 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
949 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
950 const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan);
951 if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) {
952 dest = bitsToFp(qnan, junk);
953 } else if (signal1) {
954 dest = bitsToFp(fpToBits(op1) | qnan, junk);
955 } else if (signal2) {
956 dest = bitsToFp(fpToBits(op2) | qnan, junk);
957 } else if (signal3) {
958 dest = bitsToFp(fpToBits(op3) | qnan, junk);
959 } else if (nan1) {
960 dest = op1;
961 } else if (nan2) {
962 dest = op2;
963 } else if (nan3) {
964 dest = op3;
965 }
966 } else if (flush && flushToZero(dest)) {
967 feraiseexcept(FeUnderflow);
968 } else if ((
969 (single && (dest == bitsToFp(0x00800000, junk) ||
970 dest == bitsToFp(0x80800000, junk))) ||
971 (!single &&
972 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
973 dest == bitsToFp(ULL(0x8010000000000000), junk)))
974 ) && rMode != VfpRoundZero) {
975 /*
976 * Correct for the fact that underflow is detected -before- rounding
977 * in ARM and -after- rounding in x86.
978 */
979 fesetround(FeRoundZero);
980 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3)
981 : "m" (op1), "m" (op2), "m" (op3));
982 fpType temp = func(op1, op2, op2);
983 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
984 if (flush && flushToZero(temp)) {
985 dest = temp;
986 }
987 }
988 finishVfp(fpscr, state, flush);
989 return dest;
990}
991
992template
993float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3,
994 float (*func)(float, float, float),
995 bool flush, bool defaultNan, uint32_t rMode) const;
996template
997double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3,
998 double (*func)(double, double, double),
999 bool flush, bool defaultNan, uint32_t rMode) const;
1000
1001template <class fpType>
1002fpType