Cross Reference: /gem5/src/arch/arm/insts/vfp.hh

vfp.hh (7398:063002e7106b)	vfp.hh (7430:db3e376f35d1)
1/* 2 * Copyright (c) 2010 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software --- 180 unchanged lines hidden (view full) --- 189 uint64_t bits; 190 } val; 191 val.bits = bits; 192 return val.fp; 193} 194 195typedef int VfpSavedState; 196	1/* 2 * Copyright (c) 2010 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software --- 180 unchanged lines hidden (view full) --- 189 uint64_t bits; 190 } val; 191 val.bits = bits; 192 return val.fp; 193} 194 195typedef int VfpSavedState; 196
197static inline VfpSavedState 198prepFpState(uint32_t rMode) 199{ 200 int roundingMode = fegetround(); 201 feclearexcept(FeAllExceptions); 202 switch (rMode) { 203 case VfpRoundNearest: 204 fesetround(FeRoundNearest); 205 break; 206 case VfpRoundUpward: 207 fesetround(FeRoundUpward); 208 break; 209 case VfpRoundDown: 210 fesetround(FeRoundDown); 211 break; 212 case VfpRoundZero: 213 fesetround(FeRoundZero); 214 break; 215 } 216 return roundingMode; 217}	197VfpSavedState prepFpState(uint32_t rMode); 198void finishVfp(FPSCR &fpscr, VfpSavedState state);
218	199
219static inline void 220finishVfp(FPSCR &fpscr, VfpSavedState state) 221{ 222 int exceptions = fetestexcept(FeAllExceptions); 223 bool underflow = false; 224 if (exceptions & FeInvalid) { 225 fpscr.ioc = 1; 226 } 227 if (exceptions & FeDivByZero) { 228 fpscr.dzc = 1; 229 } 230 if (exceptions & FeOverflow) { 231 fpscr.ofc = 1; 232 } 233 if (exceptions & FeUnderflow) { 234 underflow = true; 235 fpscr.ufc = 1; 236 } 237 if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) { 238 fpscr.ixc = 1; 239 } 240 fesetround(state); 241} 242
243template <class fpType>	200template <class fpType>
244static inline fpType 245fixDest(FPSCR fpscr, fpType val, fpType op1) 246{ 247 int fpClass = std::fpclassify(val); 248 fpType junk = 0.0; 249 if (fpClass == FP_NAN) { 250 const bool single = (sizeof(val) == sizeof(float)); 251 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 252 const bool nan = std::isnan(op1); 253 if (!nan \|\| (fpscr.dn == 1)) { 254 val = bitsToFp(qnan, junk); 255 } else if (nan) { 256 val = bitsToFp(fpToBits(op1) \| qnan, junk); 257 } 258 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 259 // Turn val into a zero with the correct sign; 260 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 261 val = bitsToFp(fpToBits(val) & bitMask, junk); 262 feclearexcept(FeInexact); 263 feraiseexcept(FeUnderflow); 264 } 265 return val; 266}	201fpType fixDest(FPSCR fpscr, fpType val, fpType op1);
267 268template <class fpType>	202 203template <class fpType>
269static inline fpType 270fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 271{ 272 int fpClass = std::fpclassify(val); 273 fpType junk = 0.0; 274 if (fpClass == FP_NAN) { 275 const bool single = (sizeof(val) == sizeof(float)); 276 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 277 const bool nan1 = std::isnan(op1); 278 const bool nan2 = std::isnan(op2); 279 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 280 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 281 if ((!nan1 && !nan2) \|\| (fpscr.dn == 1)) { 282 val = bitsToFp(qnan, junk); 283 } else if (signal1) { 284 val = bitsToFp(fpToBits(op1) \| qnan, junk); 285 } else if (signal2) { 286 val = bitsToFp(fpToBits(op2) \| qnan, junk); 287 } else if (nan1) { 288 val = op1; 289 } else if (nan2) { 290 val = op2; 291 } 292 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 293 // Turn val into a zero with the correct sign; 294 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 295 val = bitsToFp(fpToBits(val) & bitMask, junk); 296 feclearexcept(FeInexact); 297 feraiseexcept(FeUnderflow); 298 } 299 return val; 300}	204fpType fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
301 302template <class fpType>	205 206template <class fpType>
303static inline fpType 304fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 305{ 306 fpType mid = fixDest(fpscr, val, op1, op2); 307 const bool single = (sizeof(fpType) == sizeof(float)); 308 const fpType junk = 0.0; 309 if ((single && (val == bitsToFp(0x00800000, junk) \|\| 310 val == bitsToFp(0x80800000, junk))) \|\| 311 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) \|\| 312 val == bitsToFp(ULL(0x8010000000000000), junk))) 313 ) { 314 __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); 315 fesetround(FeRoundZero); 316 fpType temp = 0.0; 317 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 318 temp = op1 / op2; 319 if (flushToZero(temp)) { 320 feraiseexcept(FeUnderflow); 321 if (fpscr.fz) { 322 feclearexcept(FeInexact); 323 mid = temp; 324 } 325 } 326 __asm__ __volatile__("" :: "m" (temp)); 327 } 328 return mid; 329}	207fpType fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
330	208
331static inline float 332fixFpDFpSDest(FPSCR fpscr, double val) 333{ 334 const float junk = 0.0; 335 float op1 = 0.0; 336 if (std::isnan(val)) { 337 uint64_t valBits = fpToBits(val); 338 uint32_t op1Bits = bits(valBits, 50, 29) \| 339 (mask(9) << 22) \| 340 (bits(valBits, 63) << 31); 341 op1 = bitsToFp(op1Bits, junk); 342 } 343 float mid = fixDest(fpscr, (float)val, op1); 344 if (fpscr.fz && fetestexcept(FeUnderflow \| FeInexact) == 345 (FeUnderflow \| FeInexact)) { 346 feclearexcept(FeInexact); 347 } 348 if (mid == bitsToFp(0x00800000, junk) \|\| 349 mid == bitsToFp(0x80800000, junk)) { 350 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 351 fesetround(FeRoundZero); 352 float temp = 0.0; 353 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 354 temp = val; 355 if (flushToZero(temp)) { 356 feraiseexcept(FeUnderflow); 357 if (fpscr.fz) { 358 feclearexcept(FeInexact); 359 mid = temp; 360 } 361 } 362 __asm__ __volatile__("" :: "m" (temp)); 363 } 364 return mid; 365}	209float fixFpDFpSDest(FPSCR fpscr, double val); 210double fixFpSFpDDest(FPSCR fpscr, float val);
366	211
367static inline double 368fixFpSFpDDest(FPSCR fpscr, float val) 369{ 370 const double junk = 0.0; 371 double op1 = 0.0; 372 if (std::isnan(val)) { 373 uint32_t valBits = fpToBits(val); 374 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) \| 375 (mask(12) << 51) \| 376 ((uint64_t)bits(valBits, 31) << 63); 377 op1 = bitsToFp(op1Bits, junk); 378 } 379 double mid = fixDest(fpscr, (double)val, op1); 380 if (mid == bitsToFp(ULL(0x0010000000000000), junk) \|\| 381 mid == bitsToFp(ULL(0x8010000000000000), junk)) { 382 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 383 fesetround(FeRoundZero); 384 double temp = 0.0; 385 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 386 temp = val; 387 if (flushToZero(temp)) { 388 feraiseexcept(FeUnderflow); 389 if (fpscr.fz) { 390 feclearexcept(FeInexact); 391 mid = temp; 392 } 393 } 394 __asm__ __volatile__("" :: "m" (temp)); 395 } 396 return mid; 397}	212float vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top); 213float vcvtFpHFpS(FPSCR &fpscr, float op, bool top);
398	214
399static inline float 400vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top) 401{ 402 float junk = 0.0; 403 uint32_t destBits = fpToBits(dest); 404 uint32_t opBits = fpToBits(op); 405 // Extract the operand. 406 bool neg = bits(opBits, 31); 407 uint32_t exponent = bits(opBits, 30, 23); 408 uint32_t oldMantissa = bits(opBits, 22, 0); 409 uint32_t mantissa = oldMantissa >> (23 - 10); 410 // Do the conversion. 411 uint32_t extra = oldMantissa & mask(23 - 10); 412 if (exponent == 0xff) { 413 if (oldMantissa != 0) { 414 // Nans. 415 if (bits(mantissa, 9) == 0) { 416 // Signalling nan. 417 fpscr.ioc = 1; 418 } 419 if (fpscr.ahp) { 420 mantissa = 0; 421 exponent = 0; 422 fpscr.ioc = 1; 423 } else if (fpscr.dn) { 424 mantissa = (1 << 9); 425 exponent = 0x1f; 426 neg = false; 427 } else { 428 exponent = 0x1f; 429 mantissa \|= (1 << 9); 430 } 431 } else { 432 // Infinities. 433 exponent = 0x1F; 434 if (fpscr.ahp) { 435 fpscr.ioc = 1; 436 mantissa = 0x3ff; 437 } else { 438 mantissa = 0; 439 } 440 } 441 } else if (exponent == 0 && oldMantissa == 0) { 442 // Zero, don't need to do anything. 443 } else { 444 // Normalized or denormalized numbers. 445 446 bool inexact = (extra != 0); 447 448 if (exponent == 0) { 449 // Denormalized. 450 451 // If flush to zero is on, this shouldn't happen. 452 assert(fpscr.fz == 0); 453 454 // Check for underflow 455 if (inexact \|\| fpscr.ufe) 456 fpscr.ufc = 1; 457 458 // Handle rounding. 459 unsigned mode = fpscr.rMode; 460 if ((mode == VfpRoundUpward && !neg && extra) \|\| 461 (mode == VfpRoundDown && neg && extra) \|\| 462 (mode == VfpRoundNearest && 463 (extra > (1 << 9) \|\| 464 (extra == (1 << 9) && bits(mantissa, 0))))) { 465 mantissa++; 466 } 467 468 // See if the number became normalized after rounding. 469 if (mantissa == (1 << 10)) { 470 mantissa = 0; 471 exponent = 1; 472 } 473 } else { 474 // Normalized. 475 476 // We need to track the dropped bits differently since 477 // more can be dropped by denormalizing. 478 bool topOne = bits(extra, 12); 479 bool restZeros = bits(extra, 11, 0) == 0; 480 481 if (exponent <= (127 - 15)) { 482 // The result is too small. Denormalize. 483 mantissa \|= (1 << 10); 484 while (mantissa && exponent <= (127 - 15)) { 485 restZeros = restZeros && !topOne; 486 topOne = bits(mantissa, 0); 487 mantissa = mantissa >> 1; 488 exponent++; 489 } 490 if (topOne \|\| !restZeros) 491 inexact = true; 492 exponent = 0; 493 } else { 494 // Change bias. 495 exponent -= (127 - 15); 496 } 497 498 if (exponent == 0 && (inexact \|\| fpscr.ufe)) { 499 // Underflow 500 fpscr.ufc = 1; 501 } 502 503 // Handle rounding. 504 unsigned mode = fpscr.rMode; 505 bool nonZero = topOne \|\| !restZeros; 506 if ((mode == VfpRoundUpward && !neg && nonZero) \|\| 507 (mode == VfpRoundDown && neg && nonZero) \|\| 508 (mode == VfpRoundNearest && topOne && 509 (!restZeros \|\| bits(mantissa, 0)))) { 510 mantissa++; 511 } 512 513 // See if we rounded up and need to bump the exponent. 514 if (mantissa == (1 << 10)) { 515 mantissa = 0; 516 exponent++; 517 } 518 519 // Deal with overflow 520 if (fpscr.ahp) { 521 if (exponent >= 0x20) { 522 exponent = 0x1f; 523 mantissa = 0x3ff; 524 fpscr.ioc = 1; 525 // Supress inexact exception. 526 inexact = false; 527 } 528 } else { 529 if (exponent >= 0x1f) { 530 if ((mode == VfpRoundNearest) \|\| 531 (mode == VfpRoundUpward && !neg) \|\| 532 (mode == VfpRoundDown && neg)) { 533 // Overflow to infinity. 534 exponent = 0x1f; 535 mantissa = 0; 536 } else { 537 // Overflow to max normal. 538 exponent = 0x1e; 539 mantissa = 0x3ff; 540 } 541 fpscr.ofc = 1; 542 inexact = true; 543 } 544 } 545 } 546 547 if (inexact) { 548 fpscr.ixc = 1; 549 } 550 } 551 // Reassemble and install the result. 552 uint32_t result = bits(mantissa, 9, 0); 553 replaceBits(result, 14, 10, exponent); 554 if (neg) 555 result \|= (1 << 15); 556 if (top) 557 replaceBits(destBits, 31, 16, result); 558 else 559 replaceBits(destBits, 15, 0, result); 560 return bitsToFp(destBits, junk); 561} 562 563static inline float 564vcvtFpHFpS(FPSCR &fpscr, float op, bool top) 565{ 566 float junk = 0.0; 567 uint32_t opBits = fpToBits(op); 568 // Extract the operand. 569 if (top) 570 opBits = bits(opBits, 31, 16); 571 else 572 opBits = bits(opBits, 15, 0); 573 // Extract the bitfields. 574 bool neg = bits(opBits, 15); 575 uint32_t exponent = bits(opBits, 14, 10); 576 uint32_t mantissa = bits(opBits, 9, 0); 577 // Do the conversion. 578 if (exponent == 0) { 579 if (mantissa != 0) { 580 // Normalize the value. 581 exponent = exponent + (127 - 15) + 1; 582 while (mantissa < (1 << 10)) { 583 mantissa = mantissa << 1; 584 exponent--; 585 } 586 } 587 mantissa = mantissa << (23 - 10); 588 } else if (exponent == 0x1f && !fpscr.ahp) { 589 // Infinities and nans. 590 exponent = 0xff; 591 if (mantissa != 0) { 592 // Nans. 593 mantissa = mantissa << (23 - 10); 594 if (bits(mantissa, 22) == 0) { 595 // Signalling nan. 596 fpscr.ioc = 1; 597 mantissa \|= (1 << 22); 598 } 599 if (fpscr.dn) { 600 mantissa &= ~mask(22); 601 neg = false; 602 } 603 } 604 } else { 605 exponent = exponent + (127 - 15); 606 mantissa = mantissa << (23 - 10); 607 } 608 // Reassemble the result. 609 uint32_t result = bits(mantissa, 22, 0); 610 replaceBits(result, 30, 23, exponent); 611 if (neg) 612 result \|= (1 << 31); 613 return bitsToFp(result, junk); 614} 615
616static inline double 617makeDouble(uint32_t low, uint32_t high) 618{ 619 double junk = 0.0; 620 return bitsToFp((uint64_t)low \| ((uint64_t)high << 32), junk); 621} 622 623static inline uint32_t 624lowFromDouble(double val) 625{ 626 return fpToBits(val); 627} 628 629static inline uint32_t 630highFromDouble(double val) 631{ 632 return fpToBits(val) >> 32; 633} 634	215static inline double 216makeDouble(uint32_t low, uint32_t high) 217{ 218 double junk = 0.0; 219 return bitsToFp((uint64_t)low \| ((uint64_t)high << 32), junk); 220} 221 222static inline uint32_t 223lowFromDouble(double val) 224{ 225 return fpToBits(val); 226} 227 228static inline uint32_t 229highFromDouble(double val) 230{ 231 return fpToBits(val) >> 32; 232} 233
635static inline uint64_t 636vfpFpSToFixed(float val, bool isSigned, bool half, 637 uint8_t imm, bool rzero = true) 638{ 639 int rmode = rzero ? FeRoundZero : fegetround(); 640 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); 641 fesetround(FeRoundNearest); 642 val = val * powf(2.0, imm); 643 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 644 fesetround(rmode); 645 feclearexcept(FeAllExceptions); 646 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 647 float origVal = val; 648 val = rintf(val); 649 int fpType = std::fpclassify(val); 650 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 651 if (fpType == FP_NAN) { 652 feraiseexcept(FeInvalid); 653 } 654 val = 0.0; 655 } else if (origVal != val) { 656 switch (rmode) { 657 case FeRoundNearest: 658 if (origVal - val > 0.5) 659 val += 1.0; 660 else if (val - origVal > 0.5) 661 val -= 1.0; 662 break; 663 case FeRoundDown: 664 if (origVal < val) 665 val -= 1.0; 666 break; 667 case FeRoundUpward: 668 if (origVal > val) 669 val += 1.0; 670 break; 671 } 672 feraiseexcept(FeInexact); 673 }	234uint64_t vfpFpSToFixed(float val, bool isSigned, bool half, 235 uint8_t imm, bool rzero = true); 236float vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm); 237float vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm);
674	238
675 if (isSigned) { 676 if (half) { 677 if ((double)val < (int16_t)(1 << 15)) { 678 feraiseexcept(FeInvalid); 679 feclearexcept(FeInexact); 680 return (int16_t)(1 << 15); 681 } 682 if ((double)val > (int16_t)mask(15)) { 683 feraiseexcept(FeInvalid); 684 feclearexcept(FeInexact); 685 return (int16_t)mask(15); 686 } 687 return (int16_t)val; 688 } else { 689 if ((double)val < (int32_t)(1 << 31)) { 690 feraiseexcept(FeInvalid); 691 feclearexcept(FeInexact); 692 return (int32_t)(1 << 31); 693 } 694 if ((double)val > (int32_t)mask(31)) { 695 feraiseexcept(FeInvalid); 696 feclearexcept(FeInexact); 697 return (int32_t)mask(31); 698 } 699 return (int32_t)val; 700 } 701 } else { 702 if (half) { 703 if ((double)val < 0) { 704 feraiseexcept(FeInvalid); 705 feclearexcept(FeInexact); 706 return 0; 707 } 708 if ((double)val > (mask(16))) { 709 feraiseexcept(FeInvalid); 710 feclearexcept(FeInexact); 711 return mask(16); 712 } 713 return (uint16_t)val; 714 } else { 715 if ((double)val < 0) { 716 feraiseexcept(FeInvalid); 717 feclearexcept(FeInexact); 718 return 0; 719 } 720 if ((double)val > (mask(32))) { 721 feraiseexcept(FeInvalid); 722 feclearexcept(FeInexact); 723 return mask(32); 724 } 725 return (uint32_t)val; 726 } 727 } 728}	239uint64_t vfpFpDToFixed(double val, bool isSigned, bool half, 240 uint8_t imm, bool rzero = true); 241double vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm); 242double vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm);
729	243
730static inline float 731vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 732{ 733 fesetround(FeRoundNearest); 734 if (half) 735 val = (uint16_t)val; 736 float scale = powf(2.0, imm); 737 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 738 feclearexcept(FeAllExceptions); 739 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 740 return fixDivDest(fpscr, val / scale, (float)val, scale); 741} 742 743static inline float 744vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 745{ 746 fesetround(FeRoundNearest); 747 if (half) 748 val = sext<16>(val & mask(16)); 749 float scale = powf(2.0, imm); 750 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 751 feclearexcept(FeAllExceptions); 752 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 753 return fixDivDest(fpscr, val / scale, (float)val, scale); 754} 755 756static inline uint64_t 757vfpFpDToFixed(double val, bool isSigned, bool half, 758 uint8_t imm, bool rzero = true) 759{ 760 int rmode = rzero ? FeRoundZero : fegetround(); 761 fesetround(FeRoundNearest); 762 val = val * pow(2.0, imm); 763 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 764 fesetround(rmode); 765 feclearexcept(FeAllExceptions); 766 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 767 double origVal = val; 768 val = rint(val); 769 int fpType = std::fpclassify(val); 770 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 771 if (fpType == FP_NAN) { 772 feraiseexcept(FeInvalid); 773 } 774 val = 0.0; 775 } else if (origVal != val) { 776 switch (rmode) { 777 case FeRoundNearest: 778 if (origVal - val > 0.5) 779 val += 1.0; 780 else if (val - origVal > 0.5) 781 val -= 1.0; 782 break; 783 case FeRoundDown: 784 if (origVal < val) 785 val -= 1.0; 786 break; 787 case FeRoundUpward: 788 if (origVal > val) 789 val += 1.0; 790 break; 791 } 792 feraiseexcept(FeInexact); 793 } 794 if (isSigned) { 795 if (half) { 796 if (val < (int16_t)(1 << 15)) { 797 feraiseexcept(FeInvalid); 798 feclearexcept(FeInexact); 799 return (int16_t)(1 << 15); 800 } 801 if (val > (int16_t)mask(15)) { 802 feraiseexcept(FeInvalid); 803 feclearexcept(FeInexact); 804 return (int16_t)mask(15); 805 } 806 return (int16_t)val; 807 } else { 808 if (val < (int32_t)(1 << 31)) { 809 feraiseexcept(FeInvalid); 810 feclearexcept(FeInexact); 811 return (int32_t)(1 << 31); 812 } 813 if (val > (int32_t)mask(31)) { 814 feraiseexcept(FeInvalid); 815 feclearexcept(FeInexact); 816 return (int32_t)mask(31); 817 } 818 return (int32_t)val; 819 } 820 } else { 821 if (half) { 822 if (val < 0) { 823 feraiseexcept(FeInvalid); 824 feclearexcept(FeInexact); 825 return 0; 826 } 827 if (val > mask(16)) { 828 feraiseexcept(FeInvalid); 829 feclearexcept(FeInexact); 830 return mask(16); 831 } 832 return (uint16_t)val; 833 } else { 834 if (val < 0) { 835 feraiseexcept(FeInvalid); 836 feclearexcept(FeInexact); 837 return 0; 838 } 839 if (val > mask(32)) { 840 feraiseexcept(FeInvalid); 841 feclearexcept(FeInexact); 842 return mask(32); 843 } 844 return (uint32_t)val; 845 } 846 } 847} 848 849static inline double 850vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 851{ 852 fesetround(FeRoundNearest); 853 if (half) 854 val = (uint16_t)val; 855 double scale = pow(2.0, imm); 856 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 857 feclearexcept(FeAllExceptions); 858 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 859 return fixDivDest(fpscr, val / scale, (double)val, scale); 860} 861 862static inline double 863vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 864{ 865 fesetround(FeRoundNearest); 866 if (half) 867 val = sext<16>(val & mask(16)); 868 double scale = pow(2.0, imm); 869 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 870 feclearexcept(FeAllExceptions); 871 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 872 return fixDivDest(fpscr, val / scale, (double)val, scale); 873} 874
875class VfpMacroOp : public PredMacroOp 876{ 877 public: 878 static bool 879 inScalarBank(IntRegIndex idx) 880 { 881 return (idx % 32) < 8; 882 } 883 884 protected: 885 bool wide; 886 887 VfpMacroOp(const char mnem, ExtMachInst _machInst, 888* OpClass __opClass, bool _wide) : 889 PredMacroOp(mnem, _machInst, __opClass), wide(_wide) 890 {} 891	244class VfpMacroOp : public PredMacroOp 245{ 246 public: 247 static bool 248 inScalarBank(IntRegIndex idx) 249 { 250 return (idx % 32) < 8; 251 } 252 253 protected: 254 bool wide; 255 256 VfpMacroOp(const char mnem, ExtMachInst _machInst, 257* OpClass __opClass, bool _wide) : 258 PredMacroOp(mnem, _machInst, __opClass), wide(_wide) 259 {} 260
892 IntRegIndex 893 addStride(IntRegIndex idx, unsigned stride) 894 { 895 if (wide) { 896 stride = 2; 897* } 898 unsigned offset = idx % 8; 899 idx = (IntRegIndex)(idx - offset); 900 offset += stride; 901 idx = (IntRegIndex)(idx + (offset % 8)); 902 return idx; 903 } 904 905 void 906 nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) 907 { 908 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 909 assert(!inScalarBank(dest)); 910 dest = addStride(dest, stride); 911 op1 = addStride(op1, stride); 912 if (!inScalarBank(op2)) { 913 op2 = addStride(op2, stride); 914 } 915 } 916 917 void 918 nextIdxs(IntRegIndex &dest, IntRegIndex &op1) 919 { 920 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 921 assert(!inScalarBank(dest)); 922 dest = addStride(dest, stride); 923 if (!inScalarBank(op1)) { 924 op1 = addStride(op1, stride); 925 } 926 } 927 928 void 929 nextIdxs(IntRegIndex &dest) 930 { 931 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 932 assert(!inScalarBank(dest)); 933 dest = addStride(dest, stride); 934 }	261 IntRegIndex addStride(IntRegIndex idx, unsigned stride); 262 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2); 263 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1); 264 void nextIdxs(IntRegIndex &dest);
935}; 936 937static inline float 938fpAddS(float a, float b) 939{ 940 return a + b; 941} 942 --- 88 unchanged lines hidden (view full) --- 1031 { 1032 return fpToBits(val) >> 32; 1033 } 1034 1035 template <class fpType> 1036 fpType 1037 binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 1038 fpType (*func)(fpType, fpType),	265}; 266 267static inline float 268fpAddS(float a, float b) 269{ 270 return a + b; 271} 272 --- 88 unchanged lines hidden (view full) --- 361 { 362 return fpToBits(val) >> 32; 363 } 364 365 template <class fpType> 366 fpType 367 binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 368 fpType (*func)(fpType, fpType),
1039 bool flush, uint32_t rMode) const 1040 { 1041 const bool single = (sizeof(fpType) == sizeof(float)); 1042 fpType junk = 0.0;	369 bool flush, uint32_t rMode) const;
1043	370
1044 if (flush && flushToZero(op1, op2)) 1045 fpscr.idc = 1; 1046 VfpSavedState state = prepFpState(rMode); 1047 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state) 1048 : "m" (op1), "m" (op2), "m" (state)); 1049 fpType dest = func(op1, op2); 1050 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 1051 1052 int fpClass = std::fpclassify(dest); 1053 // Get NAN behavior right. This varies between x86 and ARM. 1054 if (fpClass == FP_NAN) { 1055 const bool single = (sizeof(fpType) == sizeof(float)); 1056 const uint64_t qnan = 1057 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 1058 const bool nan1 = std::isnan(op1); 1059 const bool nan2 = std::isnan(op2); 1060 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 1061 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 1062 if ((!nan1 && !nan2) \|\| (fpscr.dn == 1)) { 1063 dest = bitsToFp(qnan, junk); 1064 } else if (signal1) { 1065 dest = bitsToFp(fpToBits(op1) \| qnan, junk); 1066 } else if (signal2) { 1067 dest = bitsToFp(fpToBits(op2) \| qnan, junk); 1068 } else if (nan1) { 1069 dest = op1; 1070 } else if (nan2) { 1071 dest = op2; 1072 } 1073 } else if (flush && flushToZero(dest)) { 1074 feraiseexcept(FeUnderflow); 1075 } else if (( 1076 (single && (dest == bitsToFp(0x00800000, junk) \|\| 1077 dest == bitsToFp(0x80800000, junk))) \|\| 1078 (!single && 1079 (dest == bitsToFp(ULL(0x0010000000000000), junk) \|\| 1080 dest == bitsToFp(ULL(0x8010000000000000), junk))) 1081 ) && rMode != VfpRoundZero) { 1082 /* 1083 * Correct for the fact that underflow is detected -before- rounding 1084 * in ARM and -after- rounding in x86. 1085 / 1086* fesetround(FeRoundZero); 1087 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2) 1088 : "m" (op1), "m" (op2)); 1089 fpType temp = func(op1, op2); 1090 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 1091 if (flush && flushToZero(temp)) { 1092 dest = temp; 1093 } 1094 } 1095 finishVfp(fpscr, state); 1096 return dest; 1097 } 1098
1099 template <class fpType> 1100 fpType 1101 unaryOp(FPSCR &fpscr, fpType op1, 1102 fpType (*func)(fpType),	371 template <class fpType> 372 fpType 373 unaryOp(FPSCR &fpscr, fpType op1, 374 fpType (*func)(fpType),
1103 bool flush, uint32_t rMode) const 1104 { 1105 const bool single = (sizeof(fpType) == sizeof(float)); 1106 fpType junk = 0.0; 1107 1108 if (flush && flushToZero(op1)) 1109 fpscr.idc = 1; 1110 VfpSavedState state = prepFpState(rMode); 1111 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state) 1112 : "m" (op1), "m" (state)); 1113 fpType dest = func(op1); 1114 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 1115 1116 int fpClass = std::fpclassify(dest); 1117 // Get NAN behavior right. This varies between x86 and ARM. 1118 if (fpClass == FP_NAN) { 1119 const bool single = (sizeof(fpType) == sizeof(float)); 1120 const uint64_t qnan = 1121 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 1122 const bool nan = std::isnan(op1); 1123 if (!nan \|\| fpscr.dn == 1) { 1124 dest = bitsToFp(qnan, junk); 1125 } else if (nan) { 1126 dest = bitsToFp(fpToBits(op1) \| qnan, junk); 1127 } 1128 } else if (flush && flushToZero(dest)) { 1129 feraiseexcept(FeUnderflow); 1130 } else if (( 1131 (single && (dest == bitsToFp(0x00800000, junk) \|\| 1132 dest == bitsToFp(0x80800000, junk))) \|\| 1133 (!single && 1134 (dest == bitsToFp(ULL(0x0010000000000000), junk) \|\| 1135 dest == bitsToFp(ULL(0x8010000000000000), junk))) 1136 ) && rMode != VfpRoundZero) { 1137 /* 1138 * Correct for the fact that underflow is detected -before- rounding 1139 * in ARM and -after- rounding in x86. 1140 / 1141* fesetround(FeRoundZero); 1142 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1)); 1143 fpType temp = func(op1); 1144 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 1145 if (flush && flushToZero(temp)) { 1146 dest = temp; 1147 } 1148 } 1149 finishVfp(fpscr, state); 1150 return dest; 1151 }	375 bool flush, uint32_t rMode) const;
1152}; 1153 1154class FpRegRegOp : public FpOp 1155{ 1156 protected: 1157 IntRegIndex dest; 1158 IntRegIndex op1; 1159 --- 67 unchanged lines hidden ---	376}; 377 378class FpRegRegOp : public FpOp 379{ 380 protected: 381 IntRegIndex dest; 382 IntRegIndex op1; 383 --- 67 unchanged lines hidden ---

1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software

--- 180 unchanged lines hidden (view full) ---

189 uint64_t bits;
190 } val;
191 val.bits = bits;
192 return val.fp;
193}
194
195typedef int VfpSavedState;
196

197static inline VfpSavedState
198prepFpState(uint32_t rMode)
199{
200 int roundingMode = fegetround();
201 feclearexcept(FeAllExceptions);
202 switch (rMode) {
203 case VfpRoundNearest:
204 fesetround(FeRoundNearest);
205 break;
206 case VfpRoundUpward:
207 fesetround(FeRoundUpward);
208 break;
209 case VfpRoundDown:
210 fesetround(FeRoundDown);
211 break;
212 case VfpRoundZero:
213 fesetround(FeRoundZero);
214 break;
215 }
216 return roundingMode;
217}

197VfpSavedState prepFpState(uint32_t rMode);
198void finishVfp(FPSCR &fpscr, VfpSavedState state);

218

199

219static inline void
220finishVfp(FPSCR &fpscr, VfpSavedState state)
221{
222 int exceptions = fetestexcept(FeAllExceptions);
223 bool underflow = false;
224 if (exceptions & FeInvalid) {
225 fpscr.ioc = 1;
226 }
227 if (exceptions & FeDivByZero) {
228 fpscr.dzc = 1;
229 }
230 if (exceptions & FeOverflow) {
231 fpscr.ofc = 1;
232 }
233 if (exceptions & FeUnderflow) {
234 underflow = true;
235 fpscr.ufc = 1;
236 }
237 if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
238 fpscr.ixc = 1;
239 }
240 fesetround(state);
241}
242

243template <class fpType>

200template <class fpType>

244static inline fpType
245fixDest(FPSCR fpscr, fpType val, fpType op1)
246{
247 int fpClass = std::fpclassify(val);
248 fpType junk = 0.0;
249 if (fpClass == FP_NAN) {
250 const bool single = (sizeof(val) == sizeof(float));
251 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
252 const bool nan = std::isnan(op1);
253 if (!nan || (fpscr.dn == 1)) {
254 val = bitsToFp(qnan, junk);
255 } else if (nan) {
256 val = bitsToFp(fpToBits(op1) | qnan, junk);
257 }
258 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
259 // Turn val into a zero with the correct sign;
260 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
261 val = bitsToFp(fpToBits(val) & bitMask, junk);
262 feclearexcept(FeInexact);
263 feraiseexcept(FeUnderflow);
264 }
265 return val;
266}

201fpType fixDest(FPSCR fpscr, fpType val, fpType op1);

267
268template <class fpType>

202
203template <class fpType>

269static inline fpType
270fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
271{
272 int fpClass = std::fpclassify(val);
273 fpType junk = 0.0;
274 if (fpClass == FP_NAN) {
275 const bool single = (sizeof(val) == sizeof(float));
276 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
277 const bool nan1 = std::isnan(op1);
278 const bool nan2 = std::isnan(op2);
279 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
280 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
281 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
282 val = bitsToFp(qnan, junk);
283 } else if (signal1) {
284 val = bitsToFp(fpToBits(op1) | qnan, junk);
285 } else if (signal2) {
286 val = bitsToFp(fpToBits(op2) | qnan, junk);
287 } else if (nan1) {
288 val = op1;
289 } else if (nan2) {
290 val = op2;
291 }
292 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
293 // Turn val into a zero with the correct sign;
294 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
295 val = bitsToFp(fpToBits(val) & bitMask, junk);
296 feclearexcept(FeInexact);
297 feraiseexcept(FeUnderflow);
298 }
299 return val;
300}

204fpType fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);

301
302template <class fpType>

205
206template <class fpType>

303static inline fpType
304fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
305{
306 fpType mid = fixDest(fpscr, val, op1, op2);
307 const bool single = (sizeof(fpType) == sizeof(float));
308 const fpType junk = 0.0;
309 if ((single && (val == bitsToFp(0x00800000, junk) ||
310 val == bitsToFp(0x80800000, junk))) ||
311 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
312 val == bitsToFp(ULL(0x8010000000000000), junk)))
313 ) {
314 __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
315 fesetround(FeRoundZero);
316 fpType temp = 0.0;
317 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
318 temp = op1 / op2;
319 if (flushToZero(temp)) {
320 feraiseexcept(FeUnderflow);
321 if (fpscr.fz) {
322 feclearexcept(FeInexact);
323 mid = temp;
324 }
325 }
326 __asm__ __volatile__("" :: "m" (temp));
327 }
328 return mid;
329}

207fpType fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);

330

208

331static inline float
332fixFpDFpSDest(FPSCR fpscr, double val)
333{
334 const float junk = 0.0;
335 float op1 = 0.0;
336 if (std::isnan(val)) {
337 uint64_t valBits = fpToBits(val);
338 uint32_t op1Bits = bits(valBits, 50, 29) |
339 (mask(9) << 22) |
340 (bits(valBits, 63) << 31);
341 op1 = bitsToFp(op1Bits, junk);
342 }
343 float mid = fixDest(fpscr, (float)val, op1);
344 if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
345 (FeUnderflow | FeInexact)) {
346 feclearexcept(FeInexact);
347 }
348 if (mid == bitsToFp(0x00800000, junk) ||
349 mid == bitsToFp(0x80800000, junk)) {
350 __asm__ __volatile__("" : "=m" (val) : "m" (val));
351 fesetround(FeRoundZero);
352 float temp = 0.0;
353 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
354 temp = val;
355 if (flushToZero(temp)) {
356 feraiseexcept(FeUnderflow);
357 if (fpscr.fz) {
358 feclearexcept(FeInexact);
359 mid = temp;
360 }
361 }
362 __asm__ __volatile__("" :: "m" (temp));
363 }
364 return mid;
365}

209float fixFpDFpSDest(FPSCR fpscr, double val);
210double fixFpSFpDDest(FPSCR fpscr, float val);

366

211

367static inline double
368fixFpSFpDDest(FPSCR fpscr, float val)
369{
370 const double junk = 0.0;
371 double op1 = 0.0;
372 if (std::isnan(val)) {
373 uint32_t valBits = fpToBits(val);
374 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
375 (mask(12) << 51) |
376 ((uint64_t)bits(valBits, 31) << 63);
377 op1 = bitsToFp(op1Bits, junk);
378 }
379 double mid = fixDest(fpscr, (double)val, op1);
380 if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
381 mid == bitsToFp(ULL(0x8010000000000000), junk)) {
382 __asm__ __volatile__("" : "=m" (val) : "m" (val));
383 fesetround(FeRoundZero);
384 double temp = 0.0;
385 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
386 temp = val;
387 if (flushToZero(temp)) {
388 feraiseexcept(FeUnderflow);
389 if (fpscr.fz) {
390 feclearexcept(FeInexact);
391 mid = temp;
392 }
393 }
394 __asm__ __volatile__("" :: "m" (temp));
395 }
396 return mid;
397}

212float vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top);
213float vcvtFpHFpS(FPSCR &fpscr, float op, bool top);

398

214

399static inline float
400vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
401{
402 float junk = 0.0;
403 uint32_t destBits = fpToBits(dest);
404 uint32_t opBits = fpToBits(op);
405 // Extract the operand.
406 bool neg = bits(opBits, 31);
407 uint32_t exponent = bits(opBits, 30, 23);
408 uint32_t oldMantissa = bits(opBits, 22, 0);
409 uint32_t mantissa = oldMantissa >> (23 - 10);
410 // Do the conversion.
411 uint32_t extra = oldMantissa & mask(23 - 10);
412 if (exponent == 0xff) {
413 if (oldMantissa != 0) {
414 // Nans.
415 if (bits(mantissa, 9) == 0) {
416 // Signalling nan.
417 fpscr.ioc = 1;
418 }
419 if (fpscr.ahp) {
420 mantissa = 0;
421 exponent = 0;
422 fpscr.ioc = 1;
423 } else if (fpscr.dn) {
424 mantissa = (1 << 9);
425 exponent = 0x1f;
426 neg = false;
427 } else {
428 exponent = 0x1f;
429 mantissa |= (1 << 9);
430 }
431 } else {
432 // Infinities.
433 exponent = 0x1F;
434 if (fpscr.ahp) {
435 fpscr.ioc = 1;
436 mantissa = 0x3ff;
437 } else {
438 mantissa = 0;
439 }
440 }
441 } else if (exponent == 0 && oldMantissa == 0) {
442 // Zero, don't need to do anything.
443 } else {
444 // Normalized or denormalized numbers.
445
446 bool inexact = (extra != 0);
447
448 if (exponent == 0) {
449 // Denormalized.
450
451 // If flush to zero is on, this shouldn't happen.
452 assert(fpscr.fz == 0);
453
454 // Check for underflow
455 if (inexact || fpscr.ufe)
456 fpscr.ufc = 1;
457
458 // Handle rounding.
459 unsigned mode = fpscr.rMode;
460 if ((mode == VfpRoundUpward && !neg && extra) ||
461 (mode == VfpRoundDown && neg && extra) ||
462 (mode == VfpRoundNearest &&
463 (extra > (1 << 9) ||
464 (extra == (1 << 9) && bits(mantissa, 0))))) {
465 mantissa++;
466 }
467
468 // See if the number became normalized after rounding.
469 if (mantissa == (1 << 10)) {
470 mantissa = 0;
471 exponent = 1;
472 }
473 } else {
474 // Normalized.
475
476 // We need to track the dropped bits differently since
477 // more can be dropped by denormalizing.
478 bool topOne = bits(extra, 12);
479 bool restZeros = bits(extra, 11, 0) == 0;
480
481 if (exponent <= (127 - 15)) {
482 // The result is too small. Denormalize.
483 mantissa |= (1 << 10);
484 while (mantissa && exponent <= (127 - 15)) {
485 restZeros = restZeros && !topOne;
486 topOne = bits(mantissa, 0);
487 mantissa = mantissa >> 1;
488 exponent++;
489 }
490 if (topOne || !restZeros)
491 inexact = true;
492 exponent = 0;
493 } else {
494 // Change bias.
495 exponent -= (127 - 15);
496 }
497
498 if (exponent == 0 && (inexact || fpscr.ufe)) {
499 // Underflow
500 fpscr.ufc = 1;
501 }
502
503 // Handle rounding.
504 unsigned mode = fpscr.rMode;
505 bool nonZero = topOne || !restZeros;
506 if ((mode == VfpRoundUpward && !neg && nonZero) ||
507 (mode == VfpRoundDown && neg && nonZero) ||
508 (mode == VfpRoundNearest && topOne &&
509 (!restZeros || bits(mantissa, 0)))) {
510 mantissa++;
511 }
512
513 // See if we rounded up and need to bump the exponent.
514 if (mantissa == (1 << 10)) {
515 mantissa = 0;
516 exponent++;
517 }
518
519 // Deal with overflow
520 if (fpscr.ahp) {
521 if (exponent >= 0x20) {
522 exponent = 0x1f;
523 mantissa = 0x3ff;
524 fpscr.ioc = 1;
525 // Supress inexact exception.
526 inexact = false;
527 }
528 } else {
529 if (exponent >= 0x1f) {
530 if ((mode == VfpRoundNearest) ||
531 (mode == VfpRoundUpward && !neg) ||
532 (mode == VfpRoundDown && neg)) {
533 // Overflow to infinity.
534 exponent = 0x1f;
535 mantissa = 0;
536 } else {
537 // Overflow to max normal.
538 exponent = 0x1e;
539 mantissa = 0x3ff;
540 }
541 fpscr.ofc = 1;
542 inexact = true;
543 }
544 }
545 }
546
547 if (inexact) {
548 fpscr.ixc = 1;
549 }
550 }
551 // Reassemble and install the result.
552 uint32_t result = bits(mantissa, 9, 0);
553 replaceBits(result, 14, 10, exponent);
554 if (neg)
555 result |= (1 << 15);
556 if (top)
557 replaceBits(destBits, 31, 16, result);
558 else
559 replaceBits(destBits, 15, 0, result);
560 return bitsToFp(destBits, junk);
561}
562
563static inline float
564vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
565{
566 float junk = 0.0;
567 uint32_t opBits = fpToBits(op);
568 // Extract the operand.
569 if (top)
570 opBits = bits(opBits, 31, 16);
571 else
572 opBits = bits(opBits, 15, 0);
573 // Extract the bitfields.
574 bool neg = bits(opBits, 15);
575 uint32_t exponent = bits(opBits, 14, 10);
576 uint32_t mantissa = bits(opBits, 9, 0);
577 // Do the conversion.
578 if (exponent == 0) {
579 if (mantissa != 0) {
580 // Normalize the value.
581 exponent = exponent + (127 - 15) + 1;
582 while (mantissa < (1 << 10)) {
583 mantissa = mantissa << 1;
584 exponent--;
585 }
586 }
587 mantissa = mantissa << (23 - 10);
588 } else if (exponent == 0x1f && !fpscr.ahp) {
589 // Infinities and nans.
590 exponent = 0xff;
591 if (mantissa != 0) {
592 // Nans.
593 mantissa = mantissa << (23 - 10);
594 if (bits(mantissa, 22) == 0) {
595 // Signalling nan.
596 fpscr.ioc = 1;
597 mantissa |= (1 << 22);
598 }
599 if (fpscr.dn) {
600 mantissa &= ~mask(22);
601 neg = false;
602 }
603 }
604 } else {
605 exponent = exponent + (127 - 15);
606 mantissa = mantissa << (23 - 10);
607 }
608 // Reassemble the result.
609 uint32_t result = bits(mantissa, 22, 0);
610 replaceBits(result, 30, 23, exponent);
611 if (neg)
612 result |= (1 << 31);
613 return bitsToFp(result, junk);
614}
615

616static inline double
617makeDouble(uint32_t low, uint32_t high)
618{
619 double junk = 0.0;
620 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
621}
622
623static inline uint32_t
624lowFromDouble(double val)
625{
626 return fpToBits(val);
627}
628
629static inline uint32_t
630highFromDouble(double val)
631{
632 return fpToBits(val) >> 32;
633}
634

215static inline double
216makeDouble(uint32_t low, uint32_t high)
217{
218 double junk = 0.0;
219 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
220}
221
222static inline uint32_t
223lowFromDouble(double val)
224{
225 return fpToBits(val);
226}
227
228static inline uint32_t
229highFromDouble(double val)
230{
231 return fpToBits(val) >> 32;
232}
233

635static inline uint64_t
636vfpFpSToFixed(float val, bool isSigned, bool half,
637 uint8_t imm, bool rzero = true)
638{
639 int rmode = rzero ? FeRoundZero : fegetround();
640 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
641 fesetround(FeRoundNearest);
642 val = val * powf(2.0, imm);
643 __asm__ __volatile__("" : "=m" (val) : "m" (val));
644 fesetround(rmode);
645 feclearexcept(FeAllExceptions);
646 __asm__ __volatile__("" : "=m" (val) : "m" (val));
647 float origVal = val;
648 val = rintf(val);
649 int fpType = std::fpclassify(val);
650 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
651 if (fpType == FP_NAN) {
652 feraiseexcept(FeInvalid);
653 }
654 val = 0.0;
655 } else if (origVal != val) {
656 switch (rmode) {
657 case FeRoundNearest:
658 if (origVal - val > 0.5)
659 val += 1.0;
660 else if (val - origVal > 0.5)
661 val -= 1.0;
662 break;
663 case FeRoundDown:
664 if (origVal < val)
665 val -= 1.0;
666 break;
667 case FeRoundUpward:
668 if (origVal > val)
669 val += 1.0;
670 break;
671 }
672 feraiseexcept(FeInexact);
673 }

234uint64_t vfpFpSToFixed(float val, bool isSigned, bool half,
235 uint8_t imm, bool rzero = true);
236float vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm);
237float vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm);

674

238

675 if (isSigned) {
676 if (half) {
677 if ((double)val < (int16_t)(1 << 15)) {
678 feraiseexcept(FeInvalid);
679 feclearexcept(FeInexact);
680 return (int16_t)(1 << 15);
681 }
682 if ((double)val > (int16_t)mask(15)) {
683 feraiseexcept(FeInvalid);
684 feclearexcept(FeInexact);
685 return (int16_t)mask(15);
686 }
687 return (int16_t)val;
688 } else {
689 if ((double)val < (int32_t)(1 << 31)) {
690 feraiseexcept(FeInvalid);
691 feclearexcept(FeInexact);
692 return (int32_t)(1 << 31);
693 }
694 if ((double)val > (int32_t)mask(31)) {
695 feraiseexcept(FeInvalid);
696 feclearexcept(FeInexact);
697 return (int32_t)mask(31);
698 }
699 return (int32_t)val;
700 }
701 } else {
702 if (half) {
703 if ((double)val < 0) {
704 feraiseexcept(FeInvalid);
705 feclearexcept(FeInexact);
706 return 0;
707 }
708 if ((double)val > (mask(16))) {
709 feraiseexcept(FeInvalid);
710 feclearexcept(FeInexact);
711 return mask(16);
712 }
713 return (uint16_t)val;
714 } else {
715 if ((double)val < 0) {
716 feraiseexcept(FeInvalid);
717 feclearexcept(FeInexact);
718 return 0;
719 }
720 if ((double)val > (mask(32))) {
721 feraiseexcept(FeInvalid);
722 feclearexcept(FeInexact);
723 return mask(32);
724 }
725 return (uint32_t)val;
726 }
727 }
728}

239uint64_t vfpFpDToFixed(double val, bool isSigned, bool half,
240 uint8_t imm, bool rzero = true);
241double vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm);
242double vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm);

729

243

730static inline float
731vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
732{
733 fesetround(FeRoundNearest);
734 if (half)
735 val = (uint16_t)val;
736 float scale = powf(2.0, imm);
737 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
738 feclearexcept(FeAllExceptions);
739 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
740 return fixDivDest(fpscr, val / scale, (float)val, scale);
741}
742
743static inline float
744vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
745{
746 fesetround(FeRoundNearest);
747 if (half)
748 val = sext<16>(val & mask(16));
749 float scale = powf(2.0, imm);
750 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
751 feclearexcept(FeAllExceptions);
752 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
753 return fixDivDest(fpscr, val / scale, (float)val, scale);
754}
755
756static inline uint64_t
757vfpFpDToFixed(double val, bool isSigned, bool half,
758 uint8_t imm, bool rzero = true)
759{
760 int rmode = rzero ? FeRoundZero : fegetround();
761 fesetround(FeRoundNearest);
762 val = val * pow(2.0, imm);
763 __asm__ __volatile__("" : "=m" (val) : "m" (val));
764 fesetround(rmode);
765 feclearexcept(FeAllExceptions);
766 __asm__ __volatile__("" : "=m" (val) : "m" (val));
767 double origVal = val;
768 val = rint(val);
769 int fpType = std::fpclassify(val);
770 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
771 if (fpType == FP_NAN) {
772 feraiseexcept(FeInvalid);
773 }
774 val = 0.0;
775 } else if (origVal != val) {
776 switch (rmode) {
777 case FeRoundNearest:
778 if (origVal - val > 0.5)
779 val += 1.0;
780 else if (val - origVal > 0.5)
781 val -= 1.0;
782 break;
783 case FeRoundDown:
784 if (origVal < val)
785 val -= 1.0;
786 break;
787 case FeRoundUpward:
788 if (origVal > val)
789 val += 1.0;
790 break;
791 }
792 feraiseexcept(FeInexact);
793 }
794 if (isSigned) {
795 if (half) {
796 if (val < (int16_t)(1 << 15)) {
797 feraiseexcept(FeInvalid);
798 feclearexcept(FeInexact);
799 return (int16_t)(1 << 15);
800 }
801 if (val > (int16_t)mask(15)) {
802 feraiseexcept(FeInvalid);
803 feclearexcept(FeInexact);
804 return (int16_t)mask(15);
805 }
806 return (int16_t)val;
807 } else {
808 if (val < (int32_t)(1 << 31)) {
809 feraiseexcept(FeInvalid);
810 feclearexcept(FeInexact);
811 return (int32_t)(1 << 31);
812 }
813 if (val > (int32_t)mask(31)) {
814 feraiseexcept(FeInvalid);
815 feclearexcept(FeInexact);
816 return (int32_t)mask(31);
817 }
818 return (int32_t)val;
819 }
820 } else {
821 if (half) {
822 if (val < 0) {
823 feraiseexcept(FeInvalid);
824 feclearexcept(FeInexact);
825 return 0;
826 }
827 if (val > mask(16)) {
828 feraiseexcept(FeInvalid);
829 feclearexcept(FeInexact);
830 return mask(16);
831 }
832 return (uint16_t)val;
833 } else {
834 if (val < 0) {
835 feraiseexcept(FeInvalid);
836 feclearexcept(FeInexact);
837 return 0;
838 }
839 if (val > mask(32)) {
840 feraiseexcept(FeInvalid);
841 feclearexcept(FeInexact);
842 return mask(32);
843 }
844 return (uint32_t)val;
845 }
846 }
847}
848
849static inline double
850vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
851{
852 fesetround(FeRoundNearest);
853 if (half)
854 val = (uint16_t)val;
855 double scale = pow(2.0, imm);
856 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
857 feclearexcept(FeAllExceptions);
858 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
859 return fixDivDest(fpscr, val / scale, (double)val, scale);
860}
861
862static inline double
863vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
864{
865 fesetround(FeRoundNearest);
866 if (half)
867 val = sext<16>(val & mask(16));
868 double scale = pow(2.0, imm);
869 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
870 feclearexcept(FeAllExceptions);
871 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
872 return fixDivDest(fpscr, val / scale, (double)val, scale);
873}
874

875class VfpMacroOp : public PredMacroOp
876{
877 public:
878 static bool
879 inScalarBank(IntRegIndex idx)
880 {
881 return (idx % 32) < 8;
882 }
883
884 protected:
885 bool wide;
886
887 VfpMacroOp(const char *mnem, ExtMachInst _machInst,
888 OpClass __opClass, bool _wide) :
889 PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
890 {}
891

244class VfpMacroOp : public PredMacroOp
245{
246 public:
247 static bool
248 inScalarBank(IntRegIndex idx)
249 {
250 return (idx % 32) < 8;
251 }
252
253 protected:
254 bool wide;
255
256 VfpMacroOp(const char *mnem, ExtMachInst _machInst,
257 OpClass __opClass, bool _wide) :
258 PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
259 {}
260

892 IntRegIndex
893 addStride(IntRegIndex idx, unsigned stride)
894 {
895 if (wide) {
896 stride *= 2;
897 }
898 unsigned offset = idx % 8;
899 idx = (IntRegIndex)(idx - offset);
900 offset += stride;
901 idx = (IntRegIndex)(idx + (offset % 8));
902 return idx;
903 }
904
905 void
906 nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
907 {
908 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
909 assert(!inScalarBank(dest));
910 dest = addStride(dest, stride);
911 op1 = addStride(op1, stride);
912 if (!inScalarBank(op2)) {
913 op2 = addStride(op2, stride);
914 }
915 }
916
917 void
918 nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
919 {
920 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
921 assert(!inScalarBank(dest));
922 dest = addStride(dest, stride);
923 if (!inScalarBank(op1)) {
924 op1 = addStride(op1, stride);
925 }
926 }
927
928 void
929 nextIdxs(IntRegIndex &dest)
930 {
931 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
932 assert(!inScalarBank(dest));
933 dest = addStride(dest, stride);
934 }

261 IntRegIndex addStride(IntRegIndex idx, unsigned stride);
262 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2);
263 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1);
264 void nextIdxs(IntRegIndex &dest);

935};
936
937static inline float
938fpAddS(float a, float b)
939{
940 return a + b;
941}
942

--- 88 unchanged lines hidden (view full) ---

1031 {
1032 return fpToBits(val) >> 32;
1033 }
1034
1035 template <class fpType>
1036 fpType
1037 binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
1038 fpType (*func)(fpType, fpType),

265};
266
267static inline float
268fpAddS(float a, float b)
269{
270 return a + b;
271}
272

--- 88 unchanged lines hidden (view full) ---

361 {
362 return fpToBits(val) >> 32;
363 }
364
365 template <class fpType>
366 fpType
367 binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
368 fpType (*func)(fpType, fpType),

1039 bool flush, uint32_t rMode) const
1040 {
1041 const bool single = (sizeof(fpType) == sizeof(float));
1042 fpType junk = 0.0;

369 bool flush, uint32_t rMode) const;

1043

370

1044 if (flush && flushToZero(op1, op2))
1045 fpscr.idc = 1;
1046 VfpSavedState state = prepFpState(rMode);
1047 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
1048 : "m" (op1), "m" (op2), "m" (state));
1049 fpType dest = func(op1, op2);
1050 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
1051
1052 int fpClass = std::fpclassify(dest);
1053 // Get NAN behavior right. This varies between x86 and ARM.
1054 if (fpClass == FP_NAN) {
1055 const bool single = (sizeof(fpType) == sizeof(float));
1056 const uint64_t qnan =
1057 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
1058 const bool nan1 = std::isnan(op1);
1059 const bool nan2 = std::isnan(op2);
1060 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
1061 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
1062 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
1063 dest = bitsToFp(qnan, junk);
1064 } else if (signal1) {
1065 dest = bitsToFp(fpToBits(op1) | qnan, junk);
1066 } else if (signal2) {
1067 dest = bitsToFp(fpToBits(op2) | qnan, junk);
1068 } else if (nan1) {
1069 dest = op1;
1070 } else if (nan2) {
1071 dest = op2;
1072 }
1073 } else if (flush && flushToZero(dest)) {
1074 feraiseexcept(FeUnderflow);
1075 } else if ((
1076 (single && (dest == bitsToFp(0x00800000, junk) ||
1077 dest == bitsToFp(0x80800000, junk))) ||
1078 (!single &&
1079 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
1080 dest == bitsToFp(ULL(0x8010000000000000), junk)))
1081 ) && rMode != VfpRoundZero) {
1082 /*
1083 * Correct for the fact that underflow is detected -before- rounding
1084 * in ARM and -after- rounding in x86.
1085 */
1086 fesetround(FeRoundZero);
1087 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
1088 : "m" (op1), "m" (op2));
1089 fpType temp = func(op1, op2);
1090 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
1091 if (flush && flushToZero(temp)) {
1092 dest = temp;
1093 }
1094 }
1095 finishVfp(fpscr, state);
1096 return dest;
1097 }
1098

1099 template <class fpType>
1100 fpType
1101 unaryOp(FPSCR &fpscr, fpType op1,
1102 fpType (*func)(fpType),

371 template <class fpType>
372 fpType
373 unaryOp(FPSCR &fpscr, fpType op1,
374 fpType (*func)(fpType),

1103 bool flush, uint32_t rMode) const
1104 {
1105 const bool single = (sizeof(fpType) == sizeof(float));
1106 fpType junk = 0.0;
1107
1108 if (flush && flushToZero(op1))
1109 fpscr.idc = 1;
1110 VfpSavedState state = prepFpState(rMode);
1111 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
1112 : "m" (op1), "m" (state));
1113 fpType dest = func(op1);
1114 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
1115
1116 int fpClass = std::fpclassify(dest);
1117 // Get NAN behavior right. This varies between x86 and ARM.
1118 if (fpClass == FP_NAN) {
1119 const bool single = (sizeof(fpType) == sizeof(float));
1120 const uint64_t qnan =
1121 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
1122 const bool nan = std::isnan(op1);
1123 if (!nan || fpscr.dn == 1) {
1124 dest = bitsToFp(qnan, junk);
1125 } else if (nan) {
1126 dest = bitsToFp(fpToBits(op1) | qnan, junk);
1127 }
1128 } else if (flush && flushToZero(dest)) {
1129 feraiseexcept(FeUnderflow);
1130 } else if ((
1131 (single && (dest == bitsToFp(0x00800000, junk) ||
1132 dest == bitsToFp(0x80800000, junk))) ||
1133 (!single &&
1134 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
1135 dest == bitsToFp(ULL(0x8010000000000000), junk)))
1136 ) && rMode != VfpRoundZero) {
1137 /*
1138 * Correct for the fact that underflow is detected -before- rounding
1139 * in ARM and -after- rounding in x86.
1140 */
1141 fesetround(FeRoundZero);
1142 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
1143 fpType temp = func(op1);
1144 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
1145 if (flush && flushToZero(temp)) {
1146 dest = temp;
1147 }
1148 }
1149 finishVfp(fpscr, state);
1150 return dest;
1151 }

375 bool flush, uint32_t rMode) const;