vfp.cc revision 7434:dd5a09b86b14
1/* 2 * Copyright (c) 2010 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabe Black 38 */ 39 40#include "arch/arm/insts/vfp.hh" 41 42/* 43 * The asm statements below are to keep gcc from reordering code. Otherwise 44 * the rounding mode might be set after the operation it was intended for, the 45 * exception bits read before it, etc. 46 */ 47 48std::string 49FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 50{ 51 std::stringstream ss; 52 printMnemonic(ss); 53 printReg(ss, dest + FP_Base_DepTag); 54 ss << ", "; 55 printReg(ss, op1 + FP_Base_DepTag); 56 return ss.str(); 57} 58 59std::string 60FpRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 61{ 62 std::stringstream ss; 63 printMnemonic(ss); 64 printReg(ss, dest + FP_Base_DepTag); 65 ccprintf(ss, ", #%d", imm); 66 return ss.str(); 67} 68 69std::string 70FpRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 71{ 72 std::stringstream ss; 73 printMnemonic(ss); 74 printReg(ss, dest + FP_Base_DepTag); 75 ss << ", "; 76 printReg(ss, op1 + FP_Base_DepTag); 77 ccprintf(ss, ", #%d", imm); 78 return ss.str(); 79} 80 81std::string 82FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 83{ 84 std::stringstream ss; 85 printMnemonic(ss); 86 printReg(ss, dest + FP_Base_DepTag); 87 ss << ", "; 88 printReg(ss, op1 + FP_Base_DepTag); 89 ss << ", "; 90 printReg(ss, op2 + FP_Base_DepTag); 91 return ss.str(); 92} 93 94namespace ArmISA 95{ 96 97VfpSavedState 98prepFpState(uint32_t rMode) 99{ 100 int roundingMode = fegetround(); 101 feclearexcept(FeAllExceptions); 102 switch (rMode) { 103 case VfpRoundNearest: 104 fesetround(FeRoundNearest); 105 break; 106 case VfpRoundUpward: 107 fesetround(FeRoundUpward); 108 break; 109 case VfpRoundDown: 110 fesetround(FeRoundDown); 111 break; 112 case VfpRoundZero: 113 fesetround(FeRoundZero); 114 break; 115 } 116 return roundingMode; 117} 118 119void 120finishVfp(FPSCR &fpscr, VfpSavedState state) 121{ 122 int exceptions = fetestexcept(FeAllExceptions); 123 bool underflow = false; 124 if (exceptions & FeInvalid) { 125 fpscr.ioc = 1; 126 } 127 if (exceptions & FeDivByZero) { 128 fpscr.dzc = 1; 129 } 130 if (exceptions & FeOverflow) { 131 fpscr.ofc = 1; 132 } 133 if (exceptions & FeUnderflow) { 134 underflow = true; 135 fpscr.ufc = 1; 136 } 137 if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) { 138 fpscr.ixc = 1; 139 } 140 fesetround(state); 141} 142 143template <class fpType> 144fpType 145fixDest(FPSCR fpscr, fpType val, fpType op1) 146{ 147 int fpClass = std::fpclassify(val); 148 fpType junk = 0.0; 149 if (fpClass == FP_NAN) { 150 const bool single = (sizeof(val) == sizeof(float)); 151 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 152 const bool nan = std::isnan(op1); 153 if (!nan || (fpscr.dn == 1)) { 154 val = bitsToFp(qnan, junk); 155 } else if (nan) { 156 val = bitsToFp(fpToBits(op1) | qnan, junk); 157 } 158 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 159 // Turn val into a zero with the correct sign; 160 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 161 val = bitsToFp(fpToBits(val) & bitMask, junk); 162 feclearexcept(FeInexact); 163 feraiseexcept(FeUnderflow); 164 } 165 return val; 166} 167 168template 169float fixDest<float>(FPSCR fpscr, float val, float op1); 170template 171double fixDest<double>(FPSCR fpscr, double val, double op1); 172 173template <class fpType> 174fpType 175fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 176{ 177 int fpClass = std::fpclassify(val); 178 fpType junk = 0.0; 179 if (fpClass == FP_NAN) { 180 const bool single = (sizeof(val) == sizeof(float)); 181 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 182 const bool nan1 = std::isnan(op1); 183 const bool nan2 = std::isnan(op2); 184 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 185 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 186 if ((!nan1 && !nan2) || (fpscr.dn == 1)) { 187 val = bitsToFp(qnan, junk); 188 } else if (signal1) { 189 val = bitsToFp(fpToBits(op1) | qnan, junk); 190 } else if (signal2) { 191 val = bitsToFp(fpToBits(op2) | qnan, junk); 192 } else if (nan1) { 193 val = op1; 194 } else if (nan2) { 195 val = op2; 196 } 197 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 198 // Turn val into a zero with the correct sign; 199 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 200 val = bitsToFp(fpToBits(val) & bitMask, junk); 201 feclearexcept(FeInexact); 202 feraiseexcept(FeUnderflow); 203 } 204 return val; 205} 206 207template 208float fixDest<float>(FPSCR fpscr, float val, float op1, float op2); 209template 210double fixDest<double>(FPSCR fpscr, double val, double op1, double op2); 211 212template <class fpType> 213fpType 214fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 215{ 216 fpType mid = fixDest(fpscr, val, op1, op2); 217 const bool single = (sizeof(fpType) == sizeof(float)); 218 const fpType junk = 0.0; 219 if ((single && (val == bitsToFp(0x00800000, junk) || 220 val == bitsToFp(0x80800000, junk))) || 221 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) || 222 val == bitsToFp(ULL(0x8010000000000000), junk))) 223 ) { 224 __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); 225 fesetround(FeRoundZero); 226 fpType temp = 0.0; 227 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 228 temp = op1 / op2; 229 if (flushToZero(temp)) { 230 feraiseexcept(FeUnderflow); 231 if (fpscr.fz) { 232 feclearexcept(FeInexact); 233 mid = temp; 234 } 235 } 236 __asm__ __volatile__("" :: "m" (temp)); 237 } 238 return mid; 239} 240 241template 242float fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2); 243template 244double fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2); 245 246float 247fixFpDFpSDest(FPSCR fpscr, double val) 248{ 249 const float junk = 0.0; 250 float op1 = 0.0; 251 if (std::isnan(val)) { 252 uint64_t valBits = fpToBits(val); 253 uint32_t op1Bits = bits(valBits, 50, 29) | 254 (mask(9) << 22) | 255 (bits(valBits, 63) << 31); 256 op1 = bitsToFp(op1Bits, junk); 257 } 258 float mid = fixDest(fpscr, (float)val, op1); 259 if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) == 260 (FeUnderflow | FeInexact)) { 261 feclearexcept(FeInexact); 262 } 263 if (mid == bitsToFp(0x00800000, junk) || 264 mid == bitsToFp(0x80800000, junk)) { 265 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 266 fesetround(FeRoundZero); 267 float temp = 0.0; 268 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 269 temp = val; 270 if (flushToZero(temp)) { 271 feraiseexcept(FeUnderflow); 272 if (fpscr.fz) { 273 feclearexcept(FeInexact); 274 mid = temp; 275 } 276 } 277 __asm__ __volatile__("" :: "m" (temp)); 278 } 279 return mid; 280} 281 282double 283fixFpSFpDDest(FPSCR fpscr, float val) 284{ 285 const double junk = 0.0; 286 double op1 = 0.0; 287 if (std::isnan(val)) { 288 uint32_t valBits = fpToBits(val); 289 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) | 290 (mask(12) << 51) | 291 ((uint64_t)bits(valBits, 31) << 63); 292 op1 = bitsToFp(op1Bits, junk); 293 } 294 double mid = fixDest(fpscr, (double)val, op1); 295 if (mid == bitsToFp(ULL(0x0010000000000000), junk) || 296 mid == bitsToFp(ULL(0x8010000000000000), junk)) { 297 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 298 fesetround(FeRoundZero); 299 double temp = 0.0; 300 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 301 temp = val; 302 if (flushToZero(temp)) { 303 feraiseexcept(FeUnderflow); 304 if (fpscr.fz) { 305 feclearexcept(FeInexact); 306 mid = temp; 307 } 308 } 309 __asm__ __volatile__("" :: "m" (temp)); 310 } 311 return mid; 312} 313 314float 315vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top) 316{ 317 float junk = 0.0; 318 uint32_t destBits = fpToBits(dest); 319 uint32_t opBits = fpToBits(op); 320 // Extract the operand. 321 bool neg = bits(opBits, 31); 322 uint32_t exponent = bits(opBits, 30, 23); 323 uint32_t oldMantissa = bits(opBits, 22, 0); 324 uint32_t mantissa = oldMantissa >> (23 - 10); 325 // Do the conversion. 326 uint32_t extra = oldMantissa & mask(23 - 10); 327 if (exponent == 0xff) { 328 if (oldMantissa != 0) { 329 // Nans. 330 if (bits(mantissa, 9) == 0) { 331 // Signalling nan. 332 fpscr.ioc = 1; 333 } 334 if (fpscr.ahp) { 335 mantissa = 0; 336 exponent = 0; 337 fpscr.ioc = 1; 338 } else if (fpscr.dn) { 339 mantissa = (1 << 9); 340 exponent = 0x1f; 341 neg = false; 342 } else { 343 exponent = 0x1f; 344 mantissa |= (1 << 9); 345 } 346 } else { 347 // Infinities. 348 exponent = 0x1F; 349 if (fpscr.ahp) { 350 fpscr.ioc = 1; 351 mantissa = 0x3ff; 352 } else { 353 mantissa = 0; 354 } 355 } 356 } else if (exponent == 0 && oldMantissa == 0) { 357 // Zero, don't need to do anything. 358 } else { 359 // Normalized or denormalized numbers. 360 361 bool inexact = (extra != 0); 362 363 if (exponent == 0) { 364 // Denormalized. 365 366 // If flush to zero is on, this shouldn't happen. 367 assert(fpscr.fz == 0); 368 369 // Check for underflow 370 if (inexact || fpscr.ufe) 371 fpscr.ufc = 1; 372 373 // Handle rounding. 374 unsigned mode = fpscr.rMode; 375 if ((mode == VfpRoundUpward && !neg && extra) || 376 (mode == VfpRoundDown && neg && extra) || 377 (mode == VfpRoundNearest && 378 (extra > (1 << 9) || 379 (extra == (1 << 9) && bits(mantissa, 0))))) { 380 mantissa++; 381 } 382 383 // See if the number became normalized after rounding. 384 if (mantissa == (1 << 10)) { 385 mantissa = 0; 386 exponent = 1; 387 } 388 } else { 389 // Normalized. 390 391 // We need to track the dropped bits differently since 392 // more can be dropped by denormalizing. 393 bool topOne = bits(extra, 12); 394 bool restZeros = bits(extra, 11, 0) == 0; 395 396 if (exponent <= (127 - 15)) { 397 // The result is too small. Denormalize. 398 mantissa |= (1 << 10); 399 while (mantissa && exponent <= (127 - 15)) { 400 restZeros = restZeros && !topOne; 401 topOne = bits(mantissa, 0); 402 mantissa = mantissa >> 1; 403 exponent++; 404 } 405 if (topOne || !restZeros) 406 inexact = true; 407 exponent = 0; 408 } else { 409 // Change bias. 410 exponent -= (127 - 15); 411 } 412 413 if (exponent == 0 && (inexact || fpscr.ufe)) { 414 // Underflow 415 fpscr.ufc = 1; 416 } 417 418 // Handle rounding. 419 unsigned mode = fpscr.rMode; 420 bool nonZero = topOne || !restZeros; 421 if ((mode == VfpRoundUpward && !neg && nonZero) || 422 (mode == VfpRoundDown && neg && nonZero) || 423 (mode == VfpRoundNearest && topOne && 424 (!restZeros || bits(mantissa, 0)))) { 425 mantissa++; 426 } 427 428 // See if we rounded up and need to bump the exponent. 429 if (mantissa == (1 << 10)) { 430 mantissa = 0; 431 exponent++; 432 } 433 434 // Deal with overflow 435 if (fpscr.ahp) { 436 if (exponent >= 0x20) { 437 exponent = 0x1f; 438 mantissa = 0x3ff; 439 fpscr.ioc = 1; 440 // Supress inexact exception. 441 inexact = false; 442 } 443 } else { 444 if (exponent >= 0x1f) { 445 if ((mode == VfpRoundNearest) || 446 (mode == VfpRoundUpward && !neg) || 447 (mode == VfpRoundDown && neg)) { 448 // Overflow to infinity. 449 exponent = 0x1f; 450 mantissa = 0; 451 } else { 452 // Overflow to max normal. 453 exponent = 0x1e; 454 mantissa = 0x3ff; 455 } 456 fpscr.ofc = 1; 457 inexact = true; 458 } 459 } 460 } 461 462 if (inexact) { 463 fpscr.ixc = 1; 464 } 465 } 466 // Reassemble and install the result. 467 uint32_t result = bits(mantissa, 9, 0); 468 replaceBits(result, 14, 10, exponent); 469 if (neg) 470 result |= (1 << 15); 471 if (top) 472 replaceBits(destBits, 31, 16, result); 473 else 474 replaceBits(destBits, 15, 0, result); 475 return bitsToFp(destBits, junk); 476} 477 478float 479vcvtFpHFpS(FPSCR &fpscr, float op, bool top) 480{ 481 float junk = 0.0; 482 uint32_t opBits = fpToBits(op); 483 // Extract the operand. 484 if (top) 485 opBits = bits(opBits, 31, 16); 486 else 487 opBits = bits(opBits, 15, 0); 488 // Extract the bitfields. 489 bool neg = bits(opBits, 15); 490 uint32_t exponent = bits(opBits, 14, 10); 491 uint32_t mantissa = bits(opBits, 9, 0); 492 // Do the conversion. 493 if (exponent == 0) { 494 if (mantissa != 0) { 495 // Normalize the value. 496 exponent = exponent + (127 - 15) + 1; 497 while (mantissa < (1 << 10)) { 498 mantissa = mantissa << 1; 499 exponent--; 500 } 501 } 502 mantissa = mantissa << (23 - 10); 503 } else if (exponent == 0x1f && !fpscr.ahp) { 504 // Infinities and nans. 505 exponent = 0xff; 506 if (mantissa != 0) { 507 // Nans. 508 mantissa = mantissa << (23 - 10); 509 if (bits(mantissa, 22) == 0) { 510 // Signalling nan. 511 fpscr.ioc = 1; 512 mantissa |= (1 << 22); 513 } 514 if (fpscr.dn) { 515 mantissa &= ~mask(22); 516 neg = false; 517 } 518 } 519 } else { 520 exponent = exponent + (127 - 15); 521 mantissa = mantissa << (23 - 10); 522 } 523 // Reassemble the result. 524 uint32_t result = bits(mantissa, 22, 0); 525 replaceBits(result, 30, 23, exponent); 526 if (neg) 527 result |= (1 << 31); 528 return bitsToFp(result, junk); 529} 530 531uint64_t 532vfpFpSToFixed(float val, bool isSigned, bool half, 533 uint8_t imm, bool rzero) 534{ 535 int rmode = rzero ? FeRoundZero : fegetround(); 536 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); 537 fesetround(FeRoundNearest); 538 val = val * powf(2.0, imm); 539 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 540 fesetround(rmode); 541 feclearexcept(FeAllExceptions); 542 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 543 float origVal = val; 544 val = rintf(val); 545 int fpType = std::fpclassify(val); 546 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 547 if (fpType == FP_NAN) { 548 feraiseexcept(FeInvalid); 549 } 550 val = 0.0; 551 } else if (origVal != val) { 552 switch (rmode) { 553 case FeRoundNearest: 554 if (origVal - val > 0.5) 555 val += 1.0; 556 else if (val - origVal > 0.5) 557 val -= 1.0; 558 break; 559 case FeRoundDown: 560 if (origVal < val) 561 val -= 1.0; 562 break; 563 case FeRoundUpward: 564 if (origVal > val) 565 val += 1.0; 566 break; 567 } 568 feraiseexcept(FeInexact); 569 } 570 571 if (isSigned) { 572 if (half) { 573 if ((double)val < (int16_t)(1 << 15)) { 574 feraiseexcept(FeInvalid); 575 feclearexcept(FeInexact); 576 return (int16_t)(1 << 15); 577 } 578 if ((double)val > (int16_t)mask(15)) { 579 feraiseexcept(FeInvalid); 580 feclearexcept(FeInexact); 581 return (int16_t)mask(15); 582 } 583 return (int16_t)val; 584 } else { 585 if ((double)val < (int32_t)(1 << 31)) { 586 feraiseexcept(FeInvalid); 587 feclearexcept(FeInexact); 588 return (int32_t)(1 << 31); 589 } 590 if ((double)val > (int32_t)mask(31)) { 591 feraiseexcept(FeInvalid); 592 feclearexcept(FeInexact); 593 return (int32_t)mask(31); 594 } 595 return (int32_t)val; 596 } 597 } else { 598 if (half) { 599 if ((double)val < 0) { 600 feraiseexcept(FeInvalid); 601 feclearexcept(FeInexact); 602 return 0; 603 } 604 if ((double)val > (mask(16))) { 605 feraiseexcept(FeInvalid); 606 feclearexcept(FeInexact); 607 return mask(16); 608 } 609 return (uint16_t)val; 610 } else { 611 if ((double)val < 0) { 612 feraiseexcept(FeInvalid); 613 feclearexcept(FeInexact); 614 return 0; 615 } 616 if ((double)val > (mask(32))) { 617 feraiseexcept(FeInvalid); 618 feclearexcept(FeInexact); 619 return mask(32); 620 } 621 return (uint32_t)val; 622 } 623 } 624} 625 626float 627vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 628{ 629 fesetround(FeRoundNearest); 630 if (half) 631 val = (uint16_t)val; 632 float scale = powf(2.0, imm); 633 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 634 feclearexcept(FeAllExceptions); 635 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 636 return fixDivDest(fpscr, val / scale, (float)val, scale); 637} 638 639float 640vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 641{ 642 fesetround(FeRoundNearest); 643 if (half) 644 val = sext<16>(val & mask(16)); 645 float scale = powf(2.0, imm); 646 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 647 feclearexcept(FeAllExceptions); 648 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 649 return fixDivDest(fpscr, val / scale, (float)val, scale); 650} 651 652uint64_t 653vfpFpDToFixed(double val, bool isSigned, bool half, 654 uint8_t imm, bool rzero) 655{ 656 int rmode = rzero ? FeRoundZero : fegetround(); 657 fesetround(FeRoundNearest); 658 val = val * pow(2.0, imm); 659 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 660 fesetround(rmode); 661 feclearexcept(FeAllExceptions); 662 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 663 double origVal = val; 664 val = rint(val); 665 int fpType = std::fpclassify(val); 666 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 667 if (fpType == FP_NAN) { 668 feraiseexcept(FeInvalid); 669 } 670 val = 0.0; 671 } else if (origVal != val) { 672 switch (rmode) { 673 case FeRoundNearest: 674 if (origVal - val > 0.5) 675 val += 1.0; 676 else if (val - origVal > 0.5) 677 val -= 1.0; 678 break; 679 case FeRoundDown: 680 if (origVal < val) 681 val -= 1.0; 682 break; 683 case FeRoundUpward: 684 if (origVal > val) 685 val += 1.0; 686 break; 687 } 688 feraiseexcept(FeInexact); 689 } 690 if (isSigned) { 691 if (half) { 692 if (val < (int16_t)(1 << 15)) { 693 feraiseexcept(FeInvalid); 694 feclearexcept(FeInexact); 695 return (int16_t)(1 << 15); 696 } 697 if (val > (int16_t)mask(15)) { 698 feraiseexcept(FeInvalid); 699 feclearexcept(FeInexact); 700 return (int16_t)mask(15); 701 } 702 return (int16_t)val; 703 } else { 704 if (val < (int32_t)(1 << 31)) { 705 feraiseexcept(FeInvalid); 706 feclearexcept(FeInexact); 707 return (int32_t)(1 << 31); 708 } 709 if (val > (int32_t)mask(31)) { 710 feraiseexcept(FeInvalid); 711 feclearexcept(FeInexact); 712 return (int32_t)mask(31); 713 } 714 return (int32_t)val; 715 } 716 } else { 717 if (half) { 718 if (val < 0) { 719 feraiseexcept(FeInvalid); 720 feclearexcept(FeInexact); 721 return 0; 722 } 723 if (val > mask(16)) { 724 feraiseexcept(FeInvalid); 725 feclearexcept(FeInexact); 726 return mask(16); 727 } 728 return (uint16_t)val; 729 } else { 730 if (val < 0) { 731 feraiseexcept(FeInvalid); 732 feclearexcept(FeInexact); 733 return 0; 734 } 735 if (val > mask(32)) { 736 feraiseexcept(FeInvalid); 737 feclearexcept(FeInexact); 738 return mask(32); 739 } 740 return (uint32_t)val; 741 } 742 } 743} 744 745double 746vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 747{ 748 fesetround(FeRoundNearest); 749 if (half) 750 val = (uint16_t)val; 751 double scale = pow(2.0, imm); 752 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 753 feclearexcept(FeAllExceptions); 754 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 755 return fixDivDest(fpscr, val / scale, (double)val, scale); 756} 757 758double 759vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 760{ 761 fesetround(FeRoundNearest); 762 if (half) 763 val = sext<16>(val & mask(16)); 764 double scale = pow(2.0, imm); 765 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 766 feclearexcept(FeAllExceptions); 767 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 768 return fixDivDest(fpscr, val / scale, (double)val, scale); 769} 770 771template <class fpType> 772fpType 773FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 774 fpType (*func)(fpType, fpType), 775 bool flush, uint32_t rMode) const 776{ 777 const bool single = (sizeof(fpType) == sizeof(float)); 778 fpType junk = 0.0; 779 780 if (flush && flushToZero(op1, op2)) 781 fpscr.idc = 1; 782 VfpSavedState state = prepFpState(rMode); 783 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state) 784 : "m" (op1), "m" (op2), "m" (state)); 785 fpType dest = func(op1, op2); 786 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 787 788 int fpClass = std::fpclassify(dest); 789 // Get NAN behavior right. This varies between x86 and ARM. 790 if (fpClass == FP_NAN) { 791 const bool single = (sizeof(fpType) == sizeof(float)); 792 const uint64_t qnan = 793 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 794 const bool nan1 = std::isnan(op1); 795 const bool nan2 = std::isnan(op2); 796 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 797 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 798 if ((!nan1 && !nan2) || (fpscr.dn == 1)) { 799 dest = bitsToFp(qnan, junk); 800 } else if (signal1) { 801 dest = bitsToFp(fpToBits(op1) | qnan, junk); 802 } else if (signal2) { 803 dest = bitsToFp(fpToBits(op2) | qnan, junk); 804 } else if (nan1) { 805 dest = op1; 806 } else if (nan2) { 807 dest = op2; 808 } 809 } else if (flush && flushToZero(dest)) { 810 feraiseexcept(FeUnderflow); 811 } else if (( 812 (single && (dest == bitsToFp(0x00800000, junk) || 813 dest == bitsToFp(0x80800000, junk))) || 814 (!single && 815 (dest == bitsToFp(ULL(0x0010000000000000), junk) || 816 dest == bitsToFp(ULL(0x8010000000000000), junk))) 817 ) && rMode != VfpRoundZero) { 818 /* 819 * Correct for the fact that underflow is detected -before- rounding 820 * in ARM and -after- rounding in x86. 821 */ 822 fesetround(FeRoundZero); 823 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2) 824 : "m" (op1), "m" (op2)); 825 fpType temp = func(op1, op2); 826 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 827 if (flush && flushToZero(temp)) { 828 dest = temp; 829 } 830 } 831 finishVfp(fpscr, state); 832 return dest; 833} 834 835template 836float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2, 837 float (*func)(float, float), 838 bool flush, uint32_t rMode) const; 839template 840double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2, 841 double (*func)(double, double), 842 bool flush, uint32_t rMode) const; 843 844template <class fpType> 845fpType 846FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType), 847 bool flush, uint32_t rMode) const 848{ 849 const bool single = (sizeof(fpType) == sizeof(float)); 850 fpType junk = 0.0; 851 852 if (flush && flushToZero(op1)) 853 fpscr.idc = 1; 854 VfpSavedState state = prepFpState(rMode); 855 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state) 856 : "m" (op1), "m" (state)); 857 fpType dest = func(op1); 858 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 859 860 int fpClass = std::fpclassify(dest); 861 // Get NAN behavior right. This varies between x86 and ARM. 862 if (fpClass == FP_NAN) { 863 const bool single = (sizeof(fpType) == sizeof(float)); 864 const uint64_t qnan = 865 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 866 const bool nan = std::isnan(op1); 867 if (!nan || fpscr.dn == 1) { 868 dest = bitsToFp(qnan, junk); 869 } else if (nan) { 870 dest = bitsToFp(fpToBits(op1) | qnan, junk); 871 } 872 } else if (flush && flushToZero(dest)) { 873 feraiseexcept(FeUnderflow); 874 } else if (( 875 (single && (dest == bitsToFp(0x00800000, junk) || 876 dest == bitsToFp(0x80800000, junk))) || 877 (!single && 878 (dest == bitsToFp(ULL(0x0010000000000000), junk) || 879 dest == bitsToFp(ULL(0x8010000000000000), junk))) 880 ) && rMode != VfpRoundZero) { 881 /* 882 * Correct for the fact that underflow is detected -before- rounding 883 * in ARM and -after- rounding in x86. 884 */ 885 fesetround(FeRoundZero); 886 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1)); 887 fpType temp = func(op1); 888 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 889 if (flush && flushToZero(temp)) { 890 dest = temp; 891 } 892 } 893 finishVfp(fpscr, state); 894 return dest; 895} 896 897template 898float FpOp::unaryOp(FPSCR &fpscr, float op1, float (*func)(float), 899 bool flush, uint32_t rMode) const; 900template 901double FpOp::unaryOp(FPSCR &fpscr, double op1, double (*func)(double), 902 bool flush, uint32_t rMode) const; 903 904IntRegIndex 905VfpMacroOp::addStride(IntRegIndex idx, unsigned stride) 906{ 907 if (wide) { 908 stride *= 2; 909 } 910 unsigned offset = idx % 8; 911 idx = (IntRegIndex)(idx - offset); 912 offset += stride; 913 idx = (IntRegIndex)(idx + (offset % 8)); 914 return idx; 915} 916 917void 918VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) 919{ 920 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 921 assert(!inScalarBank(dest)); 922 dest = addStride(dest, stride); 923 op1 = addStride(op1, stride); 924 if (!inScalarBank(op2)) { 925 op2 = addStride(op2, stride); 926 } 927} 928 929void 930VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1) 931{ 932 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 933 assert(!inScalarBank(dest)); 934 dest = addStride(dest, stride); 935 if (!inScalarBank(op1)) { 936 op1 = addStride(op1, stride); 937 } 938} 939 940void 941VfpMacroOp::nextIdxs(IntRegIndex &dest) 942{ 943 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 944 assert(!inScalarBank(dest)); 945 dest = addStride(dest, stride); 946} 947 948} 949