Cross Reference: /gem5/src/arch/arm/insts/vfp.cc

Deleted Added

sdiff udiff text old ( 7430:db3e376f35d1 ) new ( 7434:dd5a09b86b14 )

full compact

vfp.cc (7430:db3e376f35d1)	vfp.cc (7434:dd5a09b86b14)
1/* 2 * Copyright (c) 2010 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabe Black 38 */ 39 40#include "arch/arm/insts/vfp.hh" 41	1/* 2 * Copyright (c) 2010 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabe Black 38 */ 39 40#include "arch/arm/insts/vfp.hh" 41
	42/* 43 * The asm statements below are to keep gcc from reordering code. Otherwise 44 * the rounding mode might be set after the operation it was intended for, the 45 * exception bits read before it, etc. 46 */ 47
42std::string 43FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 44{ 45 std::stringstream ss; 46 printMnemonic(ss); 47 printReg(ss, dest + FP_Base_DepTag); 48 ss << ", "; 49 printReg(ss, op1 + FP_Base_DepTag); 50 return ss.str(); 51} 52 53std::string 54FpRegImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 55{ 56 std::stringstream ss; 57 printMnemonic(ss); 58 printReg(ss, dest + FP_Base_DepTag); 59 ccprintf(ss, ", #%d", imm); 60 return ss.str(); 61} 62 63std::string 64FpRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 65{ 66 std::stringstream ss; 67 printMnemonic(ss); 68 printReg(ss, dest + FP_Base_DepTag); 69 ss << ", "; 70 printReg(ss, op1 + FP_Base_DepTag); 71 ccprintf(ss, ", #%d", imm); 72 return ss.str(); 73} 74 75std::string 76FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 77{ 78 std::stringstream ss; 79 printMnemonic(ss); 80 printReg(ss, dest + FP_Base_DepTag); 81 ss << ", "; 82 printReg(ss, op1 + FP_Base_DepTag); 83 ss << ", "; 84 printReg(ss, op2 + FP_Base_DepTag); 85 return ss.str(); 86} 87 88namespace ArmISA 89{ 90 91VfpSavedState 92prepFpState(uint32_t rMode) 93{ 94 int roundingMode = fegetround(); 95 feclearexcept(FeAllExceptions); 96 switch (rMode) { 97 case VfpRoundNearest: 98 fesetround(FeRoundNearest); 99 break; 100 case VfpRoundUpward: 101 fesetround(FeRoundUpward); 102 break; 103 case VfpRoundDown: 104 fesetround(FeRoundDown); 105 break; 106 case VfpRoundZero: 107 fesetround(FeRoundZero); 108 break; 109 } 110 return roundingMode; 111} 112 113void 114finishVfp(FPSCR &fpscr, VfpSavedState state) 115{ 116 int exceptions = fetestexcept(FeAllExceptions); 117 bool underflow = false; 118 if (exceptions & FeInvalid) { 119 fpscr.ioc = 1; 120 } 121 if (exceptions & FeDivByZero) { 122 fpscr.dzc = 1; 123 } 124 if (exceptions & FeOverflow) { 125 fpscr.ofc = 1; 126 } 127 if (exceptions & FeUnderflow) { 128 underflow = true; 129 fpscr.ufc = 1; 130 } 131 if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) { 132 fpscr.ixc = 1; 133 } 134 fesetround(state); 135} 136 137template <class fpType> 138fpType 139fixDest(FPSCR fpscr, fpType val, fpType op1) 140{ 141 int fpClass = std::fpclassify(val); 142 fpType junk = 0.0; 143 if (fpClass == FP_NAN) { 144 const bool single = (sizeof(val) == sizeof(float)); 145 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 146 const bool nan = std::isnan(op1); 147 if (!nan \|\| (fpscr.dn == 1)) { 148 val = bitsToFp(qnan, junk); 149 } else if (nan) { 150 val = bitsToFp(fpToBits(op1) \| qnan, junk); 151 } 152 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 153 // Turn val into a zero with the correct sign; 154 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 155 val = bitsToFp(fpToBits(val) & bitMask, junk); 156 feclearexcept(FeInexact); 157 feraiseexcept(FeUnderflow); 158 } 159 return val; 160} 161 162template 163float fixDest<float>(FPSCR fpscr, float val, float op1); 164template 165double fixDest<double>(FPSCR fpscr, double val, double op1); 166 167template <class fpType> 168fpType 169fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 170{ 171 int fpClass = std::fpclassify(val); 172 fpType junk = 0.0; 173 if (fpClass == FP_NAN) { 174 const bool single = (sizeof(val) == sizeof(float)); 175 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 176 const bool nan1 = std::isnan(op1); 177 const bool nan2 = std::isnan(op2); 178 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 179 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 180 if ((!nan1 && !nan2) \|\| (fpscr.dn == 1)) { 181 val = bitsToFp(qnan, junk); 182 } else if (signal1) { 183 val = bitsToFp(fpToBits(op1) \| qnan, junk); 184 } else if (signal2) { 185 val = bitsToFp(fpToBits(op2) \| qnan, junk); 186 } else if (nan1) { 187 val = op1; 188 } else if (nan2) { 189 val = op2; 190 } 191 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 192 // Turn val into a zero with the correct sign; 193 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 194 val = bitsToFp(fpToBits(val) & bitMask, junk); 195 feclearexcept(FeInexact); 196 feraiseexcept(FeUnderflow); 197 } 198 return val; 199} 200 201template 202float fixDest<float>(FPSCR fpscr, float val, float op1, float op2); 203template 204double fixDest<double>(FPSCR fpscr, double val, double op1, double op2); 205 206template <class fpType> 207fpType 208fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 209{ 210 fpType mid = fixDest(fpscr, val, op1, op2); 211 const bool single = (sizeof(fpType) == sizeof(float)); 212 const fpType junk = 0.0; 213 if ((single && (val == bitsToFp(0x00800000, junk) \|\| 214 val == bitsToFp(0x80800000, junk))) \|\| 215 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) \|\| 216 val == bitsToFp(ULL(0x8010000000000000), junk))) 217 ) { 218 __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); 219 fesetround(FeRoundZero); 220 fpType temp = 0.0; 221 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 222 temp = op1 / op2; 223 if (flushToZero(temp)) { 224 feraiseexcept(FeUnderflow); 225 if (fpscr.fz) { 226 feclearexcept(FeInexact); 227 mid = temp; 228 } 229 } 230 __asm__ __volatile__("" :: "m" (temp)); 231 } 232 return mid; 233} 234 235template 236float fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2); 237template 238double fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2); 239 240float 241fixFpDFpSDest(FPSCR fpscr, double val) 242{ 243 const float junk = 0.0; 244 float op1 = 0.0; 245 if (std::isnan(val)) { 246 uint64_t valBits = fpToBits(val); 247 uint32_t op1Bits = bits(valBits, 50, 29) \| 248 (mask(9) << 22) \| 249 (bits(valBits, 63) << 31); 250 op1 = bitsToFp(op1Bits, junk); 251 } 252 float mid = fixDest(fpscr, (float)val, op1); 253 if (fpscr.fz && fetestexcept(FeUnderflow \| FeInexact) == 254 (FeUnderflow \| FeInexact)) { 255 feclearexcept(FeInexact); 256 } 257 if (mid == bitsToFp(0x00800000, junk) \|\| 258 mid == bitsToFp(0x80800000, junk)) { 259 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 260 fesetround(FeRoundZero); 261 float temp = 0.0; 262 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 263 temp = val; 264 if (flushToZero(temp)) { 265 feraiseexcept(FeUnderflow); 266 if (fpscr.fz) { 267 feclearexcept(FeInexact); 268 mid = temp; 269 } 270 } 271 __asm__ __volatile__("" :: "m" (temp)); 272 } 273 return mid; 274} 275 276double 277fixFpSFpDDest(FPSCR fpscr, float val) 278{ 279 const double junk = 0.0; 280 double op1 = 0.0; 281 if (std::isnan(val)) { 282 uint32_t valBits = fpToBits(val); 283 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) \| 284 (mask(12) << 51) \| 285 ((uint64_t)bits(valBits, 31) << 63); 286 op1 = bitsToFp(op1Bits, junk); 287 } 288 double mid = fixDest(fpscr, (double)val, op1); 289 if (mid == bitsToFp(ULL(0x0010000000000000), junk) \|\| 290 mid == bitsToFp(ULL(0x8010000000000000), junk)) { 291 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 292 fesetround(FeRoundZero); 293 double temp = 0.0; 294 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 295 temp = val; 296 if (flushToZero(temp)) { 297 feraiseexcept(FeUnderflow); 298 if (fpscr.fz) { 299 feclearexcept(FeInexact); 300 mid = temp; 301 } 302 } 303 __asm__ __volatile__("" :: "m" (temp)); 304 } 305 return mid; 306} 307 308float 309vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top) 310{ 311 float junk = 0.0; 312 uint32_t destBits = fpToBits(dest); 313 uint32_t opBits = fpToBits(op); 314 // Extract the operand. 315 bool neg = bits(opBits, 31); 316 uint32_t exponent = bits(opBits, 30, 23); 317 uint32_t oldMantissa = bits(opBits, 22, 0); 318 uint32_t mantissa = oldMantissa >> (23 - 10); 319 // Do the conversion. 320 uint32_t extra = oldMantissa & mask(23 - 10); 321 if (exponent == 0xff) { 322 if (oldMantissa != 0) { 323 // Nans. 324 if (bits(mantissa, 9) == 0) { 325 // Signalling nan. 326 fpscr.ioc = 1; 327 } 328 if (fpscr.ahp) { 329 mantissa = 0; 330 exponent = 0; 331 fpscr.ioc = 1; 332 } else if (fpscr.dn) { 333 mantissa = (1 << 9); 334 exponent = 0x1f; 335 neg = false; 336 } else { 337 exponent = 0x1f; 338 mantissa \|= (1 << 9); 339 } 340 } else { 341 // Infinities. 342 exponent = 0x1F; 343 if (fpscr.ahp) { 344 fpscr.ioc = 1; 345 mantissa = 0x3ff; 346 } else { 347 mantissa = 0; 348 } 349 } 350 } else if (exponent == 0 && oldMantissa == 0) { 351 // Zero, don't need to do anything. 352 } else { 353 // Normalized or denormalized numbers. 354 355 bool inexact = (extra != 0); 356 357 if (exponent == 0) { 358 // Denormalized. 359 360 // If flush to zero is on, this shouldn't happen. 361 assert(fpscr.fz == 0); 362 363 // Check for underflow 364 if (inexact \|\| fpscr.ufe) 365 fpscr.ufc = 1; 366 367 // Handle rounding. 368 unsigned mode = fpscr.rMode; 369 if ((mode == VfpRoundUpward && !neg && extra) \|\| 370 (mode == VfpRoundDown && neg && extra) \|\| 371 (mode == VfpRoundNearest && 372 (extra > (1 << 9) \|\| 373 (extra == (1 << 9) && bits(mantissa, 0))))) { 374 mantissa++; 375 } 376 377 // See if the number became normalized after rounding. 378 if (mantissa == (1 << 10)) { 379 mantissa = 0; 380 exponent = 1; 381 } 382 } else { 383 // Normalized. 384 385 // We need to track the dropped bits differently since 386 // more can be dropped by denormalizing. 387 bool topOne = bits(extra, 12); 388 bool restZeros = bits(extra, 11, 0) == 0; 389 390 if (exponent <= (127 - 15)) { 391 // The result is too small. Denormalize. 392 mantissa \|= (1 << 10); 393 while (mantissa && exponent <= (127 - 15)) { 394 restZeros = restZeros && !topOne; 395 topOne = bits(mantissa, 0); 396 mantissa = mantissa >> 1; 397 exponent++; 398 } 399 if (topOne \|\| !restZeros) 400 inexact = true; 401 exponent = 0; 402 } else { 403 // Change bias. 404 exponent -= (127 - 15); 405 } 406 407 if (exponent == 0 && (inexact \|\| fpscr.ufe)) { 408 // Underflow 409 fpscr.ufc = 1; 410 } 411 412 // Handle rounding. 413 unsigned mode = fpscr.rMode; 414 bool nonZero = topOne \|\| !restZeros; 415 if ((mode == VfpRoundUpward && !neg && nonZero) \|\| 416 (mode == VfpRoundDown && neg && nonZero) \|\| 417 (mode == VfpRoundNearest && topOne && 418 (!restZeros \|\| bits(mantissa, 0)))) { 419 mantissa++; 420 } 421 422 // See if we rounded up and need to bump the exponent. 423 if (mantissa == (1 << 10)) { 424 mantissa = 0; 425 exponent++; 426 } 427 428 // Deal with overflow 429 if (fpscr.ahp) { 430 if (exponent >= 0x20) { 431 exponent = 0x1f; 432 mantissa = 0x3ff; 433 fpscr.ioc = 1; 434 // Supress inexact exception. 435 inexact = false; 436 } 437 } else { 438 if (exponent >= 0x1f) { 439 if ((mode == VfpRoundNearest) \|\| 440 (mode == VfpRoundUpward && !neg) \|\| 441 (mode == VfpRoundDown && neg)) { 442 // Overflow to infinity. 443 exponent = 0x1f; 444 mantissa = 0; 445 } else { 446 // Overflow to max normal. 447 exponent = 0x1e; 448 mantissa = 0x3ff; 449 } 450 fpscr.ofc = 1; 451 inexact = true; 452 } 453 } 454 } 455 456 if (inexact) { 457 fpscr.ixc = 1; 458 } 459 } 460 // Reassemble and install the result. 461 uint32_t result = bits(mantissa, 9, 0); 462 replaceBits(result, 14, 10, exponent); 463 if (neg) 464 result \|= (1 << 15); 465 if (top) 466 replaceBits(destBits, 31, 16, result); 467 else 468 replaceBits(destBits, 15, 0, result); 469 return bitsToFp(destBits, junk); 470} 471 472float 473vcvtFpHFpS(FPSCR &fpscr, float op, bool top) 474{ 475 float junk = 0.0; 476 uint32_t opBits = fpToBits(op); 477 // Extract the operand. 478 if (top) 479 opBits = bits(opBits, 31, 16); 480 else 481 opBits = bits(opBits, 15, 0); 482 // Extract the bitfields. 483 bool neg = bits(opBits, 15); 484 uint32_t exponent = bits(opBits, 14, 10); 485 uint32_t mantissa = bits(opBits, 9, 0); 486 // Do the conversion. 487 if (exponent == 0) { 488 if (mantissa != 0) { 489 // Normalize the value. 490 exponent = exponent + (127 - 15) + 1; 491 while (mantissa < (1 << 10)) { 492 mantissa = mantissa << 1; 493 exponent--; 494 } 495 } 496 mantissa = mantissa << (23 - 10); 497 } else if (exponent == 0x1f && !fpscr.ahp) { 498 // Infinities and nans. 499 exponent = 0xff; 500 if (mantissa != 0) { 501 // Nans. 502 mantissa = mantissa << (23 - 10); 503 if (bits(mantissa, 22) == 0) { 504 // Signalling nan. 505 fpscr.ioc = 1; 506 mantissa \|= (1 << 22); 507 } 508 if (fpscr.dn) { 509 mantissa &= ~mask(22); 510 neg = false; 511 } 512 } 513 } else { 514 exponent = exponent + (127 - 15); 515 mantissa = mantissa << (23 - 10); 516 } 517 // Reassemble the result. 518 uint32_t result = bits(mantissa, 22, 0); 519 replaceBits(result, 30, 23, exponent); 520 if (neg) 521 result \|= (1 << 31); 522 return bitsToFp(result, junk); 523} 524 525uint64_t 526vfpFpSToFixed(float val, bool isSigned, bool half, 527 uint8_t imm, bool rzero) 528{ 529 int rmode = rzero ? FeRoundZero : fegetround(); 530 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); 531 fesetround(FeRoundNearest); 532 val = val * powf(2.0, imm); 533 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 534 fesetround(rmode); 535 feclearexcept(FeAllExceptions); 536 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 537 float origVal = val; 538 val = rintf(val); 539 int fpType = std::fpclassify(val); 540 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 541 if (fpType == FP_NAN) { 542 feraiseexcept(FeInvalid); 543 } 544 val = 0.0; 545 } else if (origVal != val) { 546 switch (rmode) { 547 case FeRoundNearest: 548 if (origVal - val > 0.5) 549 val += 1.0; 550 else if (val - origVal > 0.5) 551 val -= 1.0; 552 break; 553 case FeRoundDown: 554 if (origVal < val) 555 val -= 1.0; 556 break; 557 case FeRoundUpward: 558 if (origVal > val) 559 val += 1.0; 560 break; 561 } 562 feraiseexcept(FeInexact); 563 } 564 565 if (isSigned) { 566 if (half) { 567 if ((double)val < (int16_t)(1 << 15)) { 568 feraiseexcept(FeInvalid); 569 feclearexcept(FeInexact); 570 return (int16_t)(1 << 15); 571 } 572 if ((double)val > (int16_t)mask(15)) { 573 feraiseexcept(FeInvalid); 574 feclearexcept(FeInexact); 575 return (int16_t)mask(15); 576 } 577 return (int16_t)val; 578 } else { 579 if ((double)val < (int32_t)(1 << 31)) { 580 feraiseexcept(FeInvalid); 581 feclearexcept(FeInexact); 582 return (int32_t)(1 << 31); 583 } 584 if ((double)val > (int32_t)mask(31)) { 585 feraiseexcept(FeInvalid); 586 feclearexcept(FeInexact); 587 return (int32_t)mask(31); 588 } 589 return (int32_t)val; 590 } 591 } else { 592 if (half) { 593 if ((double)val < 0) { 594 feraiseexcept(FeInvalid); 595 feclearexcept(FeInexact); 596 return 0; 597 } 598 if ((double)val > (mask(16))) { 599 feraiseexcept(FeInvalid); 600 feclearexcept(FeInexact); 601 return mask(16); 602 } 603 return (uint16_t)val; 604 } else { 605 if ((double)val < 0) { 606 feraiseexcept(FeInvalid); 607 feclearexcept(FeInexact); 608 return 0; 609 } 610 if ((double)val > (mask(32))) { 611 feraiseexcept(FeInvalid); 612 feclearexcept(FeInexact); 613 return mask(32); 614 } 615 return (uint32_t)val; 616 } 617 } 618} 619 620float 621vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 622{ 623 fesetround(FeRoundNearest); 624 if (half) 625 val = (uint16_t)val; 626 float scale = powf(2.0, imm); 627 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 628 feclearexcept(FeAllExceptions); 629 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 630 return fixDivDest(fpscr, val / scale, (float)val, scale); 631} 632 633float 634vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 635{ 636 fesetround(FeRoundNearest); 637 if (half) 638 val = sext<16>(val & mask(16)); 639 float scale = powf(2.0, imm); 640 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 641 feclearexcept(FeAllExceptions); 642 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 643 return fixDivDest(fpscr, val / scale, (float)val, scale); 644} 645 646uint64_t 647vfpFpDToFixed(double val, bool isSigned, bool half, 648 uint8_t imm, bool rzero) 649{ 650 int rmode = rzero ? FeRoundZero : fegetround(); 651 fesetround(FeRoundNearest); 652 val = val * pow(2.0, imm); 653 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 654 fesetround(rmode); 655 feclearexcept(FeAllExceptions); 656 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 657 double origVal = val; 658 val = rint(val); 659 int fpType = std::fpclassify(val); 660 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 661 if (fpType == FP_NAN) { 662 feraiseexcept(FeInvalid); 663 } 664 val = 0.0; 665 } else if (origVal != val) { 666 switch (rmode) { 667 case FeRoundNearest: 668 if (origVal - val > 0.5) 669 val += 1.0; 670 else if (val - origVal > 0.5) 671 val -= 1.0; 672 break; 673 case FeRoundDown: 674 if (origVal < val) 675 val -= 1.0; 676 break; 677 case FeRoundUpward: 678 if (origVal > val) 679 val += 1.0; 680 break; 681 } 682 feraiseexcept(FeInexact); 683 } 684 if (isSigned) { 685 if (half) { 686 if (val < (int16_t)(1 << 15)) { 687 feraiseexcept(FeInvalid); 688 feclearexcept(FeInexact); 689 return (int16_t)(1 << 15); 690 } 691 if (val > (int16_t)mask(15)) { 692 feraiseexcept(FeInvalid); 693 feclearexcept(FeInexact); 694 return (int16_t)mask(15); 695 } 696 return (int16_t)val; 697 } else { 698 if (val < (int32_t)(1 << 31)) { 699 feraiseexcept(FeInvalid); 700 feclearexcept(FeInexact); 701 return (int32_t)(1 << 31); 702 } 703 if (val > (int32_t)mask(31)) { 704 feraiseexcept(FeInvalid); 705 feclearexcept(FeInexact); 706 return (int32_t)mask(31); 707 } 708 return (int32_t)val; 709 } 710 } else { 711 if (half) { 712 if (val < 0) { 713 feraiseexcept(FeInvalid); 714 feclearexcept(FeInexact); 715 return 0; 716 } 717 if (val > mask(16)) { 718 feraiseexcept(FeInvalid); 719 feclearexcept(FeInexact); 720 return mask(16); 721 } 722 return (uint16_t)val; 723 } else { 724 if (val < 0) { 725 feraiseexcept(FeInvalid); 726 feclearexcept(FeInexact); 727 return 0; 728 } 729 if (val > mask(32)) { 730 feraiseexcept(FeInvalid); 731 feclearexcept(FeInexact); 732 return mask(32); 733 } 734 return (uint32_t)val; 735 } 736 } 737} 738 739double 740vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 741{ 742 fesetround(FeRoundNearest); 743 if (half) 744 val = (uint16_t)val; 745 double scale = pow(2.0, imm); 746 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 747 feclearexcept(FeAllExceptions); 748 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 749 return fixDivDest(fpscr, val / scale, (double)val, scale); 750} 751 752double 753vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 754{ 755 fesetround(FeRoundNearest); 756 if (half) 757 val = sext<16>(val & mask(16)); 758 double scale = pow(2.0, imm); 759 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 760 feclearexcept(FeAllExceptions); 761 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 762 return fixDivDest(fpscr, val / scale, (double)val, scale); 763} 764 765template <class fpType> 766fpType 767FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 768 fpType (func)(fpType, fpType), 769* bool flush, uint32_t rMode) const 770{ 771 const bool single = (sizeof(fpType) == sizeof(float)); 772 fpType junk = 0.0; 773 774 if (flush && flushToZero(op1, op2)) 775 fpscr.idc = 1; 776 VfpSavedState state = prepFpState(rMode); 777 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state) 778 : "m" (op1), "m" (op2), "m" (state)); 779 fpType dest = func(op1, op2); 780 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 781 782 int fpClass = std::fpclassify(dest); 783 // Get NAN behavior right. This varies between x86 and ARM. 784 if (fpClass == FP_NAN) { 785 const bool single = (sizeof(fpType) == sizeof(float)); 786 const uint64_t qnan = 787 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 788 const bool nan1 = std::isnan(op1); 789 const bool nan2 = std::isnan(op2); 790 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 791 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 792 if ((!nan1 && !nan2) \|\| (fpscr.dn == 1)) { 793 dest = bitsToFp(qnan, junk); 794 } else if (signal1) { 795 dest = bitsToFp(fpToBits(op1) \| qnan, junk); 796 } else if (signal2) { 797 dest = bitsToFp(fpToBits(op2) \| qnan, junk); 798 } else if (nan1) { 799 dest = op1; 800 } else if (nan2) { 801 dest = op2; 802 } 803 } else if (flush && flushToZero(dest)) { 804 feraiseexcept(FeUnderflow); 805 } else if (( 806 (single && (dest == bitsToFp(0x00800000, junk) \|\| 807 dest == bitsToFp(0x80800000, junk))) \|\| 808 (!single && 809 (dest == bitsToFp(ULL(0x0010000000000000), junk) \|\| 810 dest == bitsToFp(ULL(0x8010000000000000), junk))) 811 ) && rMode != VfpRoundZero) { 812 /* 813 * Correct for the fact that underflow is detected -before- rounding 814 * in ARM and -after- rounding in x86. 815 / 816* fesetround(FeRoundZero); 817 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2) 818 : "m" (op1), "m" (op2)); 819 fpType temp = func(op1, op2); 820 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 821 if (flush && flushToZero(temp)) { 822 dest = temp; 823 } 824 } 825 finishVfp(fpscr, state); 826 return dest; 827} 828 829template 830float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2, 831 float (func)(float, float), 832* bool flush, uint32_t rMode) const; 833template 834double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2, 835 double (func)(double, double), 836* bool flush, uint32_t rMode) const; 837 838template <class fpType> 839fpType 840FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (func)(fpType), 841* bool flush, uint32_t rMode) const 842{ 843 const bool single = (sizeof(fpType) == sizeof(float)); 844 fpType junk = 0.0; 845 846 if (flush && flushToZero(op1)) 847 fpscr.idc = 1; 848 VfpSavedState state = prepFpState(rMode); 849 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state) 850 : "m" (op1), "m" (state)); 851 fpType dest = func(op1); 852 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 853 854 int fpClass = std::fpclassify(dest); 855 // Get NAN behavior right. This varies between x86 and ARM. 856 if (fpClass == FP_NAN) { 857 const bool single = (sizeof(fpType) == sizeof(float)); 858 const uint64_t qnan = 859 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 860 const bool nan = std::isnan(op1); 861 if (!nan \|\| fpscr.dn == 1) { 862 dest = bitsToFp(qnan, junk); 863 } else if (nan) { 864 dest = bitsToFp(fpToBits(op1) \| qnan, junk); 865 } 866 } else if (flush && flushToZero(dest)) { 867 feraiseexcept(FeUnderflow); 868 } else if (( 869 (single && (dest == bitsToFp(0x00800000, junk) \|\| 870 dest == bitsToFp(0x80800000, junk))) \|\| 871 (!single && 872 (dest == bitsToFp(ULL(0x0010000000000000), junk) \|\| 873 dest == bitsToFp(ULL(0x8010000000000000), junk))) 874 ) && rMode != VfpRoundZero) { 875 /* 876 * Correct for the fact that underflow is detected -before- rounding 877 * in ARM and -after- rounding in x86. 878 / 879* fesetround(FeRoundZero); 880 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1)); 881 fpType temp = func(op1); 882 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 883 if (flush && flushToZero(temp)) { 884 dest = temp; 885 } 886 } 887 finishVfp(fpscr, state); 888 return dest; 889} 890 891template 892float FpOp::unaryOp(FPSCR &fpscr, float op1, float (func)(float), 893* bool flush, uint32_t rMode) const; 894template 895double FpOp::unaryOp(FPSCR &fpscr, double op1, double (func)(double), 896* bool flush, uint32_t rMode) const; 897 898IntRegIndex 899VfpMacroOp::addStride(IntRegIndex idx, unsigned stride) 900{ 901 if (wide) { 902 stride = 2; 903* } 904 unsigned offset = idx % 8; 905 idx = (IntRegIndex)(idx - offset); 906 offset += stride; 907 idx = (IntRegIndex)(idx + (offset % 8)); 908 return idx; 909} 910 911void 912VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) 913{ 914 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 915 assert(!inScalarBank(dest)); 916 dest = addStride(dest, stride); 917 op1 = addStride(op1, stride); 918 if (!inScalarBank(op2)) { 919 op2 = addStride(op2, stride); 920 } 921} 922 923void 924VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1) 925{ 926 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 927 assert(!inScalarBank(dest)); 928 dest = addStride(dest, stride); 929 if (!inScalarBank(op1)) { 930 op1 = addStride(op1, stride); 931 } 932} 933 934void 935VfpMacroOp::nextIdxs(IntRegIndex &dest) 936{ 937 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 938 assert(!inScalarBank(dest)); 939 dest = addStride(dest, stride); 940} 941 942}	48std::string 49FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 50{ 51 std::stringstream ss; 52 printMnemonic(ss); 53 printReg(ss, dest + FP_Base_DepTag); 54 ss << ", "; 55 printReg(ss, op1 + FP_Base_DepTag); 56 return ss.str(); 57} 58 59std::string 60FpRegImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 61{ 62 std::stringstream ss; 63 printMnemonic(ss); 64 printReg(ss, dest + FP_Base_DepTag); 65 ccprintf(ss, ", #%d", imm); 66 return ss.str(); 67} 68 69std::string 70FpRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 71{ 72 std::stringstream ss; 73 printMnemonic(ss); 74 printReg(ss, dest + FP_Base_DepTag); 75 ss << ", "; 76 printReg(ss, op1 + FP_Base_DepTag); 77 ccprintf(ss, ", #%d", imm); 78 return ss.str(); 79} 80 81std::string 82FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 83{ 84 std::stringstream ss; 85 printMnemonic(ss); 86 printReg(ss, dest + FP_Base_DepTag); 87 ss << ", "; 88 printReg(ss, op1 + FP_Base_DepTag); 89 ss << ", "; 90 printReg(ss, op2 + FP_Base_DepTag); 91 return ss.str(); 92} 93 94namespace ArmISA 95{ 96 97VfpSavedState 98prepFpState(uint32_t rMode) 99{ 100 int roundingMode = fegetround(); 101 feclearexcept(FeAllExceptions); 102 switch (rMode) { 103 case VfpRoundNearest: 104 fesetround(FeRoundNearest); 105 break; 106 case VfpRoundUpward: 107 fesetround(FeRoundUpward); 108 break; 109 case VfpRoundDown: 110 fesetround(FeRoundDown); 111 break; 112 case VfpRoundZero: 113 fesetround(FeRoundZero); 114 break; 115 } 116 return roundingMode; 117} 118 119void 120finishVfp(FPSCR &fpscr, VfpSavedState state) 121{ 122 int exceptions = fetestexcept(FeAllExceptions); 123 bool underflow = false; 124 if (exceptions & FeInvalid) { 125 fpscr.ioc = 1; 126 } 127 if (exceptions & FeDivByZero) { 128 fpscr.dzc = 1; 129 } 130 if (exceptions & FeOverflow) { 131 fpscr.ofc = 1; 132 } 133 if (exceptions & FeUnderflow) { 134 underflow = true; 135 fpscr.ufc = 1; 136 } 137 if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) { 138 fpscr.ixc = 1; 139 } 140 fesetround(state); 141} 142 143template <class fpType> 144fpType 145fixDest(FPSCR fpscr, fpType val, fpType op1) 146{ 147 int fpClass = std::fpclassify(val); 148 fpType junk = 0.0; 149 if (fpClass == FP_NAN) { 150 const bool single = (sizeof(val) == sizeof(float)); 151 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 152 const bool nan = std::isnan(op1); 153 if (!nan \|\| (fpscr.dn == 1)) { 154 val = bitsToFp(qnan, junk); 155 } else if (nan) { 156 val = bitsToFp(fpToBits(op1) \| qnan, junk); 157 } 158 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 159 // Turn val into a zero with the correct sign; 160 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 161 val = bitsToFp(fpToBits(val) & bitMask, junk); 162 feclearexcept(FeInexact); 163 feraiseexcept(FeUnderflow); 164 } 165 return val; 166} 167 168template 169float fixDest<float>(FPSCR fpscr, float val, float op1); 170template 171double fixDest<double>(FPSCR fpscr, double val, double op1); 172 173template <class fpType> 174fpType 175fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 176{ 177 int fpClass = std::fpclassify(val); 178 fpType junk = 0.0; 179 if (fpClass == FP_NAN) { 180 const bool single = (sizeof(val) == sizeof(float)); 181 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 182 const bool nan1 = std::isnan(op1); 183 const bool nan2 = std::isnan(op2); 184 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 185 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 186 if ((!nan1 && !nan2) \|\| (fpscr.dn == 1)) { 187 val = bitsToFp(qnan, junk); 188 } else if (signal1) { 189 val = bitsToFp(fpToBits(op1) \| qnan, junk); 190 } else if (signal2) { 191 val = bitsToFp(fpToBits(op2) \| qnan, junk); 192 } else if (nan1) { 193 val = op1; 194 } else if (nan2) { 195 val = op2; 196 } 197 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 198 // Turn val into a zero with the correct sign; 199 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 200 val = bitsToFp(fpToBits(val) & bitMask, junk); 201 feclearexcept(FeInexact); 202 feraiseexcept(FeUnderflow); 203 } 204 return val; 205} 206 207template 208float fixDest<float>(FPSCR fpscr, float val, float op1, float op2); 209template 210double fixDest<double>(FPSCR fpscr, double val, double op1, double op2); 211 212template <class fpType> 213fpType 214fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 215{ 216 fpType mid = fixDest(fpscr, val, op1, op2); 217 const bool single = (sizeof(fpType) == sizeof(float)); 218 const fpType junk = 0.0; 219 if ((single && (val == bitsToFp(0x00800000, junk) \|\| 220 val == bitsToFp(0x80800000, junk))) \|\| 221 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) \|\| 222 val == bitsToFp(ULL(0x8010000000000000), junk))) 223 ) { 224 __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); 225 fesetround(FeRoundZero); 226 fpType temp = 0.0; 227 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 228 temp = op1 / op2; 229 if (flushToZero(temp)) { 230 feraiseexcept(FeUnderflow); 231 if (fpscr.fz) { 232 feclearexcept(FeInexact); 233 mid = temp; 234 } 235 } 236 __asm__ __volatile__("" :: "m" (temp)); 237 } 238 return mid; 239} 240 241template 242float fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2); 243template 244double fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2); 245 246float 247fixFpDFpSDest(FPSCR fpscr, double val) 248{ 249 const float junk = 0.0; 250 float op1 = 0.0; 251 if (std::isnan(val)) { 252 uint64_t valBits = fpToBits(val); 253 uint32_t op1Bits = bits(valBits, 50, 29) \| 254 (mask(9) << 22) \| 255 (bits(valBits, 63) << 31); 256 op1 = bitsToFp(op1Bits, junk); 257 } 258 float mid = fixDest(fpscr, (float)val, op1); 259 if (fpscr.fz && fetestexcept(FeUnderflow \| FeInexact) == 260 (FeUnderflow \| FeInexact)) { 261 feclearexcept(FeInexact); 262 } 263 if (mid == bitsToFp(0x00800000, junk) \|\| 264 mid == bitsToFp(0x80800000, junk)) { 265 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 266 fesetround(FeRoundZero); 267 float temp = 0.0; 268 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 269 temp = val; 270 if (flushToZero(temp)) { 271 feraiseexcept(FeUnderflow); 272 if (fpscr.fz) { 273 feclearexcept(FeInexact); 274 mid = temp; 275 } 276 } 277 __asm__ __volatile__("" :: "m" (temp)); 278 } 279 return mid; 280} 281 282double 283fixFpSFpDDest(FPSCR fpscr, float val) 284{ 285 const double junk = 0.0; 286 double op1 = 0.0; 287 if (std::isnan(val)) { 288 uint32_t valBits = fpToBits(val); 289 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) \| 290 (mask(12) << 51) \| 291 ((uint64_t)bits(valBits, 31) << 63); 292 op1 = bitsToFp(op1Bits, junk); 293 } 294 double mid = fixDest(fpscr, (double)val, op1); 295 if (mid == bitsToFp(ULL(0x0010000000000000), junk) \|\| 296 mid == bitsToFp(ULL(0x8010000000000000), junk)) { 297 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 298 fesetround(FeRoundZero); 299 double temp = 0.0; 300 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 301 temp = val; 302 if (flushToZero(temp)) { 303 feraiseexcept(FeUnderflow); 304 if (fpscr.fz) { 305 feclearexcept(FeInexact); 306 mid = temp; 307 } 308 } 309 __asm__ __volatile__("" :: "m" (temp)); 310 } 311 return mid; 312} 313 314float 315vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top) 316{ 317 float junk = 0.0; 318 uint32_t destBits = fpToBits(dest); 319 uint32_t opBits = fpToBits(op); 320 // Extract the operand. 321 bool neg = bits(opBits, 31); 322 uint32_t exponent = bits(opBits, 30, 23); 323 uint32_t oldMantissa = bits(opBits, 22, 0); 324 uint32_t mantissa = oldMantissa >> (23 - 10); 325 // Do the conversion. 326 uint32_t extra = oldMantissa & mask(23 - 10); 327 if (exponent == 0xff) { 328 if (oldMantissa != 0) { 329 // Nans. 330 if (bits(mantissa, 9) == 0) { 331 // Signalling nan. 332 fpscr.ioc = 1; 333 } 334 if (fpscr.ahp) { 335 mantissa = 0; 336 exponent = 0; 337 fpscr.ioc = 1; 338 } else if (fpscr.dn) { 339 mantissa = (1 << 9); 340 exponent = 0x1f; 341 neg = false; 342 } else { 343 exponent = 0x1f; 344 mantissa \|= (1 << 9); 345 } 346 } else { 347 // Infinities. 348 exponent = 0x1F; 349 if (fpscr.ahp) { 350 fpscr.ioc = 1; 351 mantissa = 0x3ff; 352 } else { 353 mantissa = 0; 354 } 355 } 356 } else if (exponent == 0 && oldMantissa == 0) { 357 // Zero, don't need to do anything. 358 } else { 359 // Normalized or denormalized numbers. 360 361 bool inexact = (extra != 0); 362 363 if (exponent == 0) { 364 // Denormalized. 365 366 // If flush to zero is on, this shouldn't happen. 367 assert(fpscr.fz == 0); 368 369 // Check for underflow 370 if (inexact \|\| fpscr.ufe) 371 fpscr.ufc = 1; 372 373 // Handle rounding. 374 unsigned mode = fpscr.rMode; 375 if ((mode == VfpRoundUpward && !neg && extra) \|\| 376 (mode == VfpRoundDown && neg && extra) \|\| 377 (mode == VfpRoundNearest && 378 (extra > (1 << 9) \|\| 379 (extra == (1 << 9) && bits(mantissa, 0))))) { 380 mantissa++; 381 } 382 383 // See if the number became normalized after rounding. 384 if (mantissa == (1 << 10)) { 385 mantissa = 0; 386 exponent = 1; 387 } 388 } else { 389 // Normalized. 390 391 // We need to track the dropped bits differently since 392 // more can be dropped by denormalizing. 393 bool topOne = bits(extra, 12); 394 bool restZeros = bits(extra, 11, 0) == 0; 395 396 if (exponent <= (127 - 15)) { 397 // The result is too small. Denormalize. 398 mantissa \|= (1 << 10); 399 while (mantissa && exponent <= (127 - 15)) { 400 restZeros = restZeros && !topOne; 401 topOne = bits(mantissa, 0); 402 mantissa = mantissa >> 1; 403 exponent++; 404 } 405 if (topOne \|\| !restZeros) 406 inexact = true; 407 exponent = 0; 408 } else { 409 // Change bias. 410 exponent -= (127 - 15); 411 } 412 413 if (exponent == 0 && (inexact \|\| fpscr.ufe)) { 414 // Underflow 415 fpscr.ufc = 1; 416 } 417 418 // Handle rounding. 419 unsigned mode = fpscr.rMode; 420 bool nonZero = topOne \|\| !restZeros; 421 if ((mode == VfpRoundUpward && !neg && nonZero) \|\| 422 (mode == VfpRoundDown && neg && nonZero) \|\| 423 (mode == VfpRoundNearest && topOne && 424 (!restZeros \|\| bits(mantissa, 0)))) { 425 mantissa++; 426 } 427 428 // See if we rounded up and need to bump the exponent. 429 if (mantissa == (1 << 10)) { 430 mantissa = 0; 431 exponent++; 432 } 433 434 // Deal with overflow 435 if (fpscr.ahp) { 436 if (exponent >= 0x20) { 437 exponent = 0x1f; 438 mantissa = 0x3ff; 439 fpscr.ioc = 1; 440 // Supress inexact exception. 441 inexact = false; 442 } 443 } else { 444 if (exponent >= 0x1f) { 445 if ((mode == VfpRoundNearest) \|\| 446 (mode == VfpRoundUpward && !neg) \|\| 447 (mode == VfpRoundDown && neg)) { 448 // Overflow to infinity. 449 exponent = 0x1f; 450 mantissa = 0; 451 } else { 452 // Overflow to max normal. 453 exponent = 0x1e; 454 mantissa = 0x3ff; 455 } 456 fpscr.ofc = 1; 457 inexact = true; 458 } 459 } 460 } 461 462 if (inexact) { 463 fpscr.ixc = 1; 464 } 465 } 466 // Reassemble and install the result. 467 uint32_t result = bits(mantissa, 9, 0); 468 replaceBits(result, 14, 10, exponent); 469 if (neg) 470 result \|= (1 << 15); 471 if (top) 472 replaceBits(destBits, 31, 16, result); 473 else 474 replaceBits(destBits, 15, 0, result); 475 return bitsToFp(destBits, junk); 476} 477 478float 479vcvtFpHFpS(FPSCR &fpscr, float op, bool top) 480{ 481 float junk = 0.0; 482 uint32_t opBits = fpToBits(op); 483 // Extract the operand. 484 if (top) 485 opBits = bits(opBits, 31, 16); 486 else 487 opBits = bits(opBits, 15, 0); 488 // Extract the bitfields. 489 bool neg = bits(opBits, 15); 490 uint32_t exponent = bits(opBits, 14, 10); 491 uint32_t mantissa = bits(opBits, 9, 0); 492 // Do the conversion. 493 if (exponent == 0) { 494 if (mantissa != 0) { 495 // Normalize the value. 496 exponent = exponent + (127 - 15) + 1; 497 while (mantissa < (1 << 10)) { 498 mantissa = mantissa << 1; 499 exponent--; 500 } 501 } 502 mantissa = mantissa << (23 - 10); 503 } else if (exponent == 0x1f && !fpscr.ahp) { 504 // Infinities and nans. 505 exponent = 0xff; 506 if (mantissa != 0) { 507 // Nans. 508 mantissa = mantissa << (23 - 10); 509 if (bits(mantissa, 22) == 0) { 510 // Signalling nan. 511 fpscr.ioc = 1; 512 mantissa \|= (1 << 22); 513 } 514 if (fpscr.dn) { 515 mantissa &= ~mask(22); 516 neg = false; 517 } 518 } 519 } else { 520 exponent = exponent + (127 - 15); 521 mantissa = mantissa << (23 - 10); 522 } 523 // Reassemble the result. 524 uint32_t result = bits(mantissa, 22, 0); 525 replaceBits(result, 30, 23, exponent); 526 if (neg) 527 result \|= (1 << 31); 528 return bitsToFp(result, junk); 529} 530 531uint64_t 532vfpFpSToFixed(float val, bool isSigned, bool half, 533 uint8_t imm, bool rzero) 534{ 535 int rmode = rzero ? FeRoundZero : fegetround(); 536 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); 537 fesetround(FeRoundNearest); 538 val = val * powf(2.0, imm); 539 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 540 fesetround(rmode); 541 feclearexcept(FeAllExceptions); 542 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 543 float origVal = val; 544 val = rintf(val); 545 int fpType = std::fpclassify(val); 546 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 547 if (fpType == FP_NAN) { 548 feraiseexcept(FeInvalid); 549 } 550 val = 0.0; 551 } else if (origVal != val) { 552 switch (rmode) { 553 case FeRoundNearest: 554 if (origVal - val > 0.5) 555 val += 1.0; 556 else if (val - origVal > 0.5) 557 val -= 1.0; 558 break; 559 case FeRoundDown: 560 if (origVal < val) 561 val -= 1.0; 562 break; 563 case FeRoundUpward: 564 if (origVal > val) 565 val += 1.0; 566 break; 567 } 568 feraiseexcept(FeInexact); 569 } 570 571 if (isSigned) { 572 if (half) { 573 if ((double)val < (int16_t)(1 << 15)) { 574 feraiseexcept(FeInvalid); 575 feclearexcept(FeInexact); 576 return (int16_t)(1 << 15); 577 } 578 if ((double)val > (int16_t)mask(15)) { 579 feraiseexcept(FeInvalid); 580 feclearexcept(FeInexact); 581 return (int16_t)mask(15); 582 } 583 return (int16_t)val; 584 } else { 585 if ((double)val < (int32_t)(1 << 31)) { 586 feraiseexcept(FeInvalid); 587 feclearexcept(FeInexact); 588 return (int32_t)(1 << 31); 589 } 590 if ((double)val > (int32_t)mask(31)) { 591 feraiseexcept(FeInvalid); 592 feclearexcept(FeInexact); 593 return (int32_t)mask(31); 594 } 595 return (int32_t)val; 596 } 597 } else { 598 if (half) { 599 if ((double)val < 0) { 600 feraiseexcept(FeInvalid); 601 feclearexcept(FeInexact); 602 return 0; 603 } 604 if ((double)val > (mask(16))) { 605 feraiseexcept(FeInvalid); 606 feclearexcept(FeInexact); 607 return mask(16); 608 } 609 return (uint16_t)val; 610 } else { 611 if ((double)val < 0) { 612 feraiseexcept(FeInvalid); 613 feclearexcept(FeInexact); 614 return 0; 615 } 616 if ((double)val > (mask(32))) { 617 feraiseexcept(FeInvalid); 618 feclearexcept(FeInexact); 619 return mask(32); 620 } 621 return (uint32_t)val; 622 } 623 } 624} 625 626float 627vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 628{ 629 fesetround(FeRoundNearest); 630 if (half) 631 val = (uint16_t)val; 632 float scale = powf(2.0, imm); 633 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 634 feclearexcept(FeAllExceptions); 635 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 636 return fixDivDest(fpscr, val / scale, (float)val, scale); 637} 638 639float 640vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 641{ 642 fesetround(FeRoundNearest); 643 if (half) 644 val = sext<16>(val & mask(16)); 645 float scale = powf(2.0, imm); 646 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 647 feclearexcept(FeAllExceptions); 648 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 649 return fixDivDest(fpscr, val / scale, (float)val, scale); 650} 651 652uint64_t 653vfpFpDToFixed(double val, bool isSigned, bool half, 654 uint8_t imm, bool rzero) 655{ 656 int rmode = rzero ? FeRoundZero : fegetround(); 657 fesetround(FeRoundNearest); 658 val = val * pow(2.0, imm); 659 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 660 fesetround(rmode); 661 feclearexcept(FeAllExceptions); 662 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 663 double origVal = val; 664 val = rint(val); 665 int fpType = std::fpclassify(val); 666 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 667 if (fpType == FP_NAN) { 668 feraiseexcept(FeInvalid); 669 } 670 val = 0.0; 671 } else if (origVal != val) { 672 switch (rmode) { 673 case FeRoundNearest: 674 if (origVal - val > 0.5) 675 val += 1.0; 676 else if (val - origVal > 0.5) 677 val -= 1.0; 678 break; 679 case FeRoundDown: 680 if (origVal < val) 681 val -= 1.0; 682 break; 683 case FeRoundUpward: 684 if (origVal > val) 685 val += 1.0; 686 break; 687 } 688 feraiseexcept(FeInexact); 689 } 690 if (isSigned) { 691 if (half) { 692 if (val < (int16_t)(1 << 15)) { 693 feraiseexcept(FeInvalid); 694 feclearexcept(FeInexact); 695 return (int16_t)(1 << 15); 696 } 697 if (val > (int16_t)mask(15)) { 698 feraiseexcept(FeInvalid); 699 feclearexcept(FeInexact); 700 return (int16_t)mask(15); 701 } 702 return (int16_t)val; 703 } else { 704 if (val < (int32_t)(1 << 31)) { 705 feraiseexcept(FeInvalid); 706 feclearexcept(FeInexact); 707 return (int32_t)(1 << 31); 708 } 709 if (val > (int32_t)mask(31)) { 710 feraiseexcept(FeInvalid); 711 feclearexcept(FeInexact); 712 return (int32_t)mask(31); 713 } 714 return (int32_t)val; 715 } 716 } else { 717 if (half) { 718 if (val < 0) { 719 feraiseexcept(FeInvalid); 720 feclearexcept(FeInexact); 721 return 0; 722 } 723 if (val > mask(16)) { 724 feraiseexcept(FeInvalid); 725 feclearexcept(FeInexact); 726 return mask(16); 727 } 728 return (uint16_t)val; 729 } else { 730 if (val < 0) { 731 feraiseexcept(FeInvalid); 732 feclearexcept(FeInexact); 733 return 0; 734 } 735 if (val > mask(32)) { 736 feraiseexcept(FeInvalid); 737 feclearexcept(FeInexact); 738 return mask(32); 739 } 740 return (uint32_t)val; 741 } 742 } 743} 744 745double 746vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 747{ 748 fesetround(FeRoundNearest); 749 if (half) 750 val = (uint16_t)val; 751 double scale = pow(2.0, imm); 752 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 753 feclearexcept(FeAllExceptions); 754 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 755 return fixDivDest(fpscr, val / scale, (double)val, scale); 756} 757 758double 759vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 760{ 761 fesetround(FeRoundNearest); 762 if (half) 763 val = sext<16>(val & mask(16)); 764 double scale = pow(2.0, imm); 765 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 766 feclearexcept(FeAllExceptions); 767 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 768 return fixDivDest(fpscr, val / scale, (double)val, scale); 769} 770 771template <class fpType> 772fpType 773FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 774 fpType (func)(fpType, fpType), 775* bool flush, uint32_t rMode) const 776{ 777 const bool single = (sizeof(fpType) == sizeof(float)); 778 fpType junk = 0.0; 779 780 if (flush && flushToZero(op1, op2)) 781 fpscr.idc = 1; 782 VfpSavedState state = prepFpState(rMode); 783 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state) 784 : "m" (op1), "m" (op2), "m" (state)); 785 fpType dest = func(op1, op2); 786 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 787 788 int fpClass = std::fpclassify(dest); 789 // Get NAN behavior right. This varies between x86 and ARM. 790 if (fpClass == FP_NAN) { 791 const bool single = (sizeof(fpType) == sizeof(float)); 792 const uint64_t qnan = 793 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 794 const bool nan1 = std::isnan(op1); 795 const bool nan2 = std::isnan(op2); 796 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 797 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 798 if ((!nan1 && !nan2) \|\| (fpscr.dn == 1)) { 799 dest = bitsToFp(qnan, junk); 800 } else if (signal1) { 801 dest = bitsToFp(fpToBits(op1) \| qnan, junk); 802 } else if (signal2) { 803 dest = bitsToFp(fpToBits(op2) \| qnan, junk); 804 } else if (nan1) { 805 dest = op1; 806 } else if (nan2) { 807 dest = op2; 808 } 809 } else if (flush && flushToZero(dest)) { 810 feraiseexcept(FeUnderflow); 811 } else if (( 812 (single && (dest == bitsToFp(0x00800000, junk) \|\| 813 dest == bitsToFp(0x80800000, junk))) \|\| 814 (!single && 815 (dest == bitsToFp(ULL(0x0010000000000000), junk) \|\| 816 dest == bitsToFp(ULL(0x8010000000000000), junk))) 817 ) && rMode != VfpRoundZero) { 818 /* 819 * Correct for the fact that underflow is detected -before- rounding 820 * in ARM and -after- rounding in x86. 821 / 822* fesetround(FeRoundZero); 823 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2) 824 : "m" (op1), "m" (op2)); 825 fpType temp = func(op1, op2); 826 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 827 if (flush && flushToZero(temp)) { 828 dest = temp; 829 } 830 } 831 finishVfp(fpscr, state); 832 return dest; 833} 834 835template 836float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2, 837 float (func)(float, float), 838* bool flush, uint32_t rMode) const; 839template 840double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2, 841 double (func)(double, double), 842* bool flush, uint32_t rMode) const; 843 844template <class fpType> 845fpType 846FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (func)(fpType), 847* bool flush, uint32_t rMode) const 848{ 849 const bool single = (sizeof(fpType) == sizeof(float)); 850 fpType junk = 0.0; 851 852 if (flush && flushToZero(op1)) 853 fpscr.idc = 1; 854 VfpSavedState state = prepFpState(rMode); 855 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state) 856 : "m" (op1), "m" (state)); 857 fpType dest = func(op1); 858 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); 859 860 int fpClass = std::fpclassify(dest); 861 // Get NAN behavior right. This varies between x86 and ARM. 862 if (fpClass == FP_NAN) { 863 const bool single = (sizeof(fpType) == sizeof(float)); 864 const uint64_t qnan = 865 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 866 const bool nan = std::isnan(op1); 867 if (!nan \|\| fpscr.dn == 1) { 868 dest = bitsToFp(qnan, junk); 869 } else if (nan) { 870 dest = bitsToFp(fpToBits(op1) \| qnan, junk); 871 } 872 } else if (flush && flushToZero(dest)) { 873 feraiseexcept(FeUnderflow); 874 } else if (( 875 (single && (dest == bitsToFp(0x00800000, junk) \|\| 876 dest == bitsToFp(0x80800000, junk))) \|\| 877 (!single && 878 (dest == bitsToFp(ULL(0x0010000000000000), junk) \|\| 879 dest == bitsToFp(ULL(0x8010000000000000), junk))) 880 ) && rMode != VfpRoundZero) { 881 /* 882 * Correct for the fact that underflow is detected -before- rounding 883 * in ARM and -after- rounding in x86. 884 / 885* fesetround(FeRoundZero); 886 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1)); 887 fpType temp = func(op1); 888 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); 889 if (flush && flushToZero(temp)) { 890 dest = temp; 891 } 892 } 893 finishVfp(fpscr, state); 894 return dest; 895} 896 897template 898float FpOp::unaryOp(FPSCR &fpscr, float op1, float (func)(float), 899* bool flush, uint32_t rMode) const; 900template 901double FpOp::unaryOp(FPSCR &fpscr, double op1, double (func)(double), 902* bool flush, uint32_t rMode) const; 903 904IntRegIndex 905VfpMacroOp::addStride(IntRegIndex idx, unsigned stride) 906{ 907 if (wide) { 908 stride = 2; 909* } 910 unsigned offset = idx % 8; 911 idx = (IntRegIndex)(idx - offset); 912 offset += stride; 913 idx = (IntRegIndex)(idx + (offset % 8)); 914 return idx; 915} 916 917void 918VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) 919{ 920 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 921 assert(!inScalarBank(dest)); 922 dest = addStride(dest, stride); 923 op1 = addStride(op1, stride); 924 if (!inScalarBank(op2)) { 925 op2 = addStride(op2, stride); 926 } 927} 928 929void 930VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1) 931{ 932 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 933 assert(!inScalarBank(dest)); 934 dest = addStride(dest, stride); 935 if (!inScalarBank(op1)) { 936 op1 = addStride(op1, stride); 937 } 938} 939 940void 941VfpMacroOp::nextIdxs(IntRegIndex &dest) 942{ 943 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 944 assert(!inScalarBank(dest)); 945 dest = addStride(dest, stride); 946} 947 948}