vfp.hh revision 7386
1/* 2 * Copyright (c) 2010 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabe Black 38 */ 39 40#ifndef __ARCH_ARM_INSTS_VFP_HH__ 41#define __ARCH_ARM_INSTS_VFP_HH__ 42 43#include "arch/arm/insts/misc.hh" 44#include "arch/arm/miscregs.hh" 45#include <fenv.h> 46#include <cmath> 47 48namespace ArmISA 49{ 50 51enum VfpMicroMode { 52 VfpNotAMicroop, 53 VfpMicroop, 54 VfpFirstMicroop, 55 VfpLastMicroop 56}; 57 58template<class T> 59static inline void 60setVfpMicroFlags(VfpMicroMode mode, T &flags) 61{ 62 switch (mode) { 63 case VfpMicroop: 64 flags[StaticInst::IsMicroop] = true; 65 break; 66 case VfpFirstMicroop: 67 flags[StaticInst::IsMicroop] = 68 flags[StaticInst::IsFirstMicroop] = true; 69 break; 70 case VfpLastMicroop: 71 flags[StaticInst::IsMicroop] = 72 flags[StaticInst::IsLastMicroop] = true; 73 break; 74 case VfpNotAMicroop: 75 break; 76 } 77 if (mode == VfpMicroop || mode == VfpFirstMicroop) { 78 flags[StaticInst::IsDelayedCommit] = true; 79 } 80} 81 82enum FeExceptionBit 83{ 84 FeDivByZero = FE_DIVBYZERO, 85 FeInexact = FE_INEXACT, 86 FeInvalid = FE_INVALID, 87 FeOverflow = FE_OVERFLOW, 88 FeUnderflow = FE_UNDERFLOW, 89 FeAllExceptions = FE_ALL_EXCEPT 90}; 91 92enum FeRoundingMode 93{ 94 FeRoundDown = FE_DOWNWARD, 95 FeRoundNearest = FE_TONEAREST, 96 FeRoundZero = FE_TOWARDZERO, 97 FeRoundUpward = FE_UPWARD 98}; 99 100enum VfpRoundingMode 101{ 102 VfpRoundNearest = 0, 103 VfpRoundUpward = 1, 104 VfpRoundDown = 2, 105 VfpRoundZero = 3 106}; 107 108template <class fpType> 109static inline void 110vfpFlushToZero(uint32_t &_fpscr, fpType &op) 111{ 112 FPSCR fpscr = _fpscr; 113 if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) { 114 fpscr.idc = 1; 115 op = 0; 116 } 117 _fpscr = fpscr; 118} 119 120template <class fpType> 121static inline void 122vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2) 123{ 124 vfpFlushToZero(fpscr, op1); 125 vfpFlushToZero(fpscr, op2); 126} 127 128static inline uint32_t 129fpToBits(float fp) 130{ 131 union 132 { 133 float fp; 134 uint32_t bits; 135 } val; 136 val.fp = fp; 137 return val.bits; 138} 139 140static inline uint64_t 141fpToBits(double fp) 142{ 143 union 144 { 145 double fp; 146 uint64_t bits; 147 } val; 148 val.fp = fp; 149 return val.bits; 150} 151 152static inline float 153bitsToFp(uint64_t bits, float junk) 154{ 155 union 156 { 157 float fp; 158 uint32_t bits; 159 } val; 160 val.bits = bits; 161 return val.fp; 162} 163 164static inline double 165bitsToFp(uint64_t bits, double junk) 166{ 167 union 168 { 169 double fp; 170 uint64_t bits; 171 } val; 172 val.bits = bits; 173 return val.fp; 174} 175 176template <class fpType> 177static inline fpType 178fixDest(FPSCR fpscr, fpType val, fpType op1) 179{ 180 int fpClass = std::fpclassify(val); 181 fpType junk = 0.0; 182 if (fpClass == FP_NAN) { 183 const bool single = (sizeof(val) == sizeof(float)); 184 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 185 const bool nan = std::isnan(op1); 186 if (!nan || (fpscr.dn == 1)) { 187 val = bitsToFp(qnan, junk); 188 } else if (nan) { 189 val = bitsToFp(fpToBits(op1) | qnan, junk); 190 } 191 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 192 // Turn val into a zero with the correct sign; 193 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 194 val = bitsToFp(fpToBits(val) & bitMask, junk); 195 feraiseexcept(FeUnderflow); 196 } 197 return val; 198} 199 200template <class fpType> 201static inline fpType 202fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 203{ 204 int fpClass = std::fpclassify(val); 205 fpType junk = 0.0; 206 if (fpClass == FP_NAN) { 207 const bool single = (sizeof(val) == sizeof(float)); 208 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 209 const bool nan1 = std::isnan(op1); 210 const bool nan2 = std::isnan(op2); 211 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 212 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 213 if ((!nan1 && !nan2) || (fpscr.dn == 1)) { 214 val = bitsToFp(qnan, junk); 215 } else if (signal1) { 216 val = bitsToFp(fpToBits(op1) | qnan, junk); 217 } else if (signal2) { 218 val = bitsToFp(fpToBits(op2) | qnan, junk); 219 } else if (nan1) { 220 val = op1; 221 } else if (nan2) { 222 val = op2; 223 } 224 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) { 225 // Turn val into a zero with the correct sign; 226 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 227 val = bitsToFp(fpToBits(val) & bitMask, junk); 228 feraiseexcept(FeUnderflow); 229 } 230 return val; 231} 232 233template <class fpType> 234static inline fpType 235fixMultDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 236{ 237 fpType mid = fixDest(fpscr, val, op1, op2); 238 const bool single = (sizeof(fpType) == sizeof(float)); 239 const fpType junk = 0.0; 240 if ((single && (val == bitsToFp(0x00800000, junk) || 241 val == bitsToFp(0x80800000, junk))) || 242 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) || 243 val == bitsToFp(ULL(0x8010000000000000), junk))) 244 ) { 245 __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); 246 fesetround(FeRoundZero); 247 fpType temp = 0.0; 248 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 249 temp = op1 * op2; 250 if (!std::isnormal(temp)) { 251 feraiseexcept(FeUnderflow); 252 } 253 __asm__ __volatile__("" :: "m" (temp)); 254 } 255 return mid; 256} 257 258template <class fpType> 259static inline fpType 260fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2) 261{ 262 fpType mid = fixDest(fpscr, val, op1, op2); 263 const bool single = (sizeof(fpType) == sizeof(float)); 264 const fpType junk = 0.0; 265 if ((single && (val == bitsToFp(0x00800000, junk) || 266 val == bitsToFp(0x80800000, junk))) || 267 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) || 268 val == bitsToFp(ULL(0x8010000000000000), junk))) 269 ) { 270 __asm__ __volatile__("" : "=m" (op1) : "m" (op1)); 271 fesetround(FeRoundZero); 272 fpType temp = 0.0; 273 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 274 temp = op1 / op2; 275 if (!std::isnormal(temp)) { 276 feraiseexcept(FeUnderflow); 277 } 278 __asm__ __volatile__("" :: "m" (temp)); 279 } 280 return mid; 281} 282 283static inline float 284fixFpDFpSDest(FPSCR fpscr, double val) 285{ 286 const float junk = 0.0; 287 float op1 = 0.0; 288 if (std::isnan(val)) { 289 uint64_t valBits = fpToBits(val); 290 uint32_t op1Bits = bits(valBits, 50, 29) | 291 (mask(9) << 22) | 292 (bits(valBits, 63) << 31); 293 op1 = bitsToFp(op1Bits, junk); 294 } 295 float mid = fixDest(fpscr, (float)val, op1); 296 if (mid == bitsToFp(0x00800000, junk) || 297 mid == bitsToFp(0x80800000, junk)) { 298 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 299 fesetround(FeRoundZero); 300 float temp = 0.0; 301 __asm__ __volatile__("" : "=m" (temp) : "m" (temp)); 302 temp = val; 303 if (!std::isnormal(temp)) { 304 feraiseexcept(FeUnderflow); 305 } 306 __asm__ __volatile__("" :: "m" (temp)); 307 } 308 return mid; 309} 310 311static inline uint64_t 312vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm) 313{ 314 fesetround(FeRoundZero); 315 val = val * powf(2.0, imm); 316 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 317 feclearexcept(FeAllExceptions); 318 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 319 float origVal = val; 320 val = rintf(val); 321 int fpType = std::fpclassify(val); 322 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 323 if (fpType == FP_NAN) { 324 feraiseexcept(FeInvalid); 325 } 326 val = 0.0; 327 } else if (origVal != val) { 328 feraiseexcept(FeInexact); 329 } 330 331 if (isSigned) { 332 if (half) { 333 if ((double)val < (int16_t)(1 << 15)) { 334 feraiseexcept(FeInvalid); 335 feclearexcept(FeInexact); 336 return (int16_t)(1 << 15); 337 } 338 if ((double)val > (int16_t)mask(15)) { 339 feraiseexcept(FeInvalid); 340 feclearexcept(FeInexact); 341 return (int16_t)mask(15); 342 } 343 return (int16_t)val; 344 } else { 345 if ((double)val < (int32_t)(1 << 31)) { 346 feraiseexcept(FeInvalid); 347 feclearexcept(FeInexact); 348 return (int32_t)(1 << 31); 349 } 350 if ((double)val > (int32_t)mask(31)) { 351 feraiseexcept(FeInvalid); 352 feclearexcept(FeInexact); 353 return (int32_t)mask(31); 354 } 355 return (int32_t)val; 356 } 357 } else { 358 if (half) { 359 if ((double)val < 0) { 360 feraiseexcept(FeInvalid); 361 feclearexcept(FeInexact); 362 return 0; 363 } 364 if ((double)val > (mask(16))) { 365 feraiseexcept(FeInvalid); 366 feclearexcept(FeInexact); 367 return mask(16); 368 } 369 return (uint16_t)val; 370 } else { 371 if ((double)val < 0) { 372 feraiseexcept(FeInvalid); 373 feclearexcept(FeInexact); 374 return 0; 375 } 376 if ((double)val > (mask(32))) { 377 feraiseexcept(FeInvalid); 378 feclearexcept(FeInexact); 379 return mask(32); 380 } 381 return (uint32_t)val; 382 } 383 } 384} 385 386static inline float 387vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 388{ 389 fesetround(FeRoundNearest); 390 if (half) 391 val = (uint16_t)val; 392 float scale = powf(2.0, imm); 393 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 394 feclearexcept(FeAllExceptions); 395 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 396 return fixDivDest(fpscr, val / scale, (float)val, scale); 397} 398 399static inline float 400vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 401{ 402 fesetround(FeRoundNearest); 403 if (half) 404 val = sext<16>(val & mask(16)); 405 float scale = powf(2.0, imm); 406 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 407 feclearexcept(FeAllExceptions); 408 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 409 return fixDivDest(fpscr, val / scale, (float)val, scale); 410} 411 412static inline uint64_t 413vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm) 414{ 415 fesetround(FeRoundNearest); 416 val = val * pow(2.0, imm); 417 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 418 fesetround(FeRoundZero); 419 feclearexcept(FeAllExceptions); 420 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 421 double origVal = val; 422 val = rint(val); 423 int fpType = std::fpclassify(val); 424 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 425 if (fpType == FP_NAN) { 426 feraiseexcept(FeInvalid); 427 } 428 val = 0.0; 429 } else if (origVal != val) { 430 feraiseexcept(FeInexact); 431 } 432 if (isSigned) { 433 if (half) { 434 if (val < (int16_t)(1 << 15)) { 435 feraiseexcept(FeInvalid); 436 feclearexcept(FeInexact); 437 return (int16_t)(1 << 15); 438 } 439 if (val > (int16_t)mask(15)) { 440 feraiseexcept(FeInvalid); 441 feclearexcept(FeInexact); 442 return (int16_t)mask(15); 443 } 444 return (int16_t)val; 445 } else { 446 if (val < (int32_t)(1 << 31)) { 447 feraiseexcept(FeInvalid); 448 feclearexcept(FeInexact); 449 return (int32_t)(1 << 31); 450 } 451 if (val > (int32_t)mask(31)) { 452 feraiseexcept(FeInvalid); 453 feclearexcept(FeInexact); 454 return (int32_t)mask(31); 455 } 456 return (int32_t)val; 457 } 458 } else { 459 if (half) { 460 if (val < 0) { 461 feraiseexcept(FeInvalid); 462 feclearexcept(FeInexact); 463 return 0; 464 } 465 if (val > mask(16)) { 466 feraiseexcept(FeInvalid); 467 feclearexcept(FeInexact); 468 return mask(16); 469 } 470 return (uint16_t)val; 471 } else { 472 if (val < 0) { 473 feraiseexcept(FeInvalid); 474 feclearexcept(FeInexact); 475 return 0; 476 } 477 if (val > mask(32)) { 478 feraiseexcept(FeInvalid); 479 feclearexcept(FeInexact); 480 return mask(32); 481 } 482 return (uint32_t)val; 483 } 484 } 485} 486 487static inline double 488vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm) 489{ 490 fesetround(FeRoundNearest); 491 if (half) 492 val = (uint16_t)val; 493 double scale = pow(2.0, imm); 494 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 495 feclearexcept(FeAllExceptions); 496 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 497 return fixDivDest(fpscr, val / scale, (double)val, scale); 498} 499 500static inline double 501vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm) 502{ 503 fesetround(FeRoundNearest); 504 if (half) 505 val = sext<16>(val & mask(16)); 506 double scale = pow(2.0, imm); 507 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 508 feclearexcept(FeAllExceptions); 509 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 510 return fixDivDest(fpscr, val / scale, (double)val, scale); 511} 512 513typedef int VfpSavedState; 514 515static inline VfpSavedState 516prepVfpFpscr(FPSCR fpscr) 517{ 518 int roundingMode = fegetround(); 519 feclearexcept(FeAllExceptions); 520 switch (fpscr.rMode) { 521 case VfpRoundNearest: 522 fesetround(FeRoundNearest); 523 break; 524 case VfpRoundUpward: 525 fesetround(FeRoundUpward); 526 break; 527 case VfpRoundDown: 528 fesetround(FeRoundDown); 529 break; 530 case VfpRoundZero: 531 fesetround(FeRoundZero); 532 break; 533 } 534 return roundingMode; 535} 536 537static inline FPSCR 538setVfpFpscr(FPSCR fpscr, VfpSavedState state) 539{ 540 int exceptions = fetestexcept(FeAllExceptions); 541 if (exceptions & FeInvalid) { 542 fpscr.ioc = 1; 543 } 544 if (exceptions & FeDivByZero) { 545 fpscr.dzc = 1; 546 } 547 if (exceptions & FeOverflow) { 548 fpscr.ofc = 1; 549 } 550 if (exceptions & FeUnderflow) { 551 fpscr.ufc = 1; 552 } 553 if (exceptions & FeInexact) { 554 fpscr.ixc = 1; 555 } 556 fesetround(state); 557 return fpscr; 558} 559 560class VfpMacroOp : public PredMacroOp 561{ 562 public: 563 static bool 564 inScalarBank(IntRegIndex idx) 565 { 566 return (idx % 32) < 8; 567 } 568 569 protected: 570 bool wide; 571 572 VfpMacroOp(const char *mnem, ExtMachInst _machInst, 573 OpClass __opClass, bool _wide) : 574 PredMacroOp(mnem, _machInst, __opClass), wide(_wide) 575 {} 576 577 IntRegIndex 578 addStride(IntRegIndex idx, unsigned stride) 579 { 580 if (wide) { 581 stride *= 2; 582 } 583 unsigned offset = idx % 8; 584 idx = (IntRegIndex)(idx - offset); 585 offset += stride; 586 idx = (IntRegIndex)(idx + (offset % 8)); 587 return idx; 588 } 589 590 void 591 nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) 592 { 593 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 594 assert(!inScalarBank(dest)); 595 dest = addStride(dest, stride); 596 op1 = addStride(op1, stride); 597 if (!inScalarBank(op2)) { 598 op2 = addStride(op2, stride); 599 } 600 } 601 602 void 603 nextIdxs(IntRegIndex &dest, IntRegIndex &op1) 604 { 605 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 606 assert(!inScalarBank(dest)); 607 dest = addStride(dest, stride); 608 if (!inScalarBank(op1)) { 609 op1 = addStride(op1, stride); 610 } 611 } 612 613 void 614 nextIdxs(IntRegIndex &dest) 615 { 616 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 617 assert(!inScalarBank(dest)); 618 dest = addStride(dest, stride); 619 } 620}; 621 622class VfpRegRegOp : public RegRegOp 623{ 624 protected: 625 VfpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, 626 IntRegIndex _dest, IntRegIndex _op1, 627 VfpMicroMode mode = VfpNotAMicroop) : 628 RegRegOp(mnem, _machInst, __opClass, _dest, _op1) 629 { 630 setVfpMicroFlags(mode, flags); 631 } 632}; 633 634class VfpRegImmOp : public RegImmOp 635{ 636 protected: 637 VfpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, 638 IntRegIndex _dest, uint64_t _imm, 639 VfpMicroMode mode = VfpNotAMicroop) : 640 RegImmOp(mnem, _machInst, __opClass, _dest, _imm) 641 { 642 setVfpMicroFlags(mode, flags); 643 } 644}; 645 646class VfpRegRegImmOp : public RegRegImmOp 647{ 648 protected: 649 VfpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, 650 IntRegIndex _dest, IntRegIndex _op1, 651 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) : 652 RegRegImmOp(mnem, _machInst, __opClass, _dest, _op1, _imm) 653 { 654 setVfpMicroFlags(mode, flags); 655 } 656}; 657 658class VfpRegRegRegOp : public RegRegRegOp 659{ 660 protected: 661 VfpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, 662 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, 663 VfpMicroMode mode = VfpNotAMicroop) : 664 RegRegRegOp(mnem, _machInst, __opClass, _dest, _op1, _op2) 665 { 666 setVfpMicroFlags(mode, flags); 667 } 668}; 669 670} 671 672#endif //__ARCH_ARM_INSTS_VFP_HH__ 673