Cross Reference: /gem5/src/arch/arm/insts/vfp.hh

Deleted Added

sdiff udiff text old ( 11321:02e930db812d ) new ( 11671:520509f3e66c )

full compact

vfp.hh (11321:02e930db812d)	vfp.hh (11671:520509f3e66c)
1/* 2 * Copyright (c) 2010-2013 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabe Black 38 / 39 40#ifndef __ARCH_ARM_INSTS_VFP_HH__ 41#define __ARCH_ARM_INSTS_VFP_HH__ 42 43#include <fenv.h> 44 45#include <cmath> 46 47#include "arch/arm/insts/misc.hh" 48#include "arch/arm/miscregs.hh" 49 50namespace ArmISA 51{ 52 53enum VfpMicroMode { 54 VfpNotAMicroop, 55 VfpMicroop, 56 VfpFirstMicroop, 57 VfpLastMicroop 58}; 59 60template<class T> 61static inline void 62setVfpMicroFlags(VfpMicroMode mode, T &flags) 63{ 64 switch (mode) { 65 case VfpMicroop: 66 flags[StaticInst::IsMicroop] = true; 67 break; 68 case VfpFirstMicroop: 69 flags[StaticInst::IsMicroop] = 70 flags[StaticInst::IsFirstMicroop] = true; 71 break; 72 case VfpLastMicroop: 73 flags[StaticInst::IsMicroop] = 74 flags[StaticInst::IsLastMicroop] = true; 75 break; 76 case VfpNotAMicroop: 77 break; 78 } 79 if (mode == VfpMicroop \|\| mode == VfpFirstMicroop) { 80 flags[StaticInst::IsDelayedCommit] = true; 81 } 82} 83 84enum FeExceptionBit 85{ 86 FeDivByZero = FE_DIVBYZERO, 87 FeInexact = FE_INEXACT, 88 FeInvalid = FE_INVALID, 89 FeOverflow = FE_OVERFLOW, 90 FeUnderflow = FE_UNDERFLOW, 91 FeAllExceptions = FE_ALL_EXCEPT 92}; 93 94enum FeRoundingMode 95{ 96 FeRoundDown = FE_DOWNWARD, 97 FeRoundNearest = FE_TONEAREST, 98 FeRoundZero = FE_TOWARDZERO, 99 FeRoundUpward = FE_UPWARD 100}; 101* 102enum VfpRoundingMode 103{ 104 VfpRoundNearest = 0, 105 VfpRoundUpward = 1, 106 VfpRoundDown = 2, 107 VfpRoundZero = 3, 108 VfpRoundAway = 4 109}; 110 111static inline float bitsToFp(uint64_t, float); 112static inline double bitsToFp(uint64_t, double); 113static inline uint32_t fpToBits(float); 114static inline uint64_t fpToBits(double); 115 116template <class fpType> 117static inline bool 118flushToZero(fpType &op) 119{ 120 fpType junk = 0.0; 121 if (std::fpclassify(op) == FP_SUBNORMAL) { 122 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 123 op = bitsToFp(fpToBits(op) & bitMask, junk); 124 return true; 125 } 126 return false; 127} 128 129template <class fpType> 130static inline bool 131flushToZero(fpType &op1, fpType &op2) 132{ 133 bool flush1 = flushToZero(op1); 134 bool flush2 = flushToZero(op2); 135 return flush1 \|\| flush2; 136} 137 138template <class fpType> 139static inline void 140vfpFlushToZero(FPSCR &fpscr, fpType &op) 141{ 142 if (fpscr.fz == 1 && flushToZero(op)) { 143 fpscr.idc = 1; 144 } 145} 146 147template <class fpType> 148static inline void 149vfpFlushToZero(FPSCR &fpscr, fpType &op1, fpType &op2) 150{ 151 vfpFlushToZero(fpscr, op1); 152 vfpFlushToZero(fpscr, op2); 153} 154 155static inline uint32_t 156fpToBits(float fp) 157{ 158 union 159 { 160 float fp; 161 uint32_t bits; 162 } val; 163 val.fp = fp; 164 return val.bits; 165} 166 167static inline uint64_t 168fpToBits(double fp) 169{ 170 union 171 { 172 double fp; 173 uint64_t bits; 174 } val; 175 val.fp = fp; 176 return val.bits; 177} 178 179static inline float 180bitsToFp(uint64_t bits, float junk) 181{ 182 union 183 { 184 float fp; 185 uint32_t bits; 186 } val; 187 val.bits = bits; 188 return val.fp; 189} 190 191static inline double 192bitsToFp(uint64_t bits, double junk) 193{ 194 union 195 { 196 double fp; 197 uint64_t bits; 198 } val; 199 val.bits = bits; 200 return val.fp; 201} 202 203template <class fpType> 204static bool 205isSnan(fpType val) 206{ 207 const bool single = (sizeof(fpType) == sizeof(float)); 208 const uint64_t qnan = 209 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 210 return std::isnan(val) && ((fpToBits(val) & qnan) != qnan); 211} 212 213typedef int VfpSavedState; 214 215VfpSavedState prepFpState(uint32_t rMode); 216void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask = FpscrExcMask); 217 218template <class fpType> 219fpType fixDest(FPSCR fpscr, fpType val, fpType op1); 220 221template <class fpType> 222fpType fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2); 223 224template <class fpType> 225fpType fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2); 226 227float fixFpDFpSDest(FPSCR fpscr, double val); 228double fixFpSFpDDest(FPSCR fpscr, float val); 229 230uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, 231 uint32_t rMode, bool ahp, float op); 232uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, 233 uint32_t rMode, bool ahp, double op); 234 235float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); 236double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); 237 238static inline double 239makeDouble(uint32_t low, uint32_t high) 240{ 241 double junk = 0.0; 242 return bitsToFp((uint64_t)low \| ((uint64_t)high << 32), junk); 243} 244 245static inline uint32_t 246lowFromDouble(double val) 247{ 248 return fpToBits(val); 249} 250 251static inline uint32_t 252highFromDouble(double val) 253{ 254 return fpToBits(val) >> 32; 255} 256 257static inline void 258setFPExceptions(int exceptions) { 259 feclearexcept(FeAllExceptions); 260 feraiseexcept(exceptions); 261} 262 263template <typename T> 264uint64_t 265vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool 266 useRmode = true, VfpRoundingMode roundMode = VfpRoundZero, 267 bool aarch64 = false) 268{ 269 int rmode; 270 bool roundAwayFix = false; 271 272 if (!useRmode) { 273 rmode = fegetround(); 274 } else { 275 switch (roundMode) 276 { 277 case VfpRoundNearest: 278 rmode = FeRoundNearest; 279 break; 280 case VfpRoundUpward: 281 rmode = FeRoundUpward; 282 break; 283 case VfpRoundDown: 284 rmode = FeRoundDown; 285 break; 286 case VfpRoundZero: 287 rmode = FeRoundZero; 288 break; 289 case VfpRoundAway: 290 // There is no equivalent rounding mode, use round down and we'll 291 // fix it later 292 rmode = FeRoundDown; 293 roundAwayFix = true; 294 break; 295 default: 296 panic("Unsupported roundMode %d\n", roundMode); 297 } 298 } 299 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); 300 fesetround(FeRoundNearest); 301 val = val * pow(2.0, imm); 302 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 303 fesetround(rmode); 304 feclearexcept(FeAllExceptions); 305 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 306 T origVal = val; 307 val = rint(val); 308 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 309 310 int exceptions = fetestexcept(FeAllExceptions); 311 312 int fpType = std::fpclassify(val); 313 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 314 if (fpType == FP_NAN) { 315 exceptions \|= FeInvalid; 316 } 317 val = 0.0; 318 } else if (origVal != val) { 319 switch (rmode) { 320 case FeRoundNearest: 321 if (origVal - val > 0.5) 322 val += 1.0; 323 else if (val - origVal > 0.5) 324 val -= 1.0; 325 break; 326 case FeRoundDown: 327 if (roundAwayFix) { 328 // The ordering on the subtraction looks a bit odd in that we 329 // don't do the obvious origVal - val, instead we do 330 // -(val - origVal). This is required to get the corruct bit 331 // exact behaviour when very close to the 0.5 threshold. 332 volatile T error = val; 333 error -= origVal; 334 error = -error; 335 if ( (error > 0.5) \|\| 336 ((error == 0.5) && (val >= 0)) ) 337 val += 1.0; 338 } else { 339 if (origVal < val) 340 val -= 1.0; 341 } 342 break; 343 case FeRoundUpward: 344 if (origVal > val) 345 val += 1.0; 346 break; 347 } 348 exceptions \|= FeInexact; 349 } 350 351 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 352 353 if (isSigned) { 354 bool outOfRange = false; 355 int64_t result = (int64_t) val; 356 uint64_t finalVal; 357 358 if (!aarch64) { 359 if (width == 16) { 360 finalVal = (int16_t)val; 361 } else if (width == 32) { 362 finalVal =(int32_t)val; 363 } else if (width == 64) { 364 finalVal = result; 365 } else { 366 panic("Unsupported width %d\n", width); 367 } 368 369 // check if value is in range 370 int64_t minVal = ~mask(width-1); 371 if ((double)val < minVal) { 372 outOfRange = true; 373 finalVal = minVal; 374 } 375 int64_t maxVal = mask(width-1); 376 if ((double)val > maxVal) { 377 outOfRange = true; 378 finalVal = maxVal; 379 } 380 } else { 381 bool isNeg = val < 0; 382 finalVal = result & mask(width); 383 // If the result is supposed to be less than 64 bits check that the 384 // upper bits that got thrown away are just sign extension bits 385 if (width != 64) { 386 outOfRange = ((uint64_t) result >> (width - 1)) != 387 (isNeg ? mask(64-width+1) : 0); 388 } 389 // Check if the original floating point value doesn't matches the 390 // integer version we are also out of range. So create a saturated 391 // result. 392 if (isNeg) { 393 outOfRange \|= val < result; 394 if (outOfRange) { 395 finalVal = 1LL << (width-1); 396 } 397 } else { 398 outOfRange \|= val > result; 399 if (outOfRange) { 400 finalVal = mask(width-1); 401 } 402 } 403 } 404 405 // Raise an exception if the value was out of range 406 if (outOfRange) { 407 exceptions \|= FeInvalid; 408 exceptions &= ~FeInexact; 409 } 410 setFPExceptions(exceptions); 411 return finalVal; 412 } else { 413 if ((double)val < 0) { 414 exceptions \|= FeInvalid; 415 exceptions &= ~FeInexact; 416 setFPExceptions(exceptions); 417 return 0; 418 } 419 420 uint64_t result = ((uint64_t) val) & mask(width); 421 if (val > result) { 422 exceptions \|= FeInvalid; 423 exceptions &= ~FeInexact; 424 setFPExceptions(exceptions); 425 return mask(width); 426 } 427 428 setFPExceptions(exceptions); 429 return result; 430 } 431}; 432 433 434float vfpUFixedToFpS(bool flush, bool defaultNan, 435 uint64_t val, uint8_t width, uint8_t imm); 436float vfpSFixedToFpS(bool flush, bool defaultNan, 437 int64_t val, uint8_t width, uint8_t imm); 438 439double vfpUFixedToFpD(bool flush, bool defaultNan, 440 uint64_t val, uint8_t width, uint8_t imm); 441double vfpSFixedToFpD(bool flush, bool defaultNan, 442 int64_t val, uint8_t width, uint8_t imm); 443 444float fprSqrtEstimate(FPSCR &fpscr, float op); 445uint32_t unsignedRSqrtEstimate(uint32_t op); 446 447float fpRecipEstimate(FPSCR &fpscr, float op); 448uint32_t unsignedRecipEstimate(uint32_t op); 449 450class VfpMacroOp : public PredMacroOp 451{ 452 public: 453 static bool 454 inScalarBank(IntRegIndex idx) 455 { 456 return (idx % 32) < 8; 457 } 458 459 protected: 460 bool wide; 461 462 VfpMacroOp(const char mnem, ExtMachInst _machInst, 463* OpClass __opClass, bool _wide) : 464 PredMacroOp(mnem, _machInst, __opClass), wide(_wide) 465 {} 466 467 IntRegIndex addStride(IntRegIndex idx, unsigned stride); 468 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2); 469 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1); 470 void nextIdxs(IntRegIndex &dest); 471}; 472 473template <typename T> 474static inline T 475fpAdd(T a, T b) 476{ 477 return a + b; 478}; 479 480template <typename T> 481static inline T 482fpSub(T a, T b) 483{ 484 return a - b; 485}; 486 487static inline float 488fpAddS(float a, float b) 489{ 490 return a + b; 491} 492 493static inline double 494fpAddD(double a, double b) 495{ 496 return a + b; 497} 498 499static inline float 500fpSubS(float a, float b) 501{ 502 return a - b; 503} 504 505static inline double 506fpSubD(double a, double b) 507{ 508 return a - b; 509} 510 511static inline float 512fpDivS(float a, float b) 513{ 514 return a / b; 515} 516 517static inline double 518fpDivD(double a, double b) 519{ 520 return a / b; 521} 522 523template <typename T> 524static inline T 525fpDiv(T a, T b) 526{ 527 return a / b; 528}; 529 530template <typename T> 531static inline T 532fpMulX(T a, T b) 533{ 534 uint64_t opData; 535 uint32_t sign1; 536 uint32_t sign2; 537 const bool single = (sizeof(T) == sizeof(float)); 538 if (single) { 539 opData = (fpToBits(a)); 540 sign1 = opData>>31; 541 opData = (fpToBits(b)); 542 sign2 = opData>>31; 543 } else { 544 opData = (fpToBits(a)); 545 sign1 = opData>>63; 546 opData = (fpToBits(b)); 547 sign2 = opData>>63; 548 } 549 bool inf1 = (std::fpclassify(a) == FP_INFINITE); 550 bool inf2 = (std::fpclassify(b) == FP_INFINITE); 551 bool zero1 = (std::fpclassify(a) == FP_ZERO); 552 bool zero2 = (std::fpclassify(b) == FP_ZERO); 553 if ((inf1 && zero2) \|\| (zero1 && inf2)) { 554 if (sign1 ^ sign2) 555 return (T)(-2.0); 556 else 557 return (T)(2.0); 558 } else { 559 return (a * b); 560 } 561}; 562 563 564template <typename T> 565static inline T 566fpMul(T a, T b) 567{ 568 return a * b; 569}; 570 571static inline float 572fpMulS(float a, float b) 573{ 574 return a * b; 575} 576 577static inline double 578fpMulD(double a, double b) 579{ 580 return a * b; 581} 582 583template <typename T> 584static inline T 585// @todo remove this when all calls to it have been replaced with the new fplib implementation 586fpMulAdd(T op1, T op2, T addend) 587{ 588 T result; 589 590 if (sizeof(T) == sizeof(float)) 591 result = fmaf(op1, op2, addend); 592 else 593 result = fma(op1, op2, addend); 594 595 // ARM doesn't generate signed nan's from this opperation, so fix up the result 596 if (std::isnan(result) && !std::isnan(op1) && 597 !std::isnan(op2) && !std::isnan(addend)) 598 { 599 uint64_t bitMask = ULL(0x1) << ((sizeof(T) * 8) - 1); 600 result = bitsToFp(fpToBits(result) & ~bitMask, op1); 601 } 602 return result; 603} 604 605template <typename T> 606static inline T 607fpRIntX(T a, FPSCR &fpscr) 608{ 609 T rVal; 610 611 rVal = rint(a); 612 if (rVal != a && !std::isnan(a)) 613 fpscr.ixc = 1; 614 return (rVal); 615}; 616 617template <typename T> 618static inline T 619fpMaxNum(T a, T b) 620{ 621 const bool single = (sizeof(T) == sizeof(float)); 622 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 623 624 if (std::isnan(a)) 625 return ((fpToBits(a) & qnan) == qnan) ? b : a; 626 if (std::isnan(b)) 627 return ((fpToBits(b) & qnan) == qnan) ? a : b; 628 // Handle comparisons of +0 and -0. 629 if (!std::signbit(a) && std::signbit(b)) 630 return a; 631 return fmax(a, b); 632}; 633 634template <typename T> 635static inline T 636fpMax(T a, T b) 637{ 638 if (std::isnan(a)) 639 return a; 640 if (std::isnan(b)) 641 return b; 642 return fpMaxNum<T>(a, b); 643}; 644 645template <typename T> 646static inline T 647fpMinNum(T a, T b) 648{ 649 const bool single = (sizeof(T) == sizeof(float)); 650 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 651 652 if (std::isnan(a)) 653 return ((fpToBits(a) & qnan) == qnan) ? b : a; 654 if (std::isnan(b)) 655 return ((fpToBits(b) & qnan) == qnan) ? a : b; 656 // Handle comparisons of +0 and -0. 657 if (std::signbit(a) && !std::signbit(b)) 658 return a; 659 return fmin(a, b); 660}; 661 662template <typename T> 663static inline T 664fpMin(T a, T b) 665{ 666 if (std::isnan(a)) 667 return a; 668 if (std::isnan(b)) 669 return b; 670 return fpMinNum<T>(a, b); 671}; 672 673template <typename T> 674static inline T 675fpRSqrts(T a, T b) 676{ 677 int fpClassA = std::fpclassify(a); 678 int fpClassB = std::fpclassify(b); 679 T aXb; 680 int fpClassAxB; 681 682 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) \|\| 683 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { 684 return 1.5; 685 } 686 aXb = ab; 687* fpClassAxB = std::fpclassify(aXb); 688 if (fpClassAxB == FP_SUBNORMAL) { 689 feraiseexcept(FeUnderflow); 690 return 1.5; 691 } 692 return (3.0 - (a * b)) / 2.0; 693}; 694 695template <typename T> 696static inline T 697fpRecps(T a, T b) 698{ 699 int fpClassA = std::fpclassify(a); 700 int fpClassB = std::fpclassify(b); 701 T aXb; 702 int fpClassAxB; 703 704 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) \|\| 705 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { 706 return 2.0; 707 } 708 aXb = ab; 709* fpClassAxB = std::fpclassify(aXb); 710 if (fpClassAxB == FP_SUBNORMAL) { 711 feraiseexcept(FeUnderflow); 712 return 2.0; 713 } 714 return 2.0 - (a * b); 715}; 716 717 718static inline float 719fpRSqrtsS(float a, float b) 720{ 721 int fpClassA = std::fpclassify(a); 722 int fpClassB = std::fpclassify(b); 723 float aXb; 724 int fpClassAxB; 725 726 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) \|\| 727 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { 728 return 1.5; 729 } 730 aXb = ab; 731* fpClassAxB = std::fpclassify(aXb); 732 if (fpClassAxB == FP_SUBNORMAL) { 733 feraiseexcept(FeUnderflow); 734 return 1.5; 735 } 736 return (3.0 - (a * b)) / 2.0; 737} 738 739static inline float 740fpRecpsS(float a, float b) 741{ 742 int fpClassA = std::fpclassify(a); 743 int fpClassB = std::fpclassify(b); 744 float aXb; 745 int fpClassAxB; 746 747 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) \|\| 748 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { 749 return 2.0; 750 } 751 aXb = ab; 752* fpClassAxB = std::fpclassify(aXb); 753 if (fpClassAxB == FP_SUBNORMAL) { 754 feraiseexcept(FeUnderflow); 755 return 2.0; 756 } 757 return 2.0 - (a * b); 758} 759 760template <typename T> 761static inline T 762roundNEven(T a) { 763 T val; 764 765 val = round(a); 766 if (a - val == 0.5) { 767 if ( (((int) a) & 1) == 0 ) val += 1.0; 768 } 769 else if (a - val == -0.5) { 770 if ( (((int) a) & 1) == 0 ) val -= 1.0; 771 } 772 return val; 773} 774 775 776 777class FpOp : public PredOp 778{ 779 protected: 780 FpOp(const char mnem, ExtMachInst _machInst, OpClass __opClass) : 781* PredOp(mnem, _machInst, __opClass) 782 {} 783 784 virtual float 785 doOp(float op1, float op2) const 786 { 787 panic("Unimplemented version of doOp called.\n"); 788 } 789 790 virtual float 791 doOp(float op1) const 792 { 793 panic("Unimplemented version of doOp called.\n"); 794 } 795 796 virtual double 797 doOp(double op1, double op2) const 798 { 799 panic("Unimplemented version of doOp called.\n"); 800 } 801 802 virtual double 803 doOp(double op1) const 804 { 805 panic("Unimplemented version of doOp called.\n"); 806 } 807 808 double 809 dbl(uint32_t low, uint32_t high) const 810 { 811 double junk = 0.0; 812 return bitsToFp((uint64_t)low \| ((uint64_t)high << 32), junk); 813 } 814 815 uint32_t 816 dblLow(double val) const 817 { 818 return fpToBits(val); 819 } 820 821 uint32_t 822 dblHi(double val) const 823 { 824 return fpToBits(val) >> 32; 825 } 826 827 template <class fpType> 828 fpType 829 processNans(FPSCR &fpscr, bool &done, bool defaultNan, 830 fpType op1, fpType op2) const; 831 832 template <class fpType> 833 fpType 834 ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, 835 fpType (func)(fpType, fpType, fpType), 836* bool flush, bool defaultNan, uint32_t rMode) const; 837 838 template <class fpType> 839 fpType 840 binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 841 fpType (func)(fpType, fpType), 842* bool flush, bool defaultNan, uint32_t rMode) const; 843 844 template <class fpType> 845 fpType 846 unaryOp(FPSCR &fpscr, fpType op1, 847 fpType (func)(fpType), 848* bool flush, uint32_t rMode) const; 849 850 void 851 advancePC(PCState &pcState) const 852 { 853 if (flags[IsLastMicroop]) { 854 pcState.uEnd(); 855 } else if (flags[IsMicroop]) { 856 pcState.uAdvance(); 857 } else { 858 pcState.advance(); 859 } 860 } 861 862 float 863 fpSqrt (FPSCR fpscr,float x) const 864 { 865 866 return unaryOp(fpscr,x,sqrtf,fpscr.fz,fpscr.rMode); 867 868 } 869 870 double 871 fpSqrt (FPSCR fpscr,double x) const 872 { 873 874 return unaryOp(fpscr,x,sqrt,fpscr.fz,fpscr.rMode); 875 876 } 877}; 878 879class FpCondCompRegOp : public FpOp 880{ 881 protected: 882 IntRegIndex op1, op2; 883 ConditionCode condCode; 884 uint8_t defCc; 885 886 FpCondCompRegOp(const char mnem, ExtMachInst _machInst, 887* OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2, 888 ConditionCode _condCode, uint8_t _defCc) : 889 FpOp(mnem, _machInst, __opClass), 890 op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc) 891 {} 892 893 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 894}; 895* 896class FpCondSelOp : public FpOp 897{ 898 protected: 899 IntRegIndex dest, op1, op2; 900 ConditionCode condCode; 901 902 FpCondSelOp(const char mnem, ExtMachInst _machInst, OpClass __opClass, 903* IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, 904 ConditionCode _condCode) : 905 FpOp(mnem, _machInst, __opClass), 906 dest(_dest), op1(_op1), op2(_op2), condCode(_condCode) 907 {} 908 909 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 910}; 911* 912class FpRegRegOp : public FpOp 913{ 914 protected: 915 IntRegIndex dest; 916 IntRegIndex op1; 917 918 FpRegRegOp(const char mnem, ExtMachInst _machInst, OpClass __opClass, 919* IntRegIndex _dest, IntRegIndex _op1, 920 VfpMicroMode mode = VfpNotAMicroop) : 921 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1) 922 { 923 setVfpMicroFlags(mode, flags); 924 } 925 926 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 927}; 928* 929class FpRegImmOp : public FpOp 930{ 931 protected: 932 IntRegIndex dest; 933 uint64_t imm; 934 935 FpRegImmOp(const char mnem, ExtMachInst _machInst, OpClass __opClass, 936* IntRegIndex _dest, uint64_t _imm, 937 VfpMicroMode mode = VfpNotAMicroop) : 938 FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm) 939 { 940 setVfpMicroFlags(mode, flags); 941 } 942 943 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 944}; 945* 946class FpRegRegImmOp : public FpOp 947{ 948 protected: 949 IntRegIndex dest; 950 IntRegIndex op1; 951 uint64_t imm; 952 953 FpRegRegImmOp(const char mnem, ExtMachInst _machInst, OpClass __opClass, 954* IntRegIndex _dest, IntRegIndex _op1, 955 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) : 956 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm) 957 { 958 setVfpMicroFlags(mode, flags); 959 } 960 961 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 962}; 963* 964class FpRegRegRegOp : public FpOp 965{ 966 protected: 967 IntRegIndex dest; 968 IntRegIndex op1; 969 IntRegIndex op2; 970 971 FpRegRegRegOp(const char mnem, ExtMachInst _machInst, OpClass __opClass, 972* IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, 973 VfpMicroMode mode = VfpNotAMicroop) : 974 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2) 975 { 976 setVfpMicroFlags(mode, flags); 977 } 978 979 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 980}; 981*	1/* 2 * Copyright (c) 2010-2013 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabe Black 38 / 39 40#ifndef __ARCH_ARM_INSTS_VFP_HH__ 41#define __ARCH_ARM_INSTS_VFP_HH__ 42 43#include <fenv.h> 44 45#include <cmath> 46 47#include "arch/arm/insts/misc.hh" 48#include "arch/arm/miscregs.hh" 49 50namespace ArmISA 51{ 52 53enum VfpMicroMode { 54 VfpNotAMicroop, 55 VfpMicroop, 56 VfpFirstMicroop, 57 VfpLastMicroop 58}; 59 60template<class T> 61static inline void 62setVfpMicroFlags(VfpMicroMode mode, T &flags) 63{ 64 switch (mode) { 65 case VfpMicroop: 66 flags[StaticInst::IsMicroop] = true; 67 break; 68 case VfpFirstMicroop: 69 flags[StaticInst::IsMicroop] = 70 flags[StaticInst::IsFirstMicroop] = true; 71 break; 72 case VfpLastMicroop: 73 flags[StaticInst::IsMicroop] = 74 flags[StaticInst::IsLastMicroop] = true; 75 break; 76 case VfpNotAMicroop: 77 break; 78 } 79 if (mode == VfpMicroop \|\| mode == VfpFirstMicroop) { 80 flags[StaticInst::IsDelayedCommit] = true; 81 } 82} 83 84enum FeExceptionBit 85{ 86 FeDivByZero = FE_DIVBYZERO, 87 FeInexact = FE_INEXACT, 88 FeInvalid = FE_INVALID, 89 FeOverflow = FE_OVERFLOW, 90 FeUnderflow = FE_UNDERFLOW, 91 FeAllExceptions = FE_ALL_EXCEPT 92}; 93 94enum FeRoundingMode 95{ 96 FeRoundDown = FE_DOWNWARD, 97 FeRoundNearest = FE_TONEAREST, 98 FeRoundZero = FE_TOWARDZERO, 99 FeRoundUpward = FE_UPWARD 100}; 101* 102enum VfpRoundingMode 103{ 104 VfpRoundNearest = 0, 105 VfpRoundUpward = 1, 106 VfpRoundDown = 2, 107 VfpRoundZero = 3, 108 VfpRoundAway = 4 109}; 110 111static inline float bitsToFp(uint64_t, float); 112static inline double bitsToFp(uint64_t, double); 113static inline uint32_t fpToBits(float); 114static inline uint64_t fpToBits(double); 115 116template <class fpType> 117static inline bool 118flushToZero(fpType &op) 119{ 120 fpType junk = 0.0; 121 if (std::fpclassify(op) == FP_SUBNORMAL) { 122 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1); 123 op = bitsToFp(fpToBits(op) & bitMask, junk); 124 return true; 125 } 126 return false; 127} 128 129template <class fpType> 130static inline bool 131flushToZero(fpType &op1, fpType &op2) 132{ 133 bool flush1 = flushToZero(op1); 134 bool flush2 = flushToZero(op2); 135 return flush1 \|\| flush2; 136} 137 138template <class fpType> 139static inline void 140vfpFlushToZero(FPSCR &fpscr, fpType &op) 141{ 142 if (fpscr.fz == 1 && flushToZero(op)) { 143 fpscr.idc = 1; 144 } 145} 146 147template <class fpType> 148static inline void 149vfpFlushToZero(FPSCR &fpscr, fpType &op1, fpType &op2) 150{ 151 vfpFlushToZero(fpscr, op1); 152 vfpFlushToZero(fpscr, op2); 153} 154 155static inline uint32_t 156fpToBits(float fp) 157{ 158 union 159 { 160 float fp; 161 uint32_t bits; 162 } val; 163 val.fp = fp; 164 return val.bits; 165} 166 167static inline uint64_t 168fpToBits(double fp) 169{ 170 union 171 { 172 double fp; 173 uint64_t bits; 174 } val; 175 val.fp = fp; 176 return val.bits; 177} 178 179static inline float 180bitsToFp(uint64_t bits, float junk) 181{ 182 union 183 { 184 float fp; 185 uint32_t bits; 186 } val; 187 val.bits = bits; 188 return val.fp; 189} 190 191static inline double 192bitsToFp(uint64_t bits, double junk) 193{ 194 union 195 { 196 double fp; 197 uint64_t bits; 198 } val; 199 val.bits = bits; 200 return val.fp; 201} 202 203template <class fpType> 204static bool 205isSnan(fpType val) 206{ 207 const bool single = (sizeof(fpType) == sizeof(float)); 208 const uint64_t qnan = 209 single ? 0x7fc00000 : ULL(0x7ff8000000000000); 210 return std::isnan(val) && ((fpToBits(val) & qnan) != qnan); 211} 212 213typedef int VfpSavedState; 214 215VfpSavedState prepFpState(uint32_t rMode); 216void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask = FpscrExcMask); 217 218template <class fpType> 219fpType fixDest(FPSCR fpscr, fpType val, fpType op1); 220 221template <class fpType> 222fpType fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2); 223 224template <class fpType> 225fpType fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2); 226 227float fixFpDFpSDest(FPSCR fpscr, double val); 228double fixFpSFpDDest(FPSCR fpscr, float val); 229 230uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, 231 uint32_t rMode, bool ahp, float op); 232uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, 233 uint32_t rMode, bool ahp, double op); 234 235float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); 236double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); 237 238static inline double 239makeDouble(uint32_t low, uint32_t high) 240{ 241 double junk = 0.0; 242 return bitsToFp((uint64_t)low \| ((uint64_t)high << 32), junk); 243} 244 245static inline uint32_t 246lowFromDouble(double val) 247{ 248 return fpToBits(val); 249} 250 251static inline uint32_t 252highFromDouble(double val) 253{ 254 return fpToBits(val) >> 32; 255} 256 257static inline void 258setFPExceptions(int exceptions) { 259 feclearexcept(FeAllExceptions); 260 feraiseexcept(exceptions); 261} 262 263template <typename T> 264uint64_t 265vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool 266 useRmode = true, VfpRoundingMode roundMode = VfpRoundZero, 267 bool aarch64 = false) 268{ 269 int rmode; 270 bool roundAwayFix = false; 271 272 if (!useRmode) { 273 rmode = fegetround(); 274 } else { 275 switch (roundMode) 276 { 277 case VfpRoundNearest: 278 rmode = FeRoundNearest; 279 break; 280 case VfpRoundUpward: 281 rmode = FeRoundUpward; 282 break; 283 case VfpRoundDown: 284 rmode = FeRoundDown; 285 break; 286 case VfpRoundZero: 287 rmode = FeRoundZero; 288 break; 289 case VfpRoundAway: 290 // There is no equivalent rounding mode, use round down and we'll 291 // fix it later 292 rmode = FeRoundDown; 293 roundAwayFix = true; 294 break; 295 default: 296 panic("Unsupported roundMode %d\n", roundMode); 297 } 298 } 299 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); 300 fesetround(FeRoundNearest); 301 val = val * pow(2.0, imm); 302 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 303 fesetround(rmode); 304 feclearexcept(FeAllExceptions); 305 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 306 T origVal = val; 307 val = rint(val); 308 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 309 310 int exceptions = fetestexcept(FeAllExceptions); 311 312 int fpType = std::fpclassify(val); 313 if (fpType == FP_SUBNORMAL \|\| fpType == FP_NAN) { 314 if (fpType == FP_NAN) { 315 exceptions \|= FeInvalid; 316 } 317 val = 0.0; 318 } else if (origVal != val) { 319 switch (rmode) { 320 case FeRoundNearest: 321 if (origVal - val > 0.5) 322 val += 1.0; 323 else if (val - origVal > 0.5) 324 val -= 1.0; 325 break; 326 case FeRoundDown: 327 if (roundAwayFix) { 328 // The ordering on the subtraction looks a bit odd in that we 329 // don't do the obvious origVal - val, instead we do 330 // -(val - origVal). This is required to get the corruct bit 331 // exact behaviour when very close to the 0.5 threshold. 332 volatile T error = val; 333 error -= origVal; 334 error = -error; 335 if ( (error > 0.5) \|\| 336 ((error == 0.5) && (val >= 0)) ) 337 val += 1.0; 338 } else { 339 if (origVal < val) 340 val -= 1.0; 341 } 342 break; 343 case FeRoundUpward: 344 if (origVal > val) 345 val += 1.0; 346 break; 347 } 348 exceptions \|= FeInexact; 349 } 350 351 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 352 353 if (isSigned) { 354 bool outOfRange = false; 355 int64_t result = (int64_t) val; 356 uint64_t finalVal; 357 358 if (!aarch64) { 359 if (width == 16) { 360 finalVal = (int16_t)val; 361 } else if (width == 32) { 362 finalVal =(int32_t)val; 363 } else if (width == 64) { 364 finalVal = result; 365 } else { 366 panic("Unsupported width %d\n", width); 367 } 368 369 // check if value is in range 370 int64_t minVal = ~mask(width-1); 371 if ((double)val < minVal) { 372 outOfRange = true; 373 finalVal = minVal; 374 } 375 int64_t maxVal = mask(width-1); 376 if ((double)val > maxVal) { 377 outOfRange = true; 378 finalVal = maxVal; 379 } 380 } else { 381 bool isNeg = val < 0; 382 finalVal = result & mask(width); 383 // If the result is supposed to be less than 64 bits check that the 384 // upper bits that got thrown away are just sign extension bits 385 if (width != 64) { 386 outOfRange = ((uint64_t) result >> (width - 1)) != 387 (isNeg ? mask(64-width+1) : 0); 388 } 389 // Check if the original floating point value doesn't matches the 390 // integer version we are also out of range. So create a saturated 391 // result. 392 if (isNeg) { 393 outOfRange \|= val < result; 394 if (outOfRange) { 395 finalVal = 1LL << (width-1); 396 } 397 } else { 398 outOfRange \|= val > result; 399 if (outOfRange) { 400 finalVal = mask(width-1); 401 } 402 } 403 } 404 405 // Raise an exception if the value was out of range 406 if (outOfRange) { 407 exceptions \|= FeInvalid; 408 exceptions &= ~FeInexact; 409 } 410 setFPExceptions(exceptions); 411 return finalVal; 412 } else { 413 if ((double)val < 0) { 414 exceptions \|= FeInvalid; 415 exceptions &= ~FeInexact; 416 setFPExceptions(exceptions); 417 return 0; 418 } 419 420 uint64_t result = ((uint64_t) val) & mask(width); 421 if (val > result) { 422 exceptions \|= FeInvalid; 423 exceptions &= ~FeInexact; 424 setFPExceptions(exceptions); 425 return mask(width); 426 } 427 428 setFPExceptions(exceptions); 429 return result; 430 } 431}; 432 433 434float vfpUFixedToFpS(bool flush, bool defaultNan, 435 uint64_t val, uint8_t width, uint8_t imm); 436float vfpSFixedToFpS(bool flush, bool defaultNan, 437 int64_t val, uint8_t width, uint8_t imm); 438 439double vfpUFixedToFpD(bool flush, bool defaultNan, 440 uint64_t val, uint8_t width, uint8_t imm); 441double vfpSFixedToFpD(bool flush, bool defaultNan, 442 int64_t val, uint8_t width, uint8_t imm); 443 444float fprSqrtEstimate(FPSCR &fpscr, float op); 445uint32_t unsignedRSqrtEstimate(uint32_t op); 446 447float fpRecipEstimate(FPSCR &fpscr, float op); 448uint32_t unsignedRecipEstimate(uint32_t op); 449 450class VfpMacroOp : public PredMacroOp 451{ 452 public: 453 static bool 454 inScalarBank(IntRegIndex idx) 455 { 456 return (idx % 32) < 8; 457 } 458 459 protected: 460 bool wide; 461 462 VfpMacroOp(const char mnem, ExtMachInst _machInst, 463* OpClass __opClass, bool _wide) : 464 PredMacroOp(mnem, _machInst, __opClass), wide(_wide) 465 {} 466 467 IntRegIndex addStride(IntRegIndex idx, unsigned stride); 468 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2); 469 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1); 470 void nextIdxs(IntRegIndex &dest); 471}; 472 473template <typename T> 474static inline T 475fpAdd(T a, T b) 476{ 477 return a + b; 478}; 479 480template <typename T> 481static inline T 482fpSub(T a, T b) 483{ 484 return a - b; 485}; 486 487static inline float 488fpAddS(float a, float b) 489{ 490 return a + b; 491} 492 493static inline double 494fpAddD(double a, double b) 495{ 496 return a + b; 497} 498 499static inline float 500fpSubS(float a, float b) 501{ 502 return a - b; 503} 504 505static inline double 506fpSubD(double a, double b) 507{ 508 return a - b; 509} 510 511static inline float 512fpDivS(float a, float b) 513{ 514 return a / b; 515} 516 517static inline double 518fpDivD(double a, double b) 519{ 520 return a / b; 521} 522 523template <typename T> 524static inline T 525fpDiv(T a, T b) 526{ 527 return a / b; 528}; 529 530template <typename T> 531static inline T 532fpMulX(T a, T b) 533{ 534 uint64_t opData; 535 uint32_t sign1; 536 uint32_t sign2; 537 const bool single = (sizeof(T) == sizeof(float)); 538 if (single) { 539 opData = (fpToBits(a)); 540 sign1 = opData>>31; 541 opData = (fpToBits(b)); 542 sign2 = opData>>31; 543 } else { 544 opData = (fpToBits(a)); 545 sign1 = opData>>63; 546 opData = (fpToBits(b)); 547 sign2 = opData>>63; 548 } 549 bool inf1 = (std::fpclassify(a) == FP_INFINITE); 550 bool inf2 = (std::fpclassify(b) == FP_INFINITE); 551 bool zero1 = (std::fpclassify(a) == FP_ZERO); 552 bool zero2 = (std::fpclassify(b) == FP_ZERO); 553 if ((inf1 && zero2) \|\| (zero1 && inf2)) { 554 if (sign1 ^ sign2) 555 return (T)(-2.0); 556 else 557 return (T)(2.0); 558 } else { 559 return (a * b); 560 } 561}; 562 563 564template <typename T> 565static inline T 566fpMul(T a, T b) 567{ 568 return a * b; 569}; 570 571static inline float 572fpMulS(float a, float b) 573{ 574 return a * b; 575} 576 577static inline double 578fpMulD(double a, double b) 579{ 580 return a * b; 581} 582 583template <typename T> 584static inline T 585// @todo remove this when all calls to it have been replaced with the new fplib implementation 586fpMulAdd(T op1, T op2, T addend) 587{ 588 T result; 589 590 if (sizeof(T) == sizeof(float)) 591 result = fmaf(op1, op2, addend); 592 else 593 result = fma(op1, op2, addend); 594 595 // ARM doesn't generate signed nan's from this opperation, so fix up the result 596 if (std::isnan(result) && !std::isnan(op1) && 597 !std::isnan(op2) && !std::isnan(addend)) 598 { 599 uint64_t bitMask = ULL(0x1) << ((sizeof(T) * 8) - 1); 600 result = bitsToFp(fpToBits(result) & ~bitMask, op1); 601 } 602 return result; 603} 604 605template <typename T> 606static inline T 607fpRIntX(T a, FPSCR &fpscr) 608{ 609 T rVal; 610 611 rVal = rint(a); 612 if (rVal != a && !std::isnan(a)) 613 fpscr.ixc = 1; 614 return (rVal); 615}; 616 617template <typename T> 618static inline T 619fpMaxNum(T a, T b) 620{ 621 const bool single = (sizeof(T) == sizeof(float)); 622 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 623 624 if (std::isnan(a)) 625 return ((fpToBits(a) & qnan) == qnan) ? b : a; 626 if (std::isnan(b)) 627 return ((fpToBits(b) & qnan) == qnan) ? a : b; 628 // Handle comparisons of +0 and -0. 629 if (!std::signbit(a) && std::signbit(b)) 630 return a; 631 return fmax(a, b); 632}; 633 634template <typename T> 635static inline T 636fpMax(T a, T b) 637{ 638 if (std::isnan(a)) 639 return a; 640 if (std::isnan(b)) 641 return b; 642 return fpMaxNum<T>(a, b); 643}; 644 645template <typename T> 646static inline T 647fpMinNum(T a, T b) 648{ 649 const bool single = (sizeof(T) == sizeof(float)); 650 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 651 652 if (std::isnan(a)) 653 return ((fpToBits(a) & qnan) == qnan) ? b : a; 654 if (std::isnan(b)) 655 return ((fpToBits(b) & qnan) == qnan) ? a : b; 656 // Handle comparisons of +0 and -0. 657 if (std::signbit(a) && !std::signbit(b)) 658 return a; 659 return fmin(a, b); 660}; 661 662template <typename T> 663static inline T 664fpMin(T a, T b) 665{ 666 if (std::isnan(a)) 667 return a; 668 if (std::isnan(b)) 669 return b; 670 return fpMinNum<T>(a, b); 671}; 672 673template <typename T> 674static inline T 675fpRSqrts(T a, T b) 676{ 677 int fpClassA = std::fpclassify(a); 678 int fpClassB = std::fpclassify(b); 679 T aXb; 680 int fpClassAxB; 681 682 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) \|\| 683 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { 684 return 1.5; 685 } 686 aXb = ab; 687* fpClassAxB = std::fpclassify(aXb); 688 if (fpClassAxB == FP_SUBNORMAL) { 689 feraiseexcept(FeUnderflow); 690 return 1.5; 691 } 692 return (3.0 - (a * b)) / 2.0; 693}; 694 695template <typename T> 696static inline T 697fpRecps(T a, T b) 698{ 699 int fpClassA = std::fpclassify(a); 700 int fpClassB = std::fpclassify(b); 701 T aXb; 702 int fpClassAxB; 703 704 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) \|\| 705 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { 706 return 2.0; 707 } 708 aXb = ab; 709* fpClassAxB = std::fpclassify(aXb); 710 if (fpClassAxB == FP_SUBNORMAL) { 711 feraiseexcept(FeUnderflow); 712 return 2.0; 713 } 714 return 2.0 - (a * b); 715}; 716 717 718static inline float 719fpRSqrtsS(float a, float b) 720{ 721 int fpClassA = std::fpclassify(a); 722 int fpClassB = std::fpclassify(b); 723 float aXb; 724 int fpClassAxB; 725 726 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) \|\| 727 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { 728 return 1.5; 729 } 730 aXb = ab; 731* fpClassAxB = std::fpclassify(aXb); 732 if (fpClassAxB == FP_SUBNORMAL) { 733 feraiseexcept(FeUnderflow); 734 return 1.5; 735 } 736 return (3.0 - (a * b)) / 2.0; 737} 738 739static inline float 740fpRecpsS(float a, float b) 741{ 742 int fpClassA = std::fpclassify(a); 743 int fpClassB = std::fpclassify(b); 744 float aXb; 745 int fpClassAxB; 746 747 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) \|\| 748 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { 749 return 2.0; 750 } 751 aXb = ab; 752* fpClassAxB = std::fpclassify(aXb); 753 if (fpClassAxB == FP_SUBNORMAL) { 754 feraiseexcept(FeUnderflow); 755 return 2.0; 756 } 757 return 2.0 - (a * b); 758} 759 760template <typename T> 761static inline T 762roundNEven(T a) { 763 T val; 764 765 val = round(a); 766 if (a - val == 0.5) { 767 if ( (((int) a) & 1) == 0 ) val += 1.0; 768 } 769 else if (a - val == -0.5) { 770 if ( (((int) a) & 1) == 0 ) val -= 1.0; 771 } 772 return val; 773} 774 775 776 777class FpOp : public PredOp 778{ 779 protected: 780 FpOp(const char mnem, ExtMachInst _machInst, OpClass __opClass) : 781* PredOp(mnem, _machInst, __opClass) 782 {} 783 784 virtual float 785 doOp(float op1, float op2) const 786 { 787 panic("Unimplemented version of doOp called.\n"); 788 } 789 790 virtual float 791 doOp(float op1) const 792 { 793 panic("Unimplemented version of doOp called.\n"); 794 } 795 796 virtual double 797 doOp(double op1, double op2) const 798 { 799 panic("Unimplemented version of doOp called.\n"); 800 } 801 802 virtual double 803 doOp(double op1) const 804 { 805 panic("Unimplemented version of doOp called.\n"); 806 } 807 808 double 809 dbl(uint32_t low, uint32_t high) const 810 { 811 double junk = 0.0; 812 return bitsToFp((uint64_t)low \| ((uint64_t)high << 32), junk); 813 } 814 815 uint32_t 816 dblLow(double val) const 817 { 818 return fpToBits(val); 819 } 820 821 uint32_t 822 dblHi(double val) const 823 { 824 return fpToBits(val) >> 32; 825 } 826 827 template <class fpType> 828 fpType 829 processNans(FPSCR &fpscr, bool &done, bool defaultNan, 830 fpType op1, fpType op2) const; 831 832 template <class fpType> 833 fpType 834 ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, 835 fpType (func)(fpType, fpType, fpType), 836* bool flush, bool defaultNan, uint32_t rMode) const; 837 838 template <class fpType> 839 fpType 840 binaryOp(FPSCR &fpscr, fpType op1, fpType op2, 841 fpType (func)(fpType, fpType), 842* bool flush, bool defaultNan, uint32_t rMode) const; 843 844 template <class fpType> 845 fpType 846 unaryOp(FPSCR &fpscr, fpType op1, 847 fpType (func)(fpType), 848* bool flush, uint32_t rMode) const; 849 850 void 851 advancePC(PCState &pcState) const 852 { 853 if (flags[IsLastMicroop]) { 854 pcState.uEnd(); 855 } else if (flags[IsMicroop]) { 856 pcState.uAdvance(); 857 } else { 858 pcState.advance(); 859 } 860 } 861 862 float 863 fpSqrt (FPSCR fpscr,float x) const 864 { 865 866 return unaryOp(fpscr,x,sqrtf,fpscr.fz,fpscr.rMode); 867 868 } 869 870 double 871 fpSqrt (FPSCR fpscr,double x) const 872 { 873 874 return unaryOp(fpscr,x,sqrt,fpscr.fz,fpscr.rMode); 875 876 } 877}; 878 879class FpCondCompRegOp : public FpOp 880{ 881 protected: 882 IntRegIndex op1, op2; 883 ConditionCode condCode; 884 uint8_t defCc; 885 886 FpCondCompRegOp(const char mnem, ExtMachInst _machInst, 887* OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2, 888 ConditionCode _condCode, uint8_t _defCc) : 889 FpOp(mnem, _machInst, __opClass), 890 op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc) 891 {} 892 893 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 894}; 895* 896class FpCondSelOp : public FpOp 897{ 898 protected: 899 IntRegIndex dest, op1, op2; 900 ConditionCode condCode; 901 902 FpCondSelOp(const char mnem, ExtMachInst _machInst, OpClass __opClass, 903* IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, 904 ConditionCode _condCode) : 905 FpOp(mnem, _machInst, __opClass), 906 dest(_dest), op1(_op1), op2(_op2), condCode(_condCode) 907 {} 908 909 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 910}; 911* 912class FpRegRegOp : public FpOp 913{ 914 protected: 915 IntRegIndex dest; 916 IntRegIndex op1; 917 918 FpRegRegOp(const char mnem, ExtMachInst _machInst, OpClass __opClass, 919* IntRegIndex _dest, IntRegIndex _op1, 920 VfpMicroMode mode = VfpNotAMicroop) : 921 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1) 922 { 923 setVfpMicroFlags(mode, flags); 924 } 925 926 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 927}; 928* 929class FpRegImmOp : public FpOp 930{ 931 protected: 932 IntRegIndex dest; 933 uint64_t imm; 934 935 FpRegImmOp(const char mnem, ExtMachInst _machInst, OpClass __opClass, 936* IntRegIndex _dest, uint64_t _imm, 937 VfpMicroMode mode = VfpNotAMicroop) : 938 FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm) 939 { 940 setVfpMicroFlags(mode, flags); 941 } 942 943 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 944}; 945* 946class FpRegRegImmOp : public FpOp 947{ 948 protected: 949 IntRegIndex dest; 950 IntRegIndex op1; 951 uint64_t imm; 952 953 FpRegRegImmOp(const char mnem, ExtMachInst _machInst, OpClass __opClass, 954* IntRegIndex _dest, IntRegIndex _op1, 955 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) : 956 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm) 957 { 958 setVfpMicroFlags(mode, flags); 959 } 960 961 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 962}; 963* 964class FpRegRegRegOp : public FpOp 965{ 966 protected: 967 IntRegIndex dest; 968 IntRegIndex op1; 969 IntRegIndex op2; 970 971 FpRegRegRegOp(const char mnem, ExtMachInst _machInst, OpClass __opClass, 972* IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, 973 VfpMicroMode mode = VfpNotAMicroop) : 974 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2) 975 { 976 setVfpMicroFlags(mode, flags); 977 } 978 979 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 980}; 981*
	982class FpRegRegRegCondOp : public FpOp 983{ 984 protected: 985 IntRegIndex dest; 986 IntRegIndex op1; 987 IntRegIndex op2; 988 ConditionCode cond; 989 990 FpRegRegRegCondOp(const char mnem, ExtMachInst _machInst, 991* OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, 992 IntRegIndex _op2, ConditionCode _cond, 993 VfpMicroMode mode = VfpNotAMicroop) : 994 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2), 995 cond(_cond) 996 { 997 setVfpMicroFlags(mode, flags); 998 } 999 1000 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 1001}; 1002*
982class FpRegRegRegRegOp : public FpOp 983{ 984 protected: 985 IntRegIndex dest; 986 IntRegIndex op1; 987 IntRegIndex op2; 988 IntRegIndex op3; 989 990 FpRegRegRegRegOp(const char mnem, ExtMachInst _machInst, OpClass __opClass, 991* IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, 992 IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop) : 993 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2), 994 op3(_op3) 995 { 996 setVfpMicroFlags(mode, flags); 997 } 998 999 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 1000}; 1001* 1002class FpRegRegRegImmOp : public FpOp 1003{ 1004 protected: 1005 IntRegIndex dest; 1006 IntRegIndex op1; 1007 IntRegIndex op2; 1008 uint64_t imm; 1009 1010 FpRegRegRegImmOp(const char mnem, ExtMachInst _machInst, 1011* OpClass __opClass, IntRegIndex _dest, 1012 IntRegIndex _op1, IntRegIndex _op2, 1013 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) : 1014 FpOp(mnem, _machInst, __opClass), 1015 dest(_dest), op1(_op1), op2(_op2), imm(_imm) 1016 { 1017 setVfpMicroFlags(mode, flags); 1018 } 1019 1020 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 1021}; 1022* 1023} 1024 1025#endif //__ARCH_ARM_INSTS_VFP_HH__	1003class FpRegRegRegRegOp : public FpOp 1004{ 1005 protected: 1006 IntRegIndex dest; 1007 IntRegIndex op1; 1008 IntRegIndex op2; 1009 IntRegIndex op3; 1010 1011 FpRegRegRegRegOp(const char mnem, ExtMachInst _machInst, OpClass __opClass, 1012* IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, 1013 IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop) : 1014 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2), 1015 op3(_op3) 1016 { 1017 setVfpMicroFlags(mode, flags); 1018 } 1019 1020 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 1021}; 1022* 1023class FpRegRegRegImmOp : public FpOp 1024{ 1025 protected: 1026 IntRegIndex dest; 1027 IntRegIndex op1; 1028 IntRegIndex op2; 1029 uint64_t imm; 1030 1031 FpRegRegRegImmOp(const char mnem, ExtMachInst _machInst, 1032* OpClass __opClass, IntRegIndex _dest, 1033 IntRegIndex _op1, IntRegIndex _op2, 1034 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) : 1035 FpOp(mnem, _machInst, __opClass), 1036 dest(_dest), op1(_op1), op2(_op2), imm(_imm) 1037 { 1038 setVfpMicroFlags(mode, flags); 1039 } 1040 1041 std::string generateDisassembly(Addr pc, const SymbolTable symtab) const; 1042}; 1043* 1044} 1045 1046#endif //__ARCH_ARM_INSTS_VFP_HH__