vfp.hh revision 7384
1/* 2 * Copyright (c) 2010 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabe Black 38 */ 39 40#ifndef __ARCH_ARM_INSTS_VFP_HH__ 41#define __ARCH_ARM_INSTS_VFP_HH__ 42 43#include "arch/arm/insts/misc.hh" 44#include "arch/arm/miscregs.hh" 45#include <fenv.h> 46#include <cmath> 47 48namespace ArmISA 49{ 50 51enum VfpMicroMode { 52 VfpNotAMicroop, 53 VfpMicroop, 54 VfpFirstMicroop, 55 VfpLastMicroop 56}; 57 58template<class T> 59static inline void 60setVfpMicroFlags(VfpMicroMode mode, T &flags) 61{ 62 switch (mode) { 63 case VfpMicroop: 64 flags[StaticInst::IsMicroop] = true; 65 break; 66 case VfpFirstMicroop: 67 flags[StaticInst::IsMicroop] = 68 flags[StaticInst::IsFirstMicroop] = true; 69 break; 70 case VfpLastMicroop: 71 flags[StaticInst::IsMicroop] = 72 flags[StaticInst::IsLastMicroop] = true; 73 break; 74 case VfpNotAMicroop: 75 break; 76 } 77 if (mode == VfpMicroop || mode == VfpFirstMicroop) { 78 flags[StaticInst::IsDelayedCommit] = true; 79 } 80} 81 82enum FeExceptionBit 83{ 84 FeDivByZero = FE_DIVBYZERO, 85 FeInexact = FE_INEXACT, 86 FeInvalid = FE_INVALID, 87 FeOverflow = FE_OVERFLOW, 88 FeUnderflow = FE_UNDERFLOW, 89 FeAllExceptions = FE_ALL_EXCEPT 90}; 91 92enum FeRoundingMode 93{ 94 FeRoundDown = FE_DOWNWARD, 95 FeRoundNearest = FE_TONEAREST, 96 FeRoundZero = FE_TOWARDZERO, 97 FeRoundUpward = FE_UPWARD 98}; 99 100enum VfpRoundingMode 101{ 102 VfpRoundNearest = 0, 103 VfpRoundUpward = 1, 104 VfpRoundDown = 2, 105 VfpRoundZero = 3 106}; 107 108template <class fpType> 109static inline void 110vfpFlushToZero(uint32_t &_fpscr, fpType &op) 111{ 112 FPSCR fpscr = _fpscr; 113 if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) { 114 fpscr.idc = 1; 115 op = 0; 116 } 117 _fpscr = fpscr; 118} 119 120template <class fpType> 121static inline void 122vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2) 123{ 124 vfpFlushToZero(fpscr, op1); 125 vfpFlushToZero(fpscr, op2); 126} 127 128static inline uint32_t 129fpToBits(float fp) 130{ 131 union 132 { 133 float fp; 134 uint32_t bits; 135 } val; 136 val.fp = fp; 137 return val.bits; 138} 139 140static inline uint64_t 141fpToBits(double fp) 142{ 143 union 144 { 145 double fp; 146 uint64_t bits; 147 } val; 148 val.fp = fp; 149 return val.bits; 150} 151 152static inline float 153bitsToFp(uint64_t bits, float junk) 154{ 155 union 156 { 157 float fp; 158 uint32_t bits; 159 } val; 160 val.bits = bits; 161 return val.fp; 162} 163 164static inline double 165bitsToFp(uint64_t bits, double junk) 166{ 167 union 168 { 169 double fp; 170 uint64_t bits; 171 } val; 172 val.bits = bits; 173 return val.fp; 174} 175 176template <class fpType> 177static inline fpType 178fixNan(FPSCR fpscr, fpType val, fpType op1, fpType op2) 179{ 180 if (std::isnan(val)) { 181 const bool single = (sizeof(val) == sizeof(float)); 182 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); 183 const bool nan1 = std::isnan(op1); 184 const bool nan2 = std::isnan(op2); 185 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); 186 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); 187 fpType junk = 0.0; 188 if ((!nan1 && !nan2) || (fpscr.dn == 1)) { 189 val = bitsToFp(qnan, junk); 190 } else if (signal1) { 191 val = bitsToFp(fpToBits(op1) | qnan, junk); 192 } else if (signal2) { 193 val = bitsToFp(fpToBits(op2) | qnan, junk); 194 } else if (nan1) { 195 val = op1; 196 } else if (nan2) { 197 val = op2; 198 } 199 } 200 return val; 201} 202 203static inline uint64_t 204vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm) 205{ 206 fesetround(FeRoundZero); 207 val = val * powf(2.0, imm); 208 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 209 feclearexcept(FeAllExceptions); 210 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 211 float origVal = val; 212 val = rintf(val); 213 int fpType = std::fpclassify(val); 214 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 215 if (fpType == FP_NAN) { 216 feraiseexcept(FeInvalid); 217 } 218 val = 0.0; 219 } else if (origVal != val) { 220 feraiseexcept(FeInexact); 221 } 222 223 if (isSigned) { 224 if (half) { 225 if ((double)val < (int16_t)(1 << 15)) { 226 feraiseexcept(FeInvalid); 227 feclearexcept(FeInexact); 228 return (int16_t)(1 << 15); 229 } 230 if ((double)val > (int16_t)mask(15)) { 231 feraiseexcept(FeInvalid); 232 feclearexcept(FeInexact); 233 return (int16_t)mask(15); 234 } 235 return (int16_t)val; 236 } else { 237 if ((double)val < (int32_t)(1 << 31)) { 238 feraiseexcept(FeInvalid); 239 feclearexcept(FeInexact); 240 return (int32_t)(1 << 31); 241 } 242 if ((double)val > (int32_t)mask(31)) { 243 feraiseexcept(FeInvalid); 244 feclearexcept(FeInexact); 245 return (int32_t)mask(31); 246 } 247 return (int32_t)val; 248 } 249 } else { 250 if (half) { 251 if ((double)val < 0) { 252 feraiseexcept(FeInvalid); 253 feclearexcept(FeInexact); 254 return 0; 255 } 256 if ((double)val > (mask(16))) { 257 feraiseexcept(FeInvalid); 258 feclearexcept(FeInexact); 259 return mask(16); 260 } 261 return (uint16_t)val; 262 } else { 263 if ((double)val < 0) { 264 feraiseexcept(FeInvalid); 265 feclearexcept(FeInexact); 266 return 0; 267 } 268 if ((double)val > (mask(32))) { 269 feraiseexcept(FeInvalid); 270 feclearexcept(FeInexact); 271 return mask(32); 272 } 273 return (uint32_t)val; 274 } 275 } 276} 277 278static inline float 279vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm) 280{ 281 fesetround(FeRoundNearest); 282 if (half) 283 val = (uint16_t)val; 284 float scale = powf(2.0, imm); 285 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 286 feclearexcept(FeAllExceptions); 287 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 288 return val / scale; 289} 290 291static inline float 292vfpSFixedToFpS(int32_t val, bool half, uint8_t imm) 293{ 294 fesetround(FeRoundNearest); 295 if (half) 296 val = sext<16>(val & mask(16)); 297 float scale = powf(2.0, imm); 298 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 299 feclearexcept(FeAllExceptions); 300 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 301 return val / scale; 302} 303 304static inline uint64_t 305vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm) 306{ 307 fesetround(FeRoundNearest); 308 val = val * pow(2.0, imm); 309 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 310 fesetround(FeRoundZero); 311 feclearexcept(FeAllExceptions); 312 __asm__ __volatile__("" : "=m" (val) : "m" (val)); 313 double origVal = val; 314 val = rint(val); 315 int fpType = std::fpclassify(val); 316 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { 317 if (fpType == FP_NAN) { 318 feraiseexcept(FeInvalid); 319 } 320 val = 0.0; 321 } else if (origVal != val) { 322 feraiseexcept(FeInexact); 323 } 324 if (isSigned) { 325 if (half) { 326 if (val < (int16_t)(1 << 15)) { 327 feraiseexcept(FeInvalid); 328 feclearexcept(FeInexact); 329 return (int16_t)(1 << 15); 330 } 331 if (val > (int16_t)mask(15)) { 332 feraiseexcept(FeInvalid); 333 feclearexcept(FeInexact); 334 return (int16_t)mask(15); 335 } 336 return (int16_t)val; 337 } else { 338 if (val < (int32_t)(1 << 31)) { 339 feraiseexcept(FeInvalid); 340 feclearexcept(FeInexact); 341 return (int32_t)(1 << 31); 342 } 343 if (val > (int32_t)mask(31)) { 344 feraiseexcept(FeInvalid); 345 feclearexcept(FeInexact); 346 return (int32_t)mask(31); 347 } 348 return (int32_t)val; 349 } 350 } else { 351 if (half) { 352 if (val < 0) { 353 feraiseexcept(FeInvalid); 354 feclearexcept(FeInexact); 355 return 0; 356 } 357 if (val > mask(16)) { 358 feraiseexcept(FeInvalid); 359 feclearexcept(FeInexact); 360 return mask(16); 361 } 362 return (uint16_t)val; 363 } else { 364 if (val < 0) { 365 feraiseexcept(FeInvalid); 366 feclearexcept(FeInexact); 367 return 0; 368 } 369 if (val > mask(32)) { 370 feraiseexcept(FeInvalid); 371 feclearexcept(FeInexact); 372 return mask(32); 373 } 374 return (uint32_t)val; 375 } 376 } 377} 378 379static inline double 380vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm) 381{ 382 fesetround(FeRoundNearest); 383 if (half) 384 val = (uint16_t)val; 385 double scale = pow(2.0, imm); 386 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 387 feclearexcept(FeAllExceptions); 388 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 389 return val / scale; 390} 391 392static inline double 393vfpSFixedToFpD(int32_t val, bool half, uint8_t imm) 394{ 395 fesetround(FeRoundNearest); 396 if (half) 397 val = sext<16>(val & mask(16)); 398 double scale = pow(2.0, imm); 399 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 400 feclearexcept(FeAllExceptions); 401 __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); 402 return val / scale; 403} 404 405typedef int VfpSavedState; 406 407static inline VfpSavedState 408prepVfpFpscr(FPSCR fpscr) 409{ 410 int roundingMode = fegetround(); 411 feclearexcept(FeAllExceptions); 412 switch (fpscr.rMode) { 413 case VfpRoundNearest: 414 fesetround(FeRoundNearest); 415 break; 416 case VfpRoundUpward: 417 fesetround(FeRoundUpward); 418 break; 419 case VfpRoundDown: 420 fesetround(FeRoundDown); 421 break; 422 case VfpRoundZero: 423 fesetround(FeRoundZero); 424 break; 425 } 426 return roundingMode; 427} 428 429static inline FPSCR 430setVfpFpscr(FPSCR fpscr, VfpSavedState state) 431{ 432 int exceptions = fetestexcept(FeAllExceptions); 433 if (exceptions & FeInvalid) { 434 fpscr.ioc = 1; 435 } 436 if (exceptions & FeDivByZero) { 437 fpscr.dzc = 1; 438 } 439 if (exceptions & FeOverflow) { 440 fpscr.ofc = 1; 441 } 442 if (exceptions & FeUnderflow) { 443 fpscr.ufc = 1; 444 } 445 if (exceptions & FeInexact) { 446 fpscr.ixc = 1; 447 } 448 fesetround(state); 449 return fpscr; 450} 451 452class VfpMacroOp : public PredMacroOp 453{ 454 public: 455 static bool 456 inScalarBank(IntRegIndex idx) 457 { 458 return (idx % 32) < 8; 459 } 460 461 protected: 462 bool wide; 463 464 VfpMacroOp(const char *mnem, ExtMachInst _machInst, 465 OpClass __opClass, bool _wide) : 466 PredMacroOp(mnem, _machInst, __opClass), wide(_wide) 467 {} 468 469 IntRegIndex 470 addStride(IntRegIndex idx, unsigned stride) 471 { 472 if (wide) { 473 stride *= 2; 474 } 475 unsigned offset = idx % 8; 476 idx = (IntRegIndex)(idx - offset); 477 offset += stride; 478 idx = (IntRegIndex)(idx + (offset % 8)); 479 return idx; 480 } 481 482 void 483 nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) 484 { 485 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 486 assert(!inScalarBank(dest)); 487 dest = addStride(dest, stride); 488 op1 = addStride(op1, stride); 489 if (!inScalarBank(op2)) { 490 op2 = addStride(op2, stride); 491 } 492 } 493 494 void 495 nextIdxs(IntRegIndex &dest, IntRegIndex &op1) 496 { 497 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 498 assert(!inScalarBank(dest)); 499 dest = addStride(dest, stride); 500 if (!inScalarBank(op1)) { 501 op1 = addStride(op1, stride); 502 } 503 } 504 505 void 506 nextIdxs(IntRegIndex &dest) 507 { 508 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; 509 assert(!inScalarBank(dest)); 510 dest = addStride(dest, stride); 511 } 512}; 513 514class VfpRegRegOp : public RegRegOp 515{ 516 protected: 517 VfpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, 518 IntRegIndex _dest, IntRegIndex _op1, 519 VfpMicroMode mode = VfpNotAMicroop) : 520 RegRegOp(mnem, _machInst, __opClass, _dest, _op1) 521 { 522 setVfpMicroFlags(mode, flags); 523 } 524}; 525 526class VfpRegImmOp : public RegImmOp 527{ 528 protected: 529 VfpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, 530 IntRegIndex _dest, uint64_t _imm, 531 VfpMicroMode mode = VfpNotAMicroop) : 532 RegImmOp(mnem, _machInst, __opClass, _dest, _imm) 533 { 534 setVfpMicroFlags(mode, flags); 535 } 536}; 537 538class VfpRegRegImmOp : public RegRegImmOp 539{ 540 protected: 541 VfpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, 542 IntRegIndex _dest, IntRegIndex _op1, 543 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) : 544 RegRegImmOp(mnem, _machInst, __opClass, _dest, _op1, _imm) 545 { 546 setVfpMicroFlags(mode, flags); 547 } 548}; 549 550class VfpRegRegRegOp : public RegRegRegOp 551{ 552 protected: 553 VfpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, 554 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, 555 VfpMicroMode mode = VfpNotAMicroop) : 556 RegRegRegOp(mnem, _machInst, __opClass, _dest, _op1, _op2) 557 { 558 setVfpMicroFlags(mode, flags); 559 } 560}; 561 562} 563 564#endif //__ARCH_ARM_INSTS_VFP_HH__ 565