45#include "fplib.hh" 46 47namespace ArmISA 48{ 49 50#define FPLIB_RN 0 51#define FPLIB_RP 1 52#define FPLIB_RM 2 53#define FPLIB_RZ 3 54#define FPLIB_FZ 4 55#define FPLIB_DN 8 56#define FPLIB_AHP 16 57#define FPLIB_FZ16 32 58 59#define FPLIB_IDC 128 // Input Denormal 60#define FPLIB_IXC 16 // Inexact 61#define FPLIB_UFC 8 // Underflow 62#define FPLIB_OFC 4 // Overflow 63#define FPLIB_DZC 2 // Division by Zero 64#define FPLIB_IOC 1 // Invalid Operation 65 66#define FP16_BITS 16 67#define FP32_BITS 32 68#define FP64_BITS 64 69 70#define FP16_EXP_BITS 5 71#define FP32_EXP_BITS 8 72#define FP64_EXP_BITS 11 73 74#define FP16_EXP_BIAS 15 75#define FP32_EXP_BIAS 127 76#define FP64_EXP_BIAS 1023 77 78#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1) 79#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1) 80#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1) 81 82#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1) 83#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1) 84#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1) 85 86#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1)) 87#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1)) 88#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1)) 89 90#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1)) 91#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1)) 92#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1)) 93 94static inline uint16_t 95lsl16(uint16_t x, uint32_t shift) 96{ 97 return shift < 16 ? x << shift : 0; 98} 99 100static inline uint16_t 101lsr16(uint16_t x, uint32_t shift) 102{ 103 return shift < 16 ? x >> shift : 0; 104} 105 106static inline uint32_t 107lsl32(uint32_t x, uint32_t shift) 108{ 109 return shift < 32 ? x << shift : 0; 110} 111 112static inline uint32_t 113lsr32(uint32_t x, uint32_t shift) 114{ 115 return shift < 32 ? x >> shift : 0; 116} 117 118static inline uint64_t 119lsl64(uint64_t x, uint32_t shift) 120{ 121 return shift < 64 ? x << shift : 0; 122} 123 124static inline uint64_t 125lsr64(uint64_t x, uint32_t shift) 126{ 127 return shift < 64 ? x >> shift : 0; 128} 129 130static inline void 131lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 132{ 133 if (shift == 0) { 134 *r1 = x1; 135 *r0 = x0; 136 } else if (shift < 64) { 137 *r1 = x1 << shift | x0 >> (64 - shift); 138 *r0 = x0 << shift; 139 } else if (shift < 128) { 140 *r1 = x0 << (shift - 64); 141 *r0 = 0; 142 } else { 143 *r1 = 0; 144 *r0 = 0; 145 } 146} 147 148static inline void 149lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 150{ 151 if (shift == 0) { 152 *r1 = x1; 153 *r0 = x0; 154 } else if (shift < 64) { 155 *r0 = x0 >> shift | x1 << (64 - shift); 156 *r1 = x1 >> shift; 157 } else if (shift < 128) { 158 *r0 = x1 >> (shift - 64); 159 *r1 = 0; 160 } else { 161 *r0 = 0; 162 *r1 = 0; 163 } 164} 165 166static inline void 167mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) 168{ 169 uint32_t mask = ((uint32_t)1 << 31) - 1; 170 uint64_t a0 = a & mask; 171 uint64_t a1 = a >> 31 & mask; 172 uint64_t b0 = b & mask; 173 uint64_t b1 = b >> 31 & mask; 174 uint64_t p0 = a0 * b0; 175 uint64_t p2 = a1 * b1; 176 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; 177 uint64_t s0 = p0; 178 uint64_t s1 = (s0 >> 31) + p1; 179 uint64_t s2 = (s1 >> 31) + p2; 180 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62; 181 *x1 = s2 >> 2; 182} 183 184static inline 185void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b) 186{ 187 uint64_t t0 = (uint64_t)(uint32_t)a * b; 188 uint64_t t1 = (t0 >> 32) + (a >> 32) * b; 189 *x0 = t1 << 32 | (uint32_t)t0; 190 *x1 = t1 >> 32; 191} 192 193static inline void 194add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 195 uint64_t b1) 196{ 197 *x0 = a0 + b0; 198 *x1 = a1 + b1 + (*x0 < a0); 199} 200 201static inline void 202sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 203 uint64_t b1) 204{ 205 *x0 = a0 - b0; 206 *x1 = a1 - b1 - (*x0 > a0); 207} 208 209static inline int 210cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 211{ 212 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); 213} 214 215static inline uint16_t 216fp16_normalise(uint16_t mnt, int *exp) 217{ 218 int shift; 219 220 if (!mnt) { 221 return 0; 222 } 223 224 for (shift = 8; shift; shift >>= 1) { 225 if (!(mnt >> (16 - shift))) { 226 mnt <<= shift; 227 *exp -= shift; 228 } 229 } 230 return mnt; 231} 232 233static inline uint32_t 234fp32_normalise(uint32_t mnt, int *exp) 235{ 236 int shift; 237 238 if (!mnt) { 239 return 0; 240 } 241 242 for (shift = 16; shift; shift >>= 1) { 243 if (!(mnt >> (32 - shift))) { 244 mnt <<= shift; 245 *exp -= shift; 246 } 247 } 248 return mnt; 249} 250 251static inline uint64_t 252fp64_normalise(uint64_t mnt, int *exp) 253{ 254 int shift; 255 256 if (!mnt) { 257 return 0; 258 } 259 260 for (shift = 32; shift; shift >>= 1) { 261 if (!(mnt >> (64 - shift))) { 262 mnt <<= shift; 263 *exp -= shift; 264 } 265 } 266 return mnt; 267} 268 269static inline void 270fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp) 271{ 272 uint64_t x0 = *mnt0; 273 uint64_t x1 = *mnt1; 274 int shift; 275 276 if (!x0 && !x1) { 277 return; 278 } 279 280 if (!x1) { 281 x1 = x0; 282 x0 = 0; 283 *exp -= 64; 284 } 285 286 for (shift = 32; shift; shift >>= 1) { 287 if (!(x1 >> (64 - shift))) { 288 x1 = x1 << shift | x0 >> (64 - shift); 289 x0 <<= shift; 290 *exp -= shift; 291 } 292 } 293 294 *mnt0 = x0; 295 *mnt1 = x1; 296} 297 298static inline uint16_t 299fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) 300{ 301 return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt); 302} 303 304static inline uint32_t 305fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) 306{ 307 return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt); 308} 309 310static inline uint64_t 311fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) 312{ 313 return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt); 314} 315 316static inline uint16_t 317fp16_zero(int sgn) 318{ 319 return fp16_pack(sgn, 0, 0); 320} 321 322static inline uint32_t 323fp32_zero(int sgn) 324{ 325 return fp32_pack(sgn, 0, 0); 326} 327 328static inline uint64_t 329fp64_zero(int sgn) 330{ 331 return fp64_pack(sgn, 0, 0); 332} 333 334static inline uint16_t 335fp16_max_normal(int sgn) 336{ 337 return fp16_pack(sgn, FP16_EXP_INF - 1, -1); 338} 339 340static inline uint32_t 341fp32_max_normal(int sgn) 342{ 343 return fp32_pack(sgn, FP32_EXP_INF - 1, -1); 344} 345 346static inline uint64_t 347fp64_max_normal(int sgn) 348{ 349 return fp64_pack(sgn, FP64_EXP_INF - 1, -1); 350} 351 352static inline uint16_t 353fp16_infinity(int sgn) 354{ 355 return fp16_pack(sgn, FP16_EXP_INF, 0); 356} 357 358static inline uint32_t 359fp32_infinity(int sgn) 360{ 361 return fp32_pack(sgn, FP32_EXP_INF, 0); 362} 363 364static inline uint64_t 365fp64_infinity(int sgn) 366{ 367 return fp64_pack(sgn, FP64_EXP_INF, 0); 368} 369 370static inline uint16_t 371fp16_defaultNaN() 372{ 373 return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1)); 374} 375 376static inline uint32_t 377fp32_defaultNaN() 378{ 379 return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1)); 380} 381 382static inline uint64_t 383fp64_defaultNaN() 384{ 385 return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1)); 386} 387 388static inline void 389fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, 390 int *flags) 391{ 392 *sgn = x >> (FP16_BITS - 1); 393 *exp = FP16_EXP(x); 394 *mnt = FP16_MANT(x); 395 396 // Handle subnormals: 397 if (*exp) { 398 *mnt |= 1ULL << FP16_MANT_BITS; 399 } else { 400 ++*exp; 401 // IDC (Input Denormal) is not set in this case. 402 if (mode & FPLIB_FZ16) 403 *mnt = 0; 404 } 405} 406 407static inline void 408fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, 409 int *flags) 410{ 411 *sgn = x >> (FP32_BITS - 1); 412 *exp = FP32_EXP(x); 413 *mnt = FP32_MANT(x); 414 415 // Handle subnormals: 416 if (*exp) { 417 *mnt |= 1ULL << FP32_MANT_BITS; 418 } else { 419 ++*exp; 420 if ((mode & FPLIB_FZ) && *mnt) { 421 *flags |= FPLIB_IDC; 422 *mnt = 0; 423 } 424 } 425} 426 427static inline void 428fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, 429 int *flags) 430{ 431 *sgn = x >> (FP64_BITS - 1); 432 *exp = FP64_EXP(x); 433 *mnt = FP64_MANT(x); 434 435 // Handle subnormals: 436 if (*exp) { 437 *mnt |= 1ULL << FP64_MANT_BITS; 438 } else { 439 ++*exp; 440 if ((mode & FPLIB_FZ) && *mnt) { 441 *flags |= FPLIB_IDC; 442 *mnt = 0; 443 } 444 } 445} 446 447static inline int 448fp16_is_NaN(int exp, uint16_t mnt) 449{ 450 return exp == FP16_EXP_INF && FP16_MANT(mnt); 451} 452 453static inline int 454fp32_is_NaN(int exp, uint32_t mnt) 455{ 456 return exp == FP32_EXP_INF && FP32_MANT(mnt); 457} 458 459static inline int 460fp64_is_NaN(int exp, uint64_t mnt) 461{ 462 return exp == FP64_EXP_INF && FP64_MANT(mnt); 463} 464 465static inline int 466fp16_is_signalling_NaN(int exp, uint16_t mnt) 467{ 468 return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1); 469} 470 471static inline int 472fp32_is_signalling_NaN(int exp, uint32_t mnt) 473{ 474 return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1); 475} 476 477static inline int 478fp64_is_signalling_NaN(int exp, uint64_t mnt) 479{ 480 return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1); 481} 482 483static inline int 484fp16_is_quiet_NaN(int exp, uint16_t mnt) 485{ 486 return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1); 487} 488 489static inline int 490fp32_is_quiet_NaN(int exp, uint32_t mnt) 491{ 492 return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1); 493} 494 495static inline int 496fp64_is_quiet_NaN(int exp, uint64_t mnt) 497{ 498 return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1); 499} 500 501static inline int 502fp16_is_infinity(int exp, uint16_t mnt) 503{ 504 return exp == FP16_EXP_INF && !FP16_MANT(mnt); 505} 506 507static inline int 508fp32_is_infinity(int exp, uint32_t mnt) 509{ 510 return exp == FP32_EXP_INF && !FP32_MANT(mnt); 511} 512 513static inline int 514fp64_is_infinity(int exp, uint64_t mnt) 515{ 516 return exp == FP64_EXP_INF && !FP64_MANT(mnt); 517} 518 519static inline uint16_t 520fp16_process_NaN(uint16_t a, int mode, int *flags) 521{ 522 if (!(a >> (FP16_MANT_BITS - 1) & 1)) { 523 *flags |= FPLIB_IOC; 524 a |= 1ULL << (FP16_MANT_BITS - 1); 525 } 526 return mode & FPLIB_DN ? fp16_defaultNaN() : a; 527} 528 529static inline uint32_t 530fp32_process_NaN(uint32_t a, int mode, int *flags) 531{ 532 if (!(a >> (FP32_MANT_BITS - 1) & 1)) { 533 *flags |= FPLIB_IOC; 534 a |= 1ULL << (FP32_MANT_BITS - 1); 535 } 536 return mode & FPLIB_DN ? fp32_defaultNaN() : a; 537} 538 539static inline uint64_t 540fp64_process_NaN(uint64_t a, int mode, int *flags) 541{ 542 if (!(a >> (FP64_MANT_BITS - 1) & 1)) { 543 *flags |= FPLIB_IOC; 544 a |= 1ULL << (FP64_MANT_BITS - 1); 545 } 546 return mode & FPLIB_DN ? fp64_defaultNaN() : a; 547} 548 549static uint16_t 550fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags) 551{ 552 int a_exp = FP16_EXP(a); 553 uint16_t a_mnt = FP16_MANT(a); 554 int b_exp = FP16_EXP(b); 555 uint16_t b_mnt = FP16_MANT(b); 556 557 // Handle signalling NaNs: 558 if (fp16_is_signalling_NaN(a_exp, a_mnt)) 559 return fp16_process_NaN(a, mode, flags); 560 if (fp16_is_signalling_NaN(b_exp, b_mnt)) 561 return fp16_process_NaN(b, mode, flags); 562 563 // Handle quiet NaNs: 564 if (fp16_is_NaN(a_exp, a_mnt)) 565 return fp16_process_NaN(a, mode, flags); 566 if (fp16_is_NaN(b_exp, b_mnt)) 567 return fp16_process_NaN(b, mode, flags); 568 569 return 0; 570} 571 572static uint32_t 573fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags) 574{ 575 int a_exp = FP32_EXP(a); 576 uint32_t a_mnt = FP32_MANT(a); 577 int b_exp = FP32_EXP(b); 578 uint32_t b_mnt = FP32_MANT(b); 579 580 // Handle signalling NaNs: 581 if (fp32_is_signalling_NaN(a_exp, a_mnt)) 582 return fp32_process_NaN(a, mode, flags); 583 if (fp32_is_signalling_NaN(b_exp, b_mnt)) 584 return fp32_process_NaN(b, mode, flags); 585 586 // Handle quiet NaNs: 587 if (fp32_is_NaN(a_exp, a_mnt)) 588 return fp32_process_NaN(a, mode, flags); 589 if (fp32_is_NaN(b_exp, b_mnt)) 590 return fp32_process_NaN(b, mode, flags); 591 592 return 0; 593} 594 595static uint64_t 596fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags) 597{ 598 int a_exp = FP64_EXP(a); 599 uint64_t a_mnt = FP64_MANT(a); 600 int b_exp = FP64_EXP(b); 601 uint64_t b_mnt = FP64_MANT(b); 602 603 // Handle signalling NaNs: 604 if (fp64_is_signalling_NaN(a_exp, a_mnt)) 605 return fp64_process_NaN(a, mode, flags); 606 if (fp64_is_signalling_NaN(b_exp, b_mnt)) 607 return fp64_process_NaN(b, mode, flags); 608 609 // Handle quiet NaNs: 610 if (fp64_is_NaN(a_exp, a_mnt)) 611 return fp64_process_NaN(a, mode, flags); 612 if (fp64_is_NaN(b_exp, b_mnt)) 613 return fp64_process_NaN(b, mode, flags); 614 615 return 0; 616} 617 618static uint16_t 619fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags) 620{ 621 int a_exp = FP16_EXP(a); 622 uint16_t a_mnt = FP16_MANT(a); 623 int b_exp = FP16_EXP(b); 624 uint16_t b_mnt = FP16_MANT(b); 625 int c_exp = FP16_EXP(c); 626 uint16_t c_mnt = FP16_MANT(c); 627 628 // Handle signalling NaNs: 629 if (fp16_is_signalling_NaN(a_exp, a_mnt)) 630 return fp16_process_NaN(a, mode, flags); 631 if (fp16_is_signalling_NaN(b_exp, b_mnt)) 632 return fp16_process_NaN(b, mode, flags); 633 if (fp16_is_signalling_NaN(c_exp, c_mnt)) 634 return fp16_process_NaN(c, mode, flags); 635 636 // Handle quiet NaNs: 637 if (fp16_is_NaN(a_exp, a_mnt)) 638 return fp16_process_NaN(a, mode, flags); 639 if (fp16_is_NaN(b_exp, b_mnt)) 640 return fp16_process_NaN(b, mode, flags); 641 if (fp16_is_NaN(c_exp, c_mnt)) 642 return fp16_process_NaN(c, mode, flags); 643 644 return 0; 645} 646 647static uint32_t 648fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags) 649{ 650 int a_exp = FP32_EXP(a); 651 uint32_t a_mnt = FP32_MANT(a); 652 int b_exp = FP32_EXP(b); 653 uint32_t b_mnt = FP32_MANT(b); 654 int c_exp = FP32_EXP(c); 655 uint32_t c_mnt = FP32_MANT(c); 656 657 // Handle signalling NaNs: 658 if (fp32_is_signalling_NaN(a_exp, a_mnt)) 659 return fp32_process_NaN(a, mode, flags); 660 if (fp32_is_signalling_NaN(b_exp, b_mnt)) 661 return fp32_process_NaN(b, mode, flags); 662 if (fp32_is_signalling_NaN(c_exp, c_mnt)) 663 return fp32_process_NaN(c, mode, flags); 664 665 // Handle quiet NaNs: 666 if (fp32_is_NaN(a_exp, a_mnt)) 667 return fp32_process_NaN(a, mode, flags); 668 if (fp32_is_NaN(b_exp, b_mnt)) 669 return fp32_process_NaN(b, mode, flags); 670 if (fp32_is_NaN(c_exp, c_mnt)) 671 return fp32_process_NaN(c, mode, flags); 672 673 return 0; 674} 675 676static uint64_t 677fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags) 678{ 679 int a_exp = FP64_EXP(a); 680 uint64_t a_mnt = FP64_MANT(a); 681 int b_exp = FP64_EXP(b); 682 uint64_t b_mnt = FP64_MANT(b); 683 int c_exp = FP64_EXP(c); 684 uint64_t c_mnt = FP64_MANT(c); 685 686 // Handle signalling NaNs: 687 if (fp64_is_signalling_NaN(a_exp, a_mnt)) 688 return fp64_process_NaN(a, mode, flags); 689 if (fp64_is_signalling_NaN(b_exp, b_mnt)) 690 return fp64_process_NaN(b, mode, flags); 691 if (fp64_is_signalling_NaN(c_exp, c_mnt)) 692 return fp64_process_NaN(c, mode, flags); 693 694 // Handle quiet NaNs: 695 if (fp64_is_NaN(a_exp, a_mnt)) 696 return fp64_process_NaN(a, mode, flags); 697 if (fp64_is_NaN(b_exp, b_mnt)) 698 return fp64_process_NaN(b, mode, flags); 699 if (fp64_is_NaN(c_exp, c_mnt)) 700 return fp64_process_NaN(c, mode, flags); 701 702 return 0; 703} 704 705static uint16_t 706fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags) 707{ 708 int biased_exp; // non-negative exponent value for result 709 uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS) 710 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 711 712 assert(rm != FPRounding_TIEAWAY); 713 714 // Flush to zero: 715 if ((mode & FPLIB_FZ16) && exp < 1) { 716 *flags |= FPLIB_UFC; 717 return fp16_zero(sgn); 718 } 719 720 // The bottom FP16_EXP_BITS bits of mnt are orred together: 721 mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) | 722 ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0)); 723 724 if (exp > 0) { 725 biased_exp = exp; 726 int_mant = mnt >> 2; 727 error = mnt & 3; 728 } else { 729 biased_exp = 0; 730 int_mant = lsr16(mnt, 3 - exp); 731 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1)); 732 } 733 734 if (!biased_exp && error) { // xx should also check fpscr_val<11> 735 *flags |= FPLIB_UFC; 736 } 737 738 // Round up: 739 if ((rm == FPLIB_RN && (error == 3 || 740 (error == 2 && (int_mant & 1)))) || 741 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 742 ++int_mant; 743 if (int_mant == 1ULL << FP16_MANT_BITS) { 744 // Rounded up from denormalized to normalized 745 biased_exp = 1; 746 } 747 if (int_mant == 2ULL << FP16_MANT_BITS) { 748 // Rounded up to next exponent 749 ++biased_exp; 750 int_mant >>= 1; 751 } 752 } 753 754 // Handle rounding to odd aka Von Neumann rounding: 755 if (error && rm == FPRounding_ODD) 756 int_mant |= 1; 757 758 // Handle overflow: 759 if (!(mode & FPLIB_AHP)) { 760 if (biased_exp >= (int)FP16_EXP_INF) { 761 *flags |= FPLIB_OFC | FPLIB_IXC; 762 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 763 (rm == FPLIB_RM && sgn)) { 764 return fp16_infinity(sgn); 765 } else { 766 return fp16_max_normal(sgn); 767 } 768 } 769 } else { 770 if (biased_exp >= (int)FP16_EXP_INF + 1) { 771 *flags |= FPLIB_IOC; 772 return fp16_pack(sgn, FP16_EXP_INF, -1); 773 } 774 } 775 776 if (error) { 777 *flags |= FPLIB_IXC; 778 } 779 780 return fp16_pack(sgn, biased_exp, int_mant); 781} 782 783static uint16_t 784fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags) 785{ 786 return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags); 787} 788 789static uint32_t 790fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags) 791{ 792 int biased_exp; // non-negative exponent value for result 793 uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS) 794 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 795 796 assert(rm != FPRounding_TIEAWAY); 797 798 // Flush to zero: 799 if ((mode & FPLIB_FZ) && exp < 1) { 800 *flags |= FPLIB_UFC; 801 return fp32_zero(sgn); 802 } 803 804 // The bottom FP32_EXP_BITS bits of mnt are orred together: 805 mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) | 806 ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0)); 807 808 if (exp > 0) { 809 biased_exp = exp; 810 int_mant = mnt >> 2; 811 error = mnt & 3; 812 } else { 813 biased_exp = 0; 814 int_mant = lsr32(mnt, 3 - exp); 815 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1)); 816 } 817 818 if (!biased_exp && error) { // xx should also check fpscr_val<11> 819 *flags |= FPLIB_UFC; 820 } 821 822 // Round up: 823 if ((rm == FPLIB_RN && (error == 3 || 824 (error == 2 && (int_mant & 1)))) || 825 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 826 ++int_mant; 827 if (int_mant == 1ULL << FP32_MANT_BITS) { 828 // Rounded up from denormalized to normalized 829 biased_exp = 1; 830 } 831 if (int_mant == 2ULL << FP32_MANT_BITS) { 832 // Rounded up to next exponent 833 ++biased_exp; 834 int_mant >>= 1; 835 } 836 } 837 838 // Handle rounding to odd aka Von Neumann rounding: 839 if (error && rm == FPRounding_ODD) 840 int_mant |= 1; 841 842 // Handle overflow: 843 if (biased_exp >= (int)FP32_EXP_INF) { 844 *flags |= FPLIB_OFC | FPLIB_IXC; 845 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 846 (rm == FPLIB_RM && sgn)) { 847 return fp32_infinity(sgn); 848 } else { 849 return fp32_max_normal(sgn); 850 } 851 } 852 853 if (error) { 854 *flags |= FPLIB_IXC; 855 } 856 857 return fp32_pack(sgn, biased_exp, int_mant); 858} 859 860static uint32_t 861fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags) 862{ 863 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); 864} 865 866static uint64_t 867fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags) 868{ 869 int biased_exp; // non-negative exponent value for result 870 uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS) 871 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 872 873 assert(rm != FPRounding_TIEAWAY); 874 875 // Flush to zero: 876 if ((mode & FPLIB_FZ) && exp < 1) { 877 *flags |= FPLIB_UFC; 878 return fp64_zero(sgn); 879 } 880 881 // The bottom FP64_EXP_BITS bits of mnt are orred together: 882 mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) | 883 ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0)); 884 885 if (exp > 0) { 886 biased_exp = exp; 887 int_mant = mnt >> 2; 888 error = mnt & 3; 889 } else { 890 biased_exp = 0; 891 int_mant = lsr64(mnt, 3 - exp); 892 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1)); 893 } 894 895 if (!biased_exp && error) { // xx should also check fpscr_val<11> 896 *flags |= FPLIB_UFC; 897 } 898 899 // Round up: 900 if ((rm == FPLIB_RN && (error == 3 || 901 (error == 2 && (int_mant & 1)))) || 902 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 903 ++int_mant; 904 if (int_mant == 1ULL << FP64_MANT_BITS) { 905 // Rounded up from denormalized to normalized 906 biased_exp = 1; 907 } 908 if (int_mant == 2ULL << FP64_MANT_BITS) { 909 // Rounded up to next exponent 910 ++biased_exp; 911 int_mant >>= 1; 912 } 913 } 914 915 // Handle rounding to odd aka Von Neumann rounding: 916 if (error && rm == FPRounding_ODD) 917 int_mant |= 1; 918 919 // Handle overflow: 920 if (biased_exp >= (int)FP64_EXP_INF) { 921 *flags |= FPLIB_OFC | FPLIB_IXC; 922 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 923 (rm == FPLIB_RM && sgn)) { 924 return fp64_infinity(sgn); 925 } else { 926 return fp64_max_normal(sgn); 927 } 928 } 929 930 if (error) { 931 *flags |= FPLIB_IXC; 932 } 933 934 return fp64_pack(sgn, biased_exp, int_mant); 935} 936 937static uint64_t 938fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags) 939{ 940 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); 941} 942 943static int 944fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags) 945{ 946 int a_sgn, a_exp, b_sgn, b_exp; 947 uint16_t a_mnt, b_mnt; 948 949 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 950 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 951 952 if (fp16_is_NaN(a_exp, a_mnt) || 953 fp16_is_NaN(b_exp, b_mnt)) { 954 if (fp16_is_signalling_NaN(a_exp, a_mnt) || 955 fp16_is_signalling_NaN(b_exp, b_mnt)) 956 *flags |= FPLIB_IOC; 957 return 0; 958 } 959 return a == b || (!a_mnt && !b_mnt); 960} 961 962static int 963fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags) 964{ 965 int a_sgn, a_exp, b_sgn, b_exp; 966 uint16_t a_mnt, b_mnt; 967 968 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 969 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 970 971 if (fp16_is_NaN(a_exp, a_mnt) || 972 fp16_is_NaN(b_exp, b_mnt)) { 973 *flags |= FPLIB_IOC; 974 return 0; 975 } 976 if (!a_mnt && !b_mnt) 977 return 1; 978 if (a_sgn != b_sgn) 979 return b_sgn; 980 if (a_exp != b_exp) 981 return a_sgn ^ (a_exp > b_exp); 982 if (a_mnt != b_mnt) 983 return a_sgn ^ (a_mnt > b_mnt); 984 return 1; 985} 986 987static int 988fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags) 989{ 990 int a_sgn, a_exp, b_sgn, b_exp; 991 uint16_t a_mnt, b_mnt; 992 993 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 994 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 995 996 if (fp16_is_NaN(a_exp, a_mnt) || 997 fp16_is_NaN(b_exp, b_mnt)) { 998 *flags |= FPLIB_IOC; 999 return 0; 1000 } 1001 if (!a_mnt && !b_mnt) 1002 return 0; 1003 if (a_sgn != b_sgn) 1004 return b_sgn; 1005 if (a_exp != b_exp) 1006 return a_sgn ^ (a_exp > b_exp); 1007 if (a_mnt != b_mnt) 1008 return a_sgn ^ (a_mnt > b_mnt); 1009 return 0; 1010} 1011 1012static int 1013fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags) 1014{ 1015 int a_sgn, a_exp, b_sgn, b_exp; 1016 uint16_t a_mnt, b_mnt; 1017 1018 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1019 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1020 1021 if (fp16_is_NaN(a_exp, a_mnt) || 1022 fp16_is_NaN(b_exp, b_mnt)) { 1023 if (fp16_is_signalling_NaN(a_exp, a_mnt) || 1024 fp16_is_signalling_NaN(b_exp, b_mnt)) 1025 *flags |= FPLIB_IOC; 1026 return 1; 1027 } 1028 return 0; 1029} 1030 1031static int 1032fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags) 1033{ 1034 int a_sgn, a_exp, b_sgn, b_exp; 1035 uint32_t a_mnt, b_mnt; 1036 1037 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1038 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1039 1040 if (fp32_is_NaN(a_exp, a_mnt) || 1041 fp32_is_NaN(b_exp, b_mnt)) { 1042 if (fp32_is_signalling_NaN(a_exp, a_mnt) || 1043 fp32_is_signalling_NaN(b_exp, b_mnt)) 1044 *flags |= FPLIB_IOC; 1045 return 0; 1046 } 1047 return a == b || (!a_mnt && !b_mnt); 1048} 1049 1050static int 1051fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags) 1052{ 1053 int a_sgn, a_exp, b_sgn, b_exp; 1054 uint32_t a_mnt, b_mnt; 1055 1056 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1057 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1058 1059 if (fp32_is_NaN(a_exp, a_mnt) || 1060 fp32_is_NaN(b_exp, b_mnt)) { 1061 *flags |= FPLIB_IOC; 1062 return 0; 1063 } 1064 if (!a_mnt && !b_mnt) 1065 return 1; 1066 if (a_sgn != b_sgn) 1067 return b_sgn; 1068 if (a_exp != b_exp) 1069 return a_sgn ^ (a_exp > b_exp); 1070 if (a_mnt != b_mnt) 1071 return a_sgn ^ (a_mnt > b_mnt); 1072 return 1; 1073} 1074 1075static int 1076fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags) 1077{ 1078 int a_sgn, a_exp, b_sgn, b_exp; 1079 uint32_t a_mnt, b_mnt; 1080 1081 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1082 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1083 1084 if (fp32_is_NaN(a_exp, a_mnt) || 1085 fp32_is_NaN(b_exp, b_mnt)) { 1086 *flags |= FPLIB_IOC; 1087 return 0; 1088 } 1089 if (!a_mnt && !b_mnt) 1090 return 0; 1091 if (a_sgn != b_sgn) 1092 return b_sgn; 1093 if (a_exp != b_exp) 1094 return a_sgn ^ (a_exp > b_exp); 1095 if (a_mnt != b_mnt) 1096 return a_sgn ^ (a_mnt > b_mnt); 1097 return 0; 1098} 1099 1100static int 1101fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags) 1102{ 1103 int a_sgn, a_exp, b_sgn, b_exp; 1104 uint32_t a_mnt, b_mnt; 1105 1106 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1107 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1108 1109 if (fp32_is_NaN(a_exp, a_mnt) || 1110 fp32_is_NaN(b_exp, b_mnt)) { 1111 if (fp32_is_signalling_NaN(a_exp, a_mnt) || 1112 fp32_is_signalling_NaN(b_exp, b_mnt)) 1113 *flags |= FPLIB_IOC; 1114 return 1; 1115 } 1116 return 0; 1117} 1118 1119static int 1120fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags) 1121{ 1122 int a_sgn, a_exp, b_sgn, b_exp; 1123 uint64_t a_mnt, b_mnt; 1124 1125 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1126 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1127 1128 if (fp64_is_NaN(a_exp, a_mnt) || 1129 fp64_is_NaN(b_exp, b_mnt)) { 1130 if (fp64_is_signalling_NaN(a_exp, a_mnt) || 1131 fp64_is_signalling_NaN(b_exp, b_mnt)) 1132 *flags |= FPLIB_IOC; 1133 return 0; 1134 } 1135 return a == b || (!a_mnt && !b_mnt); 1136} 1137 1138static int 1139fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags) 1140{ 1141 int a_sgn, a_exp, b_sgn, b_exp; 1142 uint64_t a_mnt, b_mnt; 1143 1144 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1145 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1146 1147 if (fp64_is_NaN(a_exp, a_mnt) || 1148 fp64_is_NaN(b_exp, b_mnt)) { 1149 *flags |= FPLIB_IOC; 1150 return 0; 1151 } 1152 if (!a_mnt && !b_mnt) 1153 return 1; 1154 if (a_sgn != b_sgn) 1155 return b_sgn; 1156 if (a_exp != b_exp) 1157 return a_sgn ^ (a_exp > b_exp); 1158 if (a_mnt != b_mnt) 1159 return a_sgn ^ (a_mnt > b_mnt); 1160 return 1; 1161} 1162 1163static int 1164fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags) 1165{ 1166 int a_sgn, a_exp, b_sgn, b_exp; 1167 uint64_t a_mnt, b_mnt; 1168 1169 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1170 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1171 1172 if (fp64_is_NaN(a_exp, a_mnt) || 1173 fp64_is_NaN(b_exp, b_mnt)) { 1174 *flags |= FPLIB_IOC; 1175 return 0; 1176 } 1177 if (!a_mnt && !b_mnt) 1178 return 0; 1179 if (a_sgn != b_sgn) 1180 return b_sgn; 1181 if (a_exp != b_exp) 1182 return a_sgn ^ (a_exp > b_exp); 1183 if (a_mnt != b_mnt) 1184 return a_sgn ^ (a_mnt > b_mnt); 1185 return 0; 1186} 1187 1188static int 1189fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags) 1190{ 1191 int a_sgn, a_exp, b_sgn, b_exp; 1192 uint64_t a_mnt, b_mnt; 1193 1194 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1195 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1196 1197 if (fp64_is_NaN(a_exp, a_mnt) || 1198 fp64_is_NaN(b_exp, b_mnt)) { 1199 if (fp64_is_signalling_NaN(a_exp, a_mnt) || 1200 fp64_is_signalling_NaN(b_exp, b_mnt)) 1201 *flags |= FPLIB_IOC; 1202 return 1; 1203 } 1204 return 0; 1205} 1206 1207static uint16_t 1208fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags) 1209{ 1210 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1211 uint16_t a_mnt, b_mnt, x, x_mnt; 1212 1213 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1214 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1215 1216 if ((x = fp16_process_NaNs(a, b, mode, flags))) { 1217 return x; 1218 } 1219 1220 b_sgn ^= neg; 1221 1222 // Handle infinities and zeroes: 1223 if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) { 1224 *flags |= FPLIB_IOC; 1225 return fp16_defaultNaN(); 1226 } else if (a_exp == FP16_EXP_INF) { 1227 return fp16_infinity(a_sgn); 1228 } else if (b_exp == FP16_EXP_INF) { 1229 return fp16_infinity(b_sgn); 1230 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1231 return fp16_zero(a_sgn); 1232 } 1233 1234 a_mnt <<= 3; 1235 b_mnt <<= 3; 1236 if (a_exp >= b_exp) { 1237 b_mnt = (lsr16(b_mnt, a_exp - b_exp) | 1238 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1))); 1239 b_exp = a_exp; 1240 } else { 1241 a_mnt = (lsr16(a_mnt, b_exp - a_exp) | 1242 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1))); 1243 a_exp = b_exp; 1244 } 1245 x_sgn = a_sgn; 1246 x_exp = a_exp; 1247 if (a_sgn == b_sgn) { 1248 x_mnt = a_mnt + b_mnt; 1249 } else if (a_mnt >= b_mnt) { 1250 x_mnt = a_mnt - b_mnt; 1251 } else { 1252 x_sgn ^= 1; 1253 x_mnt = b_mnt - a_mnt; 1254 } 1255 1256 if (!x_mnt) { 1257 // Sign of exact zero result depends on rounding mode 1258 return fp16_zero((mode & 3) == 2); 1259 } 1260 1261 x_mnt = fp16_normalise(x_mnt, &x_exp); 1262 1263 return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1, 1264 mode, flags); 1265} 1266 1267static uint32_t 1268fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags) 1269{ 1270 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1271 uint32_t a_mnt, b_mnt, x, x_mnt; 1272 1273 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1274 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1275 1276 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1277 return x; 1278 } 1279 1280 b_sgn ^= neg; 1281 1282 // Handle infinities and zeroes: 1283 if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) { 1284 *flags |= FPLIB_IOC; 1285 return fp32_defaultNaN(); 1286 } else if (a_exp == FP32_EXP_INF) { 1287 return fp32_infinity(a_sgn); 1288 } else if (b_exp == FP32_EXP_INF) { 1289 return fp32_infinity(b_sgn); 1290 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1291 return fp32_zero(a_sgn); 1292 } 1293 1294 a_mnt <<= 3; 1295 b_mnt <<= 3; 1296 if (a_exp >= b_exp) { 1297 b_mnt = (lsr32(b_mnt, a_exp - b_exp) | 1298 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); 1299 b_exp = a_exp; 1300 } else { 1301 a_mnt = (lsr32(a_mnt, b_exp - a_exp) | 1302 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); 1303 a_exp = b_exp; 1304 } 1305 x_sgn = a_sgn; 1306 x_exp = a_exp; 1307 if (a_sgn == b_sgn) { 1308 x_mnt = a_mnt + b_mnt; 1309 } else if (a_mnt >= b_mnt) { 1310 x_mnt = a_mnt - b_mnt; 1311 } else { 1312 x_sgn ^= 1; 1313 x_mnt = b_mnt - a_mnt; 1314 } 1315 1316 if (!x_mnt) { 1317 // Sign of exact zero result depends on rounding mode 1318 return fp32_zero((mode & 3) == 2); 1319 } 1320 1321 x_mnt = fp32_normalise(x_mnt, &x_exp); 1322 1323 return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1, 1324 mode, flags); 1325} 1326 1327static uint64_t 1328fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags) 1329{ 1330 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1331 uint64_t a_mnt, b_mnt, x, x_mnt; 1332 1333 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1334 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1335 1336 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1337 return x; 1338 } 1339 1340 b_sgn ^= neg; 1341 1342 // Handle infinities and zeroes: 1343 if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) { 1344 *flags |= FPLIB_IOC; 1345 return fp64_defaultNaN(); 1346 } else if (a_exp == FP64_EXP_INF) { 1347 return fp64_infinity(a_sgn); 1348 } else if (b_exp == FP64_EXP_INF) { 1349 return fp64_infinity(b_sgn); 1350 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1351 return fp64_zero(a_sgn); 1352 } 1353 1354 a_mnt <<= 3; 1355 b_mnt <<= 3; 1356 if (a_exp >= b_exp) { 1357 b_mnt = (lsr64(b_mnt, a_exp - b_exp) | 1358 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); 1359 b_exp = a_exp; 1360 } else { 1361 a_mnt = (lsr64(a_mnt, b_exp - a_exp) | 1362 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); 1363 a_exp = b_exp; 1364 } 1365 x_sgn = a_sgn; 1366 x_exp = a_exp; 1367 if (a_sgn == b_sgn) { 1368 x_mnt = a_mnt + b_mnt; 1369 } else if (a_mnt >= b_mnt) { 1370 x_mnt = a_mnt - b_mnt; 1371 } else { 1372 x_sgn ^= 1; 1373 x_mnt = b_mnt - a_mnt; 1374 } 1375 1376 if (!x_mnt) { 1377 // Sign of exact zero result depends on rounding mode 1378 return fp64_zero((mode & 3) == 2); 1379 } 1380 1381 x_mnt = fp64_normalise(x_mnt, &x_exp); 1382 1383 return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1, 1384 mode, flags); 1385} 1386 1387static uint16_t 1388fp16_mul(uint16_t a, uint16_t b, int mode, int *flags) 1389{ 1390 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1391 uint16_t a_mnt, b_mnt, x; 1392 uint32_t x_mnt; 1393 1394 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1395 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1396 1397 if ((x = fp16_process_NaNs(a, b, mode, flags))) { 1398 return x; 1399 } 1400 1401 // Handle infinities and zeroes: 1402 if ((a_exp == FP16_EXP_INF && !b_mnt) || 1403 (b_exp == FP16_EXP_INF && !a_mnt)) { 1404 *flags |= FPLIB_IOC; 1405 return fp16_defaultNaN(); 1406 } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) { 1407 return fp16_infinity(a_sgn ^ b_sgn); 1408 } else if (!a_mnt || !b_mnt) { 1409 return fp16_zero(a_sgn ^ b_sgn); 1410 } 1411 1412 // Multiply and normalise: 1413 x_sgn = a_sgn ^ b_sgn; 1414 x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1; 1415 x_mnt = (uint32_t)a_mnt * b_mnt; 1416 x_mnt = fp32_normalise(x_mnt, &x_exp); 1417 1418 // Convert to FP16_BITS bits, collapsing error into bottom bit: 1419 x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1); 1420 1421 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); 1422} 1423 1424static uint32_t 1425fp32_mul(uint32_t a, uint32_t b, int mode, int *flags) 1426{ 1427 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1428 uint32_t a_mnt, b_mnt, x; 1429 uint64_t x_mnt; 1430 1431 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1432 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1433 1434 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1435 return x; 1436 } 1437 1438 // Handle infinities and zeroes: 1439 if ((a_exp == FP32_EXP_INF && !b_mnt) || 1440 (b_exp == FP32_EXP_INF && !a_mnt)) { 1441 *flags |= FPLIB_IOC; 1442 return fp32_defaultNaN(); 1443 } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) { 1444 return fp32_infinity(a_sgn ^ b_sgn); 1445 } else if (!a_mnt || !b_mnt) { 1446 return fp32_zero(a_sgn ^ b_sgn); 1447 } 1448 1449 // Multiply and normalise: 1450 x_sgn = a_sgn ^ b_sgn; 1451 x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1; 1452 x_mnt = (uint64_t)a_mnt * b_mnt; 1453 x_mnt = fp64_normalise(x_mnt, &x_exp); 1454 1455 // Convert to FP32_BITS bits, collapsing error into bottom bit: 1456 x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1); 1457 1458 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1459} 1460 1461static uint64_t 1462fp64_mul(uint64_t a, uint64_t b, int mode, int *flags) 1463{ 1464 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1465 uint64_t a_mnt, b_mnt, x; 1466 uint64_t x0_mnt, x1_mnt; 1467 1468 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1469 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1470 1471 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1472 return x; 1473 } 1474 1475 // Handle infinities and zeroes: 1476 if ((a_exp == FP64_EXP_INF && !b_mnt) || 1477 (b_exp == FP64_EXP_INF && !a_mnt)) { 1478 *flags |= FPLIB_IOC; 1479 return fp64_defaultNaN(); 1480 } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) { 1481 return fp64_infinity(a_sgn ^ b_sgn); 1482 } else if (!a_mnt || !b_mnt) { 1483 return fp64_zero(a_sgn ^ b_sgn); 1484 } 1485 1486 // Multiply and normalise: 1487 x_sgn = a_sgn ^ b_sgn; 1488 x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1; 1489 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); 1490 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1491 1492 // Convert to FP64_BITS bits, collapsing error into bottom bit: 1493 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1494 1495 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); 1496} 1497 1498static uint16_t 1499fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, 1500 int mode, int *flags) 1501{ 1502 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1503 uint16_t a_mnt, b_mnt, c_mnt, x; 1504 uint32_t x_mnt, y_mnt; 1505 1506 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1507 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1508 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1509 1510 x = fp16_process_NaNs3(a, b, c, mode, flags); 1511 1512 // Quiet NaN added to product of zero and infinity: 1513 if (fp16_is_quiet_NaN(a_exp, a_mnt) && 1514 ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) || 1515 (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) { 1516 x = fp16_defaultNaN(); 1517 *flags |= FPLIB_IOC; 1518 } 1519 1520 if (x) { 1521 return x; 1522 } 1523 1524 // Handle infinities and zeroes: 1525 if ((b_exp == FP16_EXP_INF && !c_mnt) || 1526 (c_exp == FP16_EXP_INF && !b_mnt) || 1527 (a_exp == FP16_EXP_INF && 1528 (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) && 1529 (a_sgn != (b_sgn ^ c_sgn)))) { 1530 *flags |= FPLIB_IOC; 1531 return fp16_defaultNaN(); 1532 } 1533 if (a_exp == FP16_EXP_INF) 1534 return fp16_infinity(a_sgn); 1535 if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) 1536 return fp16_infinity(b_sgn ^ c_sgn); 1537 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1538 return fp16_zero(a_sgn); 1539 1540 x_sgn = a_sgn; 1541 x_exp = a_exp + 2 * FP16_EXP_BITS - 3; 1542 x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4); 1543 1544 // Multiply: 1545 y_sgn = b_sgn ^ c_sgn; 1546 y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3; 1547 y_mnt = (uint32_t)b_mnt * c_mnt << 3; 1548 if (!y_mnt) { 1549 y_exp = x_exp; 1550 } 1551 1552 // Add: 1553 if (x_exp >= y_exp) { 1554 y_mnt = (lsr32(y_mnt, x_exp - y_exp) | 1555 !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1))); 1556 y_exp = x_exp; 1557 } else { 1558 x_mnt = (lsr32(x_mnt, y_exp - x_exp) | 1559 !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1))); 1560 x_exp = y_exp; 1561 } 1562 if (x_sgn == y_sgn) { 1563 x_mnt = x_mnt + y_mnt; 1564 } else if (x_mnt >= y_mnt) { 1565 x_mnt = x_mnt - y_mnt; 1566 } else { 1567 x_sgn ^= 1; 1568 x_mnt = y_mnt - x_mnt; 1569 } 1570 1571 if (!x_mnt) { 1572 // Sign of exact zero result depends on rounding mode 1573 return fp16_zero((mode & 3) == 2); 1574 } 1575 1576 // Normalise into FP16_BITS bits, collapsing error into bottom bit: 1577 x_mnt = fp32_normalise(x_mnt, &x_exp); 1578 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1); 1579 1580 return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1581} 1582 1583static uint32_t 1584fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, 1585 int mode, int *flags) 1586{ 1587 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1588 uint32_t a_mnt, b_mnt, c_mnt, x; 1589 uint64_t x_mnt, y_mnt; 1590 1591 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1592 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1593 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1594 1595 x = fp32_process_NaNs3(a, b, c, mode, flags); 1596 1597 // Quiet NaN added to product of zero and infinity: 1598 if (fp32_is_quiet_NaN(a_exp, a_mnt) && 1599 ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) || 1600 (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) { 1601 x = fp32_defaultNaN(); 1602 *flags |= FPLIB_IOC; 1603 } 1604 1605 if (x) { 1606 return x; 1607 } 1608 1609 // Handle infinities and zeroes: 1610 if ((b_exp == FP32_EXP_INF && !c_mnt) || 1611 (c_exp == FP32_EXP_INF && !b_mnt) || 1612 (a_exp == FP32_EXP_INF && 1613 (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) && 1614 (a_sgn != (b_sgn ^ c_sgn)))) { 1615 *flags |= FPLIB_IOC; 1616 return fp32_defaultNaN(); 1617 } 1618 if (a_exp == FP32_EXP_INF) 1619 return fp32_infinity(a_sgn); 1620 if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) 1621 return fp32_infinity(b_sgn ^ c_sgn); 1622 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1623 return fp32_zero(a_sgn); 1624 1625 x_sgn = a_sgn; 1626 x_exp = a_exp + 2 * FP32_EXP_BITS - 3; 1627 x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4); 1628 1629 // Multiply: 1630 y_sgn = b_sgn ^ c_sgn; 1631 y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3; 1632 y_mnt = (uint64_t)b_mnt * c_mnt << 3; 1633 if (!y_mnt) { 1634 y_exp = x_exp; 1635 } 1636 1637 // Add: 1638 if (x_exp >= y_exp) { 1639 y_mnt = (lsr64(y_mnt, x_exp - y_exp) | 1640 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); 1641 y_exp = x_exp; 1642 } else { 1643 x_mnt = (lsr64(x_mnt, y_exp - x_exp) | 1644 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); 1645 x_exp = y_exp; 1646 } 1647 if (x_sgn == y_sgn) { 1648 x_mnt = x_mnt + y_mnt; 1649 } else if (x_mnt >= y_mnt) { 1650 x_mnt = x_mnt - y_mnt; 1651 } else { 1652 x_sgn ^= 1; 1653 x_mnt = y_mnt - x_mnt; 1654 } 1655 1656 if (!x_mnt) { 1657 // Sign of exact zero result depends on rounding mode 1658 return fp32_zero((mode & 3) == 2); 1659 } 1660 1661 // Normalise into FP32_BITS bits, collapsing error into bottom bit: 1662 x_mnt = fp64_normalise(x_mnt, &x_exp); 1663 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1); 1664 1665 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1666} 1667 1668static uint64_t 1669fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, 1670 int mode, int *flags) 1671{ 1672 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1673 uint64_t a_mnt, b_mnt, c_mnt, x; 1674 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; 1675 1676 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1677 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1678 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1679 1680 x = fp64_process_NaNs3(a, b, c, mode, flags); 1681 1682 // Quiet NaN added to product of zero and infinity: 1683 if (fp64_is_quiet_NaN(a_exp, a_mnt) && 1684 ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) || 1685 (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) { 1686 x = fp64_defaultNaN(); 1687 *flags |= FPLIB_IOC; 1688 } 1689 1690 if (x) { 1691 return x; 1692 } 1693 1694 // Handle infinities and zeroes: 1695 if ((b_exp == FP64_EXP_INF && !c_mnt) || 1696 (c_exp == FP64_EXP_INF && !b_mnt) || 1697 (a_exp == FP64_EXP_INF && 1698 (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) && 1699 (a_sgn != (b_sgn ^ c_sgn)))) { 1700 *flags |= FPLIB_IOC; 1701 return fp64_defaultNaN(); 1702 } 1703 if (a_exp == FP64_EXP_INF) 1704 return fp64_infinity(a_sgn); 1705 if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) 1706 return fp64_infinity(b_sgn ^ c_sgn); 1707 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1708 return fp64_zero(a_sgn); 1709 1710 x_sgn = a_sgn; 1711 x_exp = a_exp + FP64_EXP_BITS; 1712 x0_mnt = 0; 1713 x1_mnt = a_mnt; 1714 1715 // Multiply: 1716 y_sgn = b_sgn ^ c_sgn; 1717 y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3; 1718 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); 1719 if (!y0_mnt && !y1_mnt) { 1720 y_exp = x_exp; 1721 } 1722 1723 // Add: 1724 if (x_exp >= y_exp) { 1725 uint64_t t0, t1; 1726 lsl128(&t0, &t1, y0_mnt, y1_mnt, 1727 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); 1728 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); 1729 y0_mnt |= !!(t0 | t1); 1730 y_exp = x_exp; 1731 } else { 1732 uint64_t t0, t1; 1733 lsl128(&t0, &t1, x0_mnt, x1_mnt, 1734 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); 1735 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); 1736 x0_mnt |= !!(t0 | t1); 1737 x_exp = y_exp; 1738 } 1739 if (x_sgn == y_sgn) { 1740 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1741 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { 1742 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1743 } else { 1744 x_sgn ^= 1; 1745 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); 1746 } 1747 1748 if (!x0_mnt && !x1_mnt) { 1749 // Sign of exact zero result depends on rounding mode 1750 return fp64_zero((mode & 3) == 2); 1751 } 1752 1753 // Normalise into FP64_BITS bits, collapsing error into bottom bit: 1754 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1755 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1756 1757 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); 1758} 1759 1760static uint16_t 1761fp16_div(uint16_t a, uint16_t b, int mode, int *flags) 1762{ 1763 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1764 uint16_t a_mnt, b_mnt, x; 1765 uint32_t x_mnt; 1766 1767 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1768 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1769 1770 if ((x = fp16_process_NaNs(a, b, mode, flags))) 1771 return x; 1772 1773 // Handle infinities and zeroes: 1774 if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) || 1775 (!a_mnt && !b_mnt)) { 1776 *flags |= FPLIB_IOC; 1777 return fp16_defaultNaN(); 1778 } 1779 if (a_exp == FP16_EXP_INF || !b_mnt) { 1780 if (a_exp != FP16_EXP_INF) 1781 *flags |= FPLIB_DZC; 1782 return fp16_infinity(a_sgn ^ b_sgn); 1783 } 1784 if (!a_mnt || b_exp == FP16_EXP_INF) 1785 return fp16_zero(a_sgn ^ b_sgn); 1786 1787 // Divide, setting bottom bit if inexact: 1788 a_mnt = fp16_normalise(a_mnt, &a_exp); 1789 x_sgn = a_sgn ^ b_sgn; 1790 x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3); 1791 x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt; 1792 x_mnt |= (x_mnt * b_mnt != 1793 (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)); 1794 1795 // Normalise into FP16_BITS bits, collapsing error into bottom bit: 1796 x_mnt = fp32_normalise(x_mnt, &x_exp); 1797 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1); 1798 1799 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); 1800} 1801 1802static uint32_t 1803fp32_div(uint32_t a, uint32_t b, int mode, int *flags) 1804{ 1805 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1806 uint32_t a_mnt, b_mnt, x; 1807 uint64_t x_mnt; 1808 1809 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1810 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1811 1812 if ((x = fp32_process_NaNs(a, b, mode, flags))) 1813 return x; 1814 1815 // Handle infinities and zeroes: 1816 if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) || 1817 (!a_mnt && !b_mnt)) { 1818 *flags |= FPLIB_IOC; 1819 return fp32_defaultNaN(); 1820 } 1821 if (a_exp == FP32_EXP_INF || !b_mnt) { 1822 if (a_exp != FP32_EXP_INF) 1823 *flags |= FPLIB_DZC; 1824 return fp32_infinity(a_sgn ^ b_sgn); 1825 } 1826 if (!a_mnt || b_exp == FP32_EXP_INF) 1827 return fp32_zero(a_sgn ^ b_sgn); 1828 1829 // Divide, setting bottom bit if inexact: 1830 a_mnt = fp32_normalise(a_mnt, &a_exp); 1831 x_sgn = a_sgn ^ b_sgn; 1832 x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3); 1833 x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt; 1834 x_mnt |= (x_mnt * b_mnt != 1835 (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)); 1836 1837 // Normalise into FP32_BITS bits, collapsing error into bottom bit: 1838 x_mnt = fp64_normalise(x_mnt, &x_exp); 1839 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1); 1840 1841 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1842} 1843 1844static uint64_t 1845fp64_div(uint64_t a, uint64_t b, int mode, int *flags) 1846{ 1847 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; 1848 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; 1849 1850 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1851 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1852 1853 if ((x = fp64_process_NaNs(a, b, mode, flags))) 1854 return x; 1855 1856 // Handle infinities and zeroes: 1857 if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) || 1858 (!a_mnt && !b_mnt)) { 1859 *flags |= FPLIB_IOC; 1860 return fp64_defaultNaN(); 1861 } 1862 if (a_exp == FP64_EXP_INF || !b_mnt) { 1863 if (a_exp != FP64_EXP_INF) 1864 *flags |= FPLIB_DZC; 1865 return fp64_infinity(a_sgn ^ b_sgn); 1866 } 1867 if (!a_mnt || b_exp == FP64_EXP_INF) 1868 return fp64_zero(a_sgn ^ b_sgn); 1869 1870 // Find reciprocal of divisor with Newton-Raphson: 1871 a_mnt = fp64_normalise(a_mnt, &a_exp); 1872 b_mnt = fp64_normalise(b_mnt, &b_exp); 1873 x_mnt = ~(uint64_t)0 / (b_mnt >> 31); 1874 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); 1875 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); 1876 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); 1877 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); 1878 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); 1879 1880 // Multiply by dividend: 1881 x_sgn = a_sgn ^ b_sgn; 1882 x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8; 1883 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); 1884 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); 1885 x_mnt = x1_mnt; 1886 1887 // This is an underestimate, so try adding one: 1888 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); 1889 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); 1890 if (c <= 0) { 1891 ++x_mnt; 1892 } 1893 1894 x_mnt = fp64_normalise(x_mnt, &x_exp); 1895 1896 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 1897} 1898 1899static void 1900set_fpscr0(FPSCR &fpscr, int flags) 1901{ 1902 if (flags & FPLIB_IDC) { 1903 fpscr.idc = 1; 1904 } 1905 if (flags & FPLIB_IOC) { 1906 fpscr.ioc = 1; 1907 } 1908 if (flags & FPLIB_DZC) { 1909 fpscr.dzc = 1; 1910 } 1911 if (flags & FPLIB_OFC) { 1912 fpscr.ofc = 1; 1913 } 1914 if (flags & FPLIB_UFC) { 1915 fpscr.ufc = 1; 1916 } 1917 if (flags & FPLIB_IXC) { 1918 fpscr.ixc = 1; 1919 } 1920} 1921 1922static uint16_t 1923fp16_scale(uint16_t a, int16_t b, int mode, int *flags) 1924{ 1925 int a_sgn, a_exp; 1926 uint16_t a_mnt; 1927 1928 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1929 1930 // Handle NaNs: 1931 if (fp16_is_NaN(a_exp, a_mnt)) { 1932 return fp16_process_NaN(a, mode, flags); 1933 } 1934 1935 // Handle zeroes: 1936 if (!a_mnt) { 1937 return fp16_zero(a_sgn); 1938 } 1939 1940 // Handle infinities: 1941 if (a_exp == FP16_EXP_INF) { 1942 return fp16_infinity(a_sgn); 1943 } 1944 1945 b = b < -300 ? -300 : b; 1946 b = b > 300 ? 300 : b; 1947 a_exp += b; 1948 a_mnt <<= 3; 1949 1950 a_mnt = fp16_normalise(a_mnt, &a_exp); 1951 1952 return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1, 1953 mode, flags); 1954} 1955 1956static uint32_t 1957fp32_scale(uint32_t a, int32_t b, int mode, int *flags) 1958{ 1959 int a_sgn, a_exp; 1960 uint32_t a_mnt; 1961 1962 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1963 1964 // Handle NaNs: 1965 if (fp32_is_NaN(a_exp, a_mnt)) { 1966 return fp32_process_NaN(a, mode, flags); 1967 } 1968 1969 // Handle zeroes: 1970 if (!a_mnt) { 1971 return fp32_zero(a_sgn); 1972 } 1973 1974 // Handle infinities: 1975 if (a_exp == FP32_EXP_INF) { 1976 return fp32_infinity(a_sgn); 1977 } 1978 1979 b = b < -300 ? -300 : b; 1980 b = b > 300 ? 300 : b; 1981 a_exp += b; 1982 a_mnt <<= 3; 1983 1984 a_mnt = fp32_normalise(a_mnt, &a_exp); 1985 1986 return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1, 1987 mode, flags); 1988} 1989 1990static uint64_t 1991fp64_scale(uint64_t a, int64_t b, int mode, int *flags) 1992{ 1993 int a_sgn, a_exp; 1994 uint64_t a_mnt; 1995 1996 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1997 1998 // Handle NaNs: 1999 if (fp64_is_NaN(a_exp, a_mnt)) { 2000 return fp64_process_NaN(a, mode, flags); 2001 } 2002 2003 // Handle zeroes: 2004 if (!a_mnt) { 2005 return fp64_zero(a_sgn); 2006 } 2007 2008 // Handle infinities: 2009 if (a_exp == FP64_EXP_INF) { 2010 return fp64_infinity(a_sgn); 2011 } 2012 2013 b = b < -3000 ? -3000 : b; 2014 b = b > 3000 ? 3000 : b; 2015 a_exp += b; 2016 a_mnt <<= 3; 2017 2018 a_mnt = fp64_normalise(a_mnt, &a_exp); 2019 2020 return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1, 2021 mode, flags); 2022} 2023 2024static uint16_t 2025fp16_sqrt(uint16_t a, int mode, int *flags) 2026{ 2027 int a_sgn, a_exp, x_sgn, x_exp; 2028 uint16_t a_mnt, x_mnt; 2029 uint32_t x, t0, t1; 2030 2031 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2032 2033 // Handle NaNs: 2034 if (fp16_is_NaN(a_exp, a_mnt)) 2035 return fp16_process_NaN(a, mode, flags); 2036 2037 // Handle infinities and zeroes: 2038 if (!a_mnt) 2039 return fp16_zero(a_sgn); 2040 if (a_exp == FP16_EXP_INF && !a_sgn) 2041 return fp16_infinity(a_sgn); 2042 if (a_sgn) { 2043 *flags |= FPLIB_IOC; 2044 return fp16_defaultNaN(); 2045 } 2046 2047 a_mnt = fp16_normalise(a_mnt, &a_exp); 2048 if (a_exp & 1) { 2049 ++a_exp; 2050 a_mnt >>= 1; 2051 } 2052 2053 // x = (a * 3 + 5) / 8 2054 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28); 2055 2056 // x = (a / x + x) / 2; // 8-bit accuracy 2057 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15; 2058 2059 // x = (a / x + x) / 2; // 16-bit accuracy 2060 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15; 2061 2062 x_sgn = 0; 2063 x_exp = (a_exp + 27) >> 1; 2064 x_mnt = ((x - (1 << 18)) >> 19) + 1; 2065 t1 = (uint32_t)x_mnt * x_mnt; 2066 t0 = (uint32_t)a_mnt << 9; 2067 if (t1 > t0) { 2068 --x_mnt; 2069 } 2070 2071 x_mnt = fp16_normalise(x_mnt, &x_exp); 2072 2073 return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); 2074} 2075 2076static uint32_t 2077fp32_sqrt(uint32_t a, int mode, int *flags) 2078{ 2079 int a_sgn, a_exp, x_sgn, x_exp; 2080 uint32_t a_mnt, x, x_mnt; 2081 uint64_t t0, t1; 2082 2083 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2084 2085 // Handle NaNs: 2086 if (fp32_is_NaN(a_exp, a_mnt)) 2087 return fp32_process_NaN(a, mode, flags); 2088 2089 // Handle infinities and zeroes: 2090 if (!a_mnt) 2091 return fp32_zero(a_sgn); 2092 if (a_exp == FP32_EXP_INF && !a_sgn) 2093 return fp32_infinity(a_sgn); 2094 if (a_sgn) { 2095 *flags |= FPLIB_IOC; 2096 return fp32_defaultNaN(); 2097 } 2098 2099 a_mnt = fp32_normalise(a_mnt, &a_exp); 2100 if (!(a_exp & 1)) { 2101 ++a_exp; 2102 a_mnt >>= 1; 2103 } 2104 2105 // x = (a * 3 + 5) / 8 2106 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28); 2107 2108 // x = (a / x + x) / 2; // 8-bit accuracy 2109 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 2110 2111 // x = (a / x + x) / 2; // 16-bit accuracy 2112 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 2113 2114 // x = (a / x + x) / 2; // 32-bit accuracy 2115 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); 2116 2117 x_sgn = 0; 2118 x_exp = (a_exp + 147) >> 1; 2119 x_mnt = ((x - (1 << 5)) >> 6) + 1; 2120 t1 = (uint64_t)x_mnt * x_mnt; 2121 t0 = (uint64_t)a_mnt << 19; 2122 if (t1 > t0) { 2123 --x_mnt; 2124 } 2125 2126 x_mnt = fp32_normalise(x_mnt, &x_exp); 2127 2128 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); 2129} 2130 2131static uint64_t 2132fp64_sqrt(uint64_t a, int mode, int *flags) 2133{ 2134 int a_sgn, a_exp, x_sgn, x_exp, c; 2135 uint64_t a_mnt, x_mnt, r, x0, x1; 2136 uint32_t x; 2137 2138 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2139 2140 // Handle NaNs: 2141 if (fp64_is_NaN(a_exp, a_mnt)) 2142 return fp64_process_NaN(a, mode, flags); 2143 2144 // Handle infinities and zeroes: 2145 if (!a_mnt) 2146 return fp64_zero(a_sgn); 2147 if (a_exp == FP64_EXP_INF && !a_sgn) 2148 return fp64_infinity(a_sgn); 2149 if (a_sgn) { 2150 *flags |= FPLIB_IOC; 2151 return fp64_defaultNaN(); 2152 } 2153 2154 a_mnt = fp64_normalise(a_mnt, &a_exp); 2155 if (a_exp & 1) { 2156 ++a_exp; 2157 a_mnt >>= 1; 2158 } 2159 2160 // x = (a * 3 + 5) / 8 2161 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28); 2162 2163 // x = (a / x + x) / 2; // 8-bit accuracy 2164 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 2165 2166 // x = (a / x + x) / 2; // 16-bit accuracy 2167 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 2168 2169 // x = (a / x + x) / 2; // 32-bit accuracy 2170 x = ((a_mnt / x) >> 2) + (x >> 1); 2171 2172 // r = 1 / x; // 32-bit accuracy 2173 r = ((uint64_t)1 << 62) / x; 2174 2175 // r = r * (2 - x * r); // 64-bit accuracy 2176 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); 2177 lsr128(&x0, &x1, x0, x1, 31); 2178 2179 // x = (x + a * r) / 2; // 64-bit accuracy 2180 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); 2181 lsl128(&x0, &x1, x0, x1, 5); 2182 lsr128(&x0, &x1, x0, x1, 56); 2183 2184 x0 = ((uint64_t)x << 31) + (x0 >> 1); 2185 2186 x_sgn = 0; 2187 x_exp = (a_exp + 1053) >> 1; 2188 x_mnt = x0; 2189 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; 2190 mul62x62(&x0, &x1, x_mnt, x_mnt); 2191 lsl128(&x0, &x1, x0, x1, 19); 2192 c = cmp128(x0, x1, 0, a_mnt); 2193 if (c > 0) 2194 --x_mnt; 2195 2196 x_mnt = fp64_normalise(x_mnt, &x_exp); 2197 2198 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 2199} 2200 2201static int 2202modeConv(FPSCR fpscr) 2203{ 2204 uint32_t x = (uint32_t)fpscr; 2205 return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0); 2206 // AHP bit is ignored. Only fplibConvert uses AHP. 2207} 2208 2209static void 2210set_fpscr(FPSCR &fpscr, int flags) 2211{ 2212 // translate back to FPSCR 2213 bool underflow = false; 2214 if (flags & FPLIB_IDC) { 2215 fpscr.idc = 1; 2216 } 2217 if (flags & FPLIB_IOC) { 2218 fpscr.ioc = 1; 2219 } 2220 if (flags & FPLIB_DZC) { 2221 fpscr.dzc = 1; 2222 } 2223 if (flags & FPLIB_OFC) { 2224 fpscr.ofc = 1; 2225 } 2226 if (flags & FPLIB_UFC) { 2227 underflow = true; //xx Why is this required? 2228 fpscr.ufc = 1; 2229 } 2230 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { 2231 fpscr.ixc = 1; 2232 } 2233} 2234 2235template <> 2236bool 2237fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr) 2238{ 2239 int flags = 0; 2240 int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags); 2241 set_fpscr(fpscr, flags); 2242 return x; 2243} 2244 2245template <> 2246bool 2247fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr) 2248{ 2249 int flags = 0; 2250 int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags); 2251 set_fpscr(fpscr, flags); 2252 return x; 2253} 2254 2255template <> 2256bool 2257fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr) 2258{ 2259 int flags = 0; 2260 int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags); 2261 set_fpscr(fpscr, flags); 2262 return x; 2263} 2264 2265template <> 2266bool 2267fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr) 2268{ 2269 int flags = 0; 2270 int x = fp16_compare_un(a, b, modeConv(fpscr), &flags); 2271 set_fpscr(fpscr, flags); 2272 return x; 2273} 2274 2275template <> 2276bool 2277fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) 2278{ 2279 int flags = 0; 2280 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); 2281 set_fpscr(fpscr, flags); 2282 return x; 2283} 2284 2285template <> 2286bool 2287fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) 2288{ 2289 int flags = 0; 2290 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); 2291 set_fpscr(fpscr, flags); 2292 return x; 2293} 2294 2295template <> 2296bool 2297fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) 2298{ 2299 int flags = 0; 2300 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); 2301 set_fpscr(fpscr, flags); 2302 return x; 2303} 2304 2305template <> 2306bool 2307fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr) 2308{ 2309 int flags = 0; 2310 int x = fp32_compare_un(a, b, modeConv(fpscr), &flags); 2311 set_fpscr(fpscr, flags); 2312 return x; 2313} 2314 2315template <> 2316bool 2317fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) 2318{ 2319 int flags = 0; 2320 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); 2321 set_fpscr(fpscr, flags); 2322 return x; 2323} 2324 2325template <> 2326bool 2327fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) 2328{ 2329 int flags = 0; 2330 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); 2331 set_fpscr(fpscr, flags); 2332 return x; 2333} 2334 2335template <> 2336bool 2337fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) 2338{ 2339 int flags = 0; 2340 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); 2341 set_fpscr(fpscr, flags); 2342 return x; 2343} 2344 2345template <> 2346bool 2347fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr) 2348{ 2349 int flags = 0; 2350 int x = fp64_compare_un(a, b, modeConv(fpscr), &flags); 2351 set_fpscr(fpscr, flags); 2352 return x; 2353} 2354 2355template <> 2356uint16_t 2357fplibAbs(uint16_t op) 2358{ 2359 return op & ~(1ULL << (FP16_BITS - 1)); 2360} 2361 2362template <> 2363uint32_t 2364fplibAbs(uint32_t op) 2365{ 2366 return op & ~(1ULL << (FP32_BITS - 1)); 2367} 2368 2369template <> 2370uint64_t 2371fplibAbs(uint64_t op) 2372{ 2373 return op & ~(1ULL << (FP64_BITS - 1)); 2374} 2375 2376template <> 2377uint16_t 2378fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr) 2379{ 2380 int flags = 0; 2381 uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags); 2382 set_fpscr0(fpscr, flags); 2383 return result; 2384} 2385 2386template <> 2387uint32_t 2388fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2389{ 2390 int flags = 0; 2391 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); 2392 set_fpscr0(fpscr, flags); 2393 return result; 2394} 2395 2396template <> 2397uint64_t 2398fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2399{ 2400 int flags = 0; 2401 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); 2402 set_fpscr0(fpscr, flags); 2403 return result; 2404} 2405 2406template <> 2407int 2408fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr) 2409{ 2410 int mode = modeConv(fpscr); 2411 int flags = 0; 2412 int sgn1, exp1, sgn2, exp2, result; 2413 uint16_t mnt1, mnt2; 2414 2415 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2416 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2417 2418 if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) { 2419 result = 3; 2420 if (fp16_is_signalling_NaN(exp1, mnt1) || 2421 fp16_is_signalling_NaN(exp2, mnt2) || signal_nans) 2422 flags |= FPLIB_IOC; 2423 } else { 2424 if (op1 == op2 || (!mnt1 && !mnt2)) { 2425 result = 6; 2426 } else if (sgn1 != sgn2) { 2427 result = sgn1 ? 8 : 2; 2428 } else if (exp1 != exp2) { 2429 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2430 } else { 2431 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2432 } 2433 } 2434 2435 set_fpscr0(fpscr, flags); 2436 2437 return result; 2438} 2439 2440template <> 2441int 2442fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) 2443{ 2444 int mode = modeConv(fpscr); 2445 int flags = 0; 2446 int sgn1, exp1, sgn2, exp2, result; 2447 uint32_t mnt1, mnt2; 2448 2449 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2450 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2451 2452 if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) { 2453 result = 3; 2454 if (fp32_is_signalling_NaN(exp1, mnt1) || 2455 fp32_is_signalling_NaN(exp2, mnt2) || signal_nans) 2456 flags |= FPLIB_IOC; 2457 } else { 2458 if (op1 == op2 || (!mnt1 && !mnt2)) { 2459 result = 6; 2460 } else if (sgn1 != sgn2) { 2461 result = sgn1 ? 8 : 2; 2462 } else if (exp1 != exp2) { 2463 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2464 } else { 2465 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2466 } 2467 } 2468 2469 set_fpscr0(fpscr, flags); 2470 2471 return result; 2472} 2473 2474template <> 2475int 2476fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) 2477{ 2478 int mode = modeConv(fpscr); 2479 int flags = 0; 2480 int sgn1, exp1, sgn2, exp2, result; 2481 uint64_t mnt1, mnt2; 2482 2483 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2484 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2485 2486 if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) { 2487 result = 3; 2488 if (fp64_is_signalling_NaN(exp1, mnt1) || 2489 fp64_is_signalling_NaN(exp2, mnt2) || signal_nans) 2490 flags |= FPLIB_IOC; 2491 } else { 2492 if (op1 == op2 || (!mnt1 && !mnt2)) { 2493 result = 6; 2494 } else if (sgn1 != sgn2) { 2495 result = sgn1 ? 8 : 2; 2496 } else if (exp1 != exp2) { 2497 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2498 } else { 2499 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2500 } 2501 } 2502 2503 set_fpscr0(fpscr, flags); 2504 2505 return result; 2506} 2507 2508static uint16_t 2509fp16_FPConvertNaN_32(uint32_t op) 2510{ 2511 return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF, 2512 1ULL << (FP16_MANT_BITS - 1) | 2513 op >> (FP32_MANT_BITS - FP16_MANT_BITS)); 2514} 2515 2516static uint16_t 2517fp16_FPConvertNaN_64(uint64_t op) 2518{ 2519 return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF, 2520 1ULL << (FP16_MANT_BITS - 1) | 2521 op >> (FP64_MANT_BITS - FP16_MANT_BITS)); 2522} 2523 2524static uint32_t 2525fp32_FPConvertNaN_16(uint16_t op) 2526{ 2527 return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF, 2528 1ULL << (FP32_MANT_BITS - 1) | 2529 (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS)); 2530} 2531 2532static uint32_t 2533fp32_FPConvertNaN_64(uint64_t op) 2534{ 2535 return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF, 2536 1ULL << (FP32_MANT_BITS - 1) | 2537 op >> (FP64_MANT_BITS - FP32_MANT_BITS)); 2538} 2539 2540static uint64_t 2541fp64_FPConvertNaN_16(uint16_t op) 2542{ 2543 return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF, 2544 1ULL << (FP64_MANT_BITS - 1) | 2545 (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS)); 2546} 2547 2548static uint64_t 2549fp64_FPConvertNaN_32(uint32_t op) 2550{ 2551 return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF, 2552 1ULL << (FP64_MANT_BITS - 1) | 2553 (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS)); 2554} 2555 2556static uint16_t 2557fp16_FPOnePointFive(int sgn) 2558{ 2559 return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1)); 2560} 2561 2562static uint32_t 2563fp32_FPOnePointFive(int sgn) 2564{ 2565 return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1)); 2566} 2567 2568static uint64_t 2569fp64_FPOnePointFive(int sgn) 2570{ 2571 return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1)); 2572} 2573 2574static uint16_t 2575fp16_FPThree(int sgn) 2576{ 2577 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1)); 2578} 2579 2580static uint32_t 2581fp32_FPThree(int sgn) 2582{ 2583 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1)); 2584} 2585 2586static uint64_t 2587fp64_FPThree(int sgn) 2588{ 2589 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1)); 2590} 2591 2592static uint16_t 2593fp16_FPTwo(int sgn) 2594{ 2595 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0); 2596} 2597 2598static uint32_t 2599fp32_FPTwo(int sgn) 2600{ 2601 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0); 2602} 2603 2604static uint64_t 2605fp64_FPTwo(int sgn) 2606{ 2607 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0); 2608} 2609 2610template <> 2611uint16_t 2612fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 2613{ 2614 int mode = modeConv(fpscr); 2615 int flags = 0; 2616 int sgn, exp; 2617 uint32_t mnt; 2618 uint16_t result; 2619 2620 // Unpack floating-point operand optionally with flush-to-zero: 2621 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2622 2623 bool alt_hp = fpscr.ahp; 2624 2625 if (fp32_is_NaN(exp, mnt)) { 2626 if (alt_hp) { 2627 result = fp16_zero(sgn); 2628 } else if (fpscr.dn) { 2629 result = fp16_defaultNaN(); 2630 } else { 2631 result = fp16_FPConvertNaN_32(op); 2632 } 2633 if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) { 2634 flags |= FPLIB_IOC; 2635 } 2636 } else if (exp == FP32_EXP_INF) { 2637 if (alt_hp) { 2638 result = ((uint16_t)sgn << (FP16_BITS - 1) | 2639 ((1ULL << (FP16_BITS - 1)) - 1)); 2640 flags |= FPLIB_IOC; 2641 } else { 2642 result = fp16_infinity(sgn); 2643 } 2644 } else if (!mnt) { 2645 result = fp16_zero(sgn); 2646 } else { 2647 result = 2648 fp16_round_(sgn, exp - FP32_EXP_BIAS + FP16_EXP_BIAS, 2649 mnt >> (FP32_MANT_BITS - FP16_BITS) | 2650 !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)), 2651 rounding, (mode & 0xf) | alt_hp << 4, &flags); 2652 } 2653 2654 set_fpscr0(fpscr, flags); 2655 2656 return result; 2657} 2658 2659template <> 2660uint16_t 2661fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 2662{ 2663 int mode = modeConv(fpscr); 2664 int flags = 0; 2665 int sgn, exp; 2666 uint64_t mnt; 2667 uint16_t result; 2668 2669 // Unpack floating-point operand optionally with flush-to-zero: 2670 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2671 2672 bool alt_hp = fpscr.ahp; 2673 2674 if (fp64_is_NaN(exp, mnt)) { 2675 if (alt_hp) { 2676 result = fp16_zero(sgn); 2677 } else if (fpscr.dn) { 2678 result = fp16_defaultNaN(); 2679 } else { 2680 result = fp16_FPConvertNaN_64(op); 2681 } 2682 if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) { 2683 flags |= FPLIB_IOC; 2684 } 2685 } else if (exp == FP64_EXP_INF) { 2686 if (alt_hp) { 2687 result = ((uint16_t)sgn << (FP16_BITS - 1) | 2688 ((1ULL << (FP16_BITS - 1)) - 1)); 2689 flags |= FPLIB_IOC; 2690 } else { 2691 result = fp16_infinity(sgn); 2692 } 2693 } else if (!mnt) { 2694 result = fp16_zero(sgn); 2695 } else { 2696 result = 2697 fp16_round_(sgn, exp - FP64_EXP_BIAS + FP16_EXP_BIAS, 2698 mnt >> (FP64_MANT_BITS - FP16_BITS) | 2699 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)), 2700 rounding, (mode & 0xf) | alt_hp << 4, &flags); 2701 } 2702 2703 set_fpscr0(fpscr, flags); 2704 2705 return result; 2706} 2707 2708template <> 2709uint32_t 2710fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 2711{ 2712 int mode = modeConv(fpscr); 2713 int flags = 0; 2714 int sgn, exp; 2715 uint16_t mnt; 2716 uint32_t result; 2717 2718 // Unpack floating-point operand optionally with flush-to-zero: 2719 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags); 2720 2721 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) { 2722 if (fpscr.dn) { 2723 result = fp32_defaultNaN(); 2724 } else { 2725 result = fp32_FPConvertNaN_16(op); 2726 } 2727 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) { 2728 flags |= FPLIB_IOC; 2729 } 2730 } else if (exp == FP16_EXP_INF && !fpscr.ahp) { 2731 result = fp32_infinity(sgn); 2732 } else if (!mnt) { 2733 result = fp32_zero(sgn); 2734 } else { 2735 mnt = fp16_normalise(mnt, &exp); 2736 result = fp32_pack(sgn, (exp - FP16_EXP_BIAS + 2737 FP32_EXP_BIAS + FP16_EXP_BITS), 2738 (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1)); 2739 } 2740 2741 set_fpscr0(fpscr, flags); 2742 2743 return result; 2744} 2745 2746template <> 2747uint32_t 2748fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 2749{ 2750 int mode = modeConv(fpscr); 2751 int flags = 0; 2752 int sgn, exp; 2753 uint64_t mnt; 2754 uint32_t result; 2755 2756 // Unpack floating-point operand optionally with flush-to-zero: 2757 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2758 2759 if (fp64_is_NaN(exp, mnt)) { 2760 if (fpscr.dn) { 2761 result = fp32_defaultNaN(); 2762 } else { 2763 result = fp32_FPConvertNaN_64(op); 2764 } 2765 if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) { 2766 flags |= FPLIB_IOC; 2767 } 2768 } else if (exp == FP64_EXP_INF) { 2769 result = fp32_infinity(sgn); 2770 } else if (!mnt) { 2771 result = fp32_zero(sgn); 2772 } else { 2773 result = 2774 fp32_round_(sgn, exp - FP64_EXP_BIAS + FP32_EXP_BIAS, 2775 mnt >> (FP64_MANT_BITS - FP32_BITS) | 2776 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)), 2777 rounding, mode, &flags); 2778 } 2779 2780 set_fpscr0(fpscr, flags); 2781 2782 return result; 2783} 2784 2785template <> 2786uint64_t 2787fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 2788{ 2789 int mode = modeConv(fpscr); 2790 int flags = 0; 2791 int sgn, exp; 2792 uint16_t mnt; 2793 uint64_t result; 2794 2795 // Unpack floating-point operand optionally with flush-to-zero: 2796 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags); 2797 2798 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) { 2799 if (fpscr.dn) { 2800 result = fp64_defaultNaN(); 2801 } else { 2802 result = fp64_FPConvertNaN_16(op); 2803 } 2804 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) { 2805 flags |= FPLIB_IOC; 2806 } 2807 } else if (exp == FP16_EXP_INF && !fpscr.ahp) { 2808 result = fp64_infinity(sgn); 2809 } else if (!mnt) { 2810 result = fp64_zero(sgn); 2811 } else { 2812 mnt = fp16_normalise(mnt, &exp); 2813 result = fp64_pack(sgn, (exp - FP16_EXP_BIAS + 2814 FP64_EXP_BIAS + FP16_EXP_BITS), 2815 (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1)); 2816 } 2817 2818 set_fpscr0(fpscr, flags); 2819 2820 return result; 2821} 2822 2823template <> 2824uint64_t 2825fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 2826{ 2827 int mode = modeConv(fpscr); 2828 int flags = 0; 2829 int sgn, exp; 2830 uint32_t mnt; 2831 uint64_t result; 2832 2833 // Unpack floating-point operand optionally with flush-to-zero: 2834 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2835 2836 if (fp32_is_NaN(exp, mnt)) { 2837 if (fpscr.dn) { 2838 result = fp64_defaultNaN(); 2839 } else { 2840 result = fp64_FPConvertNaN_32(op); 2841 } 2842 if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) { 2843 flags |= FPLIB_IOC; 2844 } 2845 } else if (exp == FP32_EXP_INF) { 2846 result = fp64_infinity(sgn); 2847 } else if (!mnt) { 2848 result = fp64_zero(sgn); 2849 } else { 2850 mnt = fp32_normalise(mnt, &exp); 2851 result = fp64_pack(sgn, (exp - FP32_EXP_BIAS + 2852 FP64_EXP_BIAS + FP32_EXP_BITS), 2853 (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1)); 2854 } 2855 2856 set_fpscr0(fpscr, flags); 2857 2858 return result; 2859} 2860 2861template <> 2862uint16_t 2863fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr) 2864{ 2865 int flags = 0; 2866 uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2867 set_fpscr0(fpscr, flags); 2868 return result; 2869} 2870 2871template <> 2872uint32_t 2873fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) 2874{ 2875 int flags = 0; 2876 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2877 set_fpscr0(fpscr, flags); 2878 return result; 2879} 2880 2881template <> 2882uint64_t 2883fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) 2884{ 2885 int flags = 0; 2886 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2887 set_fpscr0(fpscr, flags); 2888 return result; 2889} 2890 2891template <> 2892uint16_t 2893fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr) 2894{ 2895 int flags = 0; 2896 uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags); 2897 set_fpscr0(fpscr, flags); 2898 return result; 2899} 2900 2901template <> 2902uint32_t 2903fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2904{ 2905 int flags = 0; 2906 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); 2907 set_fpscr0(fpscr, flags); 2908 return result; 2909} 2910 2911template <> 2912uint64_t 2913fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2914{ 2915 int flags = 0; 2916 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); 2917 set_fpscr0(fpscr, flags); 2918 return result; 2919} 2920 2921template <> 2922uint16_t 2923fplibExpA(uint16_t op) 2924{ 2925 static uint16_t coeff[32] = { 2926 0x0000, 2927 0x0016, 2928 0x002d, 2929 0x0045, 2930 0x005d, 2931 0x0075, 2932 0x008e, 2933 0x00a8, 2934 0x00c2, 2935 0x00dc, 2936 0x00f8, 2937 0x0114, 2938 0x0130, 2939 0x014d, 2940 0x016b, 2941 0x0189, 2942 0x01a8, 2943 0x01c8, 2944 0x01e8, 2945 0x0209, 2946 0x022b, 2947 0x024e, 2948 0x0271, 2949 0x0295, 2950 0x02ba, 2951 0x02e0, 2952 0x0306, 2953 0x032e, 2954 0x0356, 2955 0x037f, 2956 0x03a9, 2957 0x03d4 2958 }; 2959 return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) | 2960 coeff[op & ((1 << 5) - 1)]); 2961} 2962 2963template <> 2964uint32_t 2965fplibExpA(uint32_t op) 2966{ 2967 static uint32_t coeff[64] = { 2968 0x000000, 2969 0x0164d2, 2970 0x02cd87, 2971 0x043a29, 2972 0x05aac3, 2973 0x071f62, 2974 0x08980f, 2975 0x0a14d5, 2976 0x0b95c2, 2977 0x0d1adf, 2978 0x0ea43a, 2979 0x1031dc, 2980 0x11c3d3, 2981 0x135a2b, 2982 0x14f4f0, 2983 0x16942d, 2984 0x1837f0, 2985 0x19e046, 2986 0x1b8d3a, 2987 0x1d3eda, 2988 0x1ef532, 2989 0x20b051, 2990 0x227043, 2991 0x243516, 2992 0x25fed7, 2993 0x27cd94, 2994 0x29a15b, 2995 0x2b7a3a, 2996 0x2d583f, 2997 0x2f3b79, 2998 0x3123f6, 2999 0x3311c4, 3000 0x3504f3, 3001 0x36fd92, 3002 0x38fbaf, 3003 0x3aff5b, 3004 0x3d08a4, 3005 0x3f179a, 3006 0x412c4d, 3007 0x4346cd, 3008 0x45672a, 3009 0x478d75, 3010 0x49b9be, 3011 0x4bec15, 3012 0x4e248c, 3013 0x506334, 3014 0x52a81e, 3015 0x54f35b, 3016 0x5744fd, 3017 0x599d16, 3018 0x5bfbb8, 3019 0x5e60f5, 3020 0x60ccdf, 3021 0x633f89, 3022 0x65b907, 3023 0x68396a, 3024 0x6ac0c7, 3025 0x6d4f30, 3026 0x6fe4ba, 3027 0x728177, 3028 0x75257d, 3029 0x77d0df, 3030 0x7a83b3, 3031 0x7d3e0c 3032 }; 3033 return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) | 3034 coeff[op & ((1 << 6) - 1)]); 3035} 3036 3037template <> 3038uint64_t 3039fplibExpA(uint64_t op) 3040{ 3041 static uint64_t coeff[64] = { 3042 0x0000000000000ULL, 3043 0x02c9a3e778061ULL, 3044 0x059b0d3158574ULL, 3045 0x0874518759bc8ULL, 3046 0x0b5586cf9890fULL, 3047 0x0e3ec32d3d1a2ULL, 3048 0x11301d0125b51ULL, 3049 0x1429aaea92de0ULL, 3050 0x172b83c7d517bULL, 3051 0x1a35beb6fcb75ULL, 3052 0x1d4873168b9aaULL, 3053 0x2063b88628cd6ULL, 3054 0x2387a6e756238ULL, 3055 0x26b4565e27cddULL, 3056 0x29e9df51fdee1ULL, 3057 0x2d285a6e4030bULL, 3058 0x306fe0a31b715ULL, 3059 0x33c08b26416ffULL, 3060 0x371a7373aa9cbULL, 3061 0x3a7db34e59ff7ULL, 3062 0x3dea64c123422ULL, 3063 0x4160a21f72e2aULL, 3064 0x44e086061892dULL, 3065 0x486a2b5c13cd0ULL, 3066 0x4bfdad5362a27ULL, 3067 0x4f9b2769d2ca7ULL, 3068 0x5342b569d4f82ULL, 3069 0x56f4736b527daULL, 3070 0x5ab07dd485429ULL, 3071 0x5e76f15ad2148ULL, 3072 0x6247eb03a5585ULL, 3073 0x6623882552225ULL, 3074 0x6a09e667f3bcdULL, 3075 0x6dfb23c651a2fULL, 3076 0x71f75e8ec5f74ULL, 3077 0x75feb564267c9ULL, 3078 0x7a11473eb0187ULL, 3079 0x7e2f336cf4e62ULL, 3080 0x82589994cce13ULL, 3081 0x868d99b4492edULL, 3082 0x8ace5422aa0dbULL, 3083 0x8f1ae99157736ULL, 3084 0x93737b0cdc5e5ULL, 3085 0x97d829fde4e50ULL, 3086 0x9c49182a3f090ULL, 3087 0xa0c667b5de565ULL, 3088 0xa5503b23e255dULL, 3089 0xa9e6b5579fdbfULL, 3090 0xae89f995ad3adULL, 3091 0xb33a2b84f15fbULL, 3092 0xb7f76f2fb5e47ULL, 3093 0xbcc1e904bc1d2ULL, 3094 0xc199bdd85529cULL, 3095 0xc67f12e57d14bULL, 3096 0xcb720dcef9069ULL, 3097 0xd072d4a07897cULL, 3098 0xd5818dcfba487ULL, 3099 0xda9e603db3285ULL, 3100 0xdfc97337b9b5fULL, 3101 0xe502ee78b3ff6ULL, 3102 0xea4afa2a490daULL, 3103 0xefa1bee615a27ULL, 3104 0xf50765b6e4540ULL, 3105 0xfa7c1819e90d8ULL 3106 }; 3107 return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) | 3108 coeff[op & ((1 << 6) - 1)]); 3109} 3110 3111static uint16_t 3112fp16_repack(int sgn, int exp, uint16_t mnt) 3113{ 3114 return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt); 3115} 3116 3117static uint32_t 3118fp32_repack(int sgn, int exp, uint32_t mnt) 3119{ 3120 return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt); 3121} 3122 3123static uint64_t 3124fp64_repack(int sgn, int exp, uint64_t mnt) 3125{ 3126 return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt); 3127} 3128 3129static void 3130fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn) 3131{ 3132 // Treat a single quiet-NaN as +Infinity/-Infinity 3133 if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) && 3134 (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) 3135 *op1 = fp16_infinity(sgn); 3136 if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) && 3137 (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) 3138 *op2 = fp16_infinity(sgn); 3139} 3140 3141static void 3142fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn) 3143{ 3144 // Treat a single quiet-NaN as +Infinity/-Infinity 3145 if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) && 3146 (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) 3147 *op1 = fp32_infinity(sgn); 3148 if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) && 3149 (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) 3150 *op2 = fp32_infinity(sgn); 3151} 3152 3153static void 3154fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn) 3155{ 3156 // Treat a single quiet-NaN as +Infinity/-Infinity 3157 if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) && 3158 (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) 3159 *op1 = fp64_infinity(sgn); 3160 if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) && 3161 (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) 3162 *op2 = fp64_infinity(sgn); 3163} 3164 3165template <> 3166uint16_t 3167fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3168{ 3169 int mode = modeConv(fpscr); 3170 int flags = 0; 3171 int sgn1, exp1, sgn2, exp2; 3172 uint16_t mnt1, mnt2, x, result; 3173 3174 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3175 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3176 3177 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) { 3178 result = x; 3179 } else { 3180 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3181 fp16_repack(sgn1, exp1, mnt1) : 3182 fp16_repack(sgn2, exp2, mnt2)); 3183 } 3184 set_fpscr0(fpscr, flags); 3185 return result; 3186} 3187 3188template <> 3189uint32_t 3190fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3191{ 3192 int mode = modeConv(fpscr); 3193 int flags = 0; 3194 int sgn1, exp1, sgn2, exp2; 3195 uint32_t mnt1, mnt2, x, result; 3196 3197 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3198 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3199 3200 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 3201 result = x; 3202 } else { 3203 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3204 fp32_repack(sgn1, exp1, mnt1) : 3205 fp32_repack(sgn2, exp2, mnt2)); 3206 } 3207 set_fpscr0(fpscr, flags); 3208 return result; 3209} 3210 3211template <> 3212uint64_t 3213fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3214{ 3215 int mode = modeConv(fpscr); 3216 int flags = 0; 3217 int sgn1, exp1, sgn2, exp2; 3218 uint64_t mnt1, mnt2, x, result; 3219 3220 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3221 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3222 3223 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 3224 result = x; 3225 } else { 3226 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3227 fp64_repack(sgn1, exp1, mnt1) : 3228 fp64_repack(sgn2, exp2, mnt2)); 3229 } 3230 set_fpscr0(fpscr, flags); 3231 return result; 3232} 3233 3234template <> 3235uint16_t 3236fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3237{ 3238 fp16_minmaxnum(&op1, &op2, 1); 3239 return fplibMax<uint16_t>(op1, op2, fpscr); 3240} 3241 3242template <> 3243uint32_t 3244fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3245{ 3246 fp32_minmaxnum(&op1, &op2, 1); 3247 return fplibMax<uint32_t>(op1, op2, fpscr); 3248} 3249 3250template <> 3251uint64_t 3252fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3253{ 3254 fp64_minmaxnum(&op1, &op2, 1); 3255 return fplibMax<uint64_t>(op1, op2, fpscr); 3256} 3257 3258template <> 3259uint16_t 3260fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3261{ 3262 int mode = modeConv(fpscr); 3263 int flags = 0; 3264 int sgn1, exp1, sgn2, exp2; 3265 uint16_t mnt1, mnt2, x, result; 3266 3267 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3268 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3269 3270 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) { 3271 result = x; 3272 } else { 3273 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3274 fp16_repack(sgn1, exp1, mnt1) : 3275 fp16_repack(sgn2, exp2, mnt2)); 3276 } 3277 set_fpscr0(fpscr, flags); 3278 return result; 3279} 3280 3281template <> 3282uint32_t 3283fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3284{ 3285 int mode = modeConv(fpscr); 3286 int flags = 0; 3287 int sgn1, exp1, sgn2, exp2; 3288 uint32_t mnt1, mnt2, x, result; 3289 3290 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3291 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3292 3293 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 3294 result = x; 3295 } else { 3296 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3297 fp32_repack(sgn1, exp1, mnt1) : 3298 fp32_repack(sgn2, exp2, mnt2)); 3299 } 3300 set_fpscr0(fpscr, flags); 3301 return result; 3302} 3303 3304template <> 3305uint64_t 3306fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3307{ 3308 int mode = modeConv(fpscr); 3309 int flags = 0; 3310 int sgn1, exp1, sgn2, exp2; 3311 uint64_t mnt1, mnt2, x, result; 3312 3313 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3314 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3315 3316 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 3317 result = x; 3318 } else { 3319 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3320 fp64_repack(sgn1, exp1, mnt1) : 3321 fp64_repack(sgn2, exp2, mnt2)); 3322 } 3323 set_fpscr0(fpscr, flags); 3324 return result; 3325} 3326 3327template <> 3328uint16_t 3329fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3330{ 3331 fp16_minmaxnum(&op1, &op2, 0); 3332 return fplibMin<uint16_t>(op1, op2, fpscr); 3333} 3334 3335template <> 3336uint32_t 3337fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3338{ 3339 fp32_minmaxnum(&op1, &op2, 0); 3340 return fplibMin<uint32_t>(op1, op2, fpscr); 3341} 3342 3343template <> 3344uint64_t 3345fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3346{ 3347 fp64_minmaxnum(&op1, &op2, 0); 3348 return fplibMin<uint64_t>(op1, op2, fpscr); 3349} 3350 3351template <> 3352uint16_t 3353fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3354{ 3355 int flags = 0; 3356 uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags); 3357 set_fpscr0(fpscr, flags); 3358 return result; 3359} 3360 3361template <> 3362uint32_t 3363fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3364{ 3365 int flags = 0; 3366 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); 3367 set_fpscr0(fpscr, flags); 3368 return result; 3369} 3370 3371template <> 3372uint64_t 3373fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3374{ 3375 int flags = 0; 3376 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); 3377 set_fpscr0(fpscr, flags); 3378 return result; 3379} 3380 3381template <> 3382uint16_t 3383fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3384{ 3385 int mode = modeConv(fpscr); 3386 int flags = 0; 3387 int sgn1, exp1, sgn2, exp2; 3388 uint16_t mnt1, mnt2, result; 3389 3390 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3391 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3392 3393 result = fp16_process_NaNs(op1, op2, mode, &flags); 3394 if (!result) { 3395 if ((exp1 == FP16_EXP_INF && !mnt2) || 3396 (exp2 == FP16_EXP_INF && !mnt1)) { 3397 result = fp16_FPTwo(sgn1 ^ sgn2); 3398 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) { 3399 result = fp16_infinity(sgn1 ^ sgn2); 3400 } else if (!mnt1 || !mnt2) { 3401 result = fp16_zero(sgn1 ^ sgn2); 3402 } else { 3403 result = fp16_mul(op1, op2, mode, &flags); 3404 } 3405 } 3406 3407 set_fpscr0(fpscr, flags); 3408 3409 return result; 3410} 3411 3412template <> 3413uint32_t 3414fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3415{ 3416 int mode = modeConv(fpscr); 3417 int flags = 0; 3418 int sgn1, exp1, sgn2, exp2; 3419 uint32_t mnt1, mnt2, result; 3420 3421 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3422 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3423 3424 result = fp32_process_NaNs(op1, op2, mode, &flags); 3425 if (!result) { 3426 if ((exp1 == FP32_EXP_INF && !mnt2) || 3427 (exp2 == FP32_EXP_INF && !mnt1)) { 3428 result = fp32_FPTwo(sgn1 ^ sgn2); 3429 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) { 3430 result = fp32_infinity(sgn1 ^ sgn2); 3431 } else if (!mnt1 || !mnt2) { 3432 result = fp32_zero(sgn1 ^ sgn2); 3433 } else { 3434 result = fp32_mul(op1, op2, mode, &flags); 3435 } 3436 } 3437 3438 set_fpscr0(fpscr, flags); 3439 3440 return result; 3441} 3442 3443template <> 3444uint64_t 3445fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3446{ 3447 int mode = modeConv(fpscr); 3448 int flags = 0; 3449 int sgn1, exp1, sgn2, exp2; 3450 uint64_t mnt1, mnt2, result; 3451 3452 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3453 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3454 3455 result = fp64_process_NaNs(op1, op2, mode, &flags); 3456 if (!result) { 3457 if ((exp1 == FP64_EXP_INF && !mnt2) || 3458 (exp2 == FP64_EXP_INF && !mnt1)) { 3459 result = fp64_FPTwo(sgn1 ^ sgn2); 3460 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) { 3461 result = fp64_infinity(sgn1 ^ sgn2); 3462 } else if (!mnt1 || !mnt2) { 3463 result = fp64_zero(sgn1 ^ sgn2); 3464 } else { 3465 result = fp64_mul(op1, op2, mode, &flags); 3466 } 3467 } 3468 3469 set_fpscr0(fpscr, flags); 3470 3471 return result; 3472} 3473 3474template <> 3475uint16_t 3476fplibNeg(uint16_t op) 3477{ 3478 return op ^ 1ULL << (FP16_BITS - 1); 3479} 3480 3481template <> 3482uint32_t 3483fplibNeg(uint32_t op) 3484{ 3485 return op ^ 1ULL << (FP32_BITS - 1); 3486} 3487 3488template <> 3489uint64_t 3490fplibNeg(uint64_t op) 3491{ 3492 return op ^ 1ULL << (FP64_BITS - 1); 3493} 3494 3495static const uint8_t recip_sqrt_estimate[256] = { 3496 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, 3497 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, 3498 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, 3499 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, 3500 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 3501 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 3502 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, 3503 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, 3504 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, 3505 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, 3506 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, 3507 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, 3508 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, 3509 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, 3510 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, 3511 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 3512}; 3513 3514template <> 3515uint16_t 3516fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr) 3517{ 3518 int mode = modeConv(fpscr); 3519 int flags = 0; 3520 int sgn, exp; 3521 uint16_t mnt, result; 3522 3523 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3524 3525 if (fp16_is_NaN(exp, mnt)) { 3526 result = fp16_process_NaN(op, mode, &flags); 3527 } else if (!mnt) { 3528 result = fp16_infinity(sgn); 3529 flags |= FPLIB_DZC; 3530 } else if (sgn) { 3531 result = fp16_defaultNaN(); 3532 flags |= FPLIB_IOC; 3533 } else if (exp == FP16_EXP_INF) { 3534 result = fp16_zero(0); 3535 } else { 3536 exp += FP16_EXP_BITS; 3537 mnt = fp16_normalise(mnt, &exp); 3538 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | 3539 (mnt >> (FP16_BITS - 8) & 127)]; 3540 result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1, 3541 mnt << (FP16_MANT_BITS - 8)); 3542 } 3543 3544 set_fpscr0(fpscr, flags); 3545 3546 return result; 3547} 3548 3549template <> 3550uint32_t 3551fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) 3552{ 3553 int mode = modeConv(fpscr); 3554 int flags = 0; 3555 int sgn, exp; 3556 uint32_t mnt, result; 3557 3558 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3559 3560 if (fp32_is_NaN(exp, mnt)) { 3561 result = fp32_process_NaN(op, mode, &flags); 3562 } else if (!mnt) { 3563 result = fp32_infinity(sgn); 3564 flags |= FPLIB_DZC; 3565 } else if (sgn) { 3566 result = fp32_defaultNaN(); 3567 flags |= FPLIB_IOC; 3568 } else if (exp == FP32_EXP_INF) { 3569 result = fp32_zero(0); 3570 } else { 3571 exp += FP32_EXP_BITS; 3572 mnt = fp32_normalise(mnt, &exp); 3573 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | 3574 (mnt >> (FP32_BITS - 8) & 127)]; 3575 result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1, 3576 mnt << (FP32_MANT_BITS - 8)); 3577 } 3578 3579 set_fpscr0(fpscr, flags); 3580 3581 return result; 3582} 3583 3584template <> 3585uint64_t 3586fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) 3587{ 3588 int mode = modeConv(fpscr); 3589 int flags = 0; 3590 int sgn, exp; 3591 uint64_t mnt, result; 3592 3593 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3594 3595 if (fp64_is_NaN(exp, mnt)) { 3596 result = fp64_process_NaN(op, mode, &flags); 3597 } else if (!mnt) { 3598 result = fp64_infinity(sgn); 3599 flags |= FPLIB_DZC; 3600 } else if (sgn) { 3601 result = fp64_defaultNaN(); 3602 flags |= FPLIB_IOC; 3603 } else if (exp == FP64_EXP_INF) { 3604 result = fp32_zero(0); 3605 } else { 3606 exp += FP64_EXP_BITS; 3607 mnt = fp64_normalise(mnt, &exp); 3608 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | 3609 (mnt >> (FP64_BITS - 8) & 127)]; 3610 result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1, 3611 mnt << (FP64_MANT_BITS - 8)); 3612 } 3613 3614 set_fpscr0(fpscr, flags); 3615 3616 return result; 3617} 3618 3619template <> 3620uint16_t 3621fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3622{ 3623 int mode = modeConv(fpscr); 3624 int flags = 0; 3625 int sgn1, exp1, sgn2, exp2; 3626 uint16_t mnt1, mnt2, result; 3627 3628 op1 = fplibNeg<uint16_t>(op1); 3629 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3630 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3631 3632 result = fp16_process_NaNs(op1, op2, mode, &flags); 3633 if (!result) { 3634 if ((exp1 == FP16_EXP_INF && !mnt2) || 3635 (exp2 == FP16_EXP_INF && !mnt1)) { 3636 result = fp16_FPOnePointFive(0); 3637 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) { 3638 result = fp16_infinity(sgn1 ^ sgn2); 3639 } else { 3640 result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags); 3641 } 3642 } 3643 3644 set_fpscr0(fpscr, flags); 3645 3646 return result; 3647} 3648 3649template <> 3650uint32_t 3651fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3652{ 3653 int mode = modeConv(fpscr); 3654 int flags = 0; 3655 int sgn1, exp1, sgn2, exp2; 3656 uint32_t mnt1, mnt2, result; 3657 3658 op1 = fplibNeg<uint32_t>(op1); 3659 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3660 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3661 3662 result = fp32_process_NaNs(op1, op2, mode, &flags); 3663 if (!result) { 3664 if ((exp1 == FP32_EXP_INF && !mnt2) || 3665 (exp2 == FP32_EXP_INF && !mnt1)) { 3666 result = fp32_FPOnePointFive(0); 3667 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) { 3668 result = fp32_infinity(sgn1 ^ sgn2); 3669 } else { 3670 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); 3671 } 3672 } 3673 3674 set_fpscr0(fpscr, flags); 3675 3676 return result; 3677} 3678 3679template <> 3680uint64_t 3681fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3682{ 3683 int mode = modeConv(fpscr); 3684 int flags = 0; 3685 int sgn1, exp1, sgn2, exp2; 3686 uint64_t mnt1, mnt2, result; 3687 3688 op1 = fplibNeg<uint64_t>(op1); 3689 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3690 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3691 3692 result = fp64_process_NaNs(op1, op2, mode, &flags); 3693 if (!result) { 3694 if ((exp1 == FP64_EXP_INF && !mnt2) || 3695 (exp2 == FP64_EXP_INF && !mnt1)) { 3696 result = fp64_FPOnePointFive(0); 3697 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) { 3698 result = fp64_infinity(sgn1 ^ sgn2); 3699 } else { 3700 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); 3701 } 3702 } 3703 3704 set_fpscr0(fpscr, flags); 3705 3706 return result; 3707} 3708 3709template <> 3710uint16_t 3711fplibRecipEstimate(uint16_t op, FPSCR &fpscr) 3712{ 3713 int mode = modeConv(fpscr); 3714 int flags = 0; 3715 int sgn, exp; 3716 uint16_t mnt, result; 3717 3718 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3719 3720 if (fp16_is_NaN(exp, mnt)) { 3721 result = fp16_process_NaN(op, mode, &flags); 3722 } else if (exp == FP16_EXP_INF) { 3723 result = fp16_zero(sgn); 3724 } else if (!mnt) { 3725 result = fp16_infinity(sgn); 3726 flags |= FPLIB_DZC; 3727 } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) { 3728 bool overflow_to_inf = false; 3729 switch (FPCRRounding(fpscr)) { 3730 case FPRounding_TIEEVEN: 3731 overflow_to_inf = true; 3732 break; 3733 case FPRounding_POSINF: 3734 overflow_to_inf = !sgn; 3735 break; 3736 case FPRounding_NEGINF: 3737 overflow_to_inf = sgn; 3738 break; 3739 case FPRounding_ZERO: 3740 overflow_to_inf = false; 3741 break; 3742 default:
| 46#include "fplib.hh" 47 48namespace ArmISA 49{ 50 51#define FPLIB_RN 0 52#define FPLIB_RP 1 53#define FPLIB_RM 2 54#define FPLIB_RZ 3 55#define FPLIB_FZ 4 56#define FPLIB_DN 8 57#define FPLIB_AHP 16 58#define FPLIB_FZ16 32 59 60#define FPLIB_IDC 128 // Input Denormal 61#define FPLIB_IXC 16 // Inexact 62#define FPLIB_UFC 8 // Underflow 63#define FPLIB_OFC 4 // Overflow 64#define FPLIB_DZC 2 // Division by Zero 65#define FPLIB_IOC 1 // Invalid Operation 66 67#define FP16_BITS 16 68#define FP32_BITS 32 69#define FP64_BITS 64 70 71#define FP16_EXP_BITS 5 72#define FP32_EXP_BITS 8 73#define FP64_EXP_BITS 11 74 75#define FP16_EXP_BIAS 15 76#define FP32_EXP_BIAS 127 77#define FP64_EXP_BIAS 1023 78 79#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1) 80#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1) 81#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1) 82 83#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1) 84#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1) 85#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1) 86 87#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1)) 88#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1)) 89#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1)) 90 91#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1)) 92#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1)) 93#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1)) 94 95static inline uint16_t 96lsl16(uint16_t x, uint32_t shift) 97{ 98 return shift < 16 ? x << shift : 0; 99} 100 101static inline uint16_t 102lsr16(uint16_t x, uint32_t shift) 103{ 104 return shift < 16 ? x >> shift : 0; 105} 106 107static inline uint32_t 108lsl32(uint32_t x, uint32_t shift) 109{ 110 return shift < 32 ? x << shift : 0; 111} 112 113static inline uint32_t 114lsr32(uint32_t x, uint32_t shift) 115{ 116 return shift < 32 ? x >> shift : 0; 117} 118 119static inline uint64_t 120lsl64(uint64_t x, uint32_t shift) 121{ 122 return shift < 64 ? x << shift : 0; 123} 124 125static inline uint64_t 126lsr64(uint64_t x, uint32_t shift) 127{ 128 return shift < 64 ? x >> shift : 0; 129} 130 131static inline void 132lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 133{ 134 if (shift == 0) { 135 *r1 = x1; 136 *r0 = x0; 137 } else if (shift < 64) { 138 *r1 = x1 << shift | x0 >> (64 - shift); 139 *r0 = x0 << shift; 140 } else if (shift < 128) { 141 *r1 = x0 << (shift - 64); 142 *r0 = 0; 143 } else { 144 *r1 = 0; 145 *r0 = 0; 146 } 147} 148 149static inline void 150lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 151{ 152 if (shift == 0) { 153 *r1 = x1; 154 *r0 = x0; 155 } else if (shift < 64) { 156 *r0 = x0 >> shift | x1 << (64 - shift); 157 *r1 = x1 >> shift; 158 } else if (shift < 128) { 159 *r0 = x1 >> (shift - 64); 160 *r1 = 0; 161 } else { 162 *r0 = 0; 163 *r1 = 0; 164 } 165} 166 167static inline void 168mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) 169{ 170 uint32_t mask = ((uint32_t)1 << 31) - 1; 171 uint64_t a0 = a & mask; 172 uint64_t a1 = a >> 31 & mask; 173 uint64_t b0 = b & mask; 174 uint64_t b1 = b >> 31 & mask; 175 uint64_t p0 = a0 * b0; 176 uint64_t p2 = a1 * b1; 177 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; 178 uint64_t s0 = p0; 179 uint64_t s1 = (s0 >> 31) + p1; 180 uint64_t s2 = (s1 >> 31) + p2; 181 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62; 182 *x1 = s2 >> 2; 183} 184 185static inline 186void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b) 187{ 188 uint64_t t0 = (uint64_t)(uint32_t)a * b; 189 uint64_t t1 = (t0 >> 32) + (a >> 32) * b; 190 *x0 = t1 << 32 | (uint32_t)t0; 191 *x1 = t1 >> 32; 192} 193 194static inline void 195add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 196 uint64_t b1) 197{ 198 *x0 = a0 + b0; 199 *x1 = a1 + b1 + (*x0 < a0); 200} 201 202static inline void 203sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 204 uint64_t b1) 205{ 206 *x0 = a0 - b0; 207 *x1 = a1 - b1 - (*x0 > a0); 208} 209 210static inline int 211cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 212{ 213 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); 214} 215 216static inline uint16_t 217fp16_normalise(uint16_t mnt, int *exp) 218{ 219 int shift; 220 221 if (!mnt) { 222 return 0; 223 } 224 225 for (shift = 8; shift; shift >>= 1) { 226 if (!(mnt >> (16 - shift))) { 227 mnt <<= shift; 228 *exp -= shift; 229 } 230 } 231 return mnt; 232} 233 234static inline uint32_t 235fp32_normalise(uint32_t mnt, int *exp) 236{ 237 int shift; 238 239 if (!mnt) { 240 return 0; 241 } 242 243 for (shift = 16; shift; shift >>= 1) { 244 if (!(mnt >> (32 - shift))) { 245 mnt <<= shift; 246 *exp -= shift; 247 } 248 } 249 return mnt; 250} 251 252static inline uint64_t 253fp64_normalise(uint64_t mnt, int *exp) 254{ 255 int shift; 256 257 if (!mnt) { 258 return 0; 259 } 260 261 for (shift = 32; shift; shift >>= 1) { 262 if (!(mnt >> (64 - shift))) { 263 mnt <<= shift; 264 *exp -= shift; 265 } 266 } 267 return mnt; 268} 269 270static inline void 271fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp) 272{ 273 uint64_t x0 = *mnt0; 274 uint64_t x1 = *mnt1; 275 int shift; 276 277 if (!x0 && !x1) { 278 return; 279 } 280 281 if (!x1) { 282 x1 = x0; 283 x0 = 0; 284 *exp -= 64; 285 } 286 287 for (shift = 32; shift; shift >>= 1) { 288 if (!(x1 >> (64 - shift))) { 289 x1 = x1 << shift | x0 >> (64 - shift); 290 x0 <<= shift; 291 *exp -= shift; 292 } 293 } 294 295 *mnt0 = x0; 296 *mnt1 = x1; 297} 298 299static inline uint16_t 300fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) 301{ 302 return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt); 303} 304 305static inline uint32_t 306fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) 307{ 308 return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt); 309} 310 311static inline uint64_t 312fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) 313{ 314 return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt); 315} 316 317static inline uint16_t 318fp16_zero(int sgn) 319{ 320 return fp16_pack(sgn, 0, 0); 321} 322 323static inline uint32_t 324fp32_zero(int sgn) 325{ 326 return fp32_pack(sgn, 0, 0); 327} 328 329static inline uint64_t 330fp64_zero(int sgn) 331{ 332 return fp64_pack(sgn, 0, 0); 333} 334 335static inline uint16_t 336fp16_max_normal(int sgn) 337{ 338 return fp16_pack(sgn, FP16_EXP_INF - 1, -1); 339} 340 341static inline uint32_t 342fp32_max_normal(int sgn) 343{ 344 return fp32_pack(sgn, FP32_EXP_INF - 1, -1); 345} 346 347static inline uint64_t 348fp64_max_normal(int sgn) 349{ 350 return fp64_pack(sgn, FP64_EXP_INF - 1, -1); 351} 352 353static inline uint16_t 354fp16_infinity(int sgn) 355{ 356 return fp16_pack(sgn, FP16_EXP_INF, 0); 357} 358 359static inline uint32_t 360fp32_infinity(int sgn) 361{ 362 return fp32_pack(sgn, FP32_EXP_INF, 0); 363} 364 365static inline uint64_t 366fp64_infinity(int sgn) 367{ 368 return fp64_pack(sgn, FP64_EXP_INF, 0); 369} 370 371static inline uint16_t 372fp16_defaultNaN() 373{ 374 return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1)); 375} 376 377static inline uint32_t 378fp32_defaultNaN() 379{ 380 return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1)); 381} 382 383static inline uint64_t 384fp64_defaultNaN() 385{ 386 return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1)); 387} 388 389static inline void 390fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, 391 int *flags) 392{ 393 *sgn = x >> (FP16_BITS - 1); 394 *exp = FP16_EXP(x); 395 *mnt = FP16_MANT(x); 396 397 // Handle subnormals: 398 if (*exp) { 399 *mnt |= 1ULL << FP16_MANT_BITS; 400 } else { 401 ++*exp; 402 // IDC (Input Denormal) is not set in this case. 403 if (mode & FPLIB_FZ16) 404 *mnt = 0; 405 } 406} 407 408static inline void 409fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, 410 int *flags) 411{ 412 *sgn = x >> (FP32_BITS - 1); 413 *exp = FP32_EXP(x); 414 *mnt = FP32_MANT(x); 415 416 // Handle subnormals: 417 if (*exp) { 418 *mnt |= 1ULL << FP32_MANT_BITS; 419 } else { 420 ++*exp; 421 if ((mode & FPLIB_FZ) && *mnt) { 422 *flags |= FPLIB_IDC; 423 *mnt = 0; 424 } 425 } 426} 427 428static inline void 429fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, 430 int *flags) 431{ 432 *sgn = x >> (FP64_BITS - 1); 433 *exp = FP64_EXP(x); 434 *mnt = FP64_MANT(x); 435 436 // Handle subnormals: 437 if (*exp) { 438 *mnt |= 1ULL << FP64_MANT_BITS; 439 } else { 440 ++*exp; 441 if ((mode & FPLIB_FZ) && *mnt) { 442 *flags |= FPLIB_IDC; 443 *mnt = 0; 444 } 445 } 446} 447 448static inline int 449fp16_is_NaN(int exp, uint16_t mnt) 450{ 451 return exp == FP16_EXP_INF && FP16_MANT(mnt); 452} 453 454static inline int 455fp32_is_NaN(int exp, uint32_t mnt) 456{ 457 return exp == FP32_EXP_INF && FP32_MANT(mnt); 458} 459 460static inline int 461fp64_is_NaN(int exp, uint64_t mnt) 462{ 463 return exp == FP64_EXP_INF && FP64_MANT(mnt); 464} 465 466static inline int 467fp16_is_signalling_NaN(int exp, uint16_t mnt) 468{ 469 return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1); 470} 471 472static inline int 473fp32_is_signalling_NaN(int exp, uint32_t mnt) 474{ 475 return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1); 476} 477 478static inline int 479fp64_is_signalling_NaN(int exp, uint64_t mnt) 480{ 481 return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1); 482} 483 484static inline int 485fp16_is_quiet_NaN(int exp, uint16_t mnt) 486{ 487 return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1); 488} 489 490static inline int 491fp32_is_quiet_NaN(int exp, uint32_t mnt) 492{ 493 return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1); 494} 495 496static inline int 497fp64_is_quiet_NaN(int exp, uint64_t mnt) 498{ 499 return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1); 500} 501 502static inline int 503fp16_is_infinity(int exp, uint16_t mnt) 504{ 505 return exp == FP16_EXP_INF && !FP16_MANT(mnt); 506} 507 508static inline int 509fp32_is_infinity(int exp, uint32_t mnt) 510{ 511 return exp == FP32_EXP_INF && !FP32_MANT(mnt); 512} 513 514static inline int 515fp64_is_infinity(int exp, uint64_t mnt) 516{ 517 return exp == FP64_EXP_INF && !FP64_MANT(mnt); 518} 519 520static inline uint16_t 521fp16_process_NaN(uint16_t a, int mode, int *flags) 522{ 523 if (!(a >> (FP16_MANT_BITS - 1) & 1)) { 524 *flags |= FPLIB_IOC; 525 a |= 1ULL << (FP16_MANT_BITS - 1); 526 } 527 return mode & FPLIB_DN ? fp16_defaultNaN() : a; 528} 529 530static inline uint32_t 531fp32_process_NaN(uint32_t a, int mode, int *flags) 532{ 533 if (!(a >> (FP32_MANT_BITS - 1) & 1)) { 534 *flags |= FPLIB_IOC; 535 a |= 1ULL << (FP32_MANT_BITS - 1); 536 } 537 return mode & FPLIB_DN ? fp32_defaultNaN() : a; 538} 539 540static inline uint64_t 541fp64_process_NaN(uint64_t a, int mode, int *flags) 542{ 543 if (!(a >> (FP64_MANT_BITS - 1) & 1)) { 544 *flags |= FPLIB_IOC; 545 a |= 1ULL << (FP64_MANT_BITS - 1); 546 } 547 return mode & FPLIB_DN ? fp64_defaultNaN() : a; 548} 549 550static uint16_t 551fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags) 552{ 553 int a_exp = FP16_EXP(a); 554 uint16_t a_mnt = FP16_MANT(a); 555 int b_exp = FP16_EXP(b); 556 uint16_t b_mnt = FP16_MANT(b); 557 558 // Handle signalling NaNs: 559 if (fp16_is_signalling_NaN(a_exp, a_mnt)) 560 return fp16_process_NaN(a, mode, flags); 561 if (fp16_is_signalling_NaN(b_exp, b_mnt)) 562 return fp16_process_NaN(b, mode, flags); 563 564 // Handle quiet NaNs: 565 if (fp16_is_NaN(a_exp, a_mnt)) 566 return fp16_process_NaN(a, mode, flags); 567 if (fp16_is_NaN(b_exp, b_mnt)) 568 return fp16_process_NaN(b, mode, flags); 569 570 return 0; 571} 572 573static uint32_t 574fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags) 575{ 576 int a_exp = FP32_EXP(a); 577 uint32_t a_mnt = FP32_MANT(a); 578 int b_exp = FP32_EXP(b); 579 uint32_t b_mnt = FP32_MANT(b); 580 581 // Handle signalling NaNs: 582 if (fp32_is_signalling_NaN(a_exp, a_mnt)) 583 return fp32_process_NaN(a, mode, flags); 584 if (fp32_is_signalling_NaN(b_exp, b_mnt)) 585 return fp32_process_NaN(b, mode, flags); 586 587 // Handle quiet NaNs: 588 if (fp32_is_NaN(a_exp, a_mnt)) 589 return fp32_process_NaN(a, mode, flags); 590 if (fp32_is_NaN(b_exp, b_mnt)) 591 return fp32_process_NaN(b, mode, flags); 592 593 return 0; 594} 595 596static uint64_t 597fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags) 598{ 599 int a_exp = FP64_EXP(a); 600 uint64_t a_mnt = FP64_MANT(a); 601 int b_exp = FP64_EXP(b); 602 uint64_t b_mnt = FP64_MANT(b); 603 604 // Handle signalling NaNs: 605 if (fp64_is_signalling_NaN(a_exp, a_mnt)) 606 return fp64_process_NaN(a, mode, flags); 607 if (fp64_is_signalling_NaN(b_exp, b_mnt)) 608 return fp64_process_NaN(b, mode, flags); 609 610 // Handle quiet NaNs: 611 if (fp64_is_NaN(a_exp, a_mnt)) 612 return fp64_process_NaN(a, mode, flags); 613 if (fp64_is_NaN(b_exp, b_mnt)) 614 return fp64_process_NaN(b, mode, flags); 615 616 return 0; 617} 618 619static uint16_t 620fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags) 621{ 622 int a_exp = FP16_EXP(a); 623 uint16_t a_mnt = FP16_MANT(a); 624 int b_exp = FP16_EXP(b); 625 uint16_t b_mnt = FP16_MANT(b); 626 int c_exp = FP16_EXP(c); 627 uint16_t c_mnt = FP16_MANT(c); 628 629 // Handle signalling NaNs: 630 if (fp16_is_signalling_NaN(a_exp, a_mnt)) 631 return fp16_process_NaN(a, mode, flags); 632 if (fp16_is_signalling_NaN(b_exp, b_mnt)) 633 return fp16_process_NaN(b, mode, flags); 634 if (fp16_is_signalling_NaN(c_exp, c_mnt)) 635 return fp16_process_NaN(c, mode, flags); 636 637 // Handle quiet NaNs: 638 if (fp16_is_NaN(a_exp, a_mnt)) 639 return fp16_process_NaN(a, mode, flags); 640 if (fp16_is_NaN(b_exp, b_mnt)) 641 return fp16_process_NaN(b, mode, flags); 642 if (fp16_is_NaN(c_exp, c_mnt)) 643 return fp16_process_NaN(c, mode, flags); 644 645 return 0; 646} 647 648static uint32_t 649fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags) 650{ 651 int a_exp = FP32_EXP(a); 652 uint32_t a_mnt = FP32_MANT(a); 653 int b_exp = FP32_EXP(b); 654 uint32_t b_mnt = FP32_MANT(b); 655 int c_exp = FP32_EXP(c); 656 uint32_t c_mnt = FP32_MANT(c); 657 658 // Handle signalling NaNs: 659 if (fp32_is_signalling_NaN(a_exp, a_mnt)) 660 return fp32_process_NaN(a, mode, flags); 661 if (fp32_is_signalling_NaN(b_exp, b_mnt)) 662 return fp32_process_NaN(b, mode, flags); 663 if (fp32_is_signalling_NaN(c_exp, c_mnt)) 664 return fp32_process_NaN(c, mode, flags); 665 666 // Handle quiet NaNs: 667 if (fp32_is_NaN(a_exp, a_mnt)) 668 return fp32_process_NaN(a, mode, flags); 669 if (fp32_is_NaN(b_exp, b_mnt)) 670 return fp32_process_NaN(b, mode, flags); 671 if (fp32_is_NaN(c_exp, c_mnt)) 672 return fp32_process_NaN(c, mode, flags); 673 674 return 0; 675} 676 677static uint64_t 678fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags) 679{ 680 int a_exp = FP64_EXP(a); 681 uint64_t a_mnt = FP64_MANT(a); 682 int b_exp = FP64_EXP(b); 683 uint64_t b_mnt = FP64_MANT(b); 684 int c_exp = FP64_EXP(c); 685 uint64_t c_mnt = FP64_MANT(c); 686 687 // Handle signalling NaNs: 688 if (fp64_is_signalling_NaN(a_exp, a_mnt)) 689 return fp64_process_NaN(a, mode, flags); 690 if (fp64_is_signalling_NaN(b_exp, b_mnt)) 691 return fp64_process_NaN(b, mode, flags); 692 if (fp64_is_signalling_NaN(c_exp, c_mnt)) 693 return fp64_process_NaN(c, mode, flags); 694 695 // Handle quiet NaNs: 696 if (fp64_is_NaN(a_exp, a_mnt)) 697 return fp64_process_NaN(a, mode, flags); 698 if (fp64_is_NaN(b_exp, b_mnt)) 699 return fp64_process_NaN(b, mode, flags); 700 if (fp64_is_NaN(c_exp, c_mnt)) 701 return fp64_process_NaN(c, mode, flags); 702 703 return 0; 704} 705 706static uint16_t 707fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags) 708{ 709 int biased_exp; // non-negative exponent value for result 710 uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS) 711 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 712 713 assert(rm != FPRounding_TIEAWAY); 714 715 // Flush to zero: 716 if ((mode & FPLIB_FZ16) && exp < 1) { 717 *flags |= FPLIB_UFC; 718 return fp16_zero(sgn); 719 } 720 721 // The bottom FP16_EXP_BITS bits of mnt are orred together: 722 mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) | 723 ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0)); 724 725 if (exp > 0) { 726 biased_exp = exp; 727 int_mant = mnt >> 2; 728 error = mnt & 3; 729 } else { 730 biased_exp = 0; 731 int_mant = lsr16(mnt, 3 - exp); 732 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1)); 733 } 734 735 if (!biased_exp && error) { // xx should also check fpscr_val<11> 736 *flags |= FPLIB_UFC; 737 } 738 739 // Round up: 740 if ((rm == FPLIB_RN && (error == 3 || 741 (error == 2 && (int_mant & 1)))) || 742 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 743 ++int_mant; 744 if (int_mant == 1ULL << FP16_MANT_BITS) { 745 // Rounded up from denormalized to normalized 746 biased_exp = 1; 747 } 748 if (int_mant == 2ULL << FP16_MANT_BITS) { 749 // Rounded up to next exponent 750 ++biased_exp; 751 int_mant >>= 1; 752 } 753 } 754 755 // Handle rounding to odd aka Von Neumann rounding: 756 if (error && rm == FPRounding_ODD) 757 int_mant |= 1; 758 759 // Handle overflow: 760 if (!(mode & FPLIB_AHP)) { 761 if (biased_exp >= (int)FP16_EXP_INF) { 762 *flags |= FPLIB_OFC | FPLIB_IXC; 763 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 764 (rm == FPLIB_RM && sgn)) { 765 return fp16_infinity(sgn); 766 } else { 767 return fp16_max_normal(sgn); 768 } 769 } 770 } else { 771 if (biased_exp >= (int)FP16_EXP_INF + 1) { 772 *flags |= FPLIB_IOC; 773 return fp16_pack(sgn, FP16_EXP_INF, -1); 774 } 775 } 776 777 if (error) { 778 *flags |= FPLIB_IXC; 779 } 780 781 return fp16_pack(sgn, biased_exp, int_mant); 782} 783 784static uint16_t 785fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags) 786{ 787 return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags); 788} 789 790static uint32_t 791fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags) 792{ 793 int biased_exp; // non-negative exponent value for result 794 uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS) 795 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 796 797 assert(rm != FPRounding_TIEAWAY); 798 799 // Flush to zero: 800 if ((mode & FPLIB_FZ) && exp < 1) { 801 *flags |= FPLIB_UFC; 802 return fp32_zero(sgn); 803 } 804 805 // The bottom FP32_EXP_BITS bits of mnt are orred together: 806 mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) | 807 ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0)); 808 809 if (exp > 0) { 810 biased_exp = exp; 811 int_mant = mnt >> 2; 812 error = mnt & 3; 813 } else { 814 biased_exp = 0; 815 int_mant = lsr32(mnt, 3 - exp); 816 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1)); 817 } 818 819 if (!biased_exp && error) { // xx should also check fpscr_val<11> 820 *flags |= FPLIB_UFC; 821 } 822 823 // Round up: 824 if ((rm == FPLIB_RN && (error == 3 || 825 (error == 2 && (int_mant & 1)))) || 826 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 827 ++int_mant; 828 if (int_mant == 1ULL << FP32_MANT_BITS) { 829 // Rounded up from denormalized to normalized 830 biased_exp = 1; 831 } 832 if (int_mant == 2ULL << FP32_MANT_BITS) { 833 // Rounded up to next exponent 834 ++biased_exp; 835 int_mant >>= 1; 836 } 837 } 838 839 // Handle rounding to odd aka Von Neumann rounding: 840 if (error && rm == FPRounding_ODD) 841 int_mant |= 1; 842 843 // Handle overflow: 844 if (biased_exp >= (int)FP32_EXP_INF) { 845 *flags |= FPLIB_OFC | FPLIB_IXC; 846 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 847 (rm == FPLIB_RM && sgn)) { 848 return fp32_infinity(sgn); 849 } else { 850 return fp32_max_normal(sgn); 851 } 852 } 853 854 if (error) { 855 *flags |= FPLIB_IXC; 856 } 857 858 return fp32_pack(sgn, biased_exp, int_mant); 859} 860 861static uint32_t 862fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags) 863{ 864 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); 865} 866 867static uint64_t 868fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags) 869{ 870 int biased_exp; // non-negative exponent value for result 871 uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS) 872 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 873 874 assert(rm != FPRounding_TIEAWAY); 875 876 // Flush to zero: 877 if ((mode & FPLIB_FZ) && exp < 1) { 878 *flags |= FPLIB_UFC; 879 return fp64_zero(sgn); 880 } 881 882 // The bottom FP64_EXP_BITS bits of mnt are orred together: 883 mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) | 884 ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0)); 885 886 if (exp > 0) { 887 biased_exp = exp; 888 int_mant = mnt >> 2; 889 error = mnt & 3; 890 } else { 891 biased_exp = 0; 892 int_mant = lsr64(mnt, 3 - exp); 893 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1)); 894 } 895 896 if (!biased_exp && error) { // xx should also check fpscr_val<11> 897 *flags |= FPLIB_UFC; 898 } 899 900 // Round up: 901 if ((rm == FPLIB_RN && (error == 3 || 902 (error == 2 && (int_mant & 1)))) || 903 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 904 ++int_mant; 905 if (int_mant == 1ULL << FP64_MANT_BITS) { 906 // Rounded up from denormalized to normalized 907 biased_exp = 1; 908 } 909 if (int_mant == 2ULL << FP64_MANT_BITS) { 910 // Rounded up to next exponent 911 ++biased_exp; 912 int_mant >>= 1; 913 } 914 } 915 916 // Handle rounding to odd aka Von Neumann rounding: 917 if (error && rm == FPRounding_ODD) 918 int_mant |= 1; 919 920 // Handle overflow: 921 if (biased_exp >= (int)FP64_EXP_INF) { 922 *flags |= FPLIB_OFC | FPLIB_IXC; 923 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 924 (rm == FPLIB_RM && sgn)) { 925 return fp64_infinity(sgn); 926 } else { 927 return fp64_max_normal(sgn); 928 } 929 } 930 931 if (error) { 932 *flags |= FPLIB_IXC; 933 } 934 935 return fp64_pack(sgn, biased_exp, int_mant); 936} 937 938static uint64_t 939fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags) 940{ 941 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); 942} 943 944static int 945fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags) 946{ 947 int a_sgn, a_exp, b_sgn, b_exp; 948 uint16_t a_mnt, b_mnt; 949 950 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 951 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 952 953 if (fp16_is_NaN(a_exp, a_mnt) || 954 fp16_is_NaN(b_exp, b_mnt)) { 955 if (fp16_is_signalling_NaN(a_exp, a_mnt) || 956 fp16_is_signalling_NaN(b_exp, b_mnt)) 957 *flags |= FPLIB_IOC; 958 return 0; 959 } 960 return a == b || (!a_mnt && !b_mnt); 961} 962 963static int 964fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags) 965{ 966 int a_sgn, a_exp, b_sgn, b_exp; 967 uint16_t a_mnt, b_mnt; 968 969 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 970 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 971 972 if (fp16_is_NaN(a_exp, a_mnt) || 973 fp16_is_NaN(b_exp, b_mnt)) { 974 *flags |= FPLIB_IOC; 975 return 0; 976 } 977 if (!a_mnt && !b_mnt) 978 return 1; 979 if (a_sgn != b_sgn) 980 return b_sgn; 981 if (a_exp != b_exp) 982 return a_sgn ^ (a_exp > b_exp); 983 if (a_mnt != b_mnt) 984 return a_sgn ^ (a_mnt > b_mnt); 985 return 1; 986} 987 988static int 989fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags) 990{ 991 int a_sgn, a_exp, b_sgn, b_exp; 992 uint16_t a_mnt, b_mnt; 993 994 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 995 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 996 997 if (fp16_is_NaN(a_exp, a_mnt) || 998 fp16_is_NaN(b_exp, b_mnt)) { 999 *flags |= FPLIB_IOC; 1000 return 0; 1001 } 1002 if (!a_mnt && !b_mnt) 1003 return 0; 1004 if (a_sgn != b_sgn) 1005 return b_sgn; 1006 if (a_exp != b_exp) 1007 return a_sgn ^ (a_exp > b_exp); 1008 if (a_mnt != b_mnt) 1009 return a_sgn ^ (a_mnt > b_mnt); 1010 return 0; 1011} 1012 1013static int 1014fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags) 1015{ 1016 int a_sgn, a_exp, b_sgn, b_exp; 1017 uint16_t a_mnt, b_mnt; 1018 1019 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1020 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1021 1022 if (fp16_is_NaN(a_exp, a_mnt) || 1023 fp16_is_NaN(b_exp, b_mnt)) { 1024 if (fp16_is_signalling_NaN(a_exp, a_mnt) || 1025 fp16_is_signalling_NaN(b_exp, b_mnt)) 1026 *flags |= FPLIB_IOC; 1027 return 1; 1028 } 1029 return 0; 1030} 1031 1032static int 1033fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags) 1034{ 1035 int a_sgn, a_exp, b_sgn, b_exp; 1036 uint32_t a_mnt, b_mnt; 1037 1038 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1039 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1040 1041 if (fp32_is_NaN(a_exp, a_mnt) || 1042 fp32_is_NaN(b_exp, b_mnt)) { 1043 if (fp32_is_signalling_NaN(a_exp, a_mnt) || 1044 fp32_is_signalling_NaN(b_exp, b_mnt)) 1045 *flags |= FPLIB_IOC; 1046 return 0; 1047 } 1048 return a == b || (!a_mnt && !b_mnt); 1049} 1050 1051static int 1052fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags) 1053{ 1054 int a_sgn, a_exp, b_sgn, b_exp; 1055 uint32_t a_mnt, b_mnt; 1056 1057 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1058 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1059 1060 if (fp32_is_NaN(a_exp, a_mnt) || 1061 fp32_is_NaN(b_exp, b_mnt)) { 1062 *flags |= FPLIB_IOC; 1063 return 0; 1064 } 1065 if (!a_mnt && !b_mnt) 1066 return 1; 1067 if (a_sgn != b_sgn) 1068 return b_sgn; 1069 if (a_exp != b_exp) 1070 return a_sgn ^ (a_exp > b_exp); 1071 if (a_mnt != b_mnt) 1072 return a_sgn ^ (a_mnt > b_mnt); 1073 return 1; 1074} 1075 1076static int 1077fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags) 1078{ 1079 int a_sgn, a_exp, b_sgn, b_exp; 1080 uint32_t a_mnt, b_mnt; 1081 1082 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1083 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1084 1085 if (fp32_is_NaN(a_exp, a_mnt) || 1086 fp32_is_NaN(b_exp, b_mnt)) { 1087 *flags |= FPLIB_IOC; 1088 return 0; 1089 } 1090 if (!a_mnt && !b_mnt) 1091 return 0; 1092 if (a_sgn != b_sgn) 1093 return b_sgn; 1094 if (a_exp != b_exp) 1095 return a_sgn ^ (a_exp > b_exp); 1096 if (a_mnt != b_mnt) 1097 return a_sgn ^ (a_mnt > b_mnt); 1098 return 0; 1099} 1100 1101static int 1102fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags) 1103{ 1104 int a_sgn, a_exp, b_sgn, b_exp; 1105 uint32_t a_mnt, b_mnt; 1106 1107 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1108 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1109 1110 if (fp32_is_NaN(a_exp, a_mnt) || 1111 fp32_is_NaN(b_exp, b_mnt)) { 1112 if (fp32_is_signalling_NaN(a_exp, a_mnt) || 1113 fp32_is_signalling_NaN(b_exp, b_mnt)) 1114 *flags |= FPLIB_IOC; 1115 return 1; 1116 } 1117 return 0; 1118} 1119 1120static int 1121fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags) 1122{ 1123 int a_sgn, a_exp, b_sgn, b_exp; 1124 uint64_t a_mnt, b_mnt; 1125 1126 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1127 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1128 1129 if (fp64_is_NaN(a_exp, a_mnt) || 1130 fp64_is_NaN(b_exp, b_mnt)) { 1131 if (fp64_is_signalling_NaN(a_exp, a_mnt) || 1132 fp64_is_signalling_NaN(b_exp, b_mnt)) 1133 *flags |= FPLIB_IOC; 1134 return 0; 1135 } 1136 return a == b || (!a_mnt && !b_mnt); 1137} 1138 1139static int 1140fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags) 1141{ 1142 int a_sgn, a_exp, b_sgn, b_exp; 1143 uint64_t a_mnt, b_mnt; 1144 1145 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1146 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1147 1148 if (fp64_is_NaN(a_exp, a_mnt) || 1149 fp64_is_NaN(b_exp, b_mnt)) { 1150 *flags |= FPLIB_IOC; 1151 return 0; 1152 } 1153 if (!a_mnt && !b_mnt) 1154 return 1; 1155 if (a_sgn != b_sgn) 1156 return b_sgn; 1157 if (a_exp != b_exp) 1158 return a_sgn ^ (a_exp > b_exp); 1159 if (a_mnt != b_mnt) 1160 return a_sgn ^ (a_mnt > b_mnt); 1161 return 1; 1162} 1163 1164static int 1165fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags) 1166{ 1167 int a_sgn, a_exp, b_sgn, b_exp; 1168 uint64_t a_mnt, b_mnt; 1169 1170 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1171 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1172 1173 if (fp64_is_NaN(a_exp, a_mnt) || 1174 fp64_is_NaN(b_exp, b_mnt)) { 1175 *flags |= FPLIB_IOC; 1176 return 0; 1177 } 1178 if (!a_mnt && !b_mnt) 1179 return 0; 1180 if (a_sgn != b_sgn) 1181 return b_sgn; 1182 if (a_exp != b_exp) 1183 return a_sgn ^ (a_exp > b_exp); 1184 if (a_mnt != b_mnt) 1185 return a_sgn ^ (a_mnt > b_mnt); 1186 return 0; 1187} 1188 1189static int 1190fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags) 1191{ 1192 int a_sgn, a_exp, b_sgn, b_exp; 1193 uint64_t a_mnt, b_mnt; 1194 1195 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1196 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1197 1198 if (fp64_is_NaN(a_exp, a_mnt) || 1199 fp64_is_NaN(b_exp, b_mnt)) { 1200 if (fp64_is_signalling_NaN(a_exp, a_mnt) || 1201 fp64_is_signalling_NaN(b_exp, b_mnt)) 1202 *flags |= FPLIB_IOC; 1203 return 1; 1204 } 1205 return 0; 1206} 1207 1208static uint16_t 1209fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags) 1210{ 1211 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1212 uint16_t a_mnt, b_mnt, x, x_mnt; 1213 1214 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1215 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1216 1217 if ((x = fp16_process_NaNs(a, b, mode, flags))) { 1218 return x; 1219 } 1220 1221 b_sgn ^= neg; 1222 1223 // Handle infinities and zeroes: 1224 if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) { 1225 *flags |= FPLIB_IOC; 1226 return fp16_defaultNaN(); 1227 } else if (a_exp == FP16_EXP_INF) { 1228 return fp16_infinity(a_sgn); 1229 } else if (b_exp == FP16_EXP_INF) { 1230 return fp16_infinity(b_sgn); 1231 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1232 return fp16_zero(a_sgn); 1233 } 1234 1235 a_mnt <<= 3; 1236 b_mnt <<= 3; 1237 if (a_exp >= b_exp) { 1238 b_mnt = (lsr16(b_mnt, a_exp - b_exp) | 1239 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1))); 1240 b_exp = a_exp; 1241 } else { 1242 a_mnt = (lsr16(a_mnt, b_exp - a_exp) | 1243 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1))); 1244 a_exp = b_exp; 1245 } 1246 x_sgn = a_sgn; 1247 x_exp = a_exp; 1248 if (a_sgn == b_sgn) { 1249 x_mnt = a_mnt + b_mnt; 1250 } else if (a_mnt >= b_mnt) { 1251 x_mnt = a_mnt - b_mnt; 1252 } else { 1253 x_sgn ^= 1; 1254 x_mnt = b_mnt - a_mnt; 1255 } 1256 1257 if (!x_mnt) { 1258 // Sign of exact zero result depends on rounding mode 1259 return fp16_zero((mode & 3) == 2); 1260 } 1261 1262 x_mnt = fp16_normalise(x_mnt, &x_exp); 1263 1264 return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1, 1265 mode, flags); 1266} 1267 1268static uint32_t 1269fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags) 1270{ 1271 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1272 uint32_t a_mnt, b_mnt, x, x_mnt; 1273 1274 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1275 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1276 1277 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1278 return x; 1279 } 1280 1281 b_sgn ^= neg; 1282 1283 // Handle infinities and zeroes: 1284 if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) { 1285 *flags |= FPLIB_IOC; 1286 return fp32_defaultNaN(); 1287 } else if (a_exp == FP32_EXP_INF) { 1288 return fp32_infinity(a_sgn); 1289 } else if (b_exp == FP32_EXP_INF) { 1290 return fp32_infinity(b_sgn); 1291 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1292 return fp32_zero(a_sgn); 1293 } 1294 1295 a_mnt <<= 3; 1296 b_mnt <<= 3; 1297 if (a_exp >= b_exp) { 1298 b_mnt = (lsr32(b_mnt, a_exp - b_exp) | 1299 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); 1300 b_exp = a_exp; 1301 } else { 1302 a_mnt = (lsr32(a_mnt, b_exp - a_exp) | 1303 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); 1304 a_exp = b_exp; 1305 } 1306 x_sgn = a_sgn; 1307 x_exp = a_exp; 1308 if (a_sgn == b_sgn) { 1309 x_mnt = a_mnt + b_mnt; 1310 } else if (a_mnt >= b_mnt) { 1311 x_mnt = a_mnt - b_mnt; 1312 } else { 1313 x_sgn ^= 1; 1314 x_mnt = b_mnt - a_mnt; 1315 } 1316 1317 if (!x_mnt) { 1318 // Sign of exact zero result depends on rounding mode 1319 return fp32_zero((mode & 3) == 2); 1320 } 1321 1322 x_mnt = fp32_normalise(x_mnt, &x_exp); 1323 1324 return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1, 1325 mode, flags); 1326} 1327 1328static uint64_t 1329fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags) 1330{ 1331 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1332 uint64_t a_mnt, b_mnt, x, x_mnt; 1333 1334 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1335 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1336 1337 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1338 return x; 1339 } 1340 1341 b_sgn ^= neg; 1342 1343 // Handle infinities and zeroes: 1344 if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) { 1345 *flags |= FPLIB_IOC; 1346 return fp64_defaultNaN(); 1347 } else if (a_exp == FP64_EXP_INF) { 1348 return fp64_infinity(a_sgn); 1349 } else if (b_exp == FP64_EXP_INF) { 1350 return fp64_infinity(b_sgn); 1351 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1352 return fp64_zero(a_sgn); 1353 } 1354 1355 a_mnt <<= 3; 1356 b_mnt <<= 3; 1357 if (a_exp >= b_exp) { 1358 b_mnt = (lsr64(b_mnt, a_exp - b_exp) | 1359 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); 1360 b_exp = a_exp; 1361 } else { 1362 a_mnt = (lsr64(a_mnt, b_exp - a_exp) | 1363 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); 1364 a_exp = b_exp; 1365 } 1366 x_sgn = a_sgn; 1367 x_exp = a_exp; 1368 if (a_sgn == b_sgn) { 1369 x_mnt = a_mnt + b_mnt; 1370 } else if (a_mnt >= b_mnt) { 1371 x_mnt = a_mnt - b_mnt; 1372 } else { 1373 x_sgn ^= 1; 1374 x_mnt = b_mnt - a_mnt; 1375 } 1376 1377 if (!x_mnt) { 1378 // Sign of exact zero result depends on rounding mode 1379 return fp64_zero((mode & 3) == 2); 1380 } 1381 1382 x_mnt = fp64_normalise(x_mnt, &x_exp); 1383 1384 return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1, 1385 mode, flags); 1386} 1387 1388static uint16_t 1389fp16_mul(uint16_t a, uint16_t b, int mode, int *flags) 1390{ 1391 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1392 uint16_t a_mnt, b_mnt, x; 1393 uint32_t x_mnt; 1394 1395 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1396 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1397 1398 if ((x = fp16_process_NaNs(a, b, mode, flags))) { 1399 return x; 1400 } 1401 1402 // Handle infinities and zeroes: 1403 if ((a_exp == FP16_EXP_INF && !b_mnt) || 1404 (b_exp == FP16_EXP_INF && !a_mnt)) { 1405 *flags |= FPLIB_IOC; 1406 return fp16_defaultNaN(); 1407 } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) { 1408 return fp16_infinity(a_sgn ^ b_sgn); 1409 } else if (!a_mnt || !b_mnt) { 1410 return fp16_zero(a_sgn ^ b_sgn); 1411 } 1412 1413 // Multiply and normalise: 1414 x_sgn = a_sgn ^ b_sgn; 1415 x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1; 1416 x_mnt = (uint32_t)a_mnt * b_mnt; 1417 x_mnt = fp32_normalise(x_mnt, &x_exp); 1418 1419 // Convert to FP16_BITS bits, collapsing error into bottom bit: 1420 x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1); 1421 1422 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); 1423} 1424 1425static uint32_t 1426fp32_mul(uint32_t a, uint32_t b, int mode, int *flags) 1427{ 1428 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1429 uint32_t a_mnt, b_mnt, x; 1430 uint64_t x_mnt; 1431 1432 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1433 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1434 1435 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1436 return x; 1437 } 1438 1439 // Handle infinities and zeroes: 1440 if ((a_exp == FP32_EXP_INF && !b_mnt) || 1441 (b_exp == FP32_EXP_INF && !a_mnt)) { 1442 *flags |= FPLIB_IOC; 1443 return fp32_defaultNaN(); 1444 } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) { 1445 return fp32_infinity(a_sgn ^ b_sgn); 1446 } else if (!a_mnt || !b_mnt) { 1447 return fp32_zero(a_sgn ^ b_sgn); 1448 } 1449 1450 // Multiply and normalise: 1451 x_sgn = a_sgn ^ b_sgn; 1452 x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1; 1453 x_mnt = (uint64_t)a_mnt * b_mnt; 1454 x_mnt = fp64_normalise(x_mnt, &x_exp); 1455 1456 // Convert to FP32_BITS bits, collapsing error into bottom bit: 1457 x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1); 1458 1459 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1460} 1461 1462static uint64_t 1463fp64_mul(uint64_t a, uint64_t b, int mode, int *flags) 1464{ 1465 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1466 uint64_t a_mnt, b_mnt, x; 1467 uint64_t x0_mnt, x1_mnt; 1468 1469 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1470 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1471 1472 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1473 return x; 1474 } 1475 1476 // Handle infinities and zeroes: 1477 if ((a_exp == FP64_EXP_INF && !b_mnt) || 1478 (b_exp == FP64_EXP_INF && !a_mnt)) { 1479 *flags |= FPLIB_IOC; 1480 return fp64_defaultNaN(); 1481 } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) { 1482 return fp64_infinity(a_sgn ^ b_sgn); 1483 } else if (!a_mnt || !b_mnt) { 1484 return fp64_zero(a_sgn ^ b_sgn); 1485 } 1486 1487 // Multiply and normalise: 1488 x_sgn = a_sgn ^ b_sgn; 1489 x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1; 1490 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); 1491 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1492 1493 // Convert to FP64_BITS bits, collapsing error into bottom bit: 1494 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1495 1496 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); 1497} 1498 1499static uint16_t 1500fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, 1501 int mode, int *flags) 1502{ 1503 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1504 uint16_t a_mnt, b_mnt, c_mnt, x; 1505 uint32_t x_mnt, y_mnt; 1506 1507 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1508 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1509 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1510 1511 x = fp16_process_NaNs3(a, b, c, mode, flags); 1512 1513 // Quiet NaN added to product of zero and infinity: 1514 if (fp16_is_quiet_NaN(a_exp, a_mnt) && 1515 ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) || 1516 (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) { 1517 x = fp16_defaultNaN(); 1518 *flags |= FPLIB_IOC; 1519 } 1520 1521 if (x) { 1522 return x; 1523 } 1524 1525 // Handle infinities and zeroes: 1526 if ((b_exp == FP16_EXP_INF && !c_mnt) || 1527 (c_exp == FP16_EXP_INF && !b_mnt) || 1528 (a_exp == FP16_EXP_INF && 1529 (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) && 1530 (a_sgn != (b_sgn ^ c_sgn)))) { 1531 *flags |= FPLIB_IOC; 1532 return fp16_defaultNaN(); 1533 } 1534 if (a_exp == FP16_EXP_INF) 1535 return fp16_infinity(a_sgn); 1536 if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) 1537 return fp16_infinity(b_sgn ^ c_sgn); 1538 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1539 return fp16_zero(a_sgn); 1540 1541 x_sgn = a_sgn; 1542 x_exp = a_exp + 2 * FP16_EXP_BITS - 3; 1543 x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4); 1544 1545 // Multiply: 1546 y_sgn = b_sgn ^ c_sgn; 1547 y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3; 1548 y_mnt = (uint32_t)b_mnt * c_mnt << 3; 1549 if (!y_mnt) { 1550 y_exp = x_exp; 1551 } 1552 1553 // Add: 1554 if (x_exp >= y_exp) { 1555 y_mnt = (lsr32(y_mnt, x_exp - y_exp) | 1556 !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1))); 1557 y_exp = x_exp; 1558 } else { 1559 x_mnt = (lsr32(x_mnt, y_exp - x_exp) | 1560 !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1))); 1561 x_exp = y_exp; 1562 } 1563 if (x_sgn == y_sgn) { 1564 x_mnt = x_mnt + y_mnt; 1565 } else if (x_mnt >= y_mnt) { 1566 x_mnt = x_mnt - y_mnt; 1567 } else { 1568 x_sgn ^= 1; 1569 x_mnt = y_mnt - x_mnt; 1570 } 1571 1572 if (!x_mnt) { 1573 // Sign of exact zero result depends on rounding mode 1574 return fp16_zero((mode & 3) == 2); 1575 } 1576 1577 // Normalise into FP16_BITS bits, collapsing error into bottom bit: 1578 x_mnt = fp32_normalise(x_mnt, &x_exp); 1579 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1); 1580 1581 return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1582} 1583 1584static uint32_t 1585fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, 1586 int mode, int *flags) 1587{ 1588 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1589 uint32_t a_mnt, b_mnt, c_mnt, x; 1590 uint64_t x_mnt, y_mnt; 1591 1592 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1593 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1594 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1595 1596 x = fp32_process_NaNs3(a, b, c, mode, flags); 1597 1598 // Quiet NaN added to product of zero and infinity: 1599 if (fp32_is_quiet_NaN(a_exp, a_mnt) && 1600 ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) || 1601 (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) { 1602 x = fp32_defaultNaN(); 1603 *flags |= FPLIB_IOC; 1604 } 1605 1606 if (x) { 1607 return x; 1608 } 1609 1610 // Handle infinities and zeroes: 1611 if ((b_exp == FP32_EXP_INF && !c_mnt) || 1612 (c_exp == FP32_EXP_INF && !b_mnt) || 1613 (a_exp == FP32_EXP_INF && 1614 (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) && 1615 (a_sgn != (b_sgn ^ c_sgn)))) { 1616 *flags |= FPLIB_IOC; 1617 return fp32_defaultNaN(); 1618 } 1619 if (a_exp == FP32_EXP_INF) 1620 return fp32_infinity(a_sgn); 1621 if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) 1622 return fp32_infinity(b_sgn ^ c_sgn); 1623 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1624 return fp32_zero(a_sgn); 1625 1626 x_sgn = a_sgn; 1627 x_exp = a_exp + 2 * FP32_EXP_BITS - 3; 1628 x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4); 1629 1630 // Multiply: 1631 y_sgn = b_sgn ^ c_sgn; 1632 y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3; 1633 y_mnt = (uint64_t)b_mnt * c_mnt << 3; 1634 if (!y_mnt) { 1635 y_exp = x_exp; 1636 } 1637 1638 // Add: 1639 if (x_exp >= y_exp) { 1640 y_mnt = (lsr64(y_mnt, x_exp - y_exp) | 1641 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); 1642 y_exp = x_exp; 1643 } else { 1644 x_mnt = (lsr64(x_mnt, y_exp - x_exp) | 1645 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); 1646 x_exp = y_exp; 1647 } 1648 if (x_sgn == y_sgn) { 1649 x_mnt = x_mnt + y_mnt; 1650 } else if (x_mnt >= y_mnt) { 1651 x_mnt = x_mnt - y_mnt; 1652 } else { 1653 x_sgn ^= 1; 1654 x_mnt = y_mnt - x_mnt; 1655 } 1656 1657 if (!x_mnt) { 1658 // Sign of exact zero result depends on rounding mode 1659 return fp32_zero((mode & 3) == 2); 1660 } 1661 1662 // Normalise into FP32_BITS bits, collapsing error into bottom bit: 1663 x_mnt = fp64_normalise(x_mnt, &x_exp); 1664 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1); 1665 1666 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1667} 1668 1669static uint64_t 1670fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, 1671 int mode, int *flags) 1672{ 1673 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1674 uint64_t a_mnt, b_mnt, c_mnt, x; 1675 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; 1676 1677 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1678 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1679 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1680 1681 x = fp64_process_NaNs3(a, b, c, mode, flags); 1682 1683 // Quiet NaN added to product of zero and infinity: 1684 if (fp64_is_quiet_NaN(a_exp, a_mnt) && 1685 ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) || 1686 (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) { 1687 x = fp64_defaultNaN(); 1688 *flags |= FPLIB_IOC; 1689 } 1690 1691 if (x) { 1692 return x; 1693 } 1694 1695 // Handle infinities and zeroes: 1696 if ((b_exp == FP64_EXP_INF && !c_mnt) || 1697 (c_exp == FP64_EXP_INF && !b_mnt) || 1698 (a_exp == FP64_EXP_INF && 1699 (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) && 1700 (a_sgn != (b_sgn ^ c_sgn)))) { 1701 *flags |= FPLIB_IOC; 1702 return fp64_defaultNaN(); 1703 } 1704 if (a_exp == FP64_EXP_INF) 1705 return fp64_infinity(a_sgn); 1706 if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) 1707 return fp64_infinity(b_sgn ^ c_sgn); 1708 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1709 return fp64_zero(a_sgn); 1710 1711 x_sgn = a_sgn; 1712 x_exp = a_exp + FP64_EXP_BITS; 1713 x0_mnt = 0; 1714 x1_mnt = a_mnt; 1715 1716 // Multiply: 1717 y_sgn = b_sgn ^ c_sgn; 1718 y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3; 1719 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); 1720 if (!y0_mnt && !y1_mnt) { 1721 y_exp = x_exp; 1722 } 1723 1724 // Add: 1725 if (x_exp >= y_exp) { 1726 uint64_t t0, t1; 1727 lsl128(&t0, &t1, y0_mnt, y1_mnt, 1728 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); 1729 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); 1730 y0_mnt |= !!(t0 | t1); 1731 y_exp = x_exp; 1732 } else { 1733 uint64_t t0, t1; 1734 lsl128(&t0, &t1, x0_mnt, x1_mnt, 1735 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); 1736 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); 1737 x0_mnt |= !!(t0 | t1); 1738 x_exp = y_exp; 1739 } 1740 if (x_sgn == y_sgn) { 1741 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1742 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { 1743 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1744 } else { 1745 x_sgn ^= 1; 1746 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); 1747 } 1748 1749 if (!x0_mnt && !x1_mnt) { 1750 // Sign of exact zero result depends on rounding mode 1751 return fp64_zero((mode & 3) == 2); 1752 } 1753 1754 // Normalise into FP64_BITS bits, collapsing error into bottom bit: 1755 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1756 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1757 1758 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); 1759} 1760 1761static uint16_t 1762fp16_div(uint16_t a, uint16_t b, int mode, int *flags) 1763{ 1764 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1765 uint16_t a_mnt, b_mnt, x; 1766 uint32_t x_mnt; 1767 1768 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1769 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1770 1771 if ((x = fp16_process_NaNs(a, b, mode, flags))) 1772 return x; 1773 1774 // Handle infinities and zeroes: 1775 if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) || 1776 (!a_mnt && !b_mnt)) { 1777 *flags |= FPLIB_IOC; 1778 return fp16_defaultNaN(); 1779 } 1780 if (a_exp == FP16_EXP_INF || !b_mnt) { 1781 if (a_exp != FP16_EXP_INF) 1782 *flags |= FPLIB_DZC; 1783 return fp16_infinity(a_sgn ^ b_sgn); 1784 } 1785 if (!a_mnt || b_exp == FP16_EXP_INF) 1786 return fp16_zero(a_sgn ^ b_sgn); 1787 1788 // Divide, setting bottom bit if inexact: 1789 a_mnt = fp16_normalise(a_mnt, &a_exp); 1790 x_sgn = a_sgn ^ b_sgn; 1791 x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3); 1792 x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt; 1793 x_mnt |= (x_mnt * b_mnt != 1794 (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)); 1795 1796 // Normalise into FP16_BITS bits, collapsing error into bottom bit: 1797 x_mnt = fp32_normalise(x_mnt, &x_exp); 1798 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1); 1799 1800 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); 1801} 1802 1803static uint32_t 1804fp32_div(uint32_t a, uint32_t b, int mode, int *flags) 1805{ 1806 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1807 uint32_t a_mnt, b_mnt, x; 1808 uint64_t x_mnt; 1809 1810 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1811 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1812 1813 if ((x = fp32_process_NaNs(a, b, mode, flags))) 1814 return x; 1815 1816 // Handle infinities and zeroes: 1817 if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) || 1818 (!a_mnt && !b_mnt)) { 1819 *flags |= FPLIB_IOC; 1820 return fp32_defaultNaN(); 1821 } 1822 if (a_exp == FP32_EXP_INF || !b_mnt) { 1823 if (a_exp != FP32_EXP_INF) 1824 *flags |= FPLIB_DZC; 1825 return fp32_infinity(a_sgn ^ b_sgn); 1826 } 1827 if (!a_mnt || b_exp == FP32_EXP_INF) 1828 return fp32_zero(a_sgn ^ b_sgn); 1829 1830 // Divide, setting bottom bit if inexact: 1831 a_mnt = fp32_normalise(a_mnt, &a_exp); 1832 x_sgn = a_sgn ^ b_sgn; 1833 x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3); 1834 x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt; 1835 x_mnt |= (x_mnt * b_mnt != 1836 (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)); 1837 1838 // Normalise into FP32_BITS bits, collapsing error into bottom bit: 1839 x_mnt = fp64_normalise(x_mnt, &x_exp); 1840 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1); 1841 1842 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1843} 1844 1845static uint64_t 1846fp64_div(uint64_t a, uint64_t b, int mode, int *flags) 1847{ 1848 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; 1849 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; 1850 1851 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1852 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1853 1854 if ((x = fp64_process_NaNs(a, b, mode, flags))) 1855 return x; 1856 1857 // Handle infinities and zeroes: 1858 if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) || 1859 (!a_mnt && !b_mnt)) { 1860 *flags |= FPLIB_IOC; 1861 return fp64_defaultNaN(); 1862 } 1863 if (a_exp == FP64_EXP_INF || !b_mnt) { 1864 if (a_exp != FP64_EXP_INF) 1865 *flags |= FPLIB_DZC; 1866 return fp64_infinity(a_sgn ^ b_sgn); 1867 } 1868 if (!a_mnt || b_exp == FP64_EXP_INF) 1869 return fp64_zero(a_sgn ^ b_sgn); 1870 1871 // Find reciprocal of divisor with Newton-Raphson: 1872 a_mnt = fp64_normalise(a_mnt, &a_exp); 1873 b_mnt = fp64_normalise(b_mnt, &b_exp); 1874 x_mnt = ~(uint64_t)0 / (b_mnt >> 31); 1875 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); 1876 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); 1877 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); 1878 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); 1879 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); 1880 1881 // Multiply by dividend: 1882 x_sgn = a_sgn ^ b_sgn; 1883 x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8; 1884 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); 1885 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); 1886 x_mnt = x1_mnt; 1887 1888 // This is an underestimate, so try adding one: 1889 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); 1890 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); 1891 if (c <= 0) { 1892 ++x_mnt; 1893 } 1894 1895 x_mnt = fp64_normalise(x_mnt, &x_exp); 1896 1897 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 1898} 1899 1900static void 1901set_fpscr0(FPSCR &fpscr, int flags) 1902{ 1903 if (flags & FPLIB_IDC) { 1904 fpscr.idc = 1; 1905 } 1906 if (flags & FPLIB_IOC) { 1907 fpscr.ioc = 1; 1908 } 1909 if (flags & FPLIB_DZC) { 1910 fpscr.dzc = 1; 1911 } 1912 if (flags & FPLIB_OFC) { 1913 fpscr.ofc = 1; 1914 } 1915 if (flags & FPLIB_UFC) { 1916 fpscr.ufc = 1; 1917 } 1918 if (flags & FPLIB_IXC) { 1919 fpscr.ixc = 1; 1920 } 1921} 1922 1923static uint16_t 1924fp16_scale(uint16_t a, int16_t b, int mode, int *flags) 1925{ 1926 int a_sgn, a_exp; 1927 uint16_t a_mnt; 1928 1929 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1930 1931 // Handle NaNs: 1932 if (fp16_is_NaN(a_exp, a_mnt)) { 1933 return fp16_process_NaN(a, mode, flags); 1934 } 1935 1936 // Handle zeroes: 1937 if (!a_mnt) { 1938 return fp16_zero(a_sgn); 1939 } 1940 1941 // Handle infinities: 1942 if (a_exp == FP16_EXP_INF) { 1943 return fp16_infinity(a_sgn); 1944 } 1945 1946 b = b < -300 ? -300 : b; 1947 b = b > 300 ? 300 : b; 1948 a_exp += b; 1949 a_mnt <<= 3; 1950 1951 a_mnt = fp16_normalise(a_mnt, &a_exp); 1952 1953 return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1, 1954 mode, flags); 1955} 1956 1957static uint32_t 1958fp32_scale(uint32_t a, int32_t b, int mode, int *flags) 1959{ 1960 int a_sgn, a_exp; 1961 uint32_t a_mnt; 1962 1963 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1964 1965 // Handle NaNs: 1966 if (fp32_is_NaN(a_exp, a_mnt)) { 1967 return fp32_process_NaN(a, mode, flags); 1968 } 1969 1970 // Handle zeroes: 1971 if (!a_mnt) { 1972 return fp32_zero(a_sgn); 1973 } 1974 1975 // Handle infinities: 1976 if (a_exp == FP32_EXP_INF) { 1977 return fp32_infinity(a_sgn); 1978 } 1979 1980 b = b < -300 ? -300 : b; 1981 b = b > 300 ? 300 : b; 1982 a_exp += b; 1983 a_mnt <<= 3; 1984 1985 a_mnt = fp32_normalise(a_mnt, &a_exp); 1986 1987 return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1, 1988 mode, flags); 1989} 1990 1991static uint64_t 1992fp64_scale(uint64_t a, int64_t b, int mode, int *flags) 1993{ 1994 int a_sgn, a_exp; 1995 uint64_t a_mnt; 1996 1997 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1998 1999 // Handle NaNs: 2000 if (fp64_is_NaN(a_exp, a_mnt)) { 2001 return fp64_process_NaN(a, mode, flags); 2002 } 2003 2004 // Handle zeroes: 2005 if (!a_mnt) { 2006 return fp64_zero(a_sgn); 2007 } 2008 2009 // Handle infinities: 2010 if (a_exp == FP64_EXP_INF) { 2011 return fp64_infinity(a_sgn); 2012 } 2013 2014 b = b < -3000 ? -3000 : b; 2015 b = b > 3000 ? 3000 : b; 2016 a_exp += b; 2017 a_mnt <<= 3; 2018 2019 a_mnt = fp64_normalise(a_mnt, &a_exp); 2020 2021 return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1, 2022 mode, flags); 2023} 2024 2025static uint16_t 2026fp16_sqrt(uint16_t a, int mode, int *flags) 2027{ 2028 int a_sgn, a_exp, x_sgn, x_exp; 2029 uint16_t a_mnt, x_mnt; 2030 uint32_t x, t0, t1; 2031 2032 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2033 2034 // Handle NaNs: 2035 if (fp16_is_NaN(a_exp, a_mnt)) 2036 return fp16_process_NaN(a, mode, flags); 2037 2038 // Handle infinities and zeroes: 2039 if (!a_mnt) 2040 return fp16_zero(a_sgn); 2041 if (a_exp == FP16_EXP_INF && !a_sgn) 2042 return fp16_infinity(a_sgn); 2043 if (a_sgn) { 2044 *flags |= FPLIB_IOC; 2045 return fp16_defaultNaN(); 2046 } 2047 2048 a_mnt = fp16_normalise(a_mnt, &a_exp); 2049 if (a_exp & 1) { 2050 ++a_exp; 2051 a_mnt >>= 1; 2052 } 2053 2054 // x = (a * 3 + 5) / 8 2055 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28); 2056 2057 // x = (a / x + x) / 2; // 8-bit accuracy 2058 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15; 2059 2060 // x = (a / x + x) / 2; // 16-bit accuracy 2061 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15; 2062 2063 x_sgn = 0; 2064 x_exp = (a_exp + 27) >> 1; 2065 x_mnt = ((x - (1 << 18)) >> 19) + 1; 2066 t1 = (uint32_t)x_mnt * x_mnt; 2067 t0 = (uint32_t)a_mnt << 9; 2068 if (t1 > t0) { 2069 --x_mnt; 2070 } 2071 2072 x_mnt = fp16_normalise(x_mnt, &x_exp); 2073 2074 return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); 2075} 2076 2077static uint32_t 2078fp32_sqrt(uint32_t a, int mode, int *flags) 2079{ 2080 int a_sgn, a_exp, x_sgn, x_exp; 2081 uint32_t a_mnt, x, x_mnt; 2082 uint64_t t0, t1; 2083 2084 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2085 2086 // Handle NaNs: 2087 if (fp32_is_NaN(a_exp, a_mnt)) 2088 return fp32_process_NaN(a, mode, flags); 2089 2090 // Handle infinities and zeroes: 2091 if (!a_mnt) 2092 return fp32_zero(a_sgn); 2093 if (a_exp == FP32_EXP_INF && !a_sgn) 2094 return fp32_infinity(a_sgn); 2095 if (a_sgn) { 2096 *flags |= FPLIB_IOC; 2097 return fp32_defaultNaN(); 2098 } 2099 2100 a_mnt = fp32_normalise(a_mnt, &a_exp); 2101 if (!(a_exp & 1)) { 2102 ++a_exp; 2103 a_mnt >>= 1; 2104 } 2105 2106 // x = (a * 3 + 5) / 8 2107 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28); 2108 2109 // x = (a / x + x) / 2; // 8-bit accuracy 2110 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 2111 2112 // x = (a / x + x) / 2; // 16-bit accuracy 2113 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 2114 2115 // x = (a / x + x) / 2; // 32-bit accuracy 2116 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); 2117 2118 x_sgn = 0; 2119 x_exp = (a_exp + 147) >> 1; 2120 x_mnt = ((x - (1 << 5)) >> 6) + 1; 2121 t1 = (uint64_t)x_mnt * x_mnt; 2122 t0 = (uint64_t)a_mnt << 19; 2123 if (t1 > t0) { 2124 --x_mnt; 2125 } 2126 2127 x_mnt = fp32_normalise(x_mnt, &x_exp); 2128 2129 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); 2130} 2131 2132static uint64_t 2133fp64_sqrt(uint64_t a, int mode, int *flags) 2134{ 2135 int a_sgn, a_exp, x_sgn, x_exp, c; 2136 uint64_t a_mnt, x_mnt, r, x0, x1; 2137 uint32_t x; 2138 2139 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2140 2141 // Handle NaNs: 2142 if (fp64_is_NaN(a_exp, a_mnt)) 2143 return fp64_process_NaN(a, mode, flags); 2144 2145 // Handle infinities and zeroes: 2146 if (!a_mnt) 2147 return fp64_zero(a_sgn); 2148 if (a_exp == FP64_EXP_INF && !a_sgn) 2149 return fp64_infinity(a_sgn); 2150 if (a_sgn) { 2151 *flags |= FPLIB_IOC; 2152 return fp64_defaultNaN(); 2153 } 2154 2155 a_mnt = fp64_normalise(a_mnt, &a_exp); 2156 if (a_exp & 1) { 2157 ++a_exp; 2158 a_mnt >>= 1; 2159 } 2160 2161 // x = (a * 3 + 5) / 8 2162 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28); 2163 2164 // x = (a / x + x) / 2; // 8-bit accuracy 2165 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 2166 2167 // x = (a / x + x) / 2; // 16-bit accuracy 2168 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 2169 2170 // x = (a / x + x) / 2; // 32-bit accuracy 2171 x = ((a_mnt / x) >> 2) + (x >> 1); 2172 2173 // r = 1 / x; // 32-bit accuracy 2174 r = ((uint64_t)1 << 62) / x; 2175 2176 // r = r * (2 - x * r); // 64-bit accuracy 2177 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); 2178 lsr128(&x0, &x1, x0, x1, 31); 2179 2180 // x = (x + a * r) / 2; // 64-bit accuracy 2181 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); 2182 lsl128(&x0, &x1, x0, x1, 5); 2183 lsr128(&x0, &x1, x0, x1, 56); 2184 2185 x0 = ((uint64_t)x << 31) + (x0 >> 1); 2186 2187 x_sgn = 0; 2188 x_exp = (a_exp + 1053) >> 1; 2189 x_mnt = x0; 2190 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; 2191 mul62x62(&x0, &x1, x_mnt, x_mnt); 2192 lsl128(&x0, &x1, x0, x1, 19); 2193 c = cmp128(x0, x1, 0, a_mnt); 2194 if (c > 0) 2195 --x_mnt; 2196 2197 x_mnt = fp64_normalise(x_mnt, &x_exp); 2198 2199 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 2200} 2201 2202static int 2203modeConv(FPSCR fpscr) 2204{ 2205 uint32_t x = (uint32_t)fpscr; 2206 return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0); 2207 // AHP bit is ignored. Only fplibConvert uses AHP. 2208} 2209 2210static void 2211set_fpscr(FPSCR &fpscr, int flags) 2212{ 2213 // translate back to FPSCR 2214 bool underflow = false; 2215 if (flags & FPLIB_IDC) { 2216 fpscr.idc = 1; 2217 } 2218 if (flags & FPLIB_IOC) { 2219 fpscr.ioc = 1; 2220 } 2221 if (flags & FPLIB_DZC) { 2222 fpscr.dzc = 1; 2223 } 2224 if (flags & FPLIB_OFC) { 2225 fpscr.ofc = 1; 2226 } 2227 if (flags & FPLIB_UFC) { 2228 underflow = true; //xx Why is this required? 2229 fpscr.ufc = 1; 2230 } 2231 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { 2232 fpscr.ixc = 1; 2233 } 2234} 2235 2236template <> 2237bool 2238fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr) 2239{ 2240 int flags = 0; 2241 int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags); 2242 set_fpscr(fpscr, flags); 2243 return x; 2244} 2245 2246template <> 2247bool 2248fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr) 2249{ 2250 int flags = 0; 2251 int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags); 2252 set_fpscr(fpscr, flags); 2253 return x; 2254} 2255 2256template <> 2257bool 2258fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr) 2259{ 2260 int flags = 0; 2261 int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags); 2262 set_fpscr(fpscr, flags); 2263 return x; 2264} 2265 2266template <> 2267bool 2268fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr) 2269{ 2270 int flags = 0; 2271 int x = fp16_compare_un(a, b, modeConv(fpscr), &flags); 2272 set_fpscr(fpscr, flags); 2273 return x; 2274} 2275 2276template <> 2277bool 2278fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) 2279{ 2280 int flags = 0; 2281 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); 2282 set_fpscr(fpscr, flags); 2283 return x; 2284} 2285 2286template <> 2287bool 2288fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) 2289{ 2290 int flags = 0; 2291 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); 2292 set_fpscr(fpscr, flags); 2293 return x; 2294} 2295 2296template <> 2297bool 2298fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) 2299{ 2300 int flags = 0; 2301 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); 2302 set_fpscr(fpscr, flags); 2303 return x; 2304} 2305 2306template <> 2307bool 2308fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr) 2309{ 2310 int flags = 0; 2311 int x = fp32_compare_un(a, b, modeConv(fpscr), &flags); 2312 set_fpscr(fpscr, flags); 2313 return x; 2314} 2315 2316template <> 2317bool 2318fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) 2319{ 2320 int flags = 0; 2321 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); 2322 set_fpscr(fpscr, flags); 2323 return x; 2324} 2325 2326template <> 2327bool 2328fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) 2329{ 2330 int flags = 0; 2331 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); 2332 set_fpscr(fpscr, flags); 2333 return x; 2334} 2335 2336template <> 2337bool 2338fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) 2339{ 2340 int flags = 0; 2341 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); 2342 set_fpscr(fpscr, flags); 2343 return x; 2344} 2345 2346template <> 2347bool 2348fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr) 2349{ 2350 int flags = 0; 2351 int x = fp64_compare_un(a, b, modeConv(fpscr), &flags); 2352 set_fpscr(fpscr, flags); 2353 return x; 2354} 2355 2356template <> 2357uint16_t 2358fplibAbs(uint16_t op) 2359{ 2360 return op & ~(1ULL << (FP16_BITS - 1)); 2361} 2362 2363template <> 2364uint32_t 2365fplibAbs(uint32_t op) 2366{ 2367 return op & ~(1ULL << (FP32_BITS - 1)); 2368} 2369 2370template <> 2371uint64_t 2372fplibAbs(uint64_t op) 2373{ 2374 return op & ~(1ULL << (FP64_BITS - 1)); 2375} 2376 2377template <> 2378uint16_t 2379fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr) 2380{ 2381 int flags = 0; 2382 uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags); 2383 set_fpscr0(fpscr, flags); 2384 return result; 2385} 2386 2387template <> 2388uint32_t 2389fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2390{ 2391 int flags = 0; 2392 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); 2393 set_fpscr0(fpscr, flags); 2394 return result; 2395} 2396 2397template <> 2398uint64_t 2399fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2400{ 2401 int flags = 0; 2402 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); 2403 set_fpscr0(fpscr, flags); 2404 return result; 2405} 2406 2407template <> 2408int 2409fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr) 2410{ 2411 int mode = modeConv(fpscr); 2412 int flags = 0; 2413 int sgn1, exp1, sgn2, exp2, result; 2414 uint16_t mnt1, mnt2; 2415 2416 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2417 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2418 2419 if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) { 2420 result = 3; 2421 if (fp16_is_signalling_NaN(exp1, mnt1) || 2422 fp16_is_signalling_NaN(exp2, mnt2) || signal_nans) 2423 flags |= FPLIB_IOC; 2424 } else { 2425 if (op1 == op2 || (!mnt1 && !mnt2)) { 2426 result = 6; 2427 } else if (sgn1 != sgn2) { 2428 result = sgn1 ? 8 : 2; 2429 } else if (exp1 != exp2) { 2430 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2431 } else { 2432 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2433 } 2434 } 2435 2436 set_fpscr0(fpscr, flags); 2437 2438 return result; 2439} 2440 2441template <> 2442int 2443fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) 2444{ 2445 int mode = modeConv(fpscr); 2446 int flags = 0; 2447 int sgn1, exp1, sgn2, exp2, result; 2448 uint32_t mnt1, mnt2; 2449 2450 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2451 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2452 2453 if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) { 2454 result = 3; 2455 if (fp32_is_signalling_NaN(exp1, mnt1) || 2456 fp32_is_signalling_NaN(exp2, mnt2) || signal_nans) 2457 flags |= FPLIB_IOC; 2458 } else { 2459 if (op1 == op2 || (!mnt1 && !mnt2)) { 2460 result = 6; 2461 } else if (sgn1 != sgn2) { 2462 result = sgn1 ? 8 : 2; 2463 } else if (exp1 != exp2) { 2464 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2465 } else { 2466 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2467 } 2468 } 2469 2470 set_fpscr0(fpscr, flags); 2471 2472 return result; 2473} 2474 2475template <> 2476int 2477fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) 2478{ 2479 int mode = modeConv(fpscr); 2480 int flags = 0; 2481 int sgn1, exp1, sgn2, exp2, result; 2482 uint64_t mnt1, mnt2; 2483 2484 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2485 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2486 2487 if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) { 2488 result = 3; 2489 if (fp64_is_signalling_NaN(exp1, mnt1) || 2490 fp64_is_signalling_NaN(exp2, mnt2) || signal_nans) 2491 flags |= FPLIB_IOC; 2492 } else { 2493 if (op1 == op2 || (!mnt1 && !mnt2)) { 2494 result = 6; 2495 } else if (sgn1 != sgn2) { 2496 result = sgn1 ? 8 : 2; 2497 } else if (exp1 != exp2) { 2498 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2499 } else { 2500 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2501 } 2502 } 2503 2504 set_fpscr0(fpscr, flags); 2505 2506 return result; 2507} 2508 2509static uint16_t 2510fp16_FPConvertNaN_32(uint32_t op) 2511{ 2512 return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF, 2513 1ULL << (FP16_MANT_BITS - 1) | 2514 op >> (FP32_MANT_BITS - FP16_MANT_BITS)); 2515} 2516 2517static uint16_t 2518fp16_FPConvertNaN_64(uint64_t op) 2519{ 2520 return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF, 2521 1ULL << (FP16_MANT_BITS - 1) | 2522 op >> (FP64_MANT_BITS - FP16_MANT_BITS)); 2523} 2524 2525static uint32_t 2526fp32_FPConvertNaN_16(uint16_t op) 2527{ 2528 return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF, 2529 1ULL << (FP32_MANT_BITS - 1) | 2530 (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS)); 2531} 2532 2533static uint32_t 2534fp32_FPConvertNaN_64(uint64_t op) 2535{ 2536 return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF, 2537 1ULL << (FP32_MANT_BITS - 1) | 2538 op >> (FP64_MANT_BITS - FP32_MANT_BITS)); 2539} 2540 2541static uint64_t 2542fp64_FPConvertNaN_16(uint16_t op) 2543{ 2544 return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF, 2545 1ULL << (FP64_MANT_BITS - 1) | 2546 (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS)); 2547} 2548 2549static uint64_t 2550fp64_FPConvertNaN_32(uint32_t op) 2551{ 2552 return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF, 2553 1ULL << (FP64_MANT_BITS - 1) | 2554 (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS)); 2555} 2556 2557static uint16_t 2558fp16_FPOnePointFive(int sgn) 2559{ 2560 return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1)); 2561} 2562 2563static uint32_t 2564fp32_FPOnePointFive(int sgn) 2565{ 2566 return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1)); 2567} 2568 2569static uint64_t 2570fp64_FPOnePointFive(int sgn) 2571{ 2572 return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1)); 2573} 2574 2575static uint16_t 2576fp16_FPThree(int sgn) 2577{ 2578 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1)); 2579} 2580 2581static uint32_t 2582fp32_FPThree(int sgn) 2583{ 2584 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1)); 2585} 2586 2587static uint64_t 2588fp64_FPThree(int sgn) 2589{ 2590 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1)); 2591} 2592 2593static uint16_t 2594fp16_FPTwo(int sgn) 2595{ 2596 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0); 2597} 2598 2599static uint32_t 2600fp32_FPTwo(int sgn) 2601{ 2602 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0); 2603} 2604 2605static uint64_t 2606fp64_FPTwo(int sgn) 2607{ 2608 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0); 2609} 2610 2611template <> 2612uint16_t 2613fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 2614{ 2615 int mode = modeConv(fpscr); 2616 int flags = 0; 2617 int sgn, exp; 2618 uint32_t mnt; 2619 uint16_t result; 2620 2621 // Unpack floating-point operand optionally with flush-to-zero: 2622 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2623 2624 bool alt_hp = fpscr.ahp; 2625 2626 if (fp32_is_NaN(exp, mnt)) { 2627 if (alt_hp) { 2628 result = fp16_zero(sgn); 2629 } else if (fpscr.dn) { 2630 result = fp16_defaultNaN(); 2631 } else { 2632 result = fp16_FPConvertNaN_32(op); 2633 } 2634 if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) { 2635 flags |= FPLIB_IOC; 2636 } 2637 } else if (exp == FP32_EXP_INF) { 2638 if (alt_hp) { 2639 result = ((uint16_t)sgn << (FP16_BITS - 1) | 2640 ((1ULL << (FP16_BITS - 1)) - 1)); 2641 flags |= FPLIB_IOC; 2642 } else { 2643 result = fp16_infinity(sgn); 2644 } 2645 } else if (!mnt) { 2646 result = fp16_zero(sgn); 2647 } else { 2648 result = 2649 fp16_round_(sgn, exp - FP32_EXP_BIAS + FP16_EXP_BIAS, 2650 mnt >> (FP32_MANT_BITS - FP16_BITS) | 2651 !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)), 2652 rounding, (mode & 0xf) | alt_hp << 4, &flags); 2653 } 2654 2655 set_fpscr0(fpscr, flags); 2656 2657 return result; 2658} 2659 2660template <> 2661uint16_t 2662fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 2663{ 2664 int mode = modeConv(fpscr); 2665 int flags = 0; 2666 int sgn, exp; 2667 uint64_t mnt; 2668 uint16_t result; 2669 2670 // Unpack floating-point operand optionally with flush-to-zero: 2671 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2672 2673 bool alt_hp = fpscr.ahp; 2674 2675 if (fp64_is_NaN(exp, mnt)) { 2676 if (alt_hp) { 2677 result = fp16_zero(sgn); 2678 } else if (fpscr.dn) { 2679 result = fp16_defaultNaN(); 2680 } else { 2681 result = fp16_FPConvertNaN_64(op); 2682 } 2683 if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) { 2684 flags |= FPLIB_IOC; 2685 } 2686 } else if (exp == FP64_EXP_INF) { 2687 if (alt_hp) { 2688 result = ((uint16_t)sgn << (FP16_BITS - 1) | 2689 ((1ULL << (FP16_BITS - 1)) - 1)); 2690 flags |= FPLIB_IOC; 2691 } else { 2692 result = fp16_infinity(sgn); 2693 } 2694 } else if (!mnt) { 2695 result = fp16_zero(sgn); 2696 } else { 2697 result = 2698 fp16_round_(sgn, exp - FP64_EXP_BIAS + FP16_EXP_BIAS, 2699 mnt >> (FP64_MANT_BITS - FP16_BITS) | 2700 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)), 2701 rounding, (mode & 0xf) | alt_hp << 4, &flags); 2702 } 2703 2704 set_fpscr0(fpscr, flags); 2705 2706 return result; 2707} 2708 2709template <> 2710uint32_t 2711fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 2712{ 2713 int mode = modeConv(fpscr); 2714 int flags = 0; 2715 int sgn, exp; 2716 uint16_t mnt; 2717 uint32_t result; 2718 2719 // Unpack floating-point operand optionally with flush-to-zero: 2720 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags); 2721 2722 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) { 2723 if (fpscr.dn) { 2724 result = fp32_defaultNaN(); 2725 } else { 2726 result = fp32_FPConvertNaN_16(op); 2727 } 2728 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) { 2729 flags |= FPLIB_IOC; 2730 } 2731 } else if (exp == FP16_EXP_INF && !fpscr.ahp) { 2732 result = fp32_infinity(sgn); 2733 } else if (!mnt) { 2734 result = fp32_zero(sgn); 2735 } else { 2736 mnt = fp16_normalise(mnt, &exp); 2737 result = fp32_pack(sgn, (exp - FP16_EXP_BIAS + 2738 FP32_EXP_BIAS + FP16_EXP_BITS), 2739 (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1)); 2740 } 2741 2742 set_fpscr0(fpscr, flags); 2743 2744 return result; 2745} 2746 2747template <> 2748uint32_t 2749fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 2750{ 2751 int mode = modeConv(fpscr); 2752 int flags = 0; 2753 int sgn, exp; 2754 uint64_t mnt; 2755 uint32_t result; 2756 2757 // Unpack floating-point operand optionally with flush-to-zero: 2758 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2759 2760 if (fp64_is_NaN(exp, mnt)) { 2761 if (fpscr.dn) { 2762 result = fp32_defaultNaN(); 2763 } else { 2764 result = fp32_FPConvertNaN_64(op); 2765 } 2766 if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) { 2767 flags |= FPLIB_IOC; 2768 } 2769 } else if (exp == FP64_EXP_INF) { 2770 result = fp32_infinity(sgn); 2771 } else if (!mnt) { 2772 result = fp32_zero(sgn); 2773 } else { 2774 result = 2775 fp32_round_(sgn, exp - FP64_EXP_BIAS + FP32_EXP_BIAS, 2776 mnt >> (FP64_MANT_BITS - FP32_BITS) | 2777 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)), 2778 rounding, mode, &flags); 2779 } 2780 2781 set_fpscr0(fpscr, flags); 2782 2783 return result; 2784} 2785 2786template <> 2787uint64_t 2788fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 2789{ 2790 int mode = modeConv(fpscr); 2791 int flags = 0; 2792 int sgn, exp; 2793 uint16_t mnt; 2794 uint64_t result; 2795 2796 // Unpack floating-point operand optionally with flush-to-zero: 2797 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags); 2798 2799 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) { 2800 if (fpscr.dn) { 2801 result = fp64_defaultNaN(); 2802 } else { 2803 result = fp64_FPConvertNaN_16(op); 2804 } 2805 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) { 2806 flags |= FPLIB_IOC; 2807 } 2808 } else if (exp == FP16_EXP_INF && !fpscr.ahp) { 2809 result = fp64_infinity(sgn); 2810 } else if (!mnt) { 2811 result = fp64_zero(sgn); 2812 } else { 2813 mnt = fp16_normalise(mnt, &exp); 2814 result = fp64_pack(sgn, (exp - FP16_EXP_BIAS + 2815 FP64_EXP_BIAS + FP16_EXP_BITS), 2816 (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1)); 2817 } 2818 2819 set_fpscr0(fpscr, flags); 2820 2821 return result; 2822} 2823 2824template <> 2825uint64_t 2826fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 2827{ 2828 int mode = modeConv(fpscr); 2829 int flags = 0; 2830 int sgn, exp; 2831 uint32_t mnt; 2832 uint64_t result; 2833 2834 // Unpack floating-point operand optionally with flush-to-zero: 2835 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2836 2837 if (fp32_is_NaN(exp, mnt)) { 2838 if (fpscr.dn) { 2839 result = fp64_defaultNaN(); 2840 } else { 2841 result = fp64_FPConvertNaN_32(op); 2842 } 2843 if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) { 2844 flags |= FPLIB_IOC; 2845 } 2846 } else if (exp == FP32_EXP_INF) { 2847 result = fp64_infinity(sgn); 2848 } else if (!mnt) { 2849 result = fp64_zero(sgn); 2850 } else { 2851 mnt = fp32_normalise(mnt, &exp); 2852 result = fp64_pack(sgn, (exp - FP32_EXP_BIAS + 2853 FP64_EXP_BIAS + FP32_EXP_BITS), 2854 (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1)); 2855 } 2856 2857 set_fpscr0(fpscr, flags); 2858 2859 return result; 2860} 2861 2862template <> 2863uint16_t 2864fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr) 2865{ 2866 int flags = 0; 2867 uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2868 set_fpscr0(fpscr, flags); 2869 return result; 2870} 2871 2872template <> 2873uint32_t 2874fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) 2875{ 2876 int flags = 0; 2877 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2878 set_fpscr0(fpscr, flags); 2879 return result; 2880} 2881 2882template <> 2883uint64_t 2884fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) 2885{ 2886 int flags = 0; 2887 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2888 set_fpscr0(fpscr, flags); 2889 return result; 2890} 2891 2892template <> 2893uint16_t 2894fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr) 2895{ 2896 int flags = 0; 2897 uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags); 2898 set_fpscr0(fpscr, flags); 2899 return result; 2900} 2901 2902template <> 2903uint32_t 2904fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2905{ 2906 int flags = 0; 2907 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); 2908 set_fpscr0(fpscr, flags); 2909 return result; 2910} 2911 2912template <> 2913uint64_t 2914fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2915{ 2916 int flags = 0; 2917 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); 2918 set_fpscr0(fpscr, flags); 2919 return result; 2920} 2921 2922template <> 2923uint16_t 2924fplibExpA(uint16_t op) 2925{ 2926 static uint16_t coeff[32] = { 2927 0x0000, 2928 0x0016, 2929 0x002d, 2930 0x0045, 2931 0x005d, 2932 0x0075, 2933 0x008e, 2934 0x00a8, 2935 0x00c2, 2936 0x00dc, 2937 0x00f8, 2938 0x0114, 2939 0x0130, 2940 0x014d, 2941 0x016b, 2942 0x0189, 2943 0x01a8, 2944 0x01c8, 2945 0x01e8, 2946 0x0209, 2947 0x022b, 2948 0x024e, 2949 0x0271, 2950 0x0295, 2951 0x02ba, 2952 0x02e0, 2953 0x0306, 2954 0x032e, 2955 0x0356, 2956 0x037f, 2957 0x03a9, 2958 0x03d4 2959 }; 2960 return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) | 2961 coeff[op & ((1 << 5) - 1)]); 2962} 2963 2964template <> 2965uint32_t 2966fplibExpA(uint32_t op) 2967{ 2968 static uint32_t coeff[64] = { 2969 0x000000, 2970 0x0164d2, 2971 0x02cd87, 2972 0x043a29, 2973 0x05aac3, 2974 0x071f62, 2975 0x08980f, 2976 0x0a14d5, 2977 0x0b95c2, 2978 0x0d1adf, 2979 0x0ea43a, 2980 0x1031dc, 2981 0x11c3d3, 2982 0x135a2b, 2983 0x14f4f0, 2984 0x16942d, 2985 0x1837f0, 2986 0x19e046, 2987 0x1b8d3a, 2988 0x1d3eda, 2989 0x1ef532, 2990 0x20b051, 2991 0x227043, 2992 0x243516, 2993 0x25fed7, 2994 0x27cd94, 2995 0x29a15b, 2996 0x2b7a3a, 2997 0x2d583f, 2998 0x2f3b79, 2999 0x3123f6, 3000 0x3311c4, 3001 0x3504f3, 3002 0x36fd92, 3003 0x38fbaf, 3004 0x3aff5b, 3005 0x3d08a4, 3006 0x3f179a, 3007 0x412c4d, 3008 0x4346cd, 3009 0x45672a, 3010 0x478d75, 3011 0x49b9be, 3012 0x4bec15, 3013 0x4e248c, 3014 0x506334, 3015 0x52a81e, 3016 0x54f35b, 3017 0x5744fd, 3018 0x599d16, 3019 0x5bfbb8, 3020 0x5e60f5, 3021 0x60ccdf, 3022 0x633f89, 3023 0x65b907, 3024 0x68396a, 3025 0x6ac0c7, 3026 0x6d4f30, 3027 0x6fe4ba, 3028 0x728177, 3029 0x75257d, 3030 0x77d0df, 3031 0x7a83b3, 3032 0x7d3e0c 3033 }; 3034 return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) | 3035 coeff[op & ((1 << 6) - 1)]); 3036} 3037 3038template <> 3039uint64_t 3040fplibExpA(uint64_t op) 3041{ 3042 static uint64_t coeff[64] = { 3043 0x0000000000000ULL, 3044 0x02c9a3e778061ULL, 3045 0x059b0d3158574ULL, 3046 0x0874518759bc8ULL, 3047 0x0b5586cf9890fULL, 3048 0x0e3ec32d3d1a2ULL, 3049 0x11301d0125b51ULL, 3050 0x1429aaea92de0ULL, 3051 0x172b83c7d517bULL, 3052 0x1a35beb6fcb75ULL, 3053 0x1d4873168b9aaULL, 3054 0x2063b88628cd6ULL, 3055 0x2387a6e756238ULL, 3056 0x26b4565e27cddULL, 3057 0x29e9df51fdee1ULL, 3058 0x2d285a6e4030bULL, 3059 0x306fe0a31b715ULL, 3060 0x33c08b26416ffULL, 3061 0x371a7373aa9cbULL, 3062 0x3a7db34e59ff7ULL, 3063 0x3dea64c123422ULL, 3064 0x4160a21f72e2aULL, 3065 0x44e086061892dULL, 3066 0x486a2b5c13cd0ULL, 3067 0x4bfdad5362a27ULL, 3068 0x4f9b2769d2ca7ULL, 3069 0x5342b569d4f82ULL, 3070 0x56f4736b527daULL, 3071 0x5ab07dd485429ULL, 3072 0x5e76f15ad2148ULL, 3073 0x6247eb03a5585ULL, 3074 0x6623882552225ULL, 3075 0x6a09e667f3bcdULL, 3076 0x6dfb23c651a2fULL, 3077 0x71f75e8ec5f74ULL, 3078 0x75feb564267c9ULL, 3079 0x7a11473eb0187ULL, 3080 0x7e2f336cf4e62ULL, 3081 0x82589994cce13ULL, 3082 0x868d99b4492edULL, 3083 0x8ace5422aa0dbULL, 3084 0x8f1ae99157736ULL, 3085 0x93737b0cdc5e5ULL, 3086 0x97d829fde4e50ULL, 3087 0x9c49182a3f090ULL, 3088 0xa0c667b5de565ULL, 3089 0xa5503b23e255dULL, 3090 0xa9e6b5579fdbfULL, 3091 0xae89f995ad3adULL, 3092 0xb33a2b84f15fbULL, 3093 0xb7f76f2fb5e47ULL, 3094 0xbcc1e904bc1d2ULL, 3095 0xc199bdd85529cULL, 3096 0xc67f12e57d14bULL, 3097 0xcb720dcef9069ULL, 3098 0xd072d4a07897cULL, 3099 0xd5818dcfba487ULL, 3100 0xda9e603db3285ULL, 3101 0xdfc97337b9b5fULL, 3102 0xe502ee78b3ff6ULL, 3103 0xea4afa2a490daULL, 3104 0xefa1bee615a27ULL, 3105 0xf50765b6e4540ULL, 3106 0xfa7c1819e90d8ULL 3107 }; 3108 return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) | 3109 coeff[op & ((1 << 6) - 1)]); 3110} 3111 3112static uint16_t 3113fp16_repack(int sgn, int exp, uint16_t mnt) 3114{ 3115 return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt); 3116} 3117 3118static uint32_t 3119fp32_repack(int sgn, int exp, uint32_t mnt) 3120{ 3121 return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt); 3122} 3123 3124static uint64_t 3125fp64_repack(int sgn, int exp, uint64_t mnt) 3126{ 3127 return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt); 3128} 3129 3130static void 3131fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn) 3132{ 3133 // Treat a single quiet-NaN as +Infinity/-Infinity 3134 if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) && 3135 (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) 3136 *op1 = fp16_infinity(sgn); 3137 if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) && 3138 (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) 3139 *op2 = fp16_infinity(sgn); 3140} 3141 3142static void 3143fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn) 3144{ 3145 // Treat a single quiet-NaN as +Infinity/-Infinity 3146 if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) && 3147 (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) 3148 *op1 = fp32_infinity(sgn); 3149 if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) && 3150 (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) 3151 *op2 = fp32_infinity(sgn); 3152} 3153 3154static void 3155fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn) 3156{ 3157 // Treat a single quiet-NaN as +Infinity/-Infinity 3158 if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) && 3159 (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) 3160 *op1 = fp64_infinity(sgn); 3161 if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) && 3162 (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) 3163 *op2 = fp64_infinity(sgn); 3164} 3165 3166template <> 3167uint16_t 3168fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3169{ 3170 int mode = modeConv(fpscr); 3171 int flags = 0; 3172 int sgn1, exp1, sgn2, exp2; 3173 uint16_t mnt1, mnt2, x, result; 3174 3175 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3176 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3177 3178 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) { 3179 result = x; 3180 } else { 3181 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3182 fp16_repack(sgn1, exp1, mnt1) : 3183 fp16_repack(sgn2, exp2, mnt2)); 3184 } 3185 set_fpscr0(fpscr, flags); 3186 return result; 3187} 3188 3189template <> 3190uint32_t 3191fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3192{ 3193 int mode = modeConv(fpscr); 3194 int flags = 0; 3195 int sgn1, exp1, sgn2, exp2; 3196 uint32_t mnt1, mnt2, x, result; 3197 3198 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3199 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3200 3201 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 3202 result = x; 3203 } else { 3204 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3205 fp32_repack(sgn1, exp1, mnt1) : 3206 fp32_repack(sgn2, exp2, mnt2)); 3207 } 3208 set_fpscr0(fpscr, flags); 3209 return result; 3210} 3211 3212template <> 3213uint64_t 3214fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3215{ 3216 int mode = modeConv(fpscr); 3217 int flags = 0; 3218 int sgn1, exp1, sgn2, exp2; 3219 uint64_t mnt1, mnt2, x, result; 3220 3221 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3222 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3223 3224 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 3225 result = x; 3226 } else { 3227 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3228 fp64_repack(sgn1, exp1, mnt1) : 3229 fp64_repack(sgn2, exp2, mnt2)); 3230 } 3231 set_fpscr0(fpscr, flags); 3232 return result; 3233} 3234 3235template <> 3236uint16_t 3237fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3238{ 3239 fp16_minmaxnum(&op1, &op2, 1); 3240 return fplibMax<uint16_t>(op1, op2, fpscr); 3241} 3242 3243template <> 3244uint32_t 3245fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3246{ 3247 fp32_minmaxnum(&op1, &op2, 1); 3248 return fplibMax<uint32_t>(op1, op2, fpscr); 3249} 3250 3251template <> 3252uint64_t 3253fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3254{ 3255 fp64_minmaxnum(&op1, &op2, 1); 3256 return fplibMax<uint64_t>(op1, op2, fpscr); 3257} 3258 3259template <> 3260uint16_t 3261fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3262{ 3263 int mode = modeConv(fpscr); 3264 int flags = 0; 3265 int sgn1, exp1, sgn2, exp2; 3266 uint16_t mnt1, mnt2, x, result; 3267 3268 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3269 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3270 3271 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) { 3272 result = x; 3273 } else { 3274 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3275 fp16_repack(sgn1, exp1, mnt1) : 3276 fp16_repack(sgn2, exp2, mnt2)); 3277 } 3278 set_fpscr0(fpscr, flags); 3279 return result; 3280} 3281 3282template <> 3283uint32_t 3284fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3285{ 3286 int mode = modeConv(fpscr); 3287 int flags = 0; 3288 int sgn1, exp1, sgn2, exp2; 3289 uint32_t mnt1, mnt2, x, result; 3290 3291 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3292 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3293 3294 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 3295 result = x; 3296 } else { 3297 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3298 fp32_repack(sgn1, exp1, mnt1) : 3299 fp32_repack(sgn2, exp2, mnt2)); 3300 } 3301 set_fpscr0(fpscr, flags); 3302 return result; 3303} 3304 3305template <> 3306uint64_t 3307fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3308{ 3309 int mode = modeConv(fpscr); 3310 int flags = 0; 3311 int sgn1, exp1, sgn2, exp2; 3312 uint64_t mnt1, mnt2, x, result; 3313 3314 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3315 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3316 3317 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 3318 result = x; 3319 } else { 3320 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3321 fp64_repack(sgn1, exp1, mnt1) : 3322 fp64_repack(sgn2, exp2, mnt2)); 3323 } 3324 set_fpscr0(fpscr, flags); 3325 return result; 3326} 3327 3328template <> 3329uint16_t 3330fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3331{ 3332 fp16_minmaxnum(&op1, &op2, 0); 3333 return fplibMin<uint16_t>(op1, op2, fpscr); 3334} 3335 3336template <> 3337uint32_t 3338fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3339{ 3340 fp32_minmaxnum(&op1, &op2, 0); 3341 return fplibMin<uint32_t>(op1, op2, fpscr); 3342} 3343 3344template <> 3345uint64_t 3346fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3347{ 3348 fp64_minmaxnum(&op1, &op2, 0); 3349 return fplibMin<uint64_t>(op1, op2, fpscr); 3350} 3351 3352template <> 3353uint16_t 3354fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3355{ 3356 int flags = 0; 3357 uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags); 3358 set_fpscr0(fpscr, flags); 3359 return result; 3360} 3361 3362template <> 3363uint32_t 3364fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3365{ 3366 int flags = 0; 3367 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); 3368 set_fpscr0(fpscr, flags); 3369 return result; 3370} 3371 3372template <> 3373uint64_t 3374fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3375{ 3376 int flags = 0; 3377 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); 3378 set_fpscr0(fpscr, flags); 3379 return result; 3380} 3381 3382template <> 3383uint16_t 3384fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3385{ 3386 int mode = modeConv(fpscr); 3387 int flags = 0; 3388 int sgn1, exp1, sgn2, exp2; 3389 uint16_t mnt1, mnt2, result; 3390 3391 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3392 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3393 3394 result = fp16_process_NaNs(op1, op2, mode, &flags); 3395 if (!result) { 3396 if ((exp1 == FP16_EXP_INF && !mnt2) || 3397 (exp2 == FP16_EXP_INF && !mnt1)) { 3398 result = fp16_FPTwo(sgn1 ^ sgn2); 3399 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) { 3400 result = fp16_infinity(sgn1 ^ sgn2); 3401 } else if (!mnt1 || !mnt2) { 3402 result = fp16_zero(sgn1 ^ sgn2); 3403 } else { 3404 result = fp16_mul(op1, op2, mode, &flags); 3405 } 3406 } 3407 3408 set_fpscr0(fpscr, flags); 3409 3410 return result; 3411} 3412 3413template <> 3414uint32_t 3415fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3416{ 3417 int mode = modeConv(fpscr); 3418 int flags = 0; 3419 int sgn1, exp1, sgn2, exp2; 3420 uint32_t mnt1, mnt2, result; 3421 3422 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3423 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3424 3425 result = fp32_process_NaNs(op1, op2, mode, &flags); 3426 if (!result) { 3427 if ((exp1 == FP32_EXP_INF && !mnt2) || 3428 (exp2 == FP32_EXP_INF && !mnt1)) { 3429 result = fp32_FPTwo(sgn1 ^ sgn2); 3430 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) { 3431 result = fp32_infinity(sgn1 ^ sgn2); 3432 } else if (!mnt1 || !mnt2) { 3433 result = fp32_zero(sgn1 ^ sgn2); 3434 } else { 3435 result = fp32_mul(op1, op2, mode, &flags); 3436 } 3437 } 3438 3439 set_fpscr0(fpscr, flags); 3440 3441 return result; 3442} 3443 3444template <> 3445uint64_t 3446fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3447{ 3448 int mode = modeConv(fpscr); 3449 int flags = 0; 3450 int sgn1, exp1, sgn2, exp2; 3451 uint64_t mnt1, mnt2, result; 3452 3453 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3454 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3455 3456 result = fp64_process_NaNs(op1, op2, mode, &flags); 3457 if (!result) { 3458 if ((exp1 == FP64_EXP_INF && !mnt2) || 3459 (exp2 == FP64_EXP_INF && !mnt1)) { 3460 result = fp64_FPTwo(sgn1 ^ sgn2); 3461 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) { 3462 result = fp64_infinity(sgn1 ^ sgn2); 3463 } else if (!mnt1 || !mnt2) { 3464 result = fp64_zero(sgn1 ^ sgn2); 3465 } else { 3466 result = fp64_mul(op1, op2, mode, &flags); 3467 } 3468 } 3469 3470 set_fpscr0(fpscr, flags); 3471 3472 return result; 3473} 3474 3475template <> 3476uint16_t 3477fplibNeg(uint16_t op) 3478{ 3479 return op ^ 1ULL << (FP16_BITS - 1); 3480} 3481 3482template <> 3483uint32_t 3484fplibNeg(uint32_t op) 3485{ 3486 return op ^ 1ULL << (FP32_BITS - 1); 3487} 3488 3489template <> 3490uint64_t 3491fplibNeg(uint64_t op) 3492{ 3493 return op ^ 1ULL << (FP64_BITS - 1); 3494} 3495 3496static const uint8_t recip_sqrt_estimate[256] = { 3497 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, 3498 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, 3499 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, 3500 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, 3501 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 3502 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 3503 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, 3504 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, 3505 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, 3506 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, 3507 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, 3508 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, 3509 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, 3510 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, 3511 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, 3512 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 3513}; 3514 3515template <> 3516uint16_t 3517fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr) 3518{ 3519 int mode = modeConv(fpscr); 3520 int flags = 0; 3521 int sgn, exp; 3522 uint16_t mnt, result; 3523 3524 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3525 3526 if (fp16_is_NaN(exp, mnt)) { 3527 result = fp16_process_NaN(op, mode, &flags); 3528 } else if (!mnt) { 3529 result = fp16_infinity(sgn); 3530 flags |= FPLIB_DZC; 3531 } else if (sgn) { 3532 result = fp16_defaultNaN(); 3533 flags |= FPLIB_IOC; 3534 } else if (exp == FP16_EXP_INF) { 3535 result = fp16_zero(0); 3536 } else { 3537 exp += FP16_EXP_BITS; 3538 mnt = fp16_normalise(mnt, &exp); 3539 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | 3540 (mnt >> (FP16_BITS - 8) & 127)]; 3541 result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1, 3542 mnt << (FP16_MANT_BITS - 8)); 3543 } 3544 3545 set_fpscr0(fpscr, flags); 3546 3547 return result; 3548} 3549 3550template <> 3551uint32_t 3552fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) 3553{ 3554 int mode = modeConv(fpscr); 3555 int flags = 0; 3556 int sgn, exp; 3557 uint32_t mnt, result; 3558 3559 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3560 3561 if (fp32_is_NaN(exp, mnt)) { 3562 result = fp32_process_NaN(op, mode, &flags); 3563 } else if (!mnt) { 3564 result = fp32_infinity(sgn); 3565 flags |= FPLIB_DZC; 3566 } else if (sgn) { 3567 result = fp32_defaultNaN(); 3568 flags |= FPLIB_IOC; 3569 } else if (exp == FP32_EXP_INF) { 3570 result = fp32_zero(0); 3571 } else { 3572 exp += FP32_EXP_BITS; 3573 mnt = fp32_normalise(mnt, &exp); 3574 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | 3575 (mnt >> (FP32_BITS - 8) & 127)]; 3576 result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1, 3577 mnt << (FP32_MANT_BITS - 8)); 3578 } 3579 3580 set_fpscr0(fpscr, flags); 3581 3582 return result; 3583} 3584 3585template <> 3586uint64_t 3587fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) 3588{ 3589 int mode = modeConv(fpscr); 3590 int flags = 0; 3591 int sgn, exp; 3592 uint64_t mnt, result; 3593 3594 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3595 3596 if (fp64_is_NaN(exp, mnt)) { 3597 result = fp64_process_NaN(op, mode, &flags); 3598 } else if (!mnt) { 3599 result = fp64_infinity(sgn); 3600 flags |= FPLIB_DZC; 3601 } else if (sgn) { 3602 result = fp64_defaultNaN(); 3603 flags |= FPLIB_IOC; 3604 } else if (exp == FP64_EXP_INF) { 3605 result = fp32_zero(0); 3606 } else { 3607 exp += FP64_EXP_BITS; 3608 mnt = fp64_normalise(mnt, &exp); 3609 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | 3610 (mnt >> (FP64_BITS - 8) & 127)]; 3611 result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1, 3612 mnt << (FP64_MANT_BITS - 8)); 3613 } 3614 3615 set_fpscr0(fpscr, flags); 3616 3617 return result; 3618} 3619 3620template <> 3621uint16_t 3622fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3623{ 3624 int mode = modeConv(fpscr); 3625 int flags = 0; 3626 int sgn1, exp1, sgn2, exp2; 3627 uint16_t mnt1, mnt2, result; 3628 3629 op1 = fplibNeg<uint16_t>(op1); 3630 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3631 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3632 3633 result = fp16_process_NaNs(op1, op2, mode, &flags); 3634 if (!result) { 3635 if ((exp1 == FP16_EXP_INF && !mnt2) || 3636 (exp2 == FP16_EXP_INF && !mnt1)) { 3637 result = fp16_FPOnePointFive(0); 3638 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) { 3639 result = fp16_infinity(sgn1 ^ sgn2); 3640 } else { 3641 result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags); 3642 } 3643 } 3644 3645 set_fpscr0(fpscr, flags); 3646 3647 return result; 3648} 3649 3650template <> 3651uint32_t 3652fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3653{ 3654 int mode = modeConv(fpscr); 3655 int flags = 0; 3656 int sgn1, exp1, sgn2, exp2; 3657 uint32_t mnt1, mnt2, result; 3658 3659 op1 = fplibNeg<uint32_t>(op1); 3660 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3661 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3662 3663 result = fp32_process_NaNs(op1, op2, mode, &flags); 3664 if (!result) { 3665 if ((exp1 == FP32_EXP_INF && !mnt2) || 3666 (exp2 == FP32_EXP_INF && !mnt1)) { 3667 result = fp32_FPOnePointFive(0); 3668 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) { 3669 result = fp32_infinity(sgn1 ^ sgn2); 3670 } else { 3671 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); 3672 } 3673 } 3674 3675 set_fpscr0(fpscr, flags); 3676 3677 return result; 3678} 3679 3680template <> 3681uint64_t 3682fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3683{ 3684 int mode = modeConv(fpscr); 3685 int flags = 0; 3686 int sgn1, exp1, sgn2, exp2; 3687 uint64_t mnt1, mnt2, result; 3688 3689 op1 = fplibNeg<uint64_t>(op1); 3690 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3691 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3692 3693 result = fp64_process_NaNs(op1, op2, mode, &flags); 3694 if (!result) { 3695 if ((exp1 == FP64_EXP_INF && !mnt2) || 3696 (exp2 == FP64_EXP_INF && !mnt1)) { 3697 result = fp64_FPOnePointFive(0); 3698 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) { 3699 result = fp64_infinity(sgn1 ^ sgn2); 3700 } else { 3701 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); 3702 } 3703 } 3704 3705 set_fpscr0(fpscr, flags); 3706 3707 return result; 3708} 3709 3710template <> 3711uint16_t 3712fplibRecipEstimate(uint16_t op, FPSCR &fpscr) 3713{ 3714 int mode = modeConv(fpscr); 3715 int flags = 0; 3716 int sgn, exp; 3717 uint16_t mnt, result; 3718 3719 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3720 3721 if (fp16_is_NaN(exp, mnt)) { 3722 result = fp16_process_NaN(op, mode, &flags); 3723 } else if (exp == FP16_EXP_INF) { 3724 result = fp16_zero(sgn); 3725 } else if (!mnt) { 3726 result = fp16_infinity(sgn); 3727 flags |= FPLIB_DZC; 3728 } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) { 3729 bool overflow_to_inf = false; 3730 switch (FPCRRounding(fpscr)) { 3731 case FPRounding_TIEEVEN: 3732 overflow_to_inf = true; 3733 break; 3734 case FPRounding_POSINF: 3735 overflow_to_inf = !sgn; 3736 break; 3737 case FPRounding_NEGINF: 3738 overflow_to_inf = sgn; 3739 break; 3740 case FPRounding_ZERO: 3741 overflow_to_inf = false; 3742 break; 3743 default:
|