fplib.cc revision 10272:336c7d36ac00
1/* 2* Copyright (c) 2012-2013 ARM Limited 3* All rights reserved 4* 5* The license below extends only to copyright in the software and shall 6* not be construed as granting a license to any other intellectual 7* property including but not limited to intellectual property relating 8* to a hardware implementation of the functionality of the software 9* licensed hereunder. You may use the software subject to the license 10* terms below provided that you ensure that this notice is replicated 11* unmodified and in its entirety in all distributions of the software, 12* modified or unmodified, in source code or in binary form. 13* 14* Redistribution and use in source and binary forms, with or without 15* modification, are permitted provided that the following conditions are 16* met: redistributions of source code must retain the above copyright 17* notice, this list of conditions and the following disclaimer; 18* redistributions in binary form must reproduce the above copyright 19* notice, this list of conditions and the following disclaimer in the 20* documentation and/or other materials provided with the distribution; 21* neither the name of the copyright holders nor the names of its 22* contributors may be used to endorse or promote products derived from 23* this software without specific prior written permission. 24* 25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36* 37* Authors: Edmund Grimley Evans 38* Thomas Grocutt 39*/ 40 41#include <stdint.h> 42 43#include <cassert> 44 45#include "fplib.hh" 46 47namespace ArmISA 48{ 49 50#define FPLIB_RN 0 51#define FPLIB_RP 1 52#define FPLIB_RM 2 53#define FPLIB_RZ 3 54#define FPLIB_FZ 4 55#define FPLIB_DN 8 56#define FPLIB_AHP 16 57 58#define FPLIB_IDC 128 // Input Denormal 59#define FPLIB_IXC 16 // Inexact 60#define FPLIB_UFC 8 // Underflow 61#define FPLIB_OFC 4 // Overflow 62#define FPLIB_DZC 2 // Division by Zero 63#define FPLIB_IOC 1 // Invalid Operation 64 65static inline uint16_t 66lsl16(uint16_t x, uint32_t shift) 67{ 68 return shift < 16 ? x << shift : 0; 69} 70 71static inline uint16_t 72lsr16(uint16_t x, uint32_t shift) 73{ 74 return shift < 16 ? x >> shift : 0; 75} 76 77static inline uint32_t 78lsl32(uint32_t x, uint32_t shift) 79{ 80 return shift < 32 ? x << shift : 0; 81} 82 83static inline uint32_t 84lsr32(uint32_t x, uint32_t shift) 85{ 86 return shift < 32 ? x >> shift : 0; 87} 88 89static inline uint64_t 90lsl64(uint64_t x, uint32_t shift) 91{ 92 return shift < 64 ? x << shift : 0; 93} 94 95static inline uint64_t 96lsr64(uint64_t x, uint32_t shift) 97{ 98 return shift < 64 ? x >> shift : 0; 99} 100 101static inline void 102lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 103{ 104 if (shift < 64) { 105 *r1 = x1 << shift | x0 >> (64 - shift); 106 *r0 = x0 << shift; 107 } else if (shift < 128) { 108 *r1 = x0 << (shift - 64); 109 *r0 = 0; 110 } else { 111 *r1 = 0; 112 *r0 = 0; 113 } 114} 115 116static inline void 117lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 118{ 119 if (shift < 64) { 120 *r0 = x0 >> shift | x1 << (64 - shift); 121 *r1 = x1 >> shift; 122 } else if (shift < 128) { 123 *r0 = x1 >> (shift - 64); 124 *r1 = 0; 125 } else { 126 *r0 = 0; 127 *r1 = 0; 128 } 129} 130 131static inline void 132mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) 133{ 134 uint32_t mask = ((uint32_t)1 << 31) - 1; 135 uint64_t a0 = a & mask; 136 uint64_t a1 = a >> 31 & mask; 137 uint64_t b0 = b & mask; 138 uint64_t b1 = b >> 31 & mask; 139 uint64_t p0 = a0 * b0; 140 uint64_t p2 = a1 * b1; 141 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; 142 uint64_t s0 = p0; 143 uint64_t s1 = (s0 >> 31) + p1; 144 uint64_t s2 = (s1 >> 31) + p2; 145 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62; 146 *x1 = s2 >> 2; 147} 148 149static inline 150void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b) 151{ 152 uint64_t t0 = (uint64_t)(uint32_t)a * b; 153 uint64_t t1 = (t0 >> 32) + (a >> 32) * b; 154 *x0 = t1 << 32 | (uint32_t)t0; 155 *x1 = t1 >> 32; 156} 157 158static inline void 159add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 160 uint64_t b1) 161{ 162 *x0 = a0 + b0; 163 *x1 = a1 + b1 + (*x0 < a0); 164} 165 166static inline void 167sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 168 uint64_t b1) 169{ 170 *x0 = a0 - b0; 171 *x1 = a1 - b1 - (*x0 > a0); 172} 173 174static inline int 175cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 176{ 177 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); 178} 179 180static inline uint16_t 181fp16_normalise(uint16_t mnt, int *exp) 182{ 183 int shift; 184 185 if (!mnt) { 186 return 0; 187 } 188 189 for (shift = 8; shift; shift >>= 1) { 190 if (!(mnt >> (16 - shift))) { 191 mnt <<= shift; 192 *exp -= shift; 193 } 194 } 195 return mnt; 196} 197 198static inline uint32_t 199fp32_normalise(uint32_t mnt, int *exp) 200{ 201 int shift; 202 203 if (!mnt) { 204 return 0; 205 } 206 207 for (shift = 16; shift; shift >>= 1) { 208 if (!(mnt >> (32 - shift))) { 209 mnt <<= shift; 210 *exp -= shift; 211 } 212 } 213 return mnt; 214} 215 216static inline uint64_t 217fp64_normalise(uint64_t mnt, int *exp) 218{ 219 int shift; 220 221 if (!mnt) { 222 return 0; 223 } 224 225 for (shift = 32; shift; shift >>= 1) { 226 if (!(mnt >> (64 - shift))) { 227 mnt <<= shift; 228 *exp -= shift; 229 } 230 } 231 return mnt; 232} 233 234static inline void 235fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp) 236{ 237 uint64_t x0 = *mnt0; 238 uint64_t x1 = *mnt1; 239 int shift; 240 241 if (!x0 && !x1) { 242 return; 243 } 244 245 if (!x1) { 246 x1 = x0; 247 x0 = 0; 248 *exp -= 64; 249 } 250 251 for (shift = 32; shift; shift >>= 1) { 252 if (!(x1 >> (64 - shift))) { 253 x1 = x1 << shift | x0 >> (64 - shift); 254 x0 <<= shift; 255 *exp -= shift; 256 } 257 } 258 259 *mnt0 = x0; 260 *mnt1 = x1; 261} 262 263static inline uint16_t 264fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) 265{ 266 return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1)); 267} 268 269static inline uint32_t 270fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) 271{ 272 return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1)); 273} 274 275static inline uint64_t 276fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) 277{ 278 return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1)); 279} 280 281static inline uint16_t 282fp16_zero(int sgn) 283{ 284 return fp16_pack(sgn, 0, 0); 285} 286 287static inline uint32_t 288fp32_zero(int sgn) 289{ 290 return fp32_pack(sgn, 0, 0); 291} 292 293static inline uint64_t 294fp64_zero(int sgn) 295{ 296 return fp64_pack(sgn, 0, 0); 297} 298 299static inline uint16_t 300fp16_max_normal(int sgn) 301{ 302 return fp16_pack(sgn, 30, -1); 303} 304 305static inline uint32_t 306fp32_max_normal(int sgn) 307{ 308 return fp32_pack(sgn, 254, -1); 309} 310 311static inline uint64_t 312fp64_max_normal(int sgn) 313{ 314 return fp64_pack(sgn, 2046, -1); 315} 316 317static inline uint16_t 318fp16_infinity(int sgn) 319{ 320 return fp16_pack(sgn, 31, 0); 321} 322 323static inline uint32_t 324fp32_infinity(int sgn) 325{ 326 return fp32_pack(sgn, 255, 0); 327} 328 329static inline uint64_t 330fp64_infinity(int sgn) 331{ 332 return fp64_pack(sgn, 2047, 0); 333} 334 335static inline uint16_t 336fp16_defaultNaN() 337{ 338 return fp16_pack(0, 31, (uint16_t)1 << 9); 339} 340 341static inline uint32_t 342fp32_defaultNaN() 343{ 344 return fp32_pack(0, 255, (uint32_t)1 << 22); 345} 346 347static inline uint64_t 348fp64_defaultNaN() 349{ 350 return fp64_pack(0, 2047, (uint64_t)1 << 51); 351} 352 353static inline void 354fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, 355 int *flags) 356{ 357 *sgn = x >> 15; 358 *exp = x >> 10 & 31; 359 *mnt = x & (((uint16_t)1 << 10) - 1); 360 361 // Handle subnormals: 362 if (*exp) { 363 *mnt |= (uint16_t)1 << 10; 364 } else { 365 ++*exp; 366 // There is no flush to zero in this case! 367 } 368} 369 370static inline void 371fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, 372 int *flags) 373{ 374 *sgn = x >> 31; 375 *exp = x >> 23 & 255; 376 *mnt = x & (((uint32_t)1 << 23) - 1); 377 378 // Handle subnormals: 379 if (*exp) { 380 *mnt |= (uint32_t)1 << 23; 381 } else { 382 ++*exp; 383 if ((mode & FPLIB_FZ) && *mnt) { 384 *flags |= FPLIB_IDC; 385 *mnt = 0; 386 } 387 } 388} 389 390static inline void 391fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, 392 int *flags) 393{ 394 *sgn = x >> 63; 395 *exp = x >> 52 & 2047; 396 *mnt = x & (((uint64_t)1 << 52) - 1); 397 398 // Handle subnormals: 399 if (*exp) { 400 *mnt |= (uint64_t)1 << 52; 401 } else { 402 ++*exp; 403 if ((mode & FPLIB_FZ) && *mnt) { 404 *flags |= FPLIB_IDC; 405 *mnt = 0; 406 } 407 } 408} 409 410static inline uint32_t 411fp32_process_NaN(uint32_t a, int mode, int *flags) 412{ 413 if (!(a >> 22 & 1)) { 414 *flags |= FPLIB_IOC; 415 a |= (uint32_t)1 << 22; 416 } 417 return mode & FPLIB_DN ? fp32_defaultNaN() : a; 418} 419 420static inline uint64_t 421fp64_process_NaN(uint64_t a, int mode, int *flags) 422{ 423 if (!(a >> 51 & 1)) { 424 *flags |= FPLIB_IOC; 425 a |= (uint64_t)1 << 51; 426 } 427 return mode & FPLIB_DN ? fp64_defaultNaN() : a; 428} 429 430static uint32_t 431fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags) 432{ 433 int a_exp = a >> 23 & 255; 434 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); 435 int b_exp = b >> 23 & 255; 436 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); 437 438 // Handle signalling NaNs: 439 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) 440 return fp32_process_NaN(a, mode, flags); 441 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) 442 return fp32_process_NaN(b, mode, flags); 443 444 // Handle quiet NaNs: 445 if (a_exp == 255 && a_mnt) 446 return fp32_process_NaN(a, mode, flags); 447 if (b_exp == 255 && b_mnt) 448 return fp32_process_NaN(b, mode, flags); 449 450 return 0; 451} 452 453static uint64_t 454fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags) 455{ 456 int a_exp = a >> 52 & 2047; 457 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); 458 int b_exp = b >> 52 & 2047; 459 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); 460 461 // Handle signalling NaNs: 462 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) 463 return fp64_process_NaN(a, mode, flags); 464 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) 465 return fp64_process_NaN(b, mode, flags); 466 467 // Handle quiet NaNs: 468 if (a_exp == 2047 && a_mnt) 469 return fp64_process_NaN(a, mode, flags); 470 if (b_exp == 2047 && b_mnt) 471 return fp64_process_NaN(b, mode, flags); 472 473 return 0; 474} 475 476static uint32_t 477fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags) 478{ 479 int a_exp = a >> 23 & 255; 480 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); 481 int b_exp = b >> 23 & 255; 482 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); 483 int c_exp = c >> 23 & 255; 484 uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1); 485 486 // Handle signalling NaNs: 487 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) 488 return fp32_process_NaN(a, mode, flags); 489 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) 490 return fp32_process_NaN(b, mode, flags); 491 if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1)) 492 return fp32_process_NaN(c, mode, flags); 493 494 // Handle quiet NaNs: 495 if (a_exp == 255 && a_mnt) 496 return fp32_process_NaN(a, mode, flags); 497 if (b_exp == 255 && b_mnt) 498 return fp32_process_NaN(b, mode, flags); 499 if (c_exp == 255 && c_mnt) 500 return fp32_process_NaN(c, mode, flags); 501 502 return 0; 503} 504 505static uint64_t 506fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags) 507{ 508 int a_exp = a >> 52 & 2047; 509 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); 510 int b_exp = b >> 52 & 2047; 511 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); 512 int c_exp = c >> 52 & 2047; 513 uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1); 514 515 // Handle signalling NaNs: 516 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) 517 return fp64_process_NaN(a, mode, flags); 518 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) 519 return fp64_process_NaN(b, mode, flags); 520 if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1)) 521 return fp64_process_NaN(c, mode, flags); 522 523 // Handle quiet NaNs: 524 if (a_exp == 2047 && a_mnt) 525 return fp64_process_NaN(a, mode, flags); 526 if (b_exp == 2047 && b_mnt) 527 return fp64_process_NaN(b, mode, flags); 528 if (c_exp == 2047 && c_mnt) 529 return fp64_process_NaN(c, mode, flags); 530 531 return 0; 532} 533 534static uint16_t 535fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags) 536{ 537 int biased_exp; // non-negative exponent value for result 538 uint16_t int_mant; // mantissa for result, less than (1 << 11) 539 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 540 541 assert(rm != FPRounding_TIEAWAY); 542 543 // There is no flush to zero in this case! 544 545 // The bottom 5 bits of mnt are orred together: 546 mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0); 547 548 if (exp > 0) { 549 biased_exp = exp; 550 int_mant = mnt >> 2; 551 error = mnt & 3; 552 } else { 553 biased_exp = 0; 554 int_mant = lsr16(mnt, 3 - exp); 555 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1)); 556 } 557 558 if (!biased_exp && error) { // xx should also check fpscr_val<11> 559 *flags |= FPLIB_UFC; 560 } 561 562 // Round up: 563 if ((rm == FPLIB_RN && (error == 3 || 564 (error == 2 && (int_mant & 1)))) || 565 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 566 ++int_mant; 567 if (int_mant == (uint32_t)1 << 10) { 568 // Rounded up from denormalized to normalized 569 biased_exp = 1; 570 } 571 if (int_mant == (uint32_t)1 << 11) { 572 // Rounded up to next exponent 573 ++biased_exp; 574 int_mant >>= 1; 575 } 576 } 577 578 // Handle rounding to odd aka Von Neumann rounding: 579 if (error && rm == FPRounding_ODD) 580 int_mant |= 1; 581 582 // Handle overflow: 583 if (!(mode & FPLIB_AHP)) { 584 if (biased_exp >= 31) { 585 *flags |= FPLIB_OFC | FPLIB_IXC; 586 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 587 (rm == FPLIB_RM && sgn)) { 588 return fp16_infinity(sgn); 589 } else { 590 return fp16_max_normal(sgn); 591 } 592 } 593 } else { 594 if (biased_exp >= 32) { 595 *flags |= FPLIB_IOC; 596 return fp16_pack(sgn, 31, -1); 597 } 598 } 599 600 if (error) { 601 *flags |= FPLIB_IXC; 602 } 603 604 return fp16_pack(sgn, biased_exp, int_mant); 605} 606 607static uint32_t 608fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags) 609{ 610 int biased_exp; // non-negative exponent value for result 611 uint32_t int_mant; // mantissa for result, less than (1 << 24) 612 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 613 614 assert(rm != FPRounding_TIEAWAY); 615 616 // Flush to zero: 617 if ((mode & FPLIB_FZ) && exp < 1) { 618 *flags |= FPLIB_UFC; 619 return fp32_zero(sgn); 620 } 621 622 // The bottom 8 bits of mnt are orred together: 623 mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0); 624 625 if (exp > 0) { 626 biased_exp = exp; 627 int_mant = mnt >> 2; 628 error = mnt & 3; 629 } else { 630 biased_exp = 0; 631 int_mant = lsr32(mnt, 3 - exp); 632 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1)); 633 } 634 635 if (!biased_exp && error) { // xx should also check fpscr_val<11> 636 *flags |= FPLIB_UFC; 637 } 638 639 // Round up: 640 if ((rm == FPLIB_RN && (error == 3 || 641 (error == 2 && (int_mant & 1)))) || 642 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 643 ++int_mant; 644 if (int_mant == (uint32_t)1 << 23) { 645 // Rounded up from denormalized to normalized 646 biased_exp = 1; 647 } 648 if (int_mant == (uint32_t)1 << 24) { 649 // Rounded up to next exponent 650 ++biased_exp; 651 int_mant >>= 1; 652 } 653 } 654 655 // Handle rounding to odd aka Von Neumann rounding: 656 if (error && rm == FPRounding_ODD) 657 int_mant |= 1; 658 659 // Handle overflow: 660 if (biased_exp >= 255) { 661 *flags |= FPLIB_OFC | FPLIB_IXC; 662 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 663 (rm == FPLIB_RM && sgn)) { 664 return fp32_infinity(sgn); 665 } else { 666 return fp32_max_normal(sgn); 667 } 668 } 669 670 if (error) { 671 *flags |= FPLIB_IXC; 672 } 673 674 return fp32_pack(sgn, biased_exp, int_mant); 675} 676 677static uint32_t 678fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags) 679{ 680 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); 681} 682 683static uint64_t 684fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags) 685{ 686 int biased_exp; // non-negative exponent value for result 687 uint64_t int_mant; // mantissa for result, less than (1 << 52) 688 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 689 690 assert(rm != FPRounding_TIEAWAY); 691 692 // Flush to zero: 693 if ((mode & FPLIB_FZ) && exp < 1) { 694 *flags |= FPLIB_UFC; 695 return fp64_zero(sgn); 696 } 697 698 // The bottom 11 bits of mnt are orred together: 699 mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0); 700 701 if (exp > 0) { 702 biased_exp = exp; 703 int_mant = mnt >> 2; 704 error = mnt & 3; 705 } else { 706 biased_exp = 0; 707 int_mant = lsr64(mnt, 3 - exp); 708 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1)); 709 } 710 711 if (!biased_exp && error) { // xx should also check fpscr_val<11> 712 *flags |= FPLIB_UFC; 713 } 714 715 // Round up: 716 if ((rm == FPLIB_RN && (error == 3 || 717 (error == 2 && (int_mant & 1)))) || 718 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 719 ++int_mant; 720 if (int_mant == (uint64_t)1 << 52) { 721 // Rounded up from denormalized to normalized 722 biased_exp = 1; 723 } 724 if (int_mant == (uint64_t)1 << 53) { 725 // Rounded up to next exponent 726 ++biased_exp; 727 int_mant >>= 1; 728 } 729 } 730 731 // Handle rounding to odd aka Von Neumann rounding: 732 if (error && rm == FPRounding_ODD) 733 int_mant |= 1; 734 735 // Handle overflow: 736 if (biased_exp >= 2047) { 737 *flags |= FPLIB_OFC | FPLIB_IXC; 738 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 739 (rm == FPLIB_RM && sgn)) { 740 return fp64_infinity(sgn); 741 } else { 742 return fp64_max_normal(sgn); 743 } 744 } 745 746 if (error) { 747 *flags |= FPLIB_IXC; 748 } 749 750 return fp64_pack(sgn, biased_exp, int_mant); 751} 752 753static uint64_t 754fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags) 755{ 756 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); 757} 758 759static int 760fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags) 761{ 762 int a_sgn, a_exp, b_sgn, b_exp; 763 uint32_t a_mnt, b_mnt; 764 765 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 766 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 767 768 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 769 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 770 if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) || 771 (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1))) 772 *flags |= FPLIB_IOC; 773 return 0; 774 } 775 return a == b || (!a_mnt && !b_mnt); 776} 777 778static int 779fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags) 780{ 781 int a_sgn, a_exp, b_sgn, b_exp; 782 uint32_t a_mnt, b_mnt; 783 784 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 785 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 786 787 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 788 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 789 *flags |= FPLIB_IOC; 790 return 0; 791 } 792 if (!a_mnt && !b_mnt) 793 return 1; 794 if (a_sgn != b_sgn) 795 return b_sgn; 796 if (a_exp != b_exp) 797 return a_sgn ^ (a_exp > b_exp); 798 if (a_mnt != b_mnt) 799 return a_sgn ^ (a_mnt > b_mnt); 800 return 1; 801} 802 803static int 804fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags) 805{ 806 int a_sgn, a_exp, b_sgn, b_exp; 807 uint32_t a_mnt, b_mnt; 808 809 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 810 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 811 812 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 813 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 814 *flags |= FPLIB_IOC; 815 return 0; 816 } 817 if (!a_mnt && !b_mnt) 818 return 0; 819 if (a_sgn != b_sgn) 820 return b_sgn; 821 if (a_exp != b_exp) 822 return a_sgn ^ (a_exp > b_exp); 823 if (a_mnt != b_mnt) 824 return a_sgn ^ (a_mnt > b_mnt); 825 return 0; 826} 827 828static int 829fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags) 830{ 831 int a_sgn, a_exp, b_sgn, b_exp; 832 uint64_t a_mnt, b_mnt; 833 834 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 835 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 836 837 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 838 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 839 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) || 840 (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1))) 841 *flags |= FPLIB_IOC; 842 return 0; 843 } 844 return a == b || (!a_mnt && !b_mnt); 845} 846 847static int 848fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags) 849{ 850 int a_sgn, a_exp, b_sgn, b_exp; 851 uint64_t a_mnt, b_mnt; 852 853 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 854 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 855 856 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 857 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 858 *flags |= FPLIB_IOC; 859 return 0; 860 } 861 if (!a_mnt && !b_mnt) 862 return 1; 863 if (a_sgn != b_sgn) 864 return b_sgn; 865 if (a_exp != b_exp) 866 return a_sgn ^ (a_exp > b_exp); 867 if (a_mnt != b_mnt) 868 return a_sgn ^ (a_mnt > b_mnt); 869 return 1; 870} 871 872static int 873fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags) 874{ 875 int a_sgn, a_exp, b_sgn, b_exp; 876 uint64_t a_mnt, b_mnt; 877 878 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 879 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 880 881 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 882 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 883 *flags |= FPLIB_IOC; 884 return 0; 885 } 886 if (!a_mnt && !b_mnt) 887 return 0; 888 if (a_sgn != b_sgn) 889 return b_sgn; 890 if (a_exp != b_exp) 891 return a_sgn ^ (a_exp > b_exp); 892 if (a_mnt != b_mnt) 893 return a_sgn ^ (a_mnt > b_mnt); 894 return 0; 895} 896 897static uint32_t 898fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags) 899{ 900 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 901 uint32_t a_mnt, b_mnt, x, x_mnt; 902 903 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 904 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 905 906 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 907 return x; 908 } 909 910 b_sgn ^= neg; 911 912 // Handle infinities and zeroes: 913 if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) { 914 *flags |= FPLIB_IOC; 915 return fp32_defaultNaN(); 916 } else if (a_exp == 255) { 917 return fp32_infinity(a_sgn); 918 } else if (b_exp == 255) { 919 return fp32_infinity(b_sgn); 920 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 921 return fp32_zero(a_sgn); 922 } 923 924 a_mnt <<= 3; 925 b_mnt <<= 3; 926 if (a_exp >= b_exp) { 927 b_mnt = (lsr32(b_mnt, a_exp - b_exp) | 928 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); 929 b_exp = a_exp; 930 } else { 931 a_mnt = (lsr32(a_mnt, b_exp - a_exp) | 932 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); 933 a_exp = b_exp; 934 } 935 x_sgn = a_sgn; 936 x_exp = a_exp; 937 if (a_sgn == b_sgn) { 938 x_mnt = a_mnt + b_mnt; 939 } else if (a_mnt >= b_mnt) { 940 x_mnt = a_mnt - b_mnt; 941 } else { 942 x_sgn ^= 1; 943 x_mnt = b_mnt - a_mnt; 944 } 945 946 if (!x_mnt) { 947 // Sign of exact zero result depends on rounding mode 948 return fp32_zero((mode & 3) == 2); 949 } 950 951 x_mnt = fp32_normalise(x_mnt, &x_exp); 952 953 return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags); 954} 955 956static uint64_t 957fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags) 958{ 959 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 960 uint64_t a_mnt, b_mnt, x, x_mnt; 961 962 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 963 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 964 965 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 966 return x; 967 } 968 969 b_sgn ^= neg; 970 971 // Handle infinities and zeroes: 972 if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) { 973 *flags |= FPLIB_IOC; 974 return fp64_defaultNaN(); 975 } else if (a_exp == 2047) { 976 return fp64_infinity(a_sgn); 977 } else if (b_exp == 2047) { 978 return fp64_infinity(b_sgn); 979 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 980 return fp64_zero(a_sgn); 981 } 982 983 a_mnt <<= 3; 984 b_mnt <<= 3; 985 if (a_exp >= b_exp) { 986 b_mnt = (lsr64(b_mnt, a_exp - b_exp) | 987 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); 988 b_exp = a_exp; 989 } else { 990 a_mnt = (lsr64(a_mnt, b_exp - a_exp) | 991 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); 992 a_exp = b_exp; 993 } 994 x_sgn = a_sgn; 995 x_exp = a_exp; 996 if (a_sgn == b_sgn) { 997 x_mnt = a_mnt + b_mnt; 998 } else if (a_mnt >= b_mnt) { 999 x_mnt = a_mnt - b_mnt; 1000 } else { 1001 x_sgn ^= 1; 1002 x_mnt = b_mnt - a_mnt; 1003 } 1004 1005 if (!x_mnt) { 1006 // Sign of exact zero result depends on rounding mode 1007 return fp64_zero((mode & 3) == 2); 1008 } 1009 1010 x_mnt = fp64_normalise(x_mnt, &x_exp); 1011 1012 return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags); 1013} 1014 1015static uint32_t 1016fp32_mul(uint32_t a, uint32_t b, int mode, int *flags) 1017{ 1018 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1019 uint32_t a_mnt, b_mnt, x; 1020 uint64_t x_mnt; 1021 1022 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1023 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1024 1025 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1026 return x; 1027 } 1028 1029 // Handle infinities and zeroes: 1030 if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) { 1031 *flags |= FPLIB_IOC; 1032 return fp32_defaultNaN(); 1033 } else if (a_exp == 255 || b_exp == 255) { 1034 return fp32_infinity(a_sgn ^ b_sgn); 1035 } else if (!a_mnt || !b_mnt) { 1036 return fp32_zero(a_sgn ^ b_sgn); 1037 } 1038 1039 // Multiply and normalise: 1040 x_sgn = a_sgn ^ b_sgn; 1041 x_exp = a_exp + b_exp - 110; 1042 x_mnt = (uint64_t)a_mnt * b_mnt; 1043 x_mnt = fp64_normalise(x_mnt, &x_exp); 1044 1045 // Convert to 32 bits, collapsing error into bottom bit: 1046 x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33); 1047 1048 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1049} 1050 1051static uint64_t 1052fp64_mul(uint64_t a, uint64_t b, int mode, int *flags) 1053{ 1054 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1055 uint64_t a_mnt, b_mnt, x; 1056 uint64_t x0_mnt, x1_mnt; 1057 1058 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1059 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1060 1061 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1062 return x; 1063 } 1064 1065 // Handle infinities and zeroes: 1066 if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) { 1067 *flags |= FPLIB_IOC; 1068 return fp64_defaultNaN(); 1069 } else if (a_exp == 2047 || b_exp == 2047) { 1070 return fp64_infinity(a_sgn ^ b_sgn); 1071 } else if (!a_mnt || !b_mnt) { 1072 return fp64_zero(a_sgn ^ b_sgn); 1073 } 1074 1075 // Multiply and normalise: 1076 x_sgn = a_sgn ^ b_sgn; 1077 x_exp = a_exp + b_exp - 1000; 1078 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); 1079 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1080 1081 // Convert to 64 bits, collapsing error into bottom bit: 1082 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1083 1084 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); 1085} 1086 1087static uint32_t 1088fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, 1089 int mode, int *flags) 1090{ 1091 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1092 uint32_t a_mnt, b_mnt, c_mnt, x; 1093 uint64_t x_mnt, y_mnt; 1094 1095 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1096 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1097 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1098 1099 x = fp32_process_NaNs3(a, b, c, mode, flags); 1100 1101 // Quiet NaN added to product of zero and infinity: 1102 if (a_exp == 255 && (a_mnt >> 22 & 1) && 1103 ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) || 1104 (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) { 1105 x = fp32_defaultNaN(); 1106 *flags |= FPLIB_IOC; 1107 } 1108 1109 if (x) { 1110 return x; 1111 } 1112 1113 // Handle infinities and zeroes: 1114 if ((b_exp == 255 && !c_mnt) || 1115 (c_exp == 255 && !b_mnt) || 1116 (a_exp == 255 && (b_exp == 255 || c_exp == 255) && 1117 (a_sgn != (b_sgn ^ c_sgn)))) { 1118 *flags |= FPLIB_IOC; 1119 return fp32_defaultNaN(); 1120 } 1121 if (a_exp == 255) 1122 return fp32_infinity(a_sgn); 1123 if (b_exp == 255 || c_exp == 255) 1124 return fp32_infinity(b_sgn ^ c_sgn); 1125 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1126 return fp32_zero(a_sgn); 1127 1128 x_sgn = a_sgn; 1129 x_exp = a_exp + 13; 1130 x_mnt = (uint64_t)a_mnt << 27; 1131 1132 // Multiply: 1133 y_sgn = b_sgn ^ c_sgn; 1134 y_exp = b_exp + c_exp - 113; 1135 y_mnt = (uint64_t)b_mnt * c_mnt << 3; 1136 if (!y_mnt) { 1137 y_exp = x_exp; 1138 } 1139 1140 // Add: 1141 if (x_exp >= y_exp) { 1142 y_mnt = (lsr64(y_mnt, x_exp - y_exp) | 1143 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); 1144 y_exp = x_exp; 1145 } else { 1146 x_mnt = (lsr64(x_mnt, y_exp - x_exp) | 1147 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); 1148 x_exp = y_exp; 1149 } 1150 if (x_sgn == y_sgn) { 1151 x_mnt = x_mnt + y_mnt; 1152 } else if (x_mnt >= y_mnt) { 1153 x_mnt = x_mnt - y_mnt; 1154 } else { 1155 x_sgn ^= 1; 1156 x_mnt = y_mnt - x_mnt; 1157 } 1158 1159 if (!x_mnt) { 1160 // Sign of exact zero result depends on rounding mode 1161 return fp32_zero((mode & 3) == 2); 1162 } 1163 1164 // Normalise and convert to 32 bits, collapsing error into bottom bit: 1165 x_mnt = fp64_normalise(x_mnt, &x_exp); 1166 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 1167 1168 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1169} 1170 1171static uint64_t 1172fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, 1173 int mode, int *flags) 1174{ 1175 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1176 uint64_t a_mnt, b_mnt, c_mnt, x; 1177 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; 1178 1179 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1180 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1181 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1182 1183 x = fp64_process_NaNs3(a, b, c, mode, flags); 1184 1185 // Quiet NaN added to product of zero and infinity: 1186 if (a_exp == 2047 && (a_mnt >> 51 & 1) && 1187 ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) || 1188 (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) { 1189 x = fp64_defaultNaN(); 1190 *flags |= FPLIB_IOC; 1191 } 1192 1193 if (x) { 1194 return x; 1195 } 1196 1197 // Handle infinities and zeroes: 1198 if ((b_exp == 2047 && !c_mnt) || 1199 (c_exp == 2047 && !b_mnt) || 1200 (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) && 1201 (a_sgn != (b_sgn ^ c_sgn)))) { 1202 *flags |= FPLIB_IOC; 1203 return fp64_defaultNaN(); 1204 } 1205 if (a_exp == 2047) 1206 return fp64_infinity(a_sgn); 1207 if (b_exp == 2047 || c_exp == 2047) 1208 return fp64_infinity(b_sgn ^ c_sgn); 1209 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1210 return fp64_zero(a_sgn); 1211 1212 x_sgn = a_sgn; 1213 x_exp = a_exp + 11; 1214 x0_mnt = 0; 1215 x1_mnt = a_mnt; 1216 1217 // Multiply: 1218 y_sgn = b_sgn ^ c_sgn; 1219 y_exp = b_exp + c_exp - 1003; 1220 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); 1221 if (!y0_mnt && !y1_mnt) { 1222 y_exp = x_exp; 1223 } 1224 1225 // Add: 1226 if (x_exp >= y_exp) { 1227 uint64_t t0, t1; 1228 lsl128(&t0, &t1, y0_mnt, y1_mnt, 1229 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); 1230 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); 1231 y0_mnt |= !!(t0 | t1); 1232 y_exp = x_exp; 1233 } else { 1234 uint64_t t0, t1; 1235 lsl128(&t0, &t1, x0_mnt, x1_mnt, 1236 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); 1237 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); 1238 x0_mnt |= !!(t0 | t1); 1239 x_exp = y_exp; 1240 } 1241 if (x_sgn == y_sgn) { 1242 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1243 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { 1244 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1245 } else { 1246 x_sgn ^= 1; 1247 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); 1248 } 1249 1250 if (!x0_mnt && !x1_mnt) { 1251 // Sign of exact zero result depends on rounding mode 1252 return fp64_zero((mode & 3) == 2); 1253 } 1254 1255 // Normalise and convert to 64 bits, collapsing error into bottom bit: 1256 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1257 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1258 1259 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); 1260} 1261 1262static uint32_t 1263fp32_div(uint32_t a, uint32_t b, int mode, int *flags) 1264{ 1265 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1266 uint32_t a_mnt, b_mnt, x; 1267 uint64_t x_mnt; 1268 1269 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1270 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1271 1272 if ((x = fp32_process_NaNs(a, b, mode, flags))) 1273 return x; 1274 1275 // Handle infinities and zeroes: 1276 if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) { 1277 *flags |= FPLIB_IOC; 1278 return fp32_defaultNaN(); 1279 } 1280 if (a_exp == 255 || !b_mnt) { 1281 if (a_exp != 255) 1282 *flags |= FPLIB_DZC; 1283 return fp32_infinity(a_sgn ^ b_sgn); 1284 } 1285 if (!a_mnt || b_exp == 255) 1286 return fp32_zero(a_sgn ^ b_sgn); 1287 1288 // Divide, setting bottom bit if inexact: 1289 a_mnt = fp32_normalise(a_mnt, &a_exp); 1290 x_sgn = a_sgn ^ b_sgn; 1291 x_exp = a_exp - b_exp + 172; 1292 x_mnt = ((uint64_t)a_mnt << 18) / b_mnt; 1293 x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18); 1294 1295 // Normalise and convert to 32 bits, collapsing error into bottom bit: 1296 x_mnt = fp64_normalise(x_mnt, &x_exp); 1297 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 1298 1299 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1300} 1301 1302static uint64_t 1303fp64_div(uint64_t a, uint64_t b, int mode, int *flags) 1304{ 1305 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; 1306 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; 1307 1308 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1309 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1310 1311 if ((x = fp64_process_NaNs(a, b, mode, flags))) 1312 return x; 1313 1314 // Handle infinities and zeroes: 1315 if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) { 1316 *flags |= FPLIB_IOC; 1317 return fp64_defaultNaN(); 1318 } 1319 if (a_exp == 2047 || !b_mnt) { 1320 if (a_exp != 2047) 1321 *flags |= FPLIB_DZC; 1322 return fp64_infinity(a_sgn ^ b_sgn); 1323 } 1324 if (!a_mnt || b_exp == 2047) 1325 return fp64_zero(a_sgn ^ b_sgn); 1326 1327 // Find reciprocal of divisor with Newton-Raphson: 1328 a_mnt = fp64_normalise(a_mnt, &a_exp); 1329 b_mnt = fp64_normalise(b_mnt, &b_exp); 1330 x_mnt = ~(uint64_t)0 / (b_mnt >> 31); 1331 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); 1332 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); 1333 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); 1334 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); 1335 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); 1336 1337 // Multiply by dividend: 1338 x_sgn = a_sgn ^ b_sgn; 1339 x_exp = a_exp - b_exp + 1031; 1340 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough 1341 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); 1342 x_mnt = x1_mnt; 1343 1344 // This is an underestimate, so try adding one: 1345 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough 1346 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); 1347 if (c <= 0) { 1348 ++x_mnt; 1349 } 1350 1351 x_mnt = fp64_normalise(x_mnt, &x_exp); 1352 1353 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 1354} 1355 1356static void 1357set_fpscr0(FPSCR &fpscr, int flags) 1358{ 1359 if (flags & FPLIB_IDC) { 1360 fpscr.idc = 1; 1361 } 1362 if (flags & FPLIB_IOC) { 1363 fpscr.ioc = 1; 1364 } 1365 if (flags & FPLIB_DZC) { 1366 fpscr.dzc = 1; 1367 } 1368 if (flags & FPLIB_OFC) { 1369 fpscr.ofc = 1; 1370 } 1371 if (flags & FPLIB_UFC) { 1372 fpscr.ufc = 1; 1373 } 1374 if (flags & FPLIB_IXC) { 1375 fpscr.ixc = 1; 1376 } 1377} 1378 1379static uint32_t 1380fp32_sqrt(uint32_t a, int mode, int *flags) 1381{ 1382 int a_sgn, a_exp, x_sgn, x_exp; 1383 uint32_t a_mnt, x, x_mnt; 1384 uint64_t t0, t1; 1385 1386 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1387 1388 // Handle NaNs: 1389 if (a_exp == 255 && (uint32_t)(a_mnt << 9)) 1390 return fp32_process_NaN(a, mode, flags); 1391 1392 // Handle infinities and zeroes: 1393 if (!a_mnt) { 1394 return fp32_zero(a_sgn); 1395 } 1396 if (a_exp == 255 && !a_sgn) { 1397 return fp32_infinity(a_sgn); 1398 } 1399 if (a_sgn) { 1400 *flags |= FPLIB_IOC; 1401 return fp32_defaultNaN(); 1402 } 1403 1404 a_mnt = fp32_normalise(a_mnt, &a_exp); 1405 if (!(a_exp & 1)) { 1406 ++a_exp; 1407 a_mnt >>= 1; 1408 } 1409 1410 // x = (a * 3 + 5) / 8 1411 x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28); 1412 1413 // x = (a / x + x) / 2; // 16-bit accuracy 1414 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 1415 1416 // x = (a / x + x) / 2; // 16-bit accuracy 1417 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 1418 1419 // x = (a / x + x) / 2; // 32-bit accuracy 1420 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); 1421 1422 x_sgn = 0; 1423 x_exp = (a_exp + 147) >> 1; 1424 x_mnt = ((x - (1 << 5)) >> 6) + 1; 1425 t1 = (uint64_t)x_mnt * x_mnt; 1426 t0 = (uint64_t)a_mnt << 19; 1427 if (t1 > t0) { 1428 --x_mnt; 1429 } 1430 1431 x_mnt = fp32_normalise(x_mnt, &x_exp); 1432 1433 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); 1434} 1435 1436static uint64_t 1437fp64_sqrt(uint64_t a, int mode, int *flags) 1438{ 1439 int a_sgn, a_exp, x_sgn, x_exp, c; 1440 uint64_t a_mnt, x_mnt, r, x0, x1; 1441 uint32_t x; 1442 1443 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1444 1445 // Handle NaNs: 1446 if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) { 1447 return fp64_process_NaN(a, mode, flags); 1448 } 1449 1450 // Handle infinities and zeroes: 1451 if (!a_mnt) 1452 return fp64_zero(a_sgn); 1453 if (a_exp == 2047 && !a_sgn) 1454 return fp64_infinity(a_sgn); 1455 if (a_sgn) { 1456 *flags |= FPLIB_IOC; 1457 return fp64_defaultNaN(); 1458 } 1459 1460 a_mnt = fp64_normalise(a_mnt, &a_exp); 1461 if (a_exp & 1) { 1462 ++a_exp; 1463 a_mnt >>= 1; 1464 } 1465 1466 // x = (a * 3 + 5) / 8 1467 x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28); 1468 1469 // x = (a / x + x) / 2; // 16-bit accuracy 1470 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 1471 1472 // x = (a / x + x) / 2; // 16-bit accuracy 1473 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 1474 1475 // x = (a / x + x) / 2; // 32-bit accuracy 1476 x = ((a_mnt / x) >> 2) + (x >> 1); 1477 1478 // r = 1 / x; // 32-bit accuracy 1479 r = ((uint64_t)1 << 62) / x; 1480 1481 // r = r * (2 - x * r); // 64-bit accuracy 1482 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); 1483 lsr128(&x0, &x1, x0, x1, 31); 1484 1485 // x = (x + a * r) / 2; // 64-bit accuracy 1486 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); 1487 lsl128(&x0, &x1, x0, x1, 5); 1488 lsr128(&x0, &x1, x0, x1, 56); 1489 1490 x0 = ((uint64_t)x << 31) + (x0 >> 1); 1491 1492 x_sgn = 0; 1493 x_exp = (a_exp + 1053) >> 1; 1494 x_mnt = x0; 1495 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; 1496 mul62x62(&x0, &x1, x_mnt, x_mnt); 1497 lsl128(&x0, &x1, x0, x1, 19); 1498 c = cmp128(x0, x1, 0, a_mnt); 1499 if (c > 0) 1500 --x_mnt; 1501 1502 x_mnt = fp64_normalise(x_mnt, &x_exp); 1503 1504 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 1505} 1506 1507static int 1508modeConv(FPSCR fpscr) 1509{ 1510 return (((int) fpscr) >> 22) & 0xF; 1511} 1512 1513static void 1514set_fpscr(FPSCR &fpscr, int flags) 1515{ 1516 // translate back to FPSCR 1517 bool underflow = false; 1518 if (flags & FPLIB_IDC) { 1519 fpscr.idc = 1; 1520 } 1521 if (flags & FPLIB_IOC) { 1522 fpscr.ioc = 1; 1523 } 1524 if (flags & FPLIB_DZC) { 1525 fpscr.dzc = 1; 1526 } 1527 if (flags & FPLIB_OFC) { 1528 fpscr.ofc = 1; 1529 } 1530 if (flags & FPLIB_UFC) { 1531 underflow = true; //xx Why is this required? 1532 fpscr.ufc = 1; 1533 } 1534 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { 1535 fpscr.ixc = 1; 1536 } 1537} 1538 1539template <> 1540bool 1541fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) 1542{ 1543 int flags = 0; 1544 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); 1545 set_fpscr(fpscr, flags); 1546 return x; 1547} 1548 1549template <> 1550bool 1551fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) 1552{ 1553 int flags = 0; 1554 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); 1555 set_fpscr(fpscr, flags); 1556 return x; 1557} 1558 1559template <> 1560bool 1561fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) 1562{ 1563 int flags = 0; 1564 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); 1565 set_fpscr(fpscr, flags); 1566 return x; 1567} 1568 1569template <> 1570bool 1571fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) 1572{ 1573 int flags = 0; 1574 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); 1575 set_fpscr(fpscr, flags); 1576 return x; 1577} 1578 1579template <> 1580bool 1581fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) 1582{ 1583 int flags = 0; 1584 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); 1585 set_fpscr(fpscr, flags); 1586 return x; 1587} 1588 1589template <> 1590bool 1591fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) 1592{ 1593 int flags = 0; 1594 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); 1595 set_fpscr(fpscr, flags); 1596 return x; 1597} 1598 1599template <> 1600uint32_t 1601fplibAbs(uint32_t op) 1602{ 1603 return op & ~((uint32_t)1 << 31); 1604} 1605 1606template <> 1607uint64_t 1608fplibAbs(uint64_t op) 1609{ 1610 return op & ~((uint64_t)1 << 63); 1611} 1612 1613template <> 1614uint32_t 1615fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) 1616{ 1617 int flags = 0; 1618 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); 1619 set_fpscr0(fpscr, flags); 1620 return result; 1621} 1622 1623template <> 1624uint64_t 1625fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) 1626{ 1627 int flags = 0; 1628 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); 1629 set_fpscr0(fpscr, flags); 1630 return result; 1631} 1632 1633template <> 1634int 1635fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) 1636{ 1637 int mode = modeConv(fpscr); 1638 int flags = 0; 1639 int sgn1, exp1, sgn2, exp2, result; 1640 uint32_t mnt1, mnt2; 1641 1642 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 1643 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 1644 1645 if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) || 1646 (exp2 == 255 && (uint32_t)(mnt2 << 9))) { 1647 result = 3; 1648 if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) || 1649 (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) || 1650 signal_nans) 1651 flags |= FPLIB_IOC; 1652 } else { 1653 if (op1 == op2 || (!mnt1 && !mnt2)) { 1654 result = 6; 1655 } else if (sgn1 != sgn2) { 1656 result = sgn1 ? 8 : 2; 1657 } else if (exp1 != exp2) { 1658 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 1659 } else { 1660 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 1661 } 1662 } 1663 1664 set_fpscr0(fpscr, flags); 1665 1666 return result; 1667} 1668 1669template <> 1670int 1671fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) 1672{ 1673 int mode = modeConv(fpscr); 1674 int flags = 0; 1675 int sgn1, exp1, sgn2, exp2, result; 1676 uint64_t mnt1, mnt2; 1677 1678 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 1679 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 1680 1681 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) || 1682 (exp2 == 2047 && (uint64_t)(mnt2 << 12))) { 1683 result = 3; 1684 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) || 1685 (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) || 1686 signal_nans) 1687 flags |= FPLIB_IOC; 1688 } else { 1689 if (op1 == op2 || (!mnt1 && !mnt2)) { 1690 result = 6; 1691 } else if (sgn1 != sgn2) { 1692 result = sgn1 ? 8 : 2; 1693 } else if (exp1 != exp2) { 1694 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 1695 } else { 1696 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 1697 } 1698 } 1699 1700 set_fpscr0(fpscr, flags); 1701 1702 return result; 1703} 1704 1705static uint16_t 1706fp16_FPConvertNaN_32(uint32_t op) 1707{ 1708 return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13); 1709} 1710 1711static uint16_t 1712fp16_FPConvertNaN_64(uint64_t op) 1713{ 1714 return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42); 1715} 1716 1717static uint32_t 1718fp32_FPConvertNaN_16(uint16_t op) 1719{ 1720 return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13); 1721} 1722 1723static uint32_t 1724fp32_FPConvertNaN_64(uint64_t op) 1725{ 1726 return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29); 1727} 1728 1729static uint64_t 1730fp64_FPConvertNaN_16(uint16_t op) 1731{ 1732 return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42); 1733} 1734 1735static uint64_t 1736fp64_FPConvertNaN_32(uint32_t op) 1737{ 1738 return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29); 1739} 1740 1741static uint32_t 1742fp32_FPOnePointFive(int sgn) 1743{ 1744 return fp32_pack(sgn, 127, (uint64_t)1 << 22); 1745} 1746 1747static uint64_t 1748fp64_FPOnePointFive(int sgn) 1749{ 1750 return fp64_pack(sgn, 1023, (uint64_t)1 << 51); 1751} 1752 1753static uint32_t 1754fp32_FPThree(int sgn) 1755{ 1756 return fp32_pack(sgn, 128, (uint64_t)1 << 22); 1757} 1758 1759static uint64_t 1760fp64_FPThree(int sgn) 1761{ 1762 return fp64_pack(sgn, 1024, (uint64_t)1 << 51); 1763} 1764 1765static uint32_t 1766fp32_FPTwo(int sgn) 1767{ 1768 return fp32_pack(sgn, 128, 0); 1769} 1770 1771static uint64_t 1772fp64_FPTwo(int sgn) 1773{ 1774 return fp64_pack(sgn, 1024, 0); 1775} 1776 1777template <> 1778uint16_t 1779fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 1780{ 1781 int mode = modeConv(fpscr); 1782 int flags = 0; 1783 int sgn, exp; 1784 uint32_t mnt; 1785 uint16_t result; 1786 1787 // Unpack floating-point operand optionally with flush-to-zero: 1788 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1789 1790 bool alt_hp = fpscr.ahp; 1791 1792 if (exp == 255 && (uint32_t)(mnt << 9)) { 1793 if (alt_hp) { 1794 result = fp16_zero(sgn); 1795 } else if (fpscr.dn) { 1796 result = fp16_defaultNaN(); 1797 } else { 1798 result = fp16_FPConvertNaN_32(op); 1799 } 1800 if (!(mnt >> 22 & 1) || alt_hp) { 1801 flags |= FPLIB_IOC; 1802 } 1803 } else if (exp == 255) { 1804 if (alt_hp) { 1805 result = sgn << 15 | (uint16_t)0x7fff; 1806 flags |= FPLIB_IOC; 1807 } else { 1808 result = fp16_infinity(sgn); 1809 } 1810 } else if (!mnt) { 1811 result = fp16_zero(sgn); 1812 } else { 1813 result = fp16_round_(sgn, exp - 127 + 15, 1814 mnt >> 7 | !!(uint32_t)(mnt << 25), 1815 rounding, mode | alt_hp << 4, &flags); 1816 } 1817 1818 set_fpscr0(fpscr, flags); 1819 1820 return result; 1821} 1822 1823template <> 1824uint16_t 1825fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 1826{ 1827 int mode = modeConv(fpscr); 1828 int flags = 0; 1829 int sgn, exp; 1830 uint64_t mnt; 1831 uint16_t result; 1832 1833 // Unpack floating-point operand optionally with flush-to-zero: 1834 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1835 1836 bool alt_hp = fpscr.ahp; 1837 1838 if (exp == 2047 && (uint64_t)(mnt << 12)) { 1839 if (alt_hp) { 1840 result = fp16_zero(sgn); 1841 } else if (fpscr.dn) { 1842 result = fp16_defaultNaN(); 1843 } else { 1844 result = fp16_FPConvertNaN_64(op); 1845 } 1846 if (!(mnt >> 51 & 1) || alt_hp) { 1847 flags |= FPLIB_IOC; 1848 } 1849 } else if (exp == 2047) { 1850 if (alt_hp) { 1851 result = sgn << 15 | (uint16_t)0x7fff; 1852 flags |= FPLIB_IOC; 1853 } else { 1854 result = fp16_infinity(sgn); 1855 } 1856 } else if (!mnt) { 1857 result = fp16_zero(sgn); 1858 } else { 1859 result = fp16_round_(sgn, exp - 1023 + 15, 1860 mnt >> 36 | !!(uint64_t)(mnt << 28), 1861 rounding, mode | alt_hp << 4, &flags); 1862 } 1863 1864 set_fpscr0(fpscr, flags); 1865 1866 return result; 1867} 1868 1869template <> 1870uint32_t 1871fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 1872{ 1873 int mode = modeConv(fpscr); 1874 int flags = 0; 1875 int sgn, exp; 1876 uint16_t mnt; 1877 uint32_t result; 1878 1879 // Unpack floating-point operand optionally with flush-to-zero: 1880 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1881 1882 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { 1883 if (fpscr.dn) { 1884 result = fp32_defaultNaN(); 1885 } else { 1886 result = fp32_FPConvertNaN_16(op); 1887 } 1888 if (!(mnt >> 9 & 1)) { 1889 flags |= FPLIB_IOC; 1890 } 1891 } else if (exp == 31 && !fpscr.ahp) { 1892 result = fp32_infinity(sgn); 1893 } else if (!mnt) { 1894 result = fp32_zero(sgn); 1895 } else { 1896 mnt = fp16_normalise(mnt, &exp); 1897 result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8); 1898 } 1899 1900 set_fpscr0(fpscr, flags); 1901 1902 return result; 1903} 1904 1905template <> 1906uint32_t 1907fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 1908{ 1909 int mode = modeConv(fpscr); 1910 int flags = 0; 1911 int sgn, exp; 1912 uint64_t mnt; 1913 uint32_t result; 1914 1915 // Unpack floating-point operand optionally with flush-to-zero: 1916 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1917 1918 if (exp == 2047 && (uint64_t)(mnt << 12)) { 1919 if (fpscr.dn) { 1920 result = fp32_defaultNaN(); 1921 } else { 1922 result = fp32_FPConvertNaN_64(op); 1923 } 1924 if (!(mnt >> 51 & 1)) { 1925 flags |= FPLIB_IOC; 1926 } 1927 } else if (exp == 2047) { 1928 result = fp32_infinity(sgn); 1929 } else if (!mnt) { 1930 result = fp32_zero(sgn); 1931 } else { 1932 result = fp32_round_(sgn, exp - 1023 + 127, 1933 mnt >> 20 | !!(uint64_t)(mnt << 44), 1934 rounding, mode, &flags); 1935 } 1936 1937 set_fpscr0(fpscr, flags); 1938 1939 return result; 1940} 1941 1942template <> 1943uint64_t 1944fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 1945{ 1946 int mode = modeConv(fpscr); 1947 int flags = 0; 1948 int sgn, exp; 1949 uint16_t mnt; 1950 uint64_t result; 1951 1952 // Unpack floating-point operand optionally with flush-to-zero: 1953 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1954 1955 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { 1956 if (fpscr.dn) { 1957 result = fp64_defaultNaN(); 1958 } else { 1959 result = fp64_FPConvertNaN_16(op); 1960 } 1961 if (!(mnt >> 9 & 1)) { 1962 flags |= FPLIB_IOC; 1963 } 1964 } else if (exp == 31 && !fpscr.ahp) { 1965 result = fp64_infinity(sgn); 1966 } else if (!mnt) { 1967 result = fp64_zero(sgn); 1968 } else { 1969 mnt = fp16_normalise(mnt, &exp); 1970 result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37); 1971 } 1972 1973 set_fpscr0(fpscr, flags); 1974 1975 return result; 1976} 1977 1978template <> 1979uint64_t 1980fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 1981{ 1982 int mode = modeConv(fpscr); 1983 int flags = 0; 1984 int sgn, exp; 1985 uint32_t mnt; 1986 uint64_t result; 1987 1988 // Unpack floating-point operand optionally with flush-to-zero: 1989 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1990 1991 if (exp == 255 && (uint32_t)(mnt << 9)) { 1992 if (fpscr.dn) { 1993 result = fp64_defaultNaN(); 1994 } else { 1995 result = fp64_FPConvertNaN_32(op); 1996 } 1997 if (!(mnt >> 22 & 1)) { 1998 flags |= FPLIB_IOC; 1999 } 2000 } else if (exp == 255) { 2001 result = fp64_infinity(sgn); 2002 } else if (!mnt) { 2003 result = fp64_zero(sgn); 2004 } else { 2005 mnt = fp32_normalise(mnt, &exp); 2006 result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21); 2007 } 2008 2009 set_fpscr0(fpscr, flags); 2010 2011 return result; 2012} 2013 2014template <> 2015uint32_t 2016fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) 2017{ 2018 int flags = 0; 2019 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2020 set_fpscr0(fpscr, flags); 2021 return result; 2022} 2023 2024template <> 2025uint64_t 2026fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) 2027{ 2028 int flags = 0; 2029 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2030 set_fpscr0(fpscr, flags); 2031 return result; 2032} 2033 2034template <> 2035uint32_t 2036fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2037{ 2038 int flags = 0; 2039 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); 2040 set_fpscr0(fpscr, flags); 2041 return result; 2042} 2043 2044template <> 2045uint64_t 2046fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2047{ 2048 int flags = 0; 2049 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); 2050 set_fpscr0(fpscr, flags); 2051 return result; 2052} 2053 2054static uint32_t 2055fp32_repack(int sgn, int exp, uint32_t mnt) 2056{ 2057 return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt); 2058} 2059 2060static uint64_t 2061fp64_repack(int sgn, int exp, uint64_t mnt) 2062{ 2063 return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt); 2064} 2065 2066static void 2067fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn) 2068{ 2069 // Treat a single quiet-NaN as +Infinity/-Infinity 2070 if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23) 2071 *op1 = fp32_infinity(sgn); 2072 if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23) 2073 *op2 = fp32_infinity(sgn); 2074} 2075 2076static void 2077fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn) 2078{ 2079 // Treat a single quiet-NaN as +Infinity/-Infinity 2080 if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52) 2081 *op1 = fp64_infinity(sgn); 2082 if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52) 2083 *op2 = fp64_infinity(sgn); 2084} 2085 2086template <> 2087uint32_t 2088fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2089{ 2090 int mode = modeConv(fpscr); 2091 int flags = 0; 2092 int sgn1, exp1, sgn2, exp2; 2093 uint32_t mnt1, mnt2, x, result; 2094 2095 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2096 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2097 2098 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 2099 result = x; 2100 } else { 2101 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 2102 fp32_repack(sgn1, exp1, mnt1) : 2103 fp32_repack(sgn2, exp2, mnt2)); 2104 } 2105 set_fpscr0(fpscr, flags); 2106 return result; 2107} 2108 2109template <> 2110uint64_t 2111fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2112{ 2113 int mode = modeConv(fpscr); 2114 int flags = 0; 2115 int sgn1, exp1, sgn2, exp2; 2116 uint64_t mnt1, mnt2, x, result; 2117 2118 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2119 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2120 2121 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 2122 result = x; 2123 } else { 2124 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 2125 fp64_repack(sgn1, exp1, mnt1) : 2126 fp64_repack(sgn2, exp2, mnt2)); 2127 } 2128 set_fpscr0(fpscr, flags); 2129 return result; 2130} 2131 2132template <> 2133uint32_t 2134fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2135{ 2136 fp32_minmaxnum(&op1, &op2, 1); 2137 return fplibMax<uint32_t>(op1, op2, fpscr); 2138} 2139 2140template <> 2141uint64_t 2142fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2143{ 2144 fp64_minmaxnum(&op1, &op2, 1); 2145 return fplibMax<uint64_t>(op1, op2, fpscr); 2146} 2147 2148template <> 2149uint32_t 2150fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2151{ 2152 int mode = modeConv(fpscr); 2153 int flags = 0; 2154 int sgn1, exp1, sgn2, exp2; 2155 uint32_t mnt1, mnt2, x, result; 2156 2157 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2158 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2159 2160 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 2161 result = x; 2162 } else { 2163 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 2164 fp32_repack(sgn1, exp1, mnt1) : 2165 fp32_repack(sgn2, exp2, mnt2)); 2166 } 2167 set_fpscr0(fpscr, flags); 2168 return result; 2169} 2170 2171template <> 2172uint64_t 2173fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2174{ 2175 int mode = modeConv(fpscr); 2176 int flags = 0; 2177 int sgn1, exp1, sgn2, exp2; 2178 uint64_t mnt1, mnt2, x, result; 2179 2180 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2181 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2182 2183 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 2184 result = x; 2185 } else { 2186 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 2187 fp64_repack(sgn1, exp1, mnt1) : 2188 fp64_repack(sgn2, exp2, mnt2)); 2189 } 2190 set_fpscr0(fpscr, flags); 2191 return result; 2192} 2193 2194template <> 2195uint32_t 2196fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2197{ 2198 fp32_minmaxnum(&op1, &op2, 0); 2199 return fplibMin<uint32_t>(op1, op2, fpscr); 2200} 2201 2202template <> 2203uint64_t 2204fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2205{ 2206 fp64_minmaxnum(&op1, &op2, 0); 2207 return fplibMin<uint64_t>(op1, op2, fpscr); 2208} 2209 2210template <> 2211uint32_t 2212fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2213{ 2214 int flags = 0; 2215 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); 2216 set_fpscr0(fpscr, flags); 2217 return result; 2218} 2219 2220template <> 2221uint64_t 2222fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2223{ 2224 int flags = 0; 2225 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); 2226 set_fpscr0(fpscr, flags); 2227 return result; 2228} 2229 2230template <> 2231uint32_t 2232fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2233{ 2234 int mode = modeConv(fpscr); 2235 int flags = 0; 2236 int sgn1, exp1, sgn2, exp2; 2237 uint32_t mnt1, mnt2, result; 2238 2239 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2240 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2241 2242 result = fp32_process_NaNs(op1, op2, mode, &flags); 2243 if (!result) { 2244 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2245 result = fp32_FPTwo(sgn1 ^ sgn2); 2246 } else if (exp1 == 255 || exp2 == 255) { 2247 result = fp32_infinity(sgn1 ^ sgn2); 2248 } else if (!mnt1 || !mnt2) { 2249 result = fp32_zero(sgn1 ^ sgn2); 2250 } else { 2251 result = fp32_mul(op1, op2, mode, &flags); 2252 } 2253 } 2254 2255 set_fpscr0(fpscr, flags); 2256 2257 return result; 2258} 2259 2260template <> 2261uint64_t 2262fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2263{ 2264 int mode = modeConv(fpscr); 2265 int flags = 0; 2266 int sgn1, exp1, sgn2, exp2; 2267 uint64_t mnt1, mnt2, result; 2268 2269 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2270 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2271 2272 result = fp64_process_NaNs(op1, op2, mode, &flags); 2273 if (!result) { 2274 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2275 result = fp64_FPTwo(sgn1 ^ sgn2); 2276 } else if (exp1 == 2047 || exp2 == 2047) { 2277 result = fp64_infinity(sgn1 ^ sgn2); 2278 } else if (!mnt1 || !mnt2) { 2279 result = fp64_zero(sgn1 ^ sgn2); 2280 } else { 2281 result = fp64_mul(op1, op2, mode, &flags); 2282 } 2283 } 2284 2285 set_fpscr0(fpscr, flags); 2286 2287 return result; 2288} 2289 2290template <> 2291uint32_t 2292fplibNeg(uint32_t op) 2293{ 2294 return op ^ (uint32_t)1 << 31; 2295} 2296 2297template <> 2298uint64_t 2299fplibNeg(uint64_t op) 2300{ 2301 return op ^ (uint64_t)1 << 63; 2302} 2303 2304static const uint8_t recip_sqrt_estimate[256] = { 2305 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, 2306 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, 2307 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, 2308 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, 2309 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 2310 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 2311 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, 2312 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, 2313 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, 2314 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, 2315 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, 2316 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, 2317 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, 2318 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, 2319 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, 2320 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 2321}; 2322 2323template <> 2324uint32_t 2325fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) 2326{ 2327 int mode = modeConv(fpscr); 2328 int flags = 0; 2329 int sgn, exp; 2330 uint32_t mnt, result; 2331 2332 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2333 2334 if (exp == 255 && (uint32_t)(mnt << 9)) { 2335 result = fp32_process_NaN(op, mode, &flags); 2336 } else if (!mnt) { 2337 result = fp32_infinity(sgn); 2338 flags |= FPLIB_DZC; 2339 } else if (sgn) { 2340 result = fp32_defaultNaN(); 2341 flags |= FPLIB_IOC; 2342 } else if (exp == 255) { 2343 result = fp32_zero(0); 2344 } else { 2345 exp += 8; 2346 mnt = fp32_normalise(mnt, &exp); 2347 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)]; 2348 result = fp32_pack(0, (380 - exp) >> 1, mnt << 15); 2349 } 2350 2351 set_fpscr0(fpscr, flags); 2352 2353 return result; 2354} 2355 2356template <> 2357uint64_t 2358fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) 2359{ 2360 int mode = modeConv(fpscr); 2361 int flags = 0; 2362 int sgn, exp; 2363 uint64_t mnt, result; 2364 2365 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2366 2367 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2368 result = fp64_process_NaN(op, mode, &flags); 2369 } else if (!mnt) { 2370 result = fp64_infinity(sgn); 2371 flags |= FPLIB_DZC; 2372 } else if (sgn) { 2373 result = fp64_defaultNaN(); 2374 flags |= FPLIB_IOC; 2375 } else if (exp == 2047) { 2376 result = fp32_zero(0); 2377 } else { 2378 exp += 11; 2379 mnt = fp64_normalise(mnt, &exp); 2380 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)]; 2381 result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44); 2382 } 2383 2384 set_fpscr0(fpscr, flags); 2385 2386 return result; 2387} 2388 2389template <> 2390uint32_t 2391fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2392{ 2393 int mode = modeConv(fpscr); 2394 int flags = 0; 2395 int sgn1, exp1, sgn2, exp2; 2396 uint32_t mnt1, mnt2, result; 2397 2398 op1 = fplibNeg<uint32_t>(op1); 2399 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2400 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2401 2402 result = fp32_process_NaNs(op1, op2, mode, &flags); 2403 if (!result) { 2404 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2405 result = fp32_FPOnePointFive(0); 2406 } else if (exp1 == 255 || exp2 == 255) { 2407 result = fp32_infinity(sgn1 ^ sgn2); 2408 } else { 2409 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); 2410 } 2411 } 2412 2413 set_fpscr0(fpscr, flags); 2414 2415 return result; 2416} 2417 2418template <> 2419uint64_t 2420fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2421{ 2422 int mode = modeConv(fpscr); 2423 int flags = 0; 2424 int sgn1, exp1, sgn2, exp2; 2425 uint64_t mnt1, mnt2, result; 2426 2427 op1 = fplibNeg<uint64_t>(op1); 2428 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2429 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2430 2431 result = fp64_process_NaNs(op1, op2, mode, &flags); 2432 if (!result) { 2433 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2434 result = fp64_FPOnePointFive(0); 2435 } else if (exp1 == 2047 || exp2 == 2047) { 2436 result = fp64_infinity(sgn1 ^ sgn2); 2437 } else { 2438 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); 2439 } 2440 } 2441 2442 set_fpscr0(fpscr, flags); 2443 2444 return result; 2445} 2446 2447template <> 2448uint32_t 2449fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2450{ 2451 int mode = modeConv(fpscr); 2452 int flags = 0; 2453 int sgn1, exp1, sgn2, exp2; 2454 uint32_t mnt1, mnt2, result; 2455 2456 op1 = fplibNeg<uint32_t>(op1); 2457 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2458 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2459 2460 result = fp32_process_NaNs(op1, op2, mode, &flags); 2461 if (!result) { 2462 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2463 result = fp32_FPTwo(0); 2464 } else if (exp1 == 255 || exp2 == 255) { 2465 result = fp32_infinity(sgn1 ^ sgn2); 2466 } else { 2467 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags); 2468 } 2469 } 2470 2471 set_fpscr0(fpscr, flags); 2472 2473 return result; 2474} 2475 2476template <> 2477uint32_t 2478fplibRecipEstimate(uint32_t op, FPSCR &fpscr) 2479{ 2480 int mode = modeConv(fpscr); 2481 int flags = 0; 2482 int sgn, exp; 2483 uint32_t mnt, result; 2484 2485 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2486 2487 if (exp == 255 && (uint32_t)(mnt << 9)) { 2488 result = fp32_process_NaN(op, mode, &flags); 2489 } else if (exp == 255) { 2490 result = fp32_zero(sgn); 2491 } else if (!mnt) { 2492 result = fp32_infinity(sgn); 2493 flags |= FPLIB_DZC; 2494 } else if (!((uint32_t)(op << 1) >> 22)) { 2495 bool overflow_to_inf = false; 2496 switch (FPCRRounding(fpscr)) { 2497 case FPRounding_TIEEVEN: 2498 overflow_to_inf = true; 2499 break; 2500 case FPRounding_POSINF: 2501 overflow_to_inf = !sgn; 2502 break; 2503 case FPRounding_NEGINF: 2504 overflow_to_inf = sgn; 2505 break; 2506 case FPRounding_ZERO: 2507 overflow_to_inf = false; 2508 break; 2509 default: 2510 assert(0); 2511 } 2512 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn); 2513 flags |= FPLIB_OFC | FPLIB_IXC; 2514 } else if (fpscr.fz && exp >= 253) { 2515 result = fp32_zero(sgn); 2516 flags |= FPLIB_UFC; 2517 } else { 2518 exp += 8; 2519 mnt = fp32_normalise(mnt, &exp); 2520 int result_exp = 253 - exp; 2521 uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1; 2522 fraction <<= 15; 2523 if (result_exp == 0) { 2524 fraction >>= 1; 2525 } else if (result_exp == -1) { 2526 fraction >>= 2; 2527 result_exp = 0; 2528 } 2529 result = fp32_pack(sgn, result_exp, fraction); 2530 } 2531 2532 set_fpscr0(fpscr, flags); 2533 2534 return result; 2535} 2536 2537template <> 2538uint64_t 2539fplibRecipEstimate(uint64_t op, FPSCR &fpscr) 2540{ 2541 int mode = modeConv(fpscr); 2542 int flags = 0; 2543 int sgn, exp; 2544 uint64_t mnt, result; 2545 2546 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2547 2548 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2549 result = fp64_process_NaN(op, mode, &flags); 2550 } else if (exp == 2047) { 2551 result = fp64_zero(sgn); 2552 } else if (!mnt) { 2553 result = fp64_infinity(sgn); 2554 flags |= FPLIB_DZC; 2555 } else if (!((uint64_t)(op << 1) >> 51)) { 2556 bool overflow_to_inf = false; 2557 switch (FPCRRounding(fpscr)) { 2558 case FPRounding_TIEEVEN: 2559 overflow_to_inf = true; 2560 break; 2561 case FPRounding_POSINF: 2562 overflow_to_inf = !sgn; 2563 break; 2564 case FPRounding_NEGINF: 2565 overflow_to_inf = sgn; 2566 break; 2567 case FPRounding_ZERO: 2568 overflow_to_inf = false; 2569 break; 2570 default: 2571 assert(0); 2572 } 2573 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn); 2574 flags |= FPLIB_OFC | FPLIB_IXC; 2575 } else if (fpscr.fz && exp >= 2045) { 2576 result = fp64_zero(sgn); 2577 flags |= FPLIB_UFC; 2578 } else { 2579 exp += 11; 2580 mnt = fp64_normalise(mnt, &exp); 2581 int result_exp = 2045 - exp; 2582 uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1; 2583 fraction <<= 44; 2584 if (result_exp == 0) { 2585 fraction >>= 1; 2586 } else if (result_exp == -1) { 2587 fraction >>= 2; 2588 result_exp = 0; 2589 } 2590 result = fp64_pack(sgn, result_exp, fraction); 2591 } 2592 2593 set_fpscr0(fpscr, flags); 2594 2595 return result; 2596} 2597 2598template <> 2599uint64_t 2600fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2601{ 2602 int mode = modeConv(fpscr); 2603 int flags = 0; 2604 int sgn1, exp1, sgn2, exp2; 2605 uint64_t mnt1, mnt2, result; 2606 2607 op1 = fplibNeg<uint64_t>(op1); 2608 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2609 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2610 2611 result = fp64_process_NaNs(op1, op2, mode, &flags); 2612 if (!result) { 2613 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2614 result = fp64_FPTwo(0); 2615 } else if (exp1 == 2047 || exp2 == 2047) { 2616 result = fp64_infinity(sgn1 ^ sgn2); 2617 } else { 2618 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags); 2619 } 2620 } 2621 2622 set_fpscr0(fpscr, flags); 2623 2624 return result; 2625} 2626 2627template <> 2628uint32_t 2629fplibRecpX(uint32_t op, FPSCR &fpscr) 2630{ 2631 int mode = modeConv(fpscr); 2632 int flags = 0; 2633 int sgn, exp; 2634 uint32_t mnt, result; 2635 2636 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2637 2638 if (exp == 255 && (uint32_t)(mnt << 9)) { 2639 result = fp32_process_NaN(op, mode, &flags); 2640 } 2641 else { 2642 if (!mnt) { // Zero and denormals 2643 result = fp32_pack(sgn, 254, 0); 2644 } else { // Infinities and normals 2645 result = fp32_pack(sgn, exp ^ 255, 0); 2646 } 2647 } 2648 2649 set_fpscr0(fpscr, flags); 2650 2651 return result; 2652} 2653 2654template <> 2655uint64_t 2656fplibRecpX(uint64_t op, FPSCR &fpscr) 2657{ 2658 int mode = modeConv(fpscr); 2659 int flags = 0; 2660 int sgn, exp; 2661 uint64_t mnt, result; 2662 2663 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2664 2665 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2666 result = fp64_process_NaN(op, mode, &flags); 2667 } 2668 else { 2669 if (!mnt) { // Zero and denormals 2670 result = fp64_pack(sgn, 2046, 0); 2671 } else { // Infinities and normals 2672 result = fp64_pack(sgn, exp ^ 2047, 0); 2673 } 2674 } 2675 2676 set_fpscr0(fpscr, flags); 2677 2678 return result; 2679} 2680 2681template <> 2682uint32_t 2683fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 2684{ 2685 int mode = modeConv(fpscr); 2686 int flags = 0; 2687 int sgn, exp; 2688 uint32_t mnt, result; 2689 2690 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2691 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2692 2693 // Handle NaNs, infinities and zeroes: 2694 if (exp == 255 && (uint32_t)(mnt << 9)) { 2695 result = fp32_process_NaN(op, mode, &flags); 2696 } else if (exp == 255) { 2697 result = fp32_infinity(sgn); 2698 } else if (!mnt) { 2699 result = fp32_zero(sgn); 2700 } else if (exp >= 150) { 2701 // There are no fractional bits 2702 result = op; 2703 } else { 2704 // Truncate towards zero: 2705 uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp); 2706 int err = exp < 118 ? 1 : 2707 (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0); 2708 switch (rounding) { 2709 case FPRounding_TIEEVEN: 2710 x += (err == 3 || (err == 2 && (x & 1))); 2711 break; 2712 case FPRounding_POSINF: 2713 x += err && !sgn; 2714 break; 2715 case FPRounding_NEGINF: 2716 x += err && sgn; 2717 break; 2718 case FPRounding_ZERO: 2719 break; 2720 case FPRounding_TIEAWAY: 2721 x += err >> 1; 2722 break; 2723 default: 2724 assert(0); 2725 } 2726 2727 if (x == 0) { 2728 result = fp32_zero(sgn); 2729 } else { 2730 exp = 150; 2731 mnt = fp32_normalise(x, &exp); 2732 result = fp32_pack(sgn, exp + 8, mnt >> 8); 2733 } 2734 2735 if (err && exact) 2736 flags |= FPLIB_IXC; 2737 } 2738 2739 set_fpscr0(fpscr, flags); 2740 2741 return result; 2742} 2743 2744template <> 2745uint64_t 2746fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 2747{ 2748 int mode = modeConv(fpscr); 2749 int flags = 0; 2750 int sgn, exp; 2751 uint64_t mnt, result; 2752 2753 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2754 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2755 2756 // Handle NaNs, infinities and zeroes: 2757 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2758 result = fp64_process_NaN(op, mode, &flags); 2759 } else if (exp == 2047) { 2760 result = fp64_infinity(sgn); 2761 } else if (!mnt) { 2762 result = fp64_zero(sgn); 2763 } else if (exp >= 1075) { 2764 // There are no fractional bits 2765 result = op; 2766 } else { 2767 // Truncate towards zero: 2768 uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp); 2769 int err = exp < 1011 ? 1 : 2770 (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0); 2771 switch (rounding) { 2772 case FPRounding_TIEEVEN: 2773 x += (err == 3 || (err == 2 && (x & 1))); 2774 break; 2775 case FPRounding_POSINF: 2776 x += err && !sgn; 2777 break; 2778 case FPRounding_NEGINF: 2779 x += err && sgn; 2780 break; 2781 case FPRounding_ZERO: 2782 break; 2783 case FPRounding_TIEAWAY: 2784 x += err >> 1; 2785 break; 2786 default: 2787 assert(0); 2788 } 2789 2790 if (x == 0) { 2791 result = fp64_zero(sgn); 2792 } else { 2793 exp = 1075; 2794 mnt = fp64_normalise(x, &exp); 2795 result = fp64_pack(sgn, exp + 11, mnt >> 11); 2796 } 2797 2798 if (err && exact) 2799 flags |= FPLIB_IXC; 2800 } 2801 2802 set_fpscr0(fpscr, flags); 2803 2804 return result; 2805} 2806 2807template <> 2808uint32_t 2809fplibSqrt(uint32_t op, FPSCR &fpscr) 2810{ 2811 int flags = 0; 2812 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags); 2813 set_fpscr0(fpscr, flags); 2814 return result; 2815} 2816 2817template <> 2818uint64_t 2819fplibSqrt(uint64_t op, FPSCR &fpscr) 2820{ 2821 int flags = 0; 2822 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags); 2823 set_fpscr0(fpscr, flags); 2824 return result; 2825} 2826 2827template <> 2828uint32_t 2829fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2830{ 2831 int flags = 0; 2832 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags); 2833 set_fpscr0(fpscr, flags); 2834 return result; 2835} 2836 2837template <> 2838uint64_t 2839fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2840{ 2841 int flags = 0; 2842 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags); 2843 set_fpscr0(fpscr, flags); 2844 return result; 2845} 2846 2847static uint64_t 2848FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 2849 int *flags) 2850{ 2851 uint64_t x; 2852 int err; 2853 2854 if (exp > 1023 + 63) { 2855 *flags = FPLIB_IOC; 2856 return ((uint64_t)!u << 63) - !sgn; 2857 } 2858 2859 x = lsr64(mnt << 11, 1023 + 63 - exp); 2860 err = (exp > 1023 + 63 - 2 ? 0 : 2861 (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) | 2862 !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1))); 2863 2864 switch (rounding) { 2865 case FPRounding_TIEEVEN: 2866 x += (err == 3 || (err == 2 && (x & 1))); 2867 break; 2868 case FPRounding_POSINF: 2869 x += err && !sgn; 2870 break; 2871 case FPRounding_NEGINF: 2872 x += err && sgn; 2873 break; 2874 case FPRounding_ZERO: 2875 break; 2876 case FPRounding_TIEAWAY: 2877 x += err >> 1; 2878 break; 2879 default: 2880 assert(0); 2881 } 2882 2883 if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) { 2884 *flags = FPLIB_IOC; 2885 return ((uint64_t)!u << 63) - !sgn; 2886 } 2887 2888 if (err) { 2889 *flags = FPLIB_IXC; 2890 } 2891 2892 return sgn ? -x : x; 2893} 2894 2895static uint32_t 2896FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 2897 int *flags) 2898{ 2899 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); 2900 if (u ? x >= (uint64_t)1 << 32 : 2901 !(x < (uint64_t)1 << 31 || 2902 (uint64_t)-x <= (uint64_t)1 << 31)) { 2903 *flags = FPLIB_IOC; 2904 x = ((uint32_t)!u << 31) - !sgn; 2905 } 2906 return x; 2907} 2908 2909template <> 2910uint32_t 2911fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2912{ 2913 int flags = 0; 2914 int sgn, exp; 2915 uint32_t mnt, result; 2916 2917 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2918 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2919 2920 // If NaN, set cumulative flag or take exception: 2921 if (exp == 255 && (uint32_t)(mnt << 9)) { 2922 flags = FPLIB_IOC; 2923 result = 0; 2924 } else { 2925 result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits, 2926 (uint64_t)mnt << (52 - 23), u, rounding, &flags); 2927 } 2928 2929 set_fpscr0(fpscr, flags); 2930 2931 return result; 2932} 2933 2934template <> 2935uint32_t 2936fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2937{ 2938 int flags = 0; 2939 int sgn, exp; 2940 uint64_t mnt; 2941 uint32_t result; 2942 2943 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2944 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2945 2946 // If NaN, set cumulative flag or take exception: 2947 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2948 flags = FPLIB_IOC; 2949 result = 0; 2950 } else { 2951 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags); 2952 } 2953 2954 set_fpscr0(fpscr, flags); 2955 2956 return result; 2957} 2958 2959template <> 2960uint64_t 2961fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2962{ 2963 int flags = 0; 2964 int sgn, exp; 2965 uint32_t mnt; 2966 uint64_t result; 2967 2968 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2969 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2970 2971 // If NaN, set cumulative flag or take exception: 2972 if (exp == 255 && (uint32_t)(mnt << 9)) { 2973 flags = FPLIB_IOC; 2974 result = 0; 2975 } else { 2976 result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits, 2977 (uint64_t)mnt << (52 - 23), u, rounding, &flags); 2978 } 2979 2980 set_fpscr0(fpscr, flags); 2981 2982 return result; 2983} 2984 2985template <> 2986uint64_t 2987fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2988{ 2989 int flags = 0; 2990 int sgn, exp; 2991 uint64_t mnt, result; 2992 2993 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2994 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2995 2996 // If NaN, set cumulative flag or take exception: 2997 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2998 flags = FPLIB_IOC; 2999 result = 0; 3000 } else { 3001 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags); 3002 } 3003 3004 set_fpscr0(fpscr, flags); 3005 3006 return result; 3007} 3008 3009static uint32_t 3010fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) 3011{ 3012 int x_sgn = !u && a >> 63; 3013 int x_exp = 190 - fbits; 3014 uint64_t x_mnt = x_sgn ? -a : a; 3015 3016 // Handle zero: 3017 if (!x_mnt) { 3018 return fp32_zero(0); 3019 } 3020 3021 // Normalise and convert to 32 bits, collapsing error into bottom bit: 3022 x_mnt = fp64_normalise(x_mnt, &x_exp); 3023 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 3024 3025 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 3026} 3027 3028static uint64_t 3029fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) 3030{ 3031 int x_sgn = !u && a >> 63; 3032 int x_exp = 1024 + 62 - fbits; 3033 uint64_t x_mnt = x_sgn ? -a : a; 3034 3035 // Handle zero: 3036 if (!x_mnt) { 3037 return fp64_zero(0); 3038 } 3039 3040 x_mnt = fp64_normalise(x_mnt, &x_exp); 3041 3042 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags); 3043} 3044 3045template <> 3046uint32_t 3047fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 3048{ 3049 int flags = 0; 3050 uint32_t res = fp32_cvtf(op, fbits, u, 3051 (int)rounding | ((uint32_t)fpscr >> 22 & 12), 3052 &flags); 3053 set_fpscr0(fpscr, flags); 3054 return res; 3055} 3056 3057template <> 3058uint64_t 3059fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 3060{ 3061 int flags = 0; 3062 uint64_t res = fp64_cvtf(op, fbits, u, 3063 (int)rounding | ((uint32_t)fpscr >> 22 & 12), 3064 &flags); 3065 set_fpscr0(fpscr, flags); 3066 return res; 3067} 3068 3069} 3070