fplib.cc revision 11224:a7a718faaf56
1/* 2* Copyright (c) 2012-2013 ARM Limited 3* All rights reserved 4* 5* The license below extends only to copyright in the software and shall 6* not be construed as granting a license to any other intellectual 7* property including but not limited to intellectual property relating 8* to a hardware implementation of the functionality of the software 9* licensed hereunder. You may use the software subject to the license 10* terms below provided that you ensure that this notice is replicated 11* unmodified and in its entirety in all distributions of the software, 12* modified or unmodified, in source code or in binary form. 13* 14* Redistribution and use in source and binary forms, with or without 15* modification, are permitted provided that the following conditions are 16* met: redistributions of source code must retain the above copyright 17* notice, this list of conditions and the following disclaimer; 18* redistributions in binary form must reproduce the above copyright 19* notice, this list of conditions and the following disclaimer in the 20* documentation and/or other materials provided with the distribution; 21* neither the name of the copyright holders nor the names of its 22* contributors may be used to endorse or promote products derived from 23* this software without specific prior written permission. 24* 25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36* 37* Authors: Edmund Grimley Evans 38* Thomas Grocutt 39*/ 40 41#include <stdint.h> 42 43#include <cassert> 44 45#include "fplib.hh" 46 47namespace ArmISA 48{ 49 50#define FPLIB_RN 0 51#define FPLIB_RP 1 52#define FPLIB_RM 2 53#define FPLIB_RZ 3 54#define FPLIB_FZ 4 55#define FPLIB_DN 8 56#define FPLIB_AHP 16 57 58#define FPLIB_IDC 128 // Input Denormal 59#define FPLIB_IXC 16 // Inexact 60#define FPLIB_UFC 8 // Underflow 61#define FPLIB_OFC 4 // Overflow 62#define FPLIB_DZC 2 // Division by Zero 63#define FPLIB_IOC 1 // Invalid Operation 64 65static inline uint16_t 66lsl16(uint16_t x, uint32_t shift) 67{ 68 return shift < 16 ? x << shift : 0; 69} 70 71static inline uint16_t 72lsr16(uint16_t x, uint32_t shift) 73{ 74 return shift < 16 ? x >> shift : 0; 75} 76 77static inline uint32_t 78lsl32(uint32_t x, uint32_t shift) 79{ 80 return shift < 32 ? x << shift : 0; 81} 82 83static inline uint32_t 84lsr32(uint32_t x, uint32_t shift) 85{ 86 return shift < 32 ? x >> shift : 0; 87} 88 89static inline uint64_t 90lsl64(uint64_t x, uint32_t shift) 91{ 92 return shift < 64 ? x << shift : 0; 93} 94 95static inline uint64_t 96lsr64(uint64_t x, uint32_t shift) 97{ 98 return shift < 64 ? x >> shift : 0; 99} 100 101static inline void 102lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 103{ 104 if (shift == 0) { 105 *r1 = x1; 106 *r0 = x0; 107 } else if (shift < 64) { 108 *r1 = x1 << shift | x0 >> (64 - shift); 109 *r0 = x0 << shift; 110 } else if (shift < 128) { 111 *r1 = x0 << (shift - 64); 112 *r0 = 0; 113 } else { 114 *r1 = 0; 115 *r0 = 0; 116 } 117} 118 119static inline void 120lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 121{ 122 if (shift == 0) { 123 *r1 = x1; 124 *r0 = x0; 125 } else if (shift < 64) { 126 *r0 = x0 >> shift | x1 << (64 - shift); 127 *r1 = x1 >> shift; 128 } else if (shift < 128) { 129 *r0 = x1 >> (shift - 64); 130 *r1 = 0; 131 } else { 132 *r0 = 0; 133 *r1 = 0; 134 } 135} 136 137static inline void 138mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) 139{ 140 uint32_t mask = ((uint32_t)1 << 31) - 1; 141 uint64_t a0 = a & mask; 142 uint64_t a1 = a >> 31 & mask; 143 uint64_t b0 = b & mask; 144 uint64_t b1 = b >> 31 & mask; 145 uint64_t p0 = a0 * b0; 146 uint64_t p2 = a1 * b1; 147 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; 148 uint64_t s0 = p0; 149 uint64_t s1 = (s0 >> 31) + p1; 150 uint64_t s2 = (s1 >> 31) + p2; 151 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62; 152 *x1 = s2 >> 2; 153} 154 155static inline 156void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b) 157{ 158 uint64_t t0 = (uint64_t)(uint32_t)a * b; 159 uint64_t t1 = (t0 >> 32) + (a >> 32) * b; 160 *x0 = t1 << 32 | (uint32_t)t0; 161 *x1 = t1 >> 32; 162} 163 164static inline void 165add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 166 uint64_t b1) 167{ 168 *x0 = a0 + b0; 169 *x1 = a1 + b1 + (*x0 < a0); 170} 171 172static inline void 173sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 174 uint64_t b1) 175{ 176 *x0 = a0 - b0; 177 *x1 = a1 - b1 - (*x0 > a0); 178} 179 180static inline int 181cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 182{ 183 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); 184} 185 186static inline uint16_t 187fp16_normalise(uint16_t mnt, int *exp) 188{ 189 int shift; 190 191 if (!mnt) { 192 return 0; 193 } 194 195 for (shift = 8; shift; shift >>= 1) { 196 if (!(mnt >> (16 - shift))) { 197 mnt <<= shift; 198 *exp -= shift; 199 } 200 } 201 return mnt; 202} 203 204static inline uint32_t 205fp32_normalise(uint32_t mnt, int *exp) 206{ 207 int shift; 208 209 if (!mnt) { 210 return 0; 211 } 212 213 for (shift = 16; shift; shift >>= 1) { 214 if (!(mnt >> (32 - shift))) { 215 mnt <<= shift; 216 *exp -= shift; 217 } 218 } 219 return mnt; 220} 221 222static inline uint64_t 223fp64_normalise(uint64_t mnt, int *exp) 224{ 225 int shift; 226 227 if (!mnt) { 228 return 0; 229 } 230 231 for (shift = 32; shift; shift >>= 1) { 232 if (!(mnt >> (64 - shift))) { 233 mnt <<= shift; 234 *exp -= shift; 235 } 236 } 237 return mnt; 238} 239 240static inline void 241fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp) 242{ 243 uint64_t x0 = *mnt0; 244 uint64_t x1 = *mnt1; 245 int shift; 246 247 if (!x0 && !x1) { 248 return; 249 } 250 251 if (!x1) { 252 x1 = x0; 253 x0 = 0; 254 *exp -= 64; 255 } 256 257 for (shift = 32; shift; shift >>= 1) { 258 if (!(x1 >> (64 - shift))) { 259 x1 = x1 << shift | x0 >> (64 - shift); 260 x0 <<= shift; 261 *exp -= shift; 262 } 263 } 264 265 *mnt0 = x0; 266 *mnt1 = x1; 267} 268 269static inline uint16_t 270fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) 271{ 272 return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1)); 273} 274 275static inline uint32_t 276fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) 277{ 278 return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1)); 279} 280 281static inline uint64_t 282fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) 283{ 284 return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1)); 285} 286 287static inline uint16_t 288fp16_zero(int sgn) 289{ 290 return fp16_pack(sgn, 0, 0); 291} 292 293static inline uint32_t 294fp32_zero(int sgn) 295{ 296 return fp32_pack(sgn, 0, 0); 297} 298 299static inline uint64_t 300fp64_zero(int sgn) 301{ 302 return fp64_pack(sgn, 0, 0); 303} 304 305static inline uint16_t 306fp16_max_normal(int sgn) 307{ 308 return fp16_pack(sgn, 30, -1); 309} 310 311static inline uint32_t 312fp32_max_normal(int sgn) 313{ 314 return fp32_pack(sgn, 254, -1); 315} 316 317static inline uint64_t 318fp64_max_normal(int sgn) 319{ 320 return fp64_pack(sgn, 2046, -1); 321} 322 323static inline uint16_t 324fp16_infinity(int sgn) 325{ 326 return fp16_pack(sgn, 31, 0); 327} 328 329static inline uint32_t 330fp32_infinity(int sgn) 331{ 332 return fp32_pack(sgn, 255, 0); 333} 334 335static inline uint64_t 336fp64_infinity(int sgn) 337{ 338 return fp64_pack(sgn, 2047, 0); 339} 340 341static inline uint16_t 342fp16_defaultNaN() 343{ 344 return fp16_pack(0, 31, (uint16_t)1 << 9); 345} 346 347static inline uint32_t 348fp32_defaultNaN() 349{ 350 return fp32_pack(0, 255, (uint32_t)1 << 22); 351} 352 353static inline uint64_t 354fp64_defaultNaN() 355{ 356 return fp64_pack(0, 2047, (uint64_t)1 << 51); 357} 358 359static inline void 360fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, 361 int *flags) 362{ 363 *sgn = x >> 15; 364 *exp = x >> 10 & 31; 365 *mnt = x & (((uint16_t)1 << 10) - 1); 366 367 // Handle subnormals: 368 if (*exp) { 369 *mnt |= (uint16_t)1 << 10; 370 } else { 371 ++*exp; 372 // There is no flush to zero in this case! 373 } 374} 375 376static inline void 377fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, 378 int *flags) 379{ 380 *sgn = x >> 31; 381 *exp = x >> 23 & 255; 382 *mnt = x & (((uint32_t)1 << 23) - 1); 383 384 // Handle subnormals: 385 if (*exp) { 386 *mnt |= (uint32_t)1 << 23; 387 } else { 388 ++*exp; 389 if ((mode & FPLIB_FZ) && *mnt) { 390 *flags |= FPLIB_IDC; 391 *mnt = 0; 392 } 393 } 394} 395 396static inline void 397fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, 398 int *flags) 399{ 400 *sgn = x >> 63; 401 *exp = x >> 52 & 2047; 402 *mnt = x & (((uint64_t)1 << 52) - 1); 403 404 // Handle subnormals: 405 if (*exp) { 406 *mnt |= (uint64_t)1 << 52; 407 } else { 408 ++*exp; 409 if ((mode & FPLIB_FZ) && *mnt) { 410 *flags |= FPLIB_IDC; 411 *mnt = 0; 412 } 413 } 414} 415 416static inline uint32_t 417fp32_process_NaN(uint32_t a, int mode, int *flags) 418{ 419 if (!(a >> 22 & 1)) { 420 *flags |= FPLIB_IOC; 421 a |= (uint32_t)1 << 22; 422 } 423 return mode & FPLIB_DN ? fp32_defaultNaN() : a; 424} 425 426static inline uint64_t 427fp64_process_NaN(uint64_t a, int mode, int *flags) 428{ 429 if (!(a >> 51 & 1)) { 430 *flags |= FPLIB_IOC; 431 a |= (uint64_t)1 << 51; 432 } 433 return mode & FPLIB_DN ? fp64_defaultNaN() : a; 434} 435 436static uint32_t 437fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags) 438{ 439 int a_exp = a >> 23 & 255; 440 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); 441 int b_exp = b >> 23 & 255; 442 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); 443 444 // Handle signalling NaNs: 445 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) 446 return fp32_process_NaN(a, mode, flags); 447 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) 448 return fp32_process_NaN(b, mode, flags); 449 450 // Handle quiet NaNs: 451 if (a_exp == 255 && a_mnt) 452 return fp32_process_NaN(a, mode, flags); 453 if (b_exp == 255 && b_mnt) 454 return fp32_process_NaN(b, mode, flags); 455 456 return 0; 457} 458 459static uint64_t 460fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags) 461{ 462 int a_exp = a >> 52 & 2047; 463 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); 464 int b_exp = b >> 52 & 2047; 465 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); 466 467 // Handle signalling NaNs: 468 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) 469 return fp64_process_NaN(a, mode, flags); 470 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) 471 return fp64_process_NaN(b, mode, flags); 472 473 // Handle quiet NaNs: 474 if (a_exp == 2047 && a_mnt) 475 return fp64_process_NaN(a, mode, flags); 476 if (b_exp == 2047 && b_mnt) 477 return fp64_process_NaN(b, mode, flags); 478 479 return 0; 480} 481 482static uint32_t 483fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags) 484{ 485 int a_exp = a >> 23 & 255; 486 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); 487 int b_exp = b >> 23 & 255; 488 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); 489 int c_exp = c >> 23 & 255; 490 uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1); 491 492 // Handle signalling NaNs: 493 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) 494 return fp32_process_NaN(a, mode, flags); 495 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) 496 return fp32_process_NaN(b, mode, flags); 497 if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1)) 498 return fp32_process_NaN(c, mode, flags); 499 500 // Handle quiet NaNs: 501 if (a_exp == 255 && a_mnt) 502 return fp32_process_NaN(a, mode, flags); 503 if (b_exp == 255 && b_mnt) 504 return fp32_process_NaN(b, mode, flags); 505 if (c_exp == 255 && c_mnt) 506 return fp32_process_NaN(c, mode, flags); 507 508 return 0; 509} 510 511static uint64_t 512fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags) 513{ 514 int a_exp = a >> 52 & 2047; 515 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); 516 int b_exp = b >> 52 & 2047; 517 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); 518 int c_exp = c >> 52 & 2047; 519 uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1); 520 521 // Handle signalling NaNs: 522 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) 523 return fp64_process_NaN(a, mode, flags); 524 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) 525 return fp64_process_NaN(b, mode, flags); 526 if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1)) 527 return fp64_process_NaN(c, mode, flags); 528 529 // Handle quiet NaNs: 530 if (a_exp == 2047 && a_mnt) 531 return fp64_process_NaN(a, mode, flags); 532 if (b_exp == 2047 && b_mnt) 533 return fp64_process_NaN(b, mode, flags); 534 if (c_exp == 2047 && c_mnt) 535 return fp64_process_NaN(c, mode, flags); 536 537 return 0; 538} 539 540static uint16_t 541fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags) 542{ 543 int biased_exp; // non-negative exponent value for result 544 uint16_t int_mant; // mantissa for result, less than (1 << 11) 545 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 546 547 assert(rm != FPRounding_TIEAWAY); 548 549 // There is no flush to zero in this case! 550 551 // The bottom 5 bits of mnt are orred together: 552 mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0); 553 554 if (exp > 0) { 555 biased_exp = exp; 556 int_mant = mnt >> 2; 557 error = mnt & 3; 558 } else { 559 biased_exp = 0; 560 int_mant = lsr16(mnt, 3 - exp); 561 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1)); 562 } 563 564 if (!biased_exp && error) { // xx should also check fpscr_val<11> 565 *flags |= FPLIB_UFC; 566 } 567 568 // Round up: 569 if ((rm == FPLIB_RN && (error == 3 || 570 (error == 2 && (int_mant & 1)))) || 571 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 572 ++int_mant; 573 if (int_mant == (uint32_t)1 << 10) { 574 // Rounded up from denormalized to normalized 575 biased_exp = 1; 576 } 577 if (int_mant == (uint32_t)1 << 11) { 578 // Rounded up to next exponent 579 ++biased_exp; 580 int_mant >>= 1; 581 } 582 } 583 584 // Handle rounding to odd aka Von Neumann rounding: 585 if (error && rm == FPRounding_ODD) 586 int_mant |= 1; 587 588 // Handle overflow: 589 if (!(mode & FPLIB_AHP)) { 590 if (biased_exp >= 31) { 591 *flags |= FPLIB_OFC | FPLIB_IXC; 592 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 593 (rm == FPLIB_RM && sgn)) { 594 return fp16_infinity(sgn); 595 } else { 596 return fp16_max_normal(sgn); 597 } 598 } 599 } else { 600 if (biased_exp >= 32) { 601 *flags |= FPLIB_IOC; 602 return fp16_pack(sgn, 31, -1); 603 } 604 } 605 606 if (error) { 607 *flags |= FPLIB_IXC; 608 } 609 610 return fp16_pack(sgn, biased_exp, int_mant); 611} 612 613static uint32_t 614fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags) 615{ 616 int biased_exp; // non-negative exponent value for result 617 uint32_t int_mant; // mantissa for result, less than (1 << 24) 618 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 619 620 assert(rm != FPRounding_TIEAWAY); 621 622 // Flush to zero: 623 if ((mode & FPLIB_FZ) && exp < 1) { 624 *flags |= FPLIB_UFC; 625 return fp32_zero(sgn); 626 } 627 628 // The bottom 8 bits of mnt are orred together: 629 mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0); 630 631 if (exp > 0) { 632 biased_exp = exp; 633 int_mant = mnt >> 2; 634 error = mnt & 3; 635 } else { 636 biased_exp = 0; 637 int_mant = lsr32(mnt, 3 - exp); 638 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1)); 639 } 640 641 if (!biased_exp && error) { // xx should also check fpscr_val<11> 642 *flags |= FPLIB_UFC; 643 } 644 645 // Round up: 646 if ((rm == FPLIB_RN && (error == 3 || 647 (error == 2 && (int_mant & 1)))) || 648 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 649 ++int_mant; 650 if (int_mant == (uint32_t)1 << 23) { 651 // Rounded up from denormalized to normalized 652 biased_exp = 1; 653 } 654 if (int_mant == (uint32_t)1 << 24) { 655 // Rounded up to next exponent 656 ++biased_exp; 657 int_mant >>= 1; 658 } 659 } 660 661 // Handle rounding to odd aka Von Neumann rounding: 662 if (error && rm == FPRounding_ODD) 663 int_mant |= 1; 664 665 // Handle overflow: 666 if (biased_exp >= 255) { 667 *flags |= FPLIB_OFC | FPLIB_IXC; 668 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 669 (rm == FPLIB_RM && sgn)) { 670 return fp32_infinity(sgn); 671 } else { 672 return fp32_max_normal(sgn); 673 } 674 } 675 676 if (error) { 677 *flags |= FPLIB_IXC; 678 } 679 680 return fp32_pack(sgn, biased_exp, int_mant); 681} 682 683static uint32_t 684fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags) 685{ 686 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); 687} 688 689static uint64_t 690fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags) 691{ 692 int biased_exp; // non-negative exponent value for result 693 uint64_t int_mant; // mantissa for result, less than (1 << 52) 694 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 695 696 assert(rm != FPRounding_TIEAWAY); 697 698 // Flush to zero: 699 if ((mode & FPLIB_FZ) && exp < 1) { 700 *flags |= FPLIB_UFC; 701 return fp64_zero(sgn); 702 } 703 704 // The bottom 11 bits of mnt are orred together: 705 mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0); 706 707 if (exp > 0) { 708 biased_exp = exp; 709 int_mant = mnt >> 2; 710 error = mnt & 3; 711 } else { 712 biased_exp = 0; 713 int_mant = lsr64(mnt, 3 - exp); 714 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1)); 715 } 716 717 if (!biased_exp && error) { // xx should also check fpscr_val<11> 718 *flags |= FPLIB_UFC; 719 } 720 721 // Round up: 722 if ((rm == FPLIB_RN && (error == 3 || 723 (error == 2 && (int_mant & 1)))) || 724 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 725 ++int_mant; 726 if (int_mant == (uint64_t)1 << 52) { 727 // Rounded up from denormalized to normalized 728 biased_exp = 1; 729 } 730 if (int_mant == (uint64_t)1 << 53) { 731 // Rounded up to next exponent 732 ++biased_exp; 733 int_mant >>= 1; 734 } 735 } 736 737 // Handle rounding to odd aka Von Neumann rounding: 738 if (error && rm == FPRounding_ODD) 739 int_mant |= 1; 740 741 // Handle overflow: 742 if (biased_exp >= 2047) { 743 *flags |= FPLIB_OFC | FPLIB_IXC; 744 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 745 (rm == FPLIB_RM && sgn)) { 746 return fp64_infinity(sgn); 747 } else { 748 return fp64_max_normal(sgn); 749 } 750 } 751 752 if (error) { 753 *flags |= FPLIB_IXC; 754 } 755 756 return fp64_pack(sgn, biased_exp, int_mant); 757} 758 759static uint64_t 760fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags) 761{ 762 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); 763} 764 765static int 766fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags) 767{ 768 int a_sgn, a_exp, b_sgn, b_exp; 769 uint32_t a_mnt, b_mnt; 770 771 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 772 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 773 774 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 775 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 776 if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) || 777 (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1))) 778 *flags |= FPLIB_IOC; 779 return 0; 780 } 781 return a == b || (!a_mnt && !b_mnt); 782} 783 784static int 785fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags) 786{ 787 int a_sgn, a_exp, b_sgn, b_exp; 788 uint32_t a_mnt, b_mnt; 789 790 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 791 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 792 793 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 794 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 795 *flags |= FPLIB_IOC; 796 return 0; 797 } 798 if (!a_mnt && !b_mnt) 799 return 1; 800 if (a_sgn != b_sgn) 801 return b_sgn; 802 if (a_exp != b_exp) 803 return a_sgn ^ (a_exp > b_exp); 804 if (a_mnt != b_mnt) 805 return a_sgn ^ (a_mnt > b_mnt); 806 return 1; 807} 808 809static int 810fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags) 811{ 812 int a_sgn, a_exp, b_sgn, b_exp; 813 uint32_t a_mnt, b_mnt; 814 815 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 816 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 817 818 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 819 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 820 *flags |= FPLIB_IOC; 821 return 0; 822 } 823 if (!a_mnt && !b_mnt) 824 return 0; 825 if (a_sgn != b_sgn) 826 return b_sgn; 827 if (a_exp != b_exp) 828 return a_sgn ^ (a_exp > b_exp); 829 if (a_mnt != b_mnt) 830 return a_sgn ^ (a_mnt > b_mnt); 831 return 0; 832} 833 834static int 835fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags) 836{ 837 int a_sgn, a_exp, b_sgn, b_exp; 838 uint64_t a_mnt, b_mnt; 839 840 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 841 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 842 843 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 844 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 845 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) || 846 (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1))) 847 *flags |= FPLIB_IOC; 848 return 0; 849 } 850 return a == b || (!a_mnt && !b_mnt); 851} 852 853static int 854fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags) 855{ 856 int a_sgn, a_exp, b_sgn, b_exp; 857 uint64_t a_mnt, b_mnt; 858 859 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 860 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 861 862 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 863 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 864 *flags |= FPLIB_IOC; 865 return 0; 866 } 867 if (!a_mnt && !b_mnt) 868 return 1; 869 if (a_sgn != b_sgn) 870 return b_sgn; 871 if (a_exp != b_exp) 872 return a_sgn ^ (a_exp > b_exp); 873 if (a_mnt != b_mnt) 874 return a_sgn ^ (a_mnt > b_mnt); 875 return 1; 876} 877 878static int 879fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags) 880{ 881 int a_sgn, a_exp, b_sgn, b_exp; 882 uint64_t a_mnt, b_mnt; 883 884 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 885 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 886 887 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 888 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 889 *flags |= FPLIB_IOC; 890 return 0; 891 } 892 if (!a_mnt && !b_mnt) 893 return 0; 894 if (a_sgn != b_sgn) 895 return b_sgn; 896 if (a_exp != b_exp) 897 return a_sgn ^ (a_exp > b_exp); 898 if (a_mnt != b_mnt) 899 return a_sgn ^ (a_mnt > b_mnt); 900 return 0; 901} 902 903static uint32_t 904fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags) 905{ 906 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 907 uint32_t a_mnt, b_mnt, x, x_mnt; 908 909 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 910 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 911 912 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 913 return x; 914 } 915 916 b_sgn ^= neg; 917 918 // Handle infinities and zeroes: 919 if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) { 920 *flags |= FPLIB_IOC; 921 return fp32_defaultNaN(); 922 } else if (a_exp == 255) { 923 return fp32_infinity(a_sgn); 924 } else if (b_exp == 255) { 925 return fp32_infinity(b_sgn); 926 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 927 return fp32_zero(a_sgn); 928 } 929 930 a_mnt <<= 3; 931 b_mnt <<= 3; 932 if (a_exp >= b_exp) { 933 b_mnt = (lsr32(b_mnt, a_exp - b_exp) | 934 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); 935 b_exp = a_exp; 936 } else { 937 a_mnt = (lsr32(a_mnt, b_exp - a_exp) | 938 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); 939 a_exp = b_exp; 940 } 941 x_sgn = a_sgn; 942 x_exp = a_exp; 943 if (a_sgn == b_sgn) { 944 x_mnt = a_mnt + b_mnt; 945 } else if (a_mnt >= b_mnt) { 946 x_mnt = a_mnt - b_mnt; 947 } else { 948 x_sgn ^= 1; 949 x_mnt = b_mnt - a_mnt; 950 } 951 952 if (!x_mnt) { 953 // Sign of exact zero result depends on rounding mode 954 return fp32_zero((mode & 3) == 2); 955 } 956 957 x_mnt = fp32_normalise(x_mnt, &x_exp); 958 959 return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags); 960} 961 962static uint64_t 963fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags) 964{ 965 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 966 uint64_t a_mnt, b_mnt, x, x_mnt; 967 968 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 969 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 970 971 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 972 return x; 973 } 974 975 b_sgn ^= neg; 976 977 // Handle infinities and zeroes: 978 if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) { 979 *flags |= FPLIB_IOC; 980 return fp64_defaultNaN(); 981 } else if (a_exp == 2047) { 982 return fp64_infinity(a_sgn); 983 } else if (b_exp == 2047) { 984 return fp64_infinity(b_sgn); 985 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 986 return fp64_zero(a_sgn); 987 } 988 989 a_mnt <<= 3; 990 b_mnt <<= 3; 991 if (a_exp >= b_exp) { 992 b_mnt = (lsr64(b_mnt, a_exp - b_exp) | 993 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); 994 b_exp = a_exp; 995 } else { 996 a_mnt = (lsr64(a_mnt, b_exp - a_exp) | 997 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); 998 a_exp = b_exp; 999 } 1000 x_sgn = a_sgn; 1001 x_exp = a_exp; 1002 if (a_sgn == b_sgn) { 1003 x_mnt = a_mnt + b_mnt; 1004 } else if (a_mnt >= b_mnt) { 1005 x_mnt = a_mnt - b_mnt; 1006 } else { 1007 x_sgn ^= 1; 1008 x_mnt = b_mnt - a_mnt; 1009 } 1010 1011 if (!x_mnt) { 1012 // Sign of exact zero result depends on rounding mode 1013 return fp64_zero((mode & 3) == 2); 1014 } 1015 1016 x_mnt = fp64_normalise(x_mnt, &x_exp); 1017 1018 return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags); 1019} 1020 1021static uint32_t 1022fp32_mul(uint32_t a, uint32_t b, int mode, int *flags) 1023{ 1024 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1025 uint32_t a_mnt, b_mnt, x; 1026 uint64_t x_mnt; 1027 1028 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1029 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1030 1031 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1032 return x; 1033 } 1034 1035 // Handle infinities and zeroes: 1036 if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) { 1037 *flags |= FPLIB_IOC; 1038 return fp32_defaultNaN(); 1039 } else if (a_exp == 255 || b_exp == 255) { 1040 return fp32_infinity(a_sgn ^ b_sgn); 1041 } else if (!a_mnt || !b_mnt) { 1042 return fp32_zero(a_sgn ^ b_sgn); 1043 } 1044 1045 // Multiply and normalise: 1046 x_sgn = a_sgn ^ b_sgn; 1047 x_exp = a_exp + b_exp - 110; 1048 x_mnt = (uint64_t)a_mnt * b_mnt; 1049 x_mnt = fp64_normalise(x_mnt, &x_exp); 1050 1051 // Convert to 32 bits, collapsing error into bottom bit: 1052 x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33); 1053 1054 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1055} 1056 1057static uint64_t 1058fp64_mul(uint64_t a, uint64_t b, int mode, int *flags) 1059{ 1060 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1061 uint64_t a_mnt, b_mnt, x; 1062 uint64_t x0_mnt, x1_mnt; 1063 1064 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1065 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1066 1067 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1068 return x; 1069 } 1070 1071 // Handle infinities and zeroes: 1072 if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) { 1073 *flags |= FPLIB_IOC; 1074 return fp64_defaultNaN(); 1075 } else if (a_exp == 2047 || b_exp == 2047) { 1076 return fp64_infinity(a_sgn ^ b_sgn); 1077 } else if (!a_mnt || !b_mnt) { 1078 return fp64_zero(a_sgn ^ b_sgn); 1079 } 1080 1081 // Multiply and normalise: 1082 x_sgn = a_sgn ^ b_sgn; 1083 x_exp = a_exp + b_exp - 1000; 1084 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); 1085 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1086 1087 // Convert to 64 bits, collapsing error into bottom bit: 1088 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1089 1090 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); 1091} 1092 1093static uint32_t 1094fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, 1095 int mode, int *flags) 1096{ 1097 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1098 uint32_t a_mnt, b_mnt, c_mnt, x; 1099 uint64_t x_mnt, y_mnt; 1100 1101 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1102 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1103 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1104 1105 x = fp32_process_NaNs3(a, b, c, mode, flags); 1106 1107 // Quiet NaN added to product of zero and infinity: 1108 if (a_exp == 255 && (a_mnt >> 22 & 1) && 1109 ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) || 1110 (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) { 1111 x = fp32_defaultNaN(); 1112 *flags |= FPLIB_IOC; 1113 } 1114 1115 if (x) { 1116 return x; 1117 } 1118 1119 // Handle infinities and zeroes: 1120 if ((b_exp == 255 && !c_mnt) || 1121 (c_exp == 255 && !b_mnt) || 1122 (a_exp == 255 && (b_exp == 255 || c_exp == 255) && 1123 (a_sgn != (b_sgn ^ c_sgn)))) { 1124 *flags |= FPLIB_IOC; 1125 return fp32_defaultNaN(); 1126 } 1127 if (a_exp == 255) 1128 return fp32_infinity(a_sgn); 1129 if (b_exp == 255 || c_exp == 255) 1130 return fp32_infinity(b_sgn ^ c_sgn); 1131 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1132 return fp32_zero(a_sgn); 1133 1134 x_sgn = a_sgn; 1135 x_exp = a_exp + 13; 1136 x_mnt = (uint64_t)a_mnt << 27; 1137 1138 // Multiply: 1139 y_sgn = b_sgn ^ c_sgn; 1140 y_exp = b_exp + c_exp - 113; 1141 y_mnt = (uint64_t)b_mnt * c_mnt << 3; 1142 if (!y_mnt) { 1143 y_exp = x_exp; 1144 } 1145 1146 // Add: 1147 if (x_exp >= y_exp) { 1148 y_mnt = (lsr64(y_mnt, x_exp - y_exp) | 1149 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); 1150 y_exp = x_exp; 1151 } else { 1152 x_mnt = (lsr64(x_mnt, y_exp - x_exp) | 1153 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); 1154 x_exp = y_exp; 1155 } 1156 if (x_sgn == y_sgn) { 1157 x_mnt = x_mnt + y_mnt; 1158 } else if (x_mnt >= y_mnt) { 1159 x_mnt = x_mnt - y_mnt; 1160 } else { 1161 x_sgn ^= 1; 1162 x_mnt = y_mnt - x_mnt; 1163 } 1164 1165 if (!x_mnt) { 1166 // Sign of exact zero result depends on rounding mode 1167 return fp32_zero((mode & 3) == 2); 1168 } 1169 1170 // Normalise and convert to 32 bits, collapsing error into bottom bit: 1171 x_mnt = fp64_normalise(x_mnt, &x_exp); 1172 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 1173 1174 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1175} 1176 1177static uint64_t 1178fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, 1179 int mode, int *flags) 1180{ 1181 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1182 uint64_t a_mnt, b_mnt, c_mnt, x; 1183 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; 1184 1185 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1186 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1187 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1188 1189 x = fp64_process_NaNs3(a, b, c, mode, flags); 1190 1191 // Quiet NaN added to product of zero and infinity: 1192 if (a_exp == 2047 && (a_mnt >> 51 & 1) && 1193 ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) || 1194 (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) { 1195 x = fp64_defaultNaN(); 1196 *flags |= FPLIB_IOC; 1197 } 1198 1199 if (x) { 1200 return x; 1201 } 1202 1203 // Handle infinities and zeroes: 1204 if ((b_exp == 2047 && !c_mnt) || 1205 (c_exp == 2047 && !b_mnt) || 1206 (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) && 1207 (a_sgn != (b_sgn ^ c_sgn)))) { 1208 *flags |= FPLIB_IOC; 1209 return fp64_defaultNaN(); 1210 } 1211 if (a_exp == 2047) 1212 return fp64_infinity(a_sgn); 1213 if (b_exp == 2047 || c_exp == 2047) 1214 return fp64_infinity(b_sgn ^ c_sgn); 1215 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1216 return fp64_zero(a_sgn); 1217 1218 x_sgn = a_sgn; 1219 x_exp = a_exp + 11; 1220 x0_mnt = 0; 1221 x1_mnt = a_mnt; 1222 1223 // Multiply: 1224 y_sgn = b_sgn ^ c_sgn; 1225 y_exp = b_exp + c_exp - 1003; 1226 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); 1227 if (!y0_mnt && !y1_mnt) { 1228 y_exp = x_exp; 1229 } 1230 1231 // Add: 1232 if (x_exp >= y_exp) { 1233 uint64_t t0, t1; 1234 lsl128(&t0, &t1, y0_mnt, y1_mnt, 1235 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); 1236 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); 1237 y0_mnt |= !!(t0 | t1); 1238 y_exp = x_exp; 1239 } else { 1240 uint64_t t0, t1; 1241 lsl128(&t0, &t1, x0_mnt, x1_mnt, 1242 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); 1243 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); 1244 x0_mnt |= !!(t0 | t1); 1245 x_exp = y_exp; 1246 } 1247 if (x_sgn == y_sgn) { 1248 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1249 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { 1250 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1251 } else { 1252 x_sgn ^= 1; 1253 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); 1254 } 1255 1256 if (!x0_mnt && !x1_mnt) { 1257 // Sign of exact zero result depends on rounding mode 1258 return fp64_zero((mode & 3) == 2); 1259 } 1260 1261 // Normalise and convert to 64 bits, collapsing error into bottom bit: 1262 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1263 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1264 1265 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); 1266} 1267 1268static uint32_t 1269fp32_div(uint32_t a, uint32_t b, int mode, int *flags) 1270{ 1271 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1272 uint32_t a_mnt, b_mnt, x; 1273 uint64_t x_mnt; 1274 1275 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1276 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1277 1278 if ((x = fp32_process_NaNs(a, b, mode, flags))) 1279 return x; 1280 1281 // Handle infinities and zeroes: 1282 if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) { 1283 *flags |= FPLIB_IOC; 1284 return fp32_defaultNaN(); 1285 } 1286 if (a_exp == 255 || !b_mnt) { 1287 if (a_exp != 255) 1288 *flags |= FPLIB_DZC; 1289 return fp32_infinity(a_sgn ^ b_sgn); 1290 } 1291 if (!a_mnt || b_exp == 255) 1292 return fp32_zero(a_sgn ^ b_sgn); 1293 1294 // Divide, setting bottom bit if inexact: 1295 a_mnt = fp32_normalise(a_mnt, &a_exp); 1296 x_sgn = a_sgn ^ b_sgn; 1297 x_exp = a_exp - b_exp + 172; 1298 x_mnt = ((uint64_t)a_mnt << 18) / b_mnt; 1299 x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18); 1300 1301 // Normalise and convert to 32 bits, collapsing error into bottom bit: 1302 x_mnt = fp64_normalise(x_mnt, &x_exp); 1303 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 1304 1305 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1306} 1307 1308static uint64_t 1309fp64_div(uint64_t a, uint64_t b, int mode, int *flags) 1310{ 1311 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; 1312 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; 1313 1314 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1315 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1316 1317 if ((x = fp64_process_NaNs(a, b, mode, flags))) 1318 return x; 1319 1320 // Handle infinities and zeroes: 1321 if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) { 1322 *flags |= FPLIB_IOC; 1323 return fp64_defaultNaN(); 1324 } 1325 if (a_exp == 2047 || !b_mnt) { 1326 if (a_exp != 2047) 1327 *flags |= FPLIB_DZC; 1328 return fp64_infinity(a_sgn ^ b_sgn); 1329 } 1330 if (!a_mnt || b_exp == 2047) 1331 return fp64_zero(a_sgn ^ b_sgn); 1332 1333 // Find reciprocal of divisor with Newton-Raphson: 1334 a_mnt = fp64_normalise(a_mnt, &a_exp); 1335 b_mnt = fp64_normalise(b_mnt, &b_exp); 1336 x_mnt = ~(uint64_t)0 / (b_mnt >> 31); 1337 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); 1338 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); 1339 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); 1340 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); 1341 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); 1342 1343 // Multiply by dividend: 1344 x_sgn = a_sgn ^ b_sgn; 1345 x_exp = a_exp - b_exp + 1031; 1346 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough 1347 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); 1348 x_mnt = x1_mnt; 1349 1350 // This is an underestimate, so try adding one: 1351 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough 1352 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); 1353 if (c <= 0) { 1354 ++x_mnt; 1355 } 1356 1357 x_mnt = fp64_normalise(x_mnt, &x_exp); 1358 1359 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 1360} 1361 1362static void 1363set_fpscr0(FPSCR &fpscr, int flags) 1364{ 1365 if (flags & FPLIB_IDC) { 1366 fpscr.idc = 1; 1367 } 1368 if (flags & FPLIB_IOC) { 1369 fpscr.ioc = 1; 1370 } 1371 if (flags & FPLIB_DZC) { 1372 fpscr.dzc = 1; 1373 } 1374 if (flags & FPLIB_OFC) { 1375 fpscr.ofc = 1; 1376 } 1377 if (flags & FPLIB_UFC) { 1378 fpscr.ufc = 1; 1379 } 1380 if (flags & FPLIB_IXC) { 1381 fpscr.ixc = 1; 1382 } 1383} 1384 1385static uint32_t 1386fp32_sqrt(uint32_t a, int mode, int *flags) 1387{ 1388 int a_sgn, a_exp, x_sgn, x_exp; 1389 uint32_t a_mnt, x, x_mnt; 1390 uint64_t t0, t1; 1391 1392 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1393 1394 // Handle NaNs: 1395 if (a_exp == 255 && (uint32_t)(a_mnt << 9)) 1396 return fp32_process_NaN(a, mode, flags); 1397 1398 // Handle infinities and zeroes: 1399 if (!a_mnt) { 1400 return fp32_zero(a_sgn); 1401 } 1402 if (a_exp == 255 && !a_sgn) { 1403 return fp32_infinity(a_sgn); 1404 } 1405 if (a_sgn) { 1406 *flags |= FPLIB_IOC; 1407 return fp32_defaultNaN(); 1408 } 1409 1410 a_mnt = fp32_normalise(a_mnt, &a_exp); 1411 if (!(a_exp & 1)) { 1412 ++a_exp; 1413 a_mnt >>= 1; 1414 } 1415 1416 // x = (a * 3 + 5) / 8 1417 x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28); 1418 1419 // x = (a / x + x) / 2; // 16-bit accuracy 1420 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 1421 1422 // x = (a / x + x) / 2; // 16-bit accuracy 1423 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 1424 1425 // x = (a / x + x) / 2; // 32-bit accuracy 1426 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); 1427 1428 x_sgn = 0; 1429 x_exp = (a_exp + 147) >> 1; 1430 x_mnt = ((x - (1 << 5)) >> 6) + 1; 1431 t1 = (uint64_t)x_mnt * x_mnt; 1432 t0 = (uint64_t)a_mnt << 19; 1433 if (t1 > t0) { 1434 --x_mnt; 1435 } 1436 1437 x_mnt = fp32_normalise(x_mnt, &x_exp); 1438 1439 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); 1440} 1441 1442static uint64_t 1443fp64_sqrt(uint64_t a, int mode, int *flags) 1444{ 1445 int a_sgn, a_exp, x_sgn, x_exp, c; 1446 uint64_t a_mnt, x_mnt, r, x0, x1; 1447 uint32_t x; 1448 1449 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1450 1451 // Handle NaNs: 1452 if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) { 1453 return fp64_process_NaN(a, mode, flags); 1454 } 1455 1456 // Handle infinities and zeroes: 1457 if (!a_mnt) 1458 return fp64_zero(a_sgn); 1459 if (a_exp == 2047 && !a_sgn) 1460 return fp64_infinity(a_sgn); 1461 if (a_sgn) { 1462 *flags |= FPLIB_IOC; 1463 return fp64_defaultNaN(); 1464 } 1465 1466 a_mnt = fp64_normalise(a_mnt, &a_exp); 1467 if (a_exp & 1) { 1468 ++a_exp; 1469 a_mnt >>= 1; 1470 } 1471 1472 // x = (a * 3 + 5) / 8 1473 x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28); 1474 1475 // x = (a / x + x) / 2; // 16-bit accuracy 1476 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 1477 1478 // x = (a / x + x) / 2; // 16-bit accuracy 1479 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 1480 1481 // x = (a / x + x) / 2; // 32-bit accuracy 1482 x = ((a_mnt / x) >> 2) + (x >> 1); 1483 1484 // r = 1 / x; // 32-bit accuracy 1485 r = ((uint64_t)1 << 62) / x; 1486 1487 // r = r * (2 - x * r); // 64-bit accuracy 1488 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); 1489 lsr128(&x0, &x1, x0, x1, 31); 1490 1491 // x = (x + a * r) / 2; // 64-bit accuracy 1492 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); 1493 lsl128(&x0, &x1, x0, x1, 5); 1494 lsr128(&x0, &x1, x0, x1, 56); 1495 1496 x0 = ((uint64_t)x << 31) + (x0 >> 1); 1497 1498 x_sgn = 0; 1499 x_exp = (a_exp + 1053) >> 1; 1500 x_mnt = x0; 1501 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; 1502 mul62x62(&x0, &x1, x_mnt, x_mnt); 1503 lsl128(&x0, &x1, x0, x1, 19); 1504 c = cmp128(x0, x1, 0, a_mnt); 1505 if (c > 0) 1506 --x_mnt; 1507 1508 x_mnt = fp64_normalise(x_mnt, &x_exp); 1509 1510 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 1511} 1512 1513static int 1514modeConv(FPSCR fpscr) 1515{ 1516 return (((int) fpscr) >> 22) & 0xF; 1517} 1518 1519static void 1520set_fpscr(FPSCR &fpscr, int flags) 1521{ 1522 // translate back to FPSCR 1523 bool underflow = false; 1524 if (flags & FPLIB_IDC) { 1525 fpscr.idc = 1; 1526 } 1527 if (flags & FPLIB_IOC) { 1528 fpscr.ioc = 1; 1529 } 1530 if (flags & FPLIB_DZC) { 1531 fpscr.dzc = 1; 1532 } 1533 if (flags & FPLIB_OFC) { 1534 fpscr.ofc = 1; 1535 } 1536 if (flags & FPLIB_UFC) { 1537 underflow = true; //xx Why is this required? 1538 fpscr.ufc = 1; 1539 } 1540 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { 1541 fpscr.ixc = 1; 1542 } 1543} 1544 1545template <> 1546bool 1547fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) 1548{ 1549 int flags = 0; 1550 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); 1551 set_fpscr(fpscr, flags); 1552 return x; 1553} 1554 1555template <> 1556bool 1557fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) 1558{ 1559 int flags = 0; 1560 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); 1561 set_fpscr(fpscr, flags); 1562 return x; 1563} 1564 1565template <> 1566bool 1567fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) 1568{ 1569 int flags = 0; 1570 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); 1571 set_fpscr(fpscr, flags); 1572 return x; 1573} 1574 1575template <> 1576bool 1577fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) 1578{ 1579 int flags = 0; 1580 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); 1581 set_fpscr(fpscr, flags); 1582 return x; 1583} 1584 1585template <> 1586bool 1587fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) 1588{ 1589 int flags = 0; 1590 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); 1591 set_fpscr(fpscr, flags); 1592 return x; 1593} 1594 1595template <> 1596bool 1597fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) 1598{ 1599 int flags = 0; 1600 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); 1601 set_fpscr(fpscr, flags); 1602 return x; 1603} 1604 1605template <> 1606uint32_t 1607fplibAbs(uint32_t op) 1608{ 1609 return op & ~((uint32_t)1 << 31); 1610} 1611 1612template <> 1613uint64_t 1614fplibAbs(uint64_t op) 1615{ 1616 return op & ~((uint64_t)1 << 63); 1617} 1618 1619template <> 1620uint32_t 1621fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) 1622{ 1623 int flags = 0; 1624 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); 1625 set_fpscr0(fpscr, flags); 1626 return result; 1627} 1628 1629template <> 1630uint64_t 1631fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) 1632{ 1633 int flags = 0; 1634 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); 1635 set_fpscr0(fpscr, flags); 1636 return result; 1637} 1638 1639template <> 1640int 1641fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) 1642{ 1643 int mode = modeConv(fpscr); 1644 int flags = 0; 1645 int sgn1, exp1, sgn2, exp2, result; 1646 uint32_t mnt1, mnt2; 1647 1648 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 1649 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 1650 1651 if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) || 1652 (exp2 == 255 && (uint32_t)(mnt2 << 9))) { 1653 result = 3; 1654 if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) || 1655 (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) || 1656 signal_nans) 1657 flags |= FPLIB_IOC; 1658 } else { 1659 if (op1 == op2 || (!mnt1 && !mnt2)) { 1660 result = 6; 1661 } else if (sgn1 != sgn2) { 1662 result = sgn1 ? 8 : 2; 1663 } else if (exp1 != exp2) { 1664 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 1665 } else { 1666 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 1667 } 1668 } 1669 1670 set_fpscr0(fpscr, flags); 1671 1672 return result; 1673} 1674 1675template <> 1676int 1677fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) 1678{ 1679 int mode = modeConv(fpscr); 1680 int flags = 0; 1681 int sgn1, exp1, sgn2, exp2, result; 1682 uint64_t mnt1, mnt2; 1683 1684 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 1685 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 1686 1687 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) || 1688 (exp2 == 2047 && (uint64_t)(mnt2 << 12))) { 1689 result = 3; 1690 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) || 1691 (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) || 1692 signal_nans) 1693 flags |= FPLIB_IOC; 1694 } else { 1695 if (op1 == op2 || (!mnt1 && !mnt2)) { 1696 result = 6; 1697 } else if (sgn1 != sgn2) { 1698 result = sgn1 ? 8 : 2; 1699 } else if (exp1 != exp2) { 1700 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 1701 } else { 1702 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 1703 } 1704 } 1705 1706 set_fpscr0(fpscr, flags); 1707 1708 return result; 1709} 1710 1711static uint16_t 1712fp16_FPConvertNaN_32(uint32_t op) 1713{ 1714 return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13); 1715} 1716 1717static uint16_t 1718fp16_FPConvertNaN_64(uint64_t op) 1719{ 1720 return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42); 1721} 1722 1723static uint32_t 1724fp32_FPConvertNaN_16(uint16_t op) 1725{ 1726 return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13); 1727} 1728 1729static uint32_t 1730fp32_FPConvertNaN_64(uint64_t op) 1731{ 1732 return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29); 1733} 1734 1735static uint64_t 1736fp64_FPConvertNaN_16(uint16_t op) 1737{ 1738 return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42); 1739} 1740 1741static uint64_t 1742fp64_FPConvertNaN_32(uint32_t op) 1743{ 1744 return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29); 1745} 1746 1747static uint32_t 1748fp32_FPOnePointFive(int sgn) 1749{ 1750 return fp32_pack(sgn, 127, (uint64_t)1 << 22); 1751} 1752 1753static uint64_t 1754fp64_FPOnePointFive(int sgn) 1755{ 1756 return fp64_pack(sgn, 1023, (uint64_t)1 << 51); 1757} 1758 1759static uint32_t 1760fp32_FPThree(int sgn) 1761{ 1762 return fp32_pack(sgn, 128, (uint64_t)1 << 22); 1763} 1764 1765static uint64_t 1766fp64_FPThree(int sgn) 1767{ 1768 return fp64_pack(sgn, 1024, (uint64_t)1 << 51); 1769} 1770 1771static uint32_t 1772fp32_FPTwo(int sgn) 1773{ 1774 return fp32_pack(sgn, 128, 0); 1775} 1776 1777static uint64_t 1778fp64_FPTwo(int sgn) 1779{ 1780 return fp64_pack(sgn, 1024, 0); 1781} 1782 1783template <> 1784uint16_t 1785fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 1786{ 1787 int mode = modeConv(fpscr); 1788 int flags = 0; 1789 int sgn, exp; 1790 uint32_t mnt; 1791 uint16_t result; 1792 1793 // Unpack floating-point operand optionally with flush-to-zero: 1794 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1795 1796 bool alt_hp = fpscr.ahp; 1797 1798 if (exp == 255 && (uint32_t)(mnt << 9)) { 1799 if (alt_hp) { 1800 result = fp16_zero(sgn); 1801 } else if (fpscr.dn) { 1802 result = fp16_defaultNaN(); 1803 } else { 1804 result = fp16_FPConvertNaN_32(op); 1805 } 1806 if (!(mnt >> 22 & 1) || alt_hp) { 1807 flags |= FPLIB_IOC; 1808 } 1809 } else if (exp == 255) { 1810 if (alt_hp) { 1811 result = sgn << 15 | (uint16_t)0x7fff; 1812 flags |= FPLIB_IOC; 1813 } else { 1814 result = fp16_infinity(sgn); 1815 } 1816 } else if (!mnt) { 1817 result = fp16_zero(sgn); 1818 } else { 1819 result = fp16_round_(sgn, exp - 127 + 15, 1820 mnt >> 7 | !!(uint32_t)(mnt << 25), 1821 rounding, mode | alt_hp << 4, &flags); 1822 } 1823 1824 set_fpscr0(fpscr, flags); 1825 1826 return result; 1827} 1828 1829template <> 1830uint16_t 1831fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 1832{ 1833 int mode = modeConv(fpscr); 1834 int flags = 0; 1835 int sgn, exp; 1836 uint64_t mnt; 1837 uint16_t result; 1838 1839 // Unpack floating-point operand optionally with flush-to-zero: 1840 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1841 1842 bool alt_hp = fpscr.ahp; 1843 1844 if (exp == 2047 && (uint64_t)(mnt << 12)) { 1845 if (alt_hp) { 1846 result = fp16_zero(sgn); 1847 } else if (fpscr.dn) { 1848 result = fp16_defaultNaN(); 1849 } else { 1850 result = fp16_FPConvertNaN_64(op); 1851 } 1852 if (!(mnt >> 51 & 1) || alt_hp) { 1853 flags |= FPLIB_IOC; 1854 } 1855 } else if (exp == 2047) { 1856 if (alt_hp) { 1857 result = sgn << 15 | (uint16_t)0x7fff; 1858 flags |= FPLIB_IOC; 1859 } else { 1860 result = fp16_infinity(sgn); 1861 } 1862 } else if (!mnt) { 1863 result = fp16_zero(sgn); 1864 } else { 1865 result = fp16_round_(sgn, exp - 1023 + 15, 1866 mnt >> 36 | !!(uint64_t)(mnt << 28), 1867 rounding, mode | alt_hp << 4, &flags); 1868 } 1869 1870 set_fpscr0(fpscr, flags); 1871 1872 return result; 1873} 1874 1875template <> 1876uint32_t 1877fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 1878{ 1879 int mode = modeConv(fpscr); 1880 int flags = 0; 1881 int sgn, exp; 1882 uint16_t mnt; 1883 uint32_t result; 1884 1885 // Unpack floating-point operand optionally with flush-to-zero: 1886 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1887 1888 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { 1889 if (fpscr.dn) { 1890 result = fp32_defaultNaN(); 1891 } else { 1892 result = fp32_FPConvertNaN_16(op); 1893 } 1894 if (!(mnt >> 9 & 1)) { 1895 flags |= FPLIB_IOC; 1896 } 1897 } else if (exp == 31 && !fpscr.ahp) { 1898 result = fp32_infinity(sgn); 1899 } else if (!mnt) { 1900 result = fp32_zero(sgn); 1901 } else { 1902 mnt = fp16_normalise(mnt, &exp); 1903 result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8); 1904 } 1905 1906 set_fpscr0(fpscr, flags); 1907 1908 return result; 1909} 1910 1911template <> 1912uint32_t 1913fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 1914{ 1915 int mode = modeConv(fpscr); 1916 int flags = 0; 1917 int sgn, exp; 1918 uint64_t mnt; 1919 uint32_t result; 1920 1921 // Unpack floating-point operand optionally with flush-to-zero: 1922 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1923 1924 if (exp == 2047 && (uint64_t)(mnt << 12)) { 1925 if (fpscr.dn) { 1926 result = fp32_defaultNaN(); 1927 } else { 1928 result = fp32_FPConvertNaN_64(op); 1929 } 1930 if (!(mnt >> 51 & 1)) { 1931 flags |= FPLIB_IOC; 1932 } 1933 } else if (exp == 2047) { 1934 result = fp32_infinity(sgn); 1935 } else if (!mnt) { 1936 result = fp32_zero(sgn); 1937 } else { 1938 result = fp32_round_(sgn, exp - 1023 + 127, 1939 mnt >> 20 | !!(uint64_t)(mnt << 44), 1940 rounding, mode, &flags); 1941 } 1942 1943 set_fpscr0(fpscr, flags); 1944 1945 return result; 1946} 1947 1948template <> 1949uint64_t 1950fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 1951{ 1952 int mode = modeConv(fpscr); 1953 int flags = 0; 1954 int sgn, exp; 1955 uint16_t mnt; 1956 uint64_t result; 1957 1958 // Unpack floating-point operand optionally with flush-to-zero: 1959 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1960 1961 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { 1962 if (fpscr.dn) { 1963 result = fp64_defaultNaN(); 1964 } else { 1965 result = fp64_FPConvertNaN_16(op); 1966 } 1967 if (!(mnt >> 9 & 1)) { 1968 flags |= FPLIB_IOC; 1969 } 1970 } else if (exp == 31 && !fpscr.ahp) { 1971 result = fp64_infinity(sgn); 1972 } else if (!mnt) { 1973 result = fp64_zero(sgn); 1974 } else { 1975 mnt = fp16_normalise(mnt, &exp); 1976 result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37); 1977 } 1978 1979 set_fpscr0(fpscr, flags); 1980 1981 return result; 1982} 1983 1984template <> 1985uint64_t 1986fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 1987{ 1988 int mode = modeConv(fpscr); 1989 int flags = 0; 1990 int sgn, exp; 1991 uint32_t mnt; 1992 uint64_t result; 1993 1994 // Unpack floating-point operand optionally with flush-to-zero: 1995 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1996 1997 if (exp == 255 && (uint32_t)(mnt << 9)) { 1998 if (fpscr.dn) { 1999 result = fp64_defaultNaN(); 2000 } else { 2001 result = fp64_FPConvertNaN_32(op); 2002 } 2003 if (!(mnt >> 22 & 1)) { 2004 flags |= FPLIB_IOC; 2005 } 2006 } else if (exp == 255) { 2007 result = fp64_infinity(sgn); 2008 } else if (!mnt) { 2009 result = fp64_zero(sgn); 2010 } else { 2011 mnt = fp32_normalise(mnt, &exp); 2012 result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21); 2013 } 2014 2015 set_fpscr0(fpscr, flags); 2016 2017 return result; 2018} 2019 2020template <> 2021uint32_t 2022fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) 2023{ 2024 int flags = 0; 2025 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2026 set_fpscr0(fpscr, flags); 2027 return result; 2028} 2029 2030template <> 2031uint64_t 2032fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) 2033{ 2034 int flags = 0; 2035 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2036 set_fpscr0(fpscr, flags); 2037 return result; 2038} 2039 2040template <> 2041uint32_t 2042fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2043{ 2044 int flags = 0; 2045 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); 2046 set_fpscr0(fpscr, flags); 2047 return result; 2048} 2049 2050template <> 2051uint64_t 2052fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2053{ 2054 int flags = 0; 2055 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); 2056 set_fpscr0(fpscr, flags); 2057 return result; 2058} 2059 2060static uint32_t 2061fp32_repack(int sgn, int exp, uint32_t mnt) 2062{ 2063 return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt); 2064} 2065 2066static uint64_t 2067fp64_repack(int sgn, int exp, uint64_t mnt) 2068{ 2069 return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt); 2070} 2071 2072static void 2073fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn) 2074{ 2075 // Treat a single quiet-NaN as +Infinity/-Infinity 2076 if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23) 2077 *op1 = fp32_infinity(sgn); 2078 if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23) 2079 *op2 = fp32_infinity(sgn); 2080} 2081 2082static void 2083fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn) 2084{ 2085 // Treat a single quiet-NaN as +Infinity/-Infinity 2086 if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52) 2087 *op1 = fp64_infinity(sgn); 2088 if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52) 2089 *op2 = fp64_infinity(sgn); 2090} 2091 2092template <> 2093uint32_t 2094fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2095{ 2096 int mode = modeConv(fpscr); 2097 int flags = 0; 2098 int sgn1, exp1, sgn2, exp2; 2099 uint32_t mnt1, mnt2, x, result; 2100 2101 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2102 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2103 2104 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 2105 result = x; 2106 } else { 2107 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 2108 fp32_repack(sgn1, exp1, mnt1) : 2109 fp32_repack(sgn2, exp2, mnt2)); 2110 } 2111 set_fpscr0(fpscr, flags); 2112 return result; 2113} 2114 2115template <> 2116uint64_t 2117fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2118{ 2119 int mode = modeConv(fpscr); 2120 int flags = 0; 2121 int sgn1, exp1, sgn2, exp2; 2122 uint64_t mnt1, mnt2, x, result; 2123 2124 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2125 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2126 2127 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 2128 result = x; 2129 } else { 2130 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 2131 fp64_repack(sgn1, exp1, mnt1) : 2132 fp64_repack(sgn2, exp2, mnt2)); 2133 } 2134 set_fpscr0(fpscr, flags); 2135 return result; 2136} 2137 2138template <> 2139uint32_t 2140fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2141{ 2142 fp32_minmaxnum(&op1, &op2, 1); 2143 return fplibMax<uint32_t>(op1, op2, fpscr); 2144} 2145 2146template <> 2147uint64_t 2148fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2149{ 2150 fp64_minmaxnum(&op1, &op2, 1); 2151 return fplibMax<uint64_t>(op1, op2, fpscr); 2152} 2153 2154template <> 2155uint32_t 2156fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2157{ 2158 int mode = modeConv(fpscr); 2159 int flags = 0; 2160 int sgn1, exp1, sgn2, exp2; 2161 uint32_t mnt1, mnt2, x, result; 2162 2163 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2164 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2165 2166 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 2167 result = x; 2168 } else { 2169 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 2170 fp32_repack(sgn1, exp1, mnt1) : 2171 fp32_repack(sgn2, exp2, mnt2)); 2172 } 2173 set_fpscr0(fpscr, flags); 2174 return result; 2175} 2176 2177template <> 2178uint64_t 2179fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2180{ 2181 int mode = modeConv(fpscr); 2182 int flags = 0; 2183 int sgn1, exp1, sgn2, exp2; 2184 uint64_t mnt1, mnt2, x, result; 2185 2186 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2187 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2188 2189 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 2190 result = x; 2191 } else { 2192 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 2193 fp64_repack(sgn1, exp1, mnt1) : 2194 fp64_repack(sgn2, exp2, mnt2)); 2195 } 2196 set_fpscr0(fpscr, flags); 2197 return result; 2198} 2199 2200template <> 2201uint32_t 2202fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2203{ 2204 fp32_minmaxnum(&op1, &op2, 0); 2205 return fplibMin<uint32_t>(op1, op2, fpscr); 2206} 2207 2208template <> 2209uint64_t 2210fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2211{ 2212 fp64_minmaxnum(&op1, &op2, 0); 2213 return fplibMin<uint64_t>(op1, op2, fpscr); 2214} 2215 2216template <> 2217uint32_t 2218fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2219{ 2220 int flags = 0; 2221 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); 2222 set_fpscr0(fpscr, flags); 2223 return result; 2224} 2225 2226template <> 2227uint64_t 2228fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2229{ 2230 int flags = 0; 2231 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); 2232 set_fpscr0(fpscr, flags); 2233 return result; 2234} 2235 2236template <> 2237uint32_t 2238fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2239{ 2240 int mode = modeConv(fpscr); 2241 int flags = 0; 2242 int sgn1, exp1, sgn2, exp2; 2243 uint32_t mnt1, mnt2, result; 2244 2245 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2246 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2247 2248 result = fp32_process_NaNs(op1, op2, mode, &flags); 2249 if (!result) { 2250 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2251 result = fp32_FPTwo(sgn1 ^ sgn2); 2252 } else if (exp1 == 255 || exp2 == 255) { 2253 result = fp32_infinity(sgn1 ^ sgn2); 2254 } else if (!mnt1 || !mnt2) { 2255 result = fp32_zero(sgn1 ^ sgn2); 2256 } else { 2257 result = fp32_mul(op1, op2, mode, &flags); 2258 } 2259 } 2260 2261 set_fpscr0(fpscr, flags); 2262 2263 return result; 2264} 2265 2266template <> 2267uint64_t 2268fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2269{ 2270 int mode = modeConv(fpscr); 2271 int flags = 0; 2272 int sgn1, exp1, sgn2, exp2; 2273 uint64_t mnt1, mnt2, result; 2274 2275 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2276 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2277 2278 result = fp64_process_NaNs(op1, op2, mode, &flags); 2279 if (!result) { 2280 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2281 result = fp64_FPTwo(sgn1 ^ sgn2); 2282 } else if (exp1 == 2047 || exp2 == 2047) { 2283 result = fp64_infinity(sgn1 ^ sgn2); 2284 } else if (!mnt1 || !mnt2) { 2285 result = fp64_zero(sgn1 ^ sgn2); 2286 } else { 2287 result = fp64_mul(op1, op2, mode, &flags); 2288 } 2289 } 2290 2291 set_fpscr0(fpscr, flags); 2292 2293 return result; 2294} 2295 2296template <> 2297uint32_t 2298fplibNeg(uint32_t op) 2299{ 2300 return op ^ (uint32_t)1 << 31; 2301} 2302 2303template <> 2304uint64_t 2305fplibNeg(uint64_t op) 2306{ 2307 return op ^ (uint64_t)1 << 63; 2308} 2309 2310static const uint8_t recip_sqrt_estimate[256] = { 2311 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, 2312 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, 2313 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, 2314 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, 2315 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 2316 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 2317 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, 2318 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, 2319 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, 2320 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, 2321 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, 2322 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, 2323 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, 2324 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, 2325 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, 2326 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 2327}; 2328 2329template <> 2330uint32_t 2331fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) 2332{ 2333 int mode = modeConv(fpscr); 2334 int flags = 0; 2335 int sgn, exp; 2336 uint32_t mnt, result; 2337 2338 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2339 2340 if (exp == 255 && (uint32_t)(mnt << 9)) { 2341 result = fp32_process_NaN(op, mode, &flags); 2342 } else if (!mnt) { 2343 result = fp32_infinity(sgn); 2344 flags |= FPLIB_DZC; 2345 } else if (sgn) { 2346 result = fp32_defaultNaN(); 2347 flags |= FPLIB_IOC; 2348 } else if (exp == 255) { 2349 result = fp32_zero(0); 2350 } else { 2351 exp += 8; 2352 mnt = fp32_normalise(mnt, &exp); 2353 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)]; 2354 result = fp32_pack(0, (380 - exp) >> 1, mnt << 15); 2355 } 2356 2357 set_fpscr0(fpscr, flags); 2358 2359 return result; 2360} 2361 2362template <> 2363uint64_t 2364fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) 2365{ 2366 int mode = modeConv(fpscr); 2367 int flags = 0; 2368 int sgn, exp; 2369 uint64_t mnt, result; 2370 2371 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2372 2373 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2374 result = fp64_process_NaN(op, mode, &flags); 2375 } else if (!mnt) { 2376 result = fp64_infinity(sgn); 2377 flags |= FPLIB_DZC; 2378 } else if (sgn) { 2379 result = fp64_defaultNaN(); 2380 flags |= FPLIB_IOC; 2381 } else if (exp == 2047) { 2382 result = fp32_zero(0); 2383 } else { 2384 exp += 11; 2385 mnt = fp64_normalise(mnt, &exp); 2386 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)]; 2387 result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44); 2388 } 2389 2390 set_fpscr0(fpscr, flags); 2391 2392 return result; 2393} 2394 2395template <> 2396uint32_t 2397fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2398{ 2399 int mode = modeConv(fpscr); 2400 int flags = 0; 2401 int sgn1, exp1, sgn2, exp2; 2402 uint32_t mnt1, mnt2, result; 2403 2404 op1 = fplibNeg<uint32_t>(op1); 2405 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2406 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2407 2408 result = fp32_process_NaNs(op1, op2, mode, &flags); 2409 if (!result) { 2410 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2411 result = fp32_FPOnePointFive(0); 2412 } else if (exp1 == 255 || exp2 == 255) { 2413 result = fp32_infinity(sgn1 ^ sgn2); 2414 } else { 2415 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); 2416 } 2417 } 2418 2419 set_fpscr0(fpscr, flags); 2420 2421 return result; 2422} 2423 2424template <> 2425uint64_t 2426fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2427{ 2428 int mode = modeConv(fpscr); 2429 int flags = 0; 2430 int sgn1, exp1, sgn2, exp2; 2431 uint64_t mnt1, mnt2, result; 2432 2433 op1 = fplibNeg<uint64_t>(op1); 2434 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2435 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2436 2437 result = fp64_process_NaNs(op1, op2, mode, &flags); 2438 if (!result) { 2439 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2440 result = fp64_FPOnePointFive(0); 2441 } else if (exp1 == 2047 || exp2 == 2047) { 2442 result = fp64_infinity(sgn1 ^ sgn2); 2443 } else { 2444 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); 2445 } 2446 } 2447 2448 set_fpscr0(fpscr, flags); 2449 2450 return result; 2451} 2452 2453template <> 2454uint32_t 2455fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2456{ 2457 int mode = modeConv(fpscr); 2458 int flags = 0; 2459 int sgn1, exp1, sgn2, exp2; 2460 uint32_t mnt1, mnt2, result; 2461 2462 op1 = fplibNeg<uint32_t>(op1); 2463 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2464 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2465 2466 result = fp32_process_NaNs(op1, op2, mode, &flags); 2467 if (!result) { 2468 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2469 result = fp32_FPTwo(0); 2470 } else if (exp1 == 255 || exp2 == 255) { 2471 result = fp32_infinity(sgn1 ^ sgn2); 2472 } else { 2473 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags); 2474 } 2475 } 2476 2477 set_fpscr0(fpscr, flags); 2478 2479 return result; 2480} 2481 2482template <> 2483uint32_t 2484fplibRecipEstimate(uint32_t op, FPSCR &fpscr) 2485{ 2486 int mode = modeConv(fpscr); 2487 int flags = 0; 2488 int sgn, exp; 2489 uint32_t mnt, result; 2490 2491 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2492 2493 if (exp == 255 && (uint32_t)(mnt << 9)) { 2494 result = fp32_process_NaN(op, mode, &flags); 2495 } else if (exp == 255) { 2496 result = fp32_zero(sgn); 2497 } else if (!mnt) { 2498 result = fp32_infinity(sgn); 2499 flags |= FPLIB_DZC; 2500 } else if (!((uint32_t)(op << 1) >> 22)) { 2501 bool overflow_to_inf = false; 2502 switch (FPCRRounding(fpscr)) { 2503 case FPRounding_TIEEVEN: 2504 overflow_to_inf = true; 2505 break; 2506 case FPRounding_POSINF: 2507 overflow_to_inf = !sgn; 2508 break; 2509 case FPRounding_NEGINF: 2510 overflow_to_inf = sgn; 2511 break; 2512 case FPRounding_ZERO: 2513 overflow_to_inf = false; 2514 break; 2515 default: 2516 assert(0); 2517 } 2518 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn); 2519 flags |= FPLIB_OFC | FPLIB_IXC; 2520 } else if (fpscr.fz && exp >= 253) { 2521 result = fp32_zero(sgn); 2522 flags |= FPLIB_UFC; 2523 } else { 2524 exp += 8; 2525 mnt = fp32_normalise(mnt, &exp); 2526 int result_exp = 253 - exp; 2527 uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1; 2528 fraction <<= 15; 2529 if (result_exp == 0) { 2530 fraction >>= 1; 2531 } else if (result_exp == -1) { 2532 fraction >>= 2; 2533 result_exp = 0; 2534 } 2535 result = fp32_pack(sgn, result_exp, fraction); 2536 } 2537 2538 set_fpscr0(fpscr, flags); 2539 2540 return result; 2541} 2542 2543template <> 2544uint64_t 2545fplibRecipEstimate(uint64_t op, FPSCR &fpscr) 2546{ 2547 int mode = modeConv(fpscr); 2548 int flags = 0; 2549 int sgn, exp; 2550 uint64_t mnt, result; 2551 2552 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2553 2554 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2555 result = fp64_process_NaN(op, mode, &flags); 2556 } else if (exp == 2047) { 2557 result = fp64_zero(sgn); 2558 } else if (!mnt) { 2559 result = fp64_infinity(sgn); 2560 flags |= FPLIB_DZC; 2561 } else if (!((uint64_t)(op << 1) >> 51)) { 2562 bool overflow_to_inf = false; 2563 switch (FPCRRounding(fpscr)) { 2564 case FPRounding_TIEEVEN: 2565 overflow_to_inf = true; 2566 break; 2567 case FPRounding_POSINF: 2568 overflow_to_inf = !sgn; 2569 break; 2570 case FPRounding_NEGINF: 2571 overflow_to_inf = sgn; 2572 break; 2573 case FPRounding_ZERO: 2574 overflow_to_inf = false; 2575 break; 2576 default: 2577 assert(0); 2578 } 2579 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn); 2580 flags |= FPLIB_OFC | FPLIB_IXC; 2581 } else if (fpscr.fz && exp >= 2045) { 2582 result = fp64_zero(sgn); 2583 flags |= FPLIB_UFC; 2584 } else { 2585 exp += 11; 2586 mnt = fp64_normalise(mnt, &exp); 2587 int result_exp = 2045 - exp; 2588 uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1; 2589 fraction <<= 44; 2590 if (result_exp == 0) { 2591 fraction >>= 1; 2592 } else if (result_exp == -1) { 2593 fraction >>= 2; 2594 result_exp = 0; 2595 } 2596 result = fp64_pack(sgn, result_exp, fraction); 2597 } 2598 2599 set_fpscr0(fpscr, flags); 2600 2601 return result; 2602} 2603 2604template <> 2605uint64_t 2606fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2607{ 2608 int mode = modeConv(fpscr); 2609 int flags = 0; 2610 int sgn1, exp1, sgn2, exp2; 2611 uint64_t mnt1, mnt2, result; 2612 2613 op1 = fplibNeg<uint64_t>(op1); 2614 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2615 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2616 2617 result = fp64_process_NaNs(op1, op2, mode, &flags); 2618 if (!result) { 2619 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2620 result = fp64_FPTwo(0); 2621 } else if (exp1 == 2047 || exp2 == 2047) { 2622 result = fp64_infinity(sgn1 ^ sgn2); 2623 } else { 2624 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags); 2625 } 2626 } 2627 2628 set_fpscr0(fpscr, flags); 2629 2630 return result; 2631} 2632 2633template <> 2634uint32_t 2635fplibRecpX(uint32_t op, FPSCR &fpscr) 2636{ 2637 int mode = modeConv(fpscr); 2638 int flags = 0; 2639 int sgn, exp; 2640 uint32_t mnt, result; 2641 2642 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2643 2644 if (exp == 255 && (uint32_t)(mnt << 9)) { 2645 result = fp32_process_NaN(op, mode, &flags); 2646 } 2647 else { 2648 if (!mnt) { // Zero and denormals 2649 result = fp32_pack(sgn, 254, 0); 2650 } else { // Infinities and normals 2651 result = fp32_pack(sgn, exp ^ 255, 0); 2652 } 2653 } 2654 2655 set_fpscr0(fpscr, flags); 2656 2657 return result; 2658} 2659 2660template <> 2661uint64_t 2662fplibRecpX(uint64_t op, FPSCR &fpscr) 2663{ 2664 int mode = modeConv(fpscr); 2665 int flags = 0; 2666 int sgn, exp; 2667 uint64_t mnt, result; 2668 2669 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2670 2671 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2672 result = fp64_process_NaN(op, mode, &flags); 2673 } 2674 else { 2675 if (!mnt) { // Zero and denormals 2676 result = fp64_pack(sgn, 2046, 0); 2677 } else { // Infinities and normals 2678 result = fp64_pack(sgn, exp ^ 2047, 0); 2679 } 2680 } 2681 2682 set_fpscr0(fpscr, flags); 2683 2684 return result; 2685} 2686 2687template <> 2688uint32_t 2689fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 2690{ 2691 int mode = modeConv(fpscr); 2692 int flags = 0; 2693 int sgn, exp; 2694 uint32_t mnt, result; 2695 2696 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2697 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2698 2699 // Handle NaNs, infinities and zeroes: 2700 if (exp == 255 && (uint32_t)(mnt << 9)) { 2701 result = fp32_process_NaN(op, mode, &flags); 2702 } else if (exp == 255) { 2703 result = fp32_infinity(sgn); 2704 } else if (!mnt) { 2705 result = fp32_zero(sgn); 2706 } else if (exp >= 150) { 2707 // There are no fractional bits 2708 result = op; 2709 } else { 2710 // Truncate towards zero: 2711 uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp); 2712 int err = exp < 118 ? 1 : 2713 (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0); 2714 switch (rounding) { 2715 case FPRounding_TIEEVEN: 2716 x += (err == 3 || (err == 2 && (x & 1))); 2717 break; 2718 case FPRounding_POSINF: 2719 x += err && !sgn; 2720 break; 2721 case FPRounding_NEGINF: 2722 x += err && sgn; 2723 break; 2724 case FPRounding_ZERO: 2725 break; 2726 case FPRounding_TIEAWAY: 2727 x += err >> 1; 2728 break; 2729 default: 2730 assert(0); 2731 } 2732 2733 if (x == 0) { 2734 result = fp32_zero(sgn); 2735 } else { 2736 exp = 150; 2737 mnt = fp32_normalise(x, &exp); 2738 result = fp32_pack(sgn, exp + 8, mnt >> 8); 2739 } 2740 2741 if (err && exact) 2742 flags |= FPLIB_IXC; 2743 } 2744 2745 set_fpscr0(fpscr, flags); 2746 2747 return result; 2748} 2749 2750template <> 2751uint64_t 2752fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 2753{ 2754 int mode = modeConv(fpscr); 2755 int flags = 0; 2756 int sgn, exp; 2757 uint64_t mnt, result; 2758 2759 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2760 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2761 2762 // Handle NaNs, infinities and zeroes: 2763 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2764 result = fp64_process_NaN(op, mode, &flags); 2765 } else if (exp == 2047) { 2766 result = fp64_infinity(sgn); 2767 } else if (!mnt) { 2768 result = fp64_zero(sgn); 2769 } else if (exp >= 1075) { 2770 // There are no fractional bits 2771 result = op; 2772 } else { 2773 // Truncate towards zero: 2774 uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp); 2775 int err = exp < 1011 ? 1 : 2776 (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0); 2777 switch (rounding) { 2778 case FPRounding_TIEEVEN: 2779 x += (err == 3 || (err == 2 && (x & 1))); 2780 break; 2781 case FPRounding_POSINF: 2782 x += err && !sgn; 2783 break; 2784 case FPRounding_NEGINF: 2785 x += err && sgn; 2786 break; 2787 case FPRounding_ZERO: 2788 break; 2789 case FPRounding_TIEAWAY: 2790 x += err >> 1; 2791 break; 2792 default: 2793 assert(0); 2794 } 2795 2796 if (x == 0) { 2797 result = fp64_zero(sgn); 2798 } else { 2799 exp = 1075; 2800 mnt = fp64_normalise(x, &exp); 2801 result = fp64_pack(sgn, exp + 11, mnt >> 11); 2802 } 2803 2804 if (err && exact) 2805 flags |= FPLIB_IXC; 2806 } 2807 2808 set_fpscr0(fpscr, flags); 2809 2810 return result; 2811} 2812 2813template <> 2814uint32_t 2815fplibSqrt(uint32_t op, FPSCR &fpscr) 2816{ 2817 int flags = 0; 2818 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags); 2819 set_fpscr0(fpscr, flags); 2820 return result; 2821} 2822 2823template <> 2824uint64_t 2825fplibSqrt(uint64_t op, FPSCR &fpscr) 2826{ 2827 int flags = 0; 2828 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags); 2829 set_fpscr0(fpscr, flags); 2830 return result; 2831} 2832 2833template <> 2834uint32_t 2835fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2836{ 2837 int flags = 0; 2838 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags); 2839 set_fpscr0(fpscr, flags); 2840 return result; 2841} 2842 2843template <> 2844uint64_t 2845fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2846{ 2847 int flags = 0; 2848 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags); 2849 set_fpscr0(fpscr, flags); 2850 return result; 2851} 2852 2853static uint64_t 2854FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 2855 int *flags) 2856{ 2857 uint64_t x; 2858 int err; 2859 2860 if (exp > 1023 + 63) { 2861 *flags = FPLIB_IOC; 2862 return ((uint64_t)!u << 63) - !sgn; 2863 } 2864 2865 x = lsr64(mnt << 11, 1023 + 63 - exp); 2866 err = (exp > 1023 + 63 - 2 ? 0 : 2867 (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) | 2868 !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1))); 2869 2870 switch (rounding) { 2871 case FPRounding_TIEEVEN: 2872 x += (err == 3 || (err == 2 && (x & 1))); 2873 break; 2874 case FPRounding_POSINF: 2875 x += err && !sgn; 2876 break; 2877 case FPRounding_NEGINF: 2878 x += err && sgn; 2879 break; 2880 case FPRounding_ZERO: 2881 break; 2882 case FPRounding_TIEAWAY: 2883 x += err >> 1; 2884 break; 2885 default: 2886 assert(0); 2887 } 2888 2889 if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) { 2890 *flags = FPLIB_IOC; 2891 return ((uint64_t)!u << 63) - !sgn; 2892 } 2893 2894 if (err) { 2895 *flags = FPLIB_IXC; 2896 } 2897 2898 return sgn ? -x : x; 2899} 2900 2901static uint32_t 2902FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 2903 int *flags) 2904{ 2905 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); 2906 if (u ? x >= (uint64_t)1 << 32 : 2907 !(x < (uint64_t)1 << 31 || 2908 (uint64_t)-x <= (uint64_t)1 << 31)) { 2909 *flags = FPLIB_IOC; 2910 x = ((uint32_t)!u << 31) - !sgn; 2911 } 2912 return x; 2913} 2914 2915template <> 2916uint32_t 2917fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2918{ 2919 int flags = 0; 2920 int sgn, exp; 2921 uint32_t mnt, result; 2922 2923 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2924 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2925 2926 // If NaN, set cumulative flag or take exception: 2927 if (exp == 255 && (uint32_t)(mnt << 9)) { 2928 flags = FPLIB_IOC; 2929 result = 0; 2930 } else { 2931 result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits, 2932 (uint64_t)mnt << (52 - 23), u, rounding, &flags); 2933 } 2934 2935 set_fpscr0(fpscr, flags); 2936 2937 return result; 2938} 2939 2940template <> 2941uint32_t 2942fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2943{ 2944 int flags = 0; 2945 int sgn, exp; 2946 uint64_t mnt; 2947 uint32_t result; 2948 2949 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2950 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2951 2952 // If NaN, set cumulative flag or take exception: 2953 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2954 flags = FPLIB_IOC; 2955 result = 0; 2956 } else { 2957 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags); 2958 } 2959 2960 set_fpscr0(fpscr, flags); 2961 2962 return result; 2963} 2964 2965template <> 2966uint64_t 2967fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2968{ 2969 int flags = 0; 2970 int sgn, exp; 2971 uint32_t mnt; 2972 uint64_t result; 2973 2974 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2975 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2976 2977 // If NaN, set cumulative flag or take exception: 2978 if (exp == 255 && (uint32_t)(mnt << 9)) { 2979 flags = FPLIB_IOC; 2980 result = 0; 2981 } else { 2982 result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits, 2983 (uint64_t)mnt << (52 - 23), u, rounding, &flags); 2984 } 2985 2986 set_fpscr0(fpscr, flags); 2987 2988 return result; 2989} 2990 2991template <> 2992uint64_t 2993fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2994{ 2995 int flags = 0; 2996 int sgn, exp; 2997 uint64_t mnt, result; 2998 2999 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 3000 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 3001 3002 // If NaN, set cumulative flag or take exception: 3003 if (exp == 2047 && (uint64_t)(mnt << 12)) { 3004 flags = FPLIB_IOC; 3005 result = 0; 3006 } else { 3007 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags); 3008 } 3009 3010 set_fpscr0(fpscr, flags); 3011 3012 return result; 3013} 3014 3015static uint32_t 3016fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) 3017{ 3018 int x_sgn = !u && a >> 63; 3019 int x_exp = 190 - fbits; 3020 uint64_t x_mnt = x_sgn ? -a : a; 3021 3022 // Handle zero: 3023 if (!x_mnt) { 3024 return fp32_zero(0); 3025 } 3026 3027 // Normalise and convert to 32 bits, collapsing error into bottom bit: 3028 x_mnt = fp64_normalise(x_mnt, &x_exp); 3029 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 3030 3031 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 3032} 3033 3034static uint64_t 3035fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) 3036{ 3037 int x_sgn = !u && a >> 63; 3038 int x_exp = 1024 + 62 - fbits; 3039 uint64_t x_mnt = x_sgn ? -a : a; 3040 3041 // Handle zero: 3042 if (!x_mnt) { 3043 return fp64_zero(0); 3044 } 3045 3046 x_mnt = fp64_normalise(x_mnt, &x_exp); 3047 3048 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags); 3049} 3050 3051template <> 3052uint32_t 3053fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 3054{ 3055 int flags = 0; 3056 uint32_t res = fp32_cvtf(op, fbits, u, 3057 (int)rounding | ((uint32_t)fpscr >> 22 & 12), 3058 &flags); 3059 set_fpscr0(fpscr, flags); 3060 return res; 3061} 3062 3063template <> 3064uint64_t 3065fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 3066{ 3067 int flags = 0; 3068 uint64_t res = fp64_cvtf(op, fbits, u, 3069 (int)rounding | ((uint32_t)fpscr >> 22 & 12), 3070 &flags); 3071 set_fpscr0(fpscr, flags); 3072 return res; 3073} 3074 3075} 3076