fplib.cc revision 10104:ff709c429b7b
1/* 2* Copyright (c) 2012-2013 ARM Limited 3* All rights reserved 4* 5* The license below extends only to copyright in the software and shall 6* not be construed as granting a license to any other intellectual 7* property including but not limited to intellectual property relating 8* to a hardware implementation of the functionality of the software 9* licensed hereunder. You may use the software subject to the license 10* terms below provided that you ensure that this notice is replicated 11* unmodified and in its entirety in all distributions of the software, 12* modified or unmodified, in source code or in binary form. 13* 14* Redistribution and use in source and binary forms, with or without 15* modification, are permitted provided that the following conditions are 16* met: redistributions of source code must retain the above copyright 17* notice, this list of conditions and the following disclaimer; 18* redistributions in binary form must reproduce the above copyright 19* notice, this list of conditions and the following disclaimer in the 20* documentation and/or other materials provided with the distribution; 21* neither the name of the copyright holders nor the names of its 22* contributors may be used to endorse or promote products derived from 23* this software without specific prior written permission. 24* 25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36* 37* Authors: Edmund Grimley Evans 38* Thomas Grocutt 39*/ 40 41#include <stdint.h> 42 43#include <cassert> 44 45#include "fplib.hh" 46 47namespace ArmISA 48{ 49 50#define FPLIB_RN 0 51#define FPLIB_RP 1 52#define FPLIB_RM 2 53#define FPLIB_RZ 3 54#define FPLIB_FZ 4 55#define FPLIB_DN 8 56#define FPLIB_AHP 16 57 58#define FPLIB_IDC 128 // Input Denormal 59#define FPLIB_IXC 16 // Inexact 60#define FPLIB_UFC 8 // Underflow 61#define FPLIB_OFC 4 // Overflow 62#define FPLIB_DZC 2 // Division by Zero 63#define FPLIB_IOC 1 // Invalid Operation 64 65static inline uint16_t 66lsl16(uint16_t x, uint32_t shift) 67{ 68 return shift < 16 ? x << shift : 0; 69} 70 71static inline uint16_t 72lsr16(uint16_t x, uint32_t shift) 73{ 74 return shift < 16 ? x >> shift : 0; 75} 76 77static inline uint32_t 78lsl32(uint32_t x, uint32_t shift) 79{ 80 return shift < 32 ? x << shift : 0; 81} 82 83static inline uint32_t 84lsr32(uint32_t x, uint32_t shift) 85{ 86 return shift < 32 ? x >> shift : 0; 87} 88 89static inline uint64_t 90lsl64(uint64_t x, uint32_t shift) 91{ 92 return shift < 64 ? x << shift : 0; 93} 94 95static inline uint64_t 96lsr64(uint64_t x, uint32_t shift) 97{ 98 return shift < 64 ? x >> shift : 0; 99} 100 101static inline void 102lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 103{ 104 if (shift < 64) { 105 *r1 = x1 << shift | x0 >> (64 - shift); 106 *r0 = x0 << shift; 107 } else if (shift < 128) { 108 *r1 = x0 << (shift - 64); 109 *r0 = 0; 110 } else { 111 *r1 = 0; 112 *r0 = 0; 113 } 114} 115 116static inline void 117lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 118{ 119 if (shift < 64) { 120 *r0 = x0 >> shift | x1 << (64 - shift); 121 *r1 = x1 >> shift; 122 } else if (shift < 128) { 123 *r0 = x1 >> (shift - 64); 124 *r1 = 0; 125 } else { 126 *r0 = 0; 127 *r1 = 0; 128 } 129} 130 131static inline void 132mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) 133{ 134 uint32_t mask = ((uint32_t)1 << 31) - 1; 135 uint64_t a0 = a & mask; 136 uint64_t a1 = a >> 31 & mask; 137 uint64_t b0 = b & mask; 138 uint64_t b1 = b >> 31 & mask; 139 uint64_t p0 = a0 * b0; 140 uint64_t p2 = a1 * b1; 141 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; 142 uint64_t s0 = p0; 143 uint64_t s1 = (s0 >> 31) + p1; 144 uint64_t s2 = (s1 >> 31) + p2; 145 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62; 146 *x1 = s2 >> 2; 147} 148 149static inline 150void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b) 151{ 152 uint64_t t0 = (uint64_t)(uint32_t)a * b; 153 uint64_t t1 = (t0 >> 32) + (a >> 32) * b; 154 *x0 = t1 << 32 | (uint32_t)t0; 155 *x1 = t1 >> 32; 156} 157 158static inline void 159mul64x64(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) 160{ 161 uint64_t a0 = (uint32_t)a; 162 uint64_t a1 = a >> 32; 163 uint64_t b0 = (uint32_t)b; 164 uint64_t b1 = b >> 32; 165 uint64_t t1 = (a0 * b0 >> 32) + a1 * b0; 166 uint64_t t2 = a0 * b1; 167 uint64_t x = ((uint64_t)(uint32_t)t1 + (uint32_t)t2) >> 32; 168 x += t1 >> 32; 169 x += t2 >> 32; 170 x += a1 * b1; 171 *x0 = a * b; 172 *x1 = x; 173} 174 175static inline void 176add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 177 uint64_t b1) 178{ 179 *x0 = a0 + b0; 180 *x1 = a1 + b1 + (*x0 < a0); 181} 182 183static inline void 184sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 185 uint64_t b1) 186{ 187 *x0 = a0 - b0; 188 *x1 = a1 - b1 - (*x0 > a0); 189} 190 191static inline int 192cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 193{ 194 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); 195} 196 197static inline uint16_t 198fp16_normalise(uint16_t mnt, int *exp) 199{ 200 int shift; 201 202 if (!mnt) { 203 return 0; 204 } 205 206 for (shift = 8; shift; shift >>= 1) { 207 if (!(mnt >> (16 - shift))) { 208 mnt <<= shift; 209 *exp -= shift; 210 } 211 } 212 return mnt; 213} 214 215static inline uint32_t 216fp32_normalise(uint32_t mnt, int *exp) 217{ 218 int shift; 219 220 if (!mnt) { 221 return 0; 222 } 223 224 for (shift = 16; shift; shift >>= 1) { 225 if (!(mnt >> (32 - shift))) { 226 mnt <<= shift; 227 *exp -= shift; 228 } 229 } 230 return mnt; 231} 232 233static inline uint64_t 234fp64_normalise(uint64_t mnt, int *exp) 235{ 236 int shift; 237 238 if (!mnt) { 239 return 0; 240 } 241 242 for (shift = 32; shift; shift >>= 1) { 243 if (!(mnt >> (64 - shift))) { 244 mnt <<= shift; 245 *exp -= shift; 246 } 247 } 248 return mnt; 249} 250 251static inline void 252fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp) 253{ 254 uint64_t x0 = *mnt0; 255 uint64_t x1 = *mnt1; 256 int shift; 257 258 if (!x0 && !x1) { 259 return; 260 } 261 262 if (!x1) { 263 x1 = x0; 264 x0 = 0; 265 *exp -= 64; 266 } 267 268 for (shift = 32; shift; shift >>= 1) { 269 if (!(x1 >> (64 - shift))) { 270 x1 = x1 << shift | x0 >> (64 - shift); 271 x0 <<= shift; 272 *exp -= shift; 273 } 274 } 275 276 *mnt0 = x0; 277 *mnt1 = x1; 278} 279 280static inline uint16_t 281fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) 282{ 283 return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1)); 284} 285 286static inline uint32_t 287fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) 288{ 289 return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1)); 290} 291 292static inline uint64_t 293fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) 294{ 295 return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1)); 296} 297 298static inline uint16_t 299fp16_zero(int sgn) 300{ 301 return fp16_pack(sgn, 0, 0); 302} 303 304static inline uint32_t 305fp32_zero(int sgn) 306{ 307 return fp32_pack(sgn, 0, 0); 308} 309 310static inline uint64_t 311fp64_zero(int sgn) 312{ 313 return fp64_pack(sgn, 0, 0); 314} 315 316static inline uint16_t 317fp16_max_normal(int sgn) 318{ 319 return fp16_pack(sgn, 30, -1); 320} 321 322static inline uint32_t 323fp32_max_normal(int sgn) 324{ 325 return fp32_pack(sgn, 254, -1); 326} 327 328static inline uint64_t 329fp64_max_normal(int sgn) 330{ 331 return fp64_pack(sgn, 2046, -1); 332} 333 334static inline uint16_t 335fp16_infinity(int sgn) 336{ 337 return fp16_pack(sgn, 31, 0); 338} 339 340static inline uint32_t 341fp32_infinity(int sgn) 342{ 343 return fp32_pack(sgn, 255, 0); 344} 345 346static inline uint64_t 347fp64_infinity(int sgn) 348{ 349 return fp64_pack(sgn, 2047, 0); 350} 351 352static inline uint16_t 353fp16_defaultNaN() 354{ 355 return fp16_pack(0, 31, (uint16_t)1 << 9); 356} 357 358static inline uint32_t 359fp32_defaultNaN() 360{ 361 return fp32_pack(0, 255, (uint32_t)1 << 22); 362} 363 364static inline uint64_t 365fp64_defaultNaN() 366{ 367 return fp64_pack(0, 2047, (uint64_t)1 << 51); 368} 369 370static inline void 371fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, 372 int *flags) 373{ 374 *sgn = x >> 15; 375 *exp = x >> 10 & 31; 376 *mnt = x & (((uint16_t)1 << 10) - 1); 377 378 // Handle subnormals: 379 if (*exp) { 380 *mnt |= (uint16_t)1 << 10; 381 } else { 382 ++*exp; 383 // There is no flush to zero in this case! 384 } 385} 386 387static inline void 388fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, 389 int *flags) 390{ 391 *sgn = x >> 31; 392 *exp = x >> 23 & 255; 393 *mnt = x & (((uint32_t)1 << 23) - 1); 394 395 // Handle subnormals: 396 if (*exp) { 397 *mnt |= (uint32_t)1 << 23; 398 } else { 399 ++*exp; 400 if ((mode & FPLIB_FZ) && *mnt) { 401 *flags |= FPLIB_IDC; 402 *mnt = 0; 403 } 404 } 405} 406 407static inline void 408fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, 409 int *flags) 410{ 411 *sgn = x >> 63; 412 *exp = x >> 52 & 2047; 413 *mnt = x & (((uint64_t)1 << 52) - 1); 414 415 // Handle subnormals: 416 if (*exp) { 417 *mnt |= (uint64_t)1 << 52; 418 } else { 419 ++*exp; 420 if ((mode & FPLIB_FZ) && *mnt) { 421 *flags |= FPLIB_IDC; 422 *mnt = 0; 423 } 424 } 425} 426 427static inline uint32_t 428fp32_process_NaN(uint32_t a, int mode, int *flags) 429{ 430 if (!(a >> 22 & 1)) { 431 *flags |= FPLIB_IOC; 432 a |= (uint32_t)1 << 22; 433 } 434 return mode & FPLIB_DN ? fp32_defaultNaN() : a; 435} 436 437static inline uint64_t 438fp64_process_NaN(uint64_t a, int mode, int *flags) 439{ 440 if (!(a >> 51 & 1)) { 441 *flags |= FPLIB_IOC; 442 a |= (uint64_t)1 << 51; 443 } 444 return mode & FPLIB_DN ? fp64_defaultNaN() : a; 445} 446 447static uint32_t 448fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags) 449{ 450 int a_exp = a >> 23 & 255; 451 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); 452 int b_exp = b >> 23 & 255; 453 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); 454 455 // Handle signalling NaNs: 456 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) 457 return fp32_process_NaN(a, mode, flags); 458 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) 459 return fp32_process_NaN(b, mode, flags); 460 461 // Handle quiet NaNs: 462 if (a_exp == 255 && a_mnt) 463 return fp32_process_NaN(a, mode, flags); 464 if (b_exp == 255 && b_mnt) 465 return fp32_process_NaN(b, mode, flags); 466 467 return 0; 468} 469 470static uint64_t 471fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags) 472{ 473 int a_exp = a >> 52 & 2047; 474 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); 475 int b_exp = b >> 52 & 2047; 476 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); 477 478 // Handle signalling NaNs: 479 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) 480 return fp64_process_NaN(a, mode, flags); 481 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) 482 return fp64_process_NaN(b, mode, flags); 483 484 // Handle quiet NaNs: 485 if (a_exp == 2047 && a_mnt) 486 return fp64_process_NaN(a, mode, flags); 487 if (b_exp == 2047 && b_mnt) 488 return fp64_process_NaN(b, mode, flags); 489 490 return 0; 491} 492 493static uint32_t 494fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags) 495{ 496 int a_exp = a >> 23 & 255; 497 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); 498 int b_exp = b >> 23 & 255; 499 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); 500 int c_exp = c >> 23 & 255; 501 uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1); 502 503 // Handle signalling NaNs: 504 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) 505 return fp32_process_NaN(a, mode, flags); 506 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) 507 return fp32_process_NaN(b, mode, flags); 508 if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1)) 509 return fp32_process_NaN(c, mode, flags); 510 511 // Handle quiet NaNs: 512 if (a_exp == 255 && a_mnt) 513 return fp32_process_NaN(a, mode, flags); 514 if (b_exp == 255 && b_mnt) 515 return fp32_process_NaN(b, mode, flags); 516 if (c_exp == 255 && c_mnt) 517 return fp32_process_NaN(c, mode, flags); 518 519 return 0; 520} 521 522static uint64_t 523fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags) 524{ 525 int a_exp = a >> 52 & 2047; 526 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); 527 int b_exp = b >> 52 & 2047; 528 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); 529 int c_exp = c >> 52 & 2047; 530 uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1); 531 532 // Handle signalling NaNs: 533 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) 534 return fp64_process_NaN(a, mode, flags); 535 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) 536 return fp64_process_NaN(b, mode, flags); 537 if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1)) 538 return fp64_process_NaN(c, mode, flags); 539 540 // Handle quiet NaNs: 541 if (a_exp == 2047 && a_mnt) 542 return fp64_process_NaN(a, mode, flags); 543 if (b_exp == 2047 && b_mnt) 544 return fp64_process_NaN(b, mode, flags); 545 if (c_exp == 2047 && c_mnt) 546 return fp64_process_NaN(c, mode, flags); 547 548 return 0; 549} 550 551static uint16_t 552fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags) 553{ 554 int biased_exp; // non-negative exponent value for result 555 uint16_t int_mant; // mantissa for result, less than (1 << 11) 556 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 557 558 assert(rm != FPRounding_TIEAWAY); 559 560 // There is no flush to zero in this case! 561 562 // The bottom 5 bits of mnt are orred together: 563 mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0); 564 565 if (exp > 0) { 566 biased_exp = exp; 567 int_mant = mnt >> 2; 568 error = mnt & 3; 569 } else { 570 biased_exp = 0; 571 int_mant = lsr16(mnt, 3 - exp); 572 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1)); 573 } 574 575 if (!biased_exp && error) { // xx should also check fpscr_val<11> 576 *flags |= FPLIB_UFC; 577 } 578 579 // Round up: 580 if ((rm == FPLIB_RN && (error == 3 || 581 (error == 2 && (int_mant & 1)))) || 582 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 583 ++int_mant; 584 if (int_mant == (uint32_t)1 << 10) { 585 // Rounded up from denormalized to normalized 586 biased_exp = 1; 587 } 588 if (int_mant == (uint32_t)1 << 11) { 589 // Rounded up to next exponent 590 ++biased_exp; 591 int_mant >>= 1; 592 } 593 } 594 595 // Handle rounding to odd aka Von Neumann rounding: 596 if (error && rm == FPRounding_ODD) 597 int_mant |= 1; 598 599 // Handle overflow: 600 if (!(mode & FPLIB_AHP)) { 601 if (biased_exp >= 31) { 602 *flags |= FPLIB_OFC | FPLIB_IXC; 603 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 604 (rm == FPLIB_RM && sgn)) { 605 return fp16_infinity(sgn); 606 } else { 607 return fp16_max_normal(sgn); 608 } 609 } 610 } else { 611 if (biased_exp >= 32) { 612 *flags |= FPLIB_IOC; 613 return fp16_pack(sgn, 31, -1); 614 } 615 } 616 617 if (error) { 618 *flags |= FPLIB_IXC; 619 } 620 621 return fp16_pack(sgn, biased_exp, int_mant); 622} 623 624static uint32_t 625fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags) 626{ 627 int biased_exp; // non-negative exponent value for result 628 uint32_t int_mant; // mantissa for result, less than (1 << 24) 629 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 630 631 assert(rm != FPRounding_TIEAWAY); 632 633 // Flush to zero: 634 if ((mode & FPLIB_FZ) && exp < 1) { 635 *flags |= FPLIB_UFC; 636 return fp32_zero(sgn); 637 } 638 639 // The bottom 8 bits of mnt are orred together: 640 mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0); 641 642 if (exp > 0) { 643 biased_exp = exp; 644 int_mant = mnt >> 2; 645 error = mnt & 3; 646 } else { 647 biased_exp = 0; 648 int_mant = lsr32(mnt, 3 - exp); 649 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1)); 650 } 651 652 if (!biased_exp && error) { // xx should also check fpscr_val<11> 653 *flags |= FPLIB_UFC; 654 } 655 656 // Round up: 657 if ((rm == FPLIB_RN && (error == 3 || 658 (error == 2 && (int_mant & 1)))) || 659 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 660 ++int_mant; 661 if (int_mant == (uint32_t)1 << 23) { 662 // Rounded up from denormalized to normalized 663 biased_exp = 1; 664 } 665 if (int_mant == (uint32_t)1 << 24) { 666 // Rounded up to next exponent 667 ++biased_exp; 668 int_mant >>= 1; 669 } 670 } 671 672 // Handle rounding to odd aka Von Neumann rounding: 673 if (error && rm == FPRounding_ODD) 674 int_mant |= 1; 675 676 // Handle overflow: 677 if (biased_exp >= 255) { 678 *flags |= FPLIB_OFC | FPLIB_IXC; 679 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 680 (rm == FPLIB_RM && sgn)) { 681 return fp32_infinity(sgn); 682 } else { 683 return fp32_max_normal(sgn); 684 } 685 } 686 687 if (error) { 688 *flags |= FPLIB_IXC; 689 } 690 691 return fp32_pack(sgn, biased_exp, int_mant); 692} 693 694static uint32_t 695fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags) 696{ 697 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); 698} 699 700static uint64_t 701fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags) 702{ 703 int biased_exp; // non-negative exponent value for result 704 uint64_t int_mant; // mantissa for result, less than (1 << 52) 705 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 706 707 assert(rm != FPRounding_TIEAWAY); 708 709 // Flush to zero: 710 if ((mode & FPLIB_FZ) && exp < 1) { 711 *flags |= FPLIB_UFC; 712 return fp64_zero(sgn); 713 } 714 715 // The bottom 11 bits of mnt are orred together: 716 mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0); 717 718 if (exp > 0) { 719 biased_exp = exp; 720 int_mant = mnt >> 2; 721 error = mnt & 3; 722 } else { 723 biased_exp = 0; 724 int_mant = lsr64(mnt, 3 - exp); 725 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1)); 726 } 727 728 if (!biased_exp && error) { // xx should also check fpscr_val<11> 729 *flags |= FPLIB_UFC; 730 } 731 732 // Round up: 733 if ((rm == FPLIB_RN && (error == 3 || 734 (error == 2 && (int_mant & 1)))) || 735 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 736 ++int_mant; 737 if (int_mant == (uint64_t)1 << 52) { 738 // Rounded up from denormalized to normalized 739 biased_exp = 1; 740 } 741 if (int_mant == (uint64_t)1 << 53) { 742 // Rounded up to next exponent 743 ++biased_exp; 744 int_mant >>= 1; 745 } 746 } 747 748 // Handle rounding to odd aka Von Neumann rounding: 749 if (error && rm == FPRounding_ODD) 750 int_mant |= 1; 751 752 // Handle overflow: 753 if (biased_exp >= 2047) { 754 *flags |= FPLIB_OFC | FPLIB_IXC; 755 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 756 (rm == FPLIB_RM && sgn)) { 757 return fp64_infinity(sgn); 758 } else { 759 return fp64_max_normal(sgn); 760 } 761 } 762 763 if (error) { 764 *flags |= FPLIB_IXC; 765 } 766 767 return fp64_pack(sgn, biased_exp, int_mant); 768} 769 770static uint64_t 771fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags) 772{ 773 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); 774} 775 776static int 777fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags) 778{ 779 int a_sgn, a_exp, b_sgn, b_exp; 780 uint32_t a_mnt, b_mnt; 781 782 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 783 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 784 785 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 786 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 787 if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) || 788 (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1))) 789 *flags |= FPLIB_IOC; 790 return 0; 791 } 792 return a == b || (!a_mnt && !b_mnt); 793} 794 795static int 796fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags) 797{ 798 int a_sgn, a_exp, b_sgn, b_exp; 799 uint32_t a_mnt, b_mnt; 800 801 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 802 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 803 804 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 805 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 806 *flags |= FPLIB_IOC; 807 return 0; 808 } 809 if (!a_mnt && !b_mnt) 810 return 1; 811 if (a_sgn != b_sgn) 812 return b_sgn; 813 if (a_exp != b_exp) 814 return a_sgn ^ (a_exp > b_exp); 815 if (a_mnt != b_mnt) 816 return a_sgn ^ (a_mnt > b_mnt); 817 return 1; 818} 819 820static int 821fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags) 822{ 823 int a_sgn, a_exp, b_sgn, b_exp; 824 uint32_t a_mnt, b_mnt; 825 826 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 827 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 828 829 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 830 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 831 *flags |= FPLIB_IOC; 832 return 0; 833 } 834 if (!a_mnt && !b_mnt) 835 return 0; 836 if (a_sgn != b_sgn) 837 return b_sgn; 838 if (a_exp != b_exp) 839 return a_sgn ^ (a_exp > b_exp); 840 if (a_mnt != b_mnt) 841 return a_sgn ^ (a_mnt > b_mnt); 842 return 0; 843} 844 845static int 846fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags) 847{ 848 int a_sgn, a_exp, b_sgn, b_exp; 849 uint64_t a_mnt, b_mnt; 850 851 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 852 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 853 854 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 855 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 856 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) || 857 (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1))) 858 *flags |= FPLIB_IOC; 859 return 0; 860 } 861 return a == b || (!a_mnt && !b_mnt); 862} 863 864static int 865fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags) 866{ 867 int a_sgn, a_exp, b_sgn, b_exp; 868 uint64_t a_mnt, b_mnt; 869 870 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 871 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 872 873 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 874 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 875 *flags |= FPLIB_IOC; 876 return 0; 877 } 878 if (!a_mnt && !b_mnt) 879 return 1; 880 if (a_sgn != b_sgn) 881 return b_sgn; 882 if (a_exp != b_exp) 883 return a_sgn ^ (a_exp > b_exp); 884 if (a_mnt != b_mnt) 885 return a_sgn ^ (a_mnt > b_mnt); 886 return 1; 887} 888 889static int 890fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags) 891{ 892 int a_sgn, a_exp, b_sgn, b_exp; 893 uint64_t a_mnt, b_mnt; 894 895 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 896 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 897 898 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 899 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 900 *flags |= FPLIB_IOC; 901 return 0; 902 } 903 if (!a_mnt && !b_mnt) 904 return 0; 905 if (a_sgn != b_sgn) 906 return b_sgn; 907 if (a_exp != b_exp) 908 return a_sgn ^ (a_exp > b_exp); 909 if (a_mnt != b_mnt) 910 return a_sgn ^ (a_mnt > b_mnt); 911 return 0; 912} 913 914static uint32_t 915fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags) 916{ 917 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 918 uint32_t a_mnt, b_mnt, x, x_mnt; 919 920 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 921 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 922 923 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 924 return x; 925 } 926 927 b_sgn ^= neg; 928 929 // Handle infinities and zeroes: 930 if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) { 931 *flags |= FPLIB_IOC; 932 return fp32_defaultNaN(); 933 } else if (a_exp == 255) { 934 return fp32_infinity(a_sgn); 935 } else if (b_exp == 255) { 936 return fp32_infinity(b_sgn); 937 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 938 return fp32_zero(a_sgn); 939 } 940 941 a_mnt <<= 3; 942 b_mnt <<= 3; 943 if (a_exp >= b_exp) { 944 b_mnt = (lsr32(b_mnt, a_exp - b_exp) | 945 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); 946 b_exp = a_exp; 947 } else { 948 a_mnt = (lsr32(a_mnt, b_exp - a_exp) | 949 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); 950 a_exp = b_exp; 951 } 952 x_sgn = a_sgn; 953 x_exp = a_exp; 954 if (a_sgn == b_sgn) { 955 x_mnt = a_mnt + b_mnt; 956 } else if (a_mnt >= b_mnt) { 957 x_mnt = a_mnt - b_mnt; 958 } else { 959 x_sgn ^= 1; 960 x_mnt = b_mnt - a_mnt; 961 } 962 963 if (!x_mnt) { 964 // Sign of exact zero result depends on rounding mode 965 return fp32_zero((mode & 3) == 2); 966 } 967 968 x_mnt = fp32_normalise(x_mnt, &x_exp); 969 970 return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags); 971} 972 973static uint64_t 974fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags) 975{ 976 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 977 uint64_t a_mnt, b_mnt, x, x_mnt; 978 979 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 980 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 981 982 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 983 return x; 984 } 985 986 b_sgn ^= neg; 987 988 // Handle infinities and zeroes: 989 if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) { 990 *flags |= FPLIB_IOC; 991 return fp64_defaultNaN(); 992 } else if (a_exp == 2047) { 993 return fp64_infinity(a_sgn); 994 } else if (b_exp == 2047) { 995 return fp64_infinity(b_sgn); 996 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 997 return fp64_zero(a_sgn); 998 } 999 1000 a_mnt <<= 3; 1001 b_mnt <<= 3; 1002 if (a_exp >= b_exp) { 1003 b_mnt = (lsr64(b_mnt, a_exp - b_exp) | 1004 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); 1005 b_exp = a_exp; 1006 } else { 1007 a_mnt = (lsr64(a_mnt, b_exp - a_exp) | 1008 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); 1009 a_exp = b_exp; 1010 } 1011 x_sgn = a_sgn; 1012 x_exp = a_exp; 1013 if (a_sgn == b_sgn) { 1014 x_mnt = a_mnt + b_mnt; 1015 } else if (a_mnt >= b_mnt) { 1016 x_mnt = a_mnt - b_mnt; 1017 } else { 1018 x_sgn ^= 1; 1019 x_mnt = b_mnt - a_mnt; 1020 } 1021 1022 if (!x_mnt) { 1023 // Sign of exact zero result depends on rounding mode 1024 return fp64_zero((mode & 3) == 2); 1025 } 1026 1027 x_mnt = fp64_normalise(x_mnt, &x_exp); 1028 1029 return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags); 1030} 1031 1032static uint32_t 1033fp32_mul(uint32_t a, uint32_t b, int mode, int *flags) 1034{ 1035 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1036 uint32_t a_mnt, b_mnt, x; 1037 uint64_t x_mnt; 1038 1039 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1040 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1041 1042 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1043 return x; 1044 } 1045 1046 // Handle infinities and zeroes: 1047 if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) { 1048 *flags |= FPLIB_IOC; 1049 return fp32_defaultNaN(); 1050 } else if (a_exp == 255 || b_exp == 255) { 1051 return fp32_infinity(a_sgn ^ b_sgn); 1052 } else if (!a_mnt || !b_mnt) { 1053 return fp32_zero(a_sgn ^ b_sgn); 1054 } 1055 1056 // Multiply and normalise: 1057 x_sgn = a_sgn ^ b_sgn; 1058 x_exp = a_exp + b_exp - 110; 1059 x_mnt = (uint64_t)a_mnt * b_mnt; 1060 x_mnt = fp64_normalise(x_mnt, &x_exp); 1061 1062 // Convert to 32 bits, collapsing error into bottom bit: 1063 x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33); 1064 1065 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1066} 1067 1068static uint64_t 1069fp64_mul(uint64_t a, uint64_t b, int mode, int *flags) 1070{ 1071 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1072 uint64_t a_mnt, b_mnt, x; 1073 uint64_t x0_mnt, x1_mnt; 1074 1075 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1076 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1077 1078 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1079 return x; 1080 } 1081 1082 // Handle infinities and zeroes: 1083 if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) { 1084 *flags |= FPLIB_IOC; 1085 return fp64_defaultNaN(); 1086 } else if (a_exp == 2047 || b_exp == 2047) { 1087 return fp64_infinity(a_sgn ^ b_sgn); 1088 } else if (!a_mnt || !b_mnt) { 1089 return fp64_zero(a_sgn ^ b_sgn); 1090 } 1091 1092 // Multiply and normalise: 1093 x_sgn = a_sgn ^ b_sgn; 1094 x_exp = a_exp + b_exp - 1000; 1095 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); 1096 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1097 1098 // Convert to 64 bits, collapsing error into bottom bit: 1099 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1100 1101 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); 1102} 1103 1104static uint32_t 1105fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, 1106 int mode, int *flags) 1107{ 1108 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1109 uint32_t a_mnt, b_mnt, c_mnt, x; 1110 uint64_t x_mnt, y_mnt; 1111 1112 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1113 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1114 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1115 1116 x = fp32_process_NaNs3(a, b, c, mode, flags); 1117 1118 // Quiet NaN added to product of zero and infinity: 1119 if (a_exp == 255 && (a_mnt >> 22 & 1) && 1120 ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) || 1121 (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) { 1122 x = fp32_defaultNaN(); 1123 *flags |= FPLIB_IOC; 1124 } 1125 1126 if (x) { 1127 return x; 1128 } 1129 1130 // Handle infinities and zeroes: 1131 if ((b_exp == 255 && !c_mnt) || 1132 (c_exp == 255 && !b_mnt) || 1133 (a_exp == 255 && (b_exp == 255 || c_exp == 255) && 1134 (a_sgn != (b_sgn ^ c_sgn)))) { 1135 *flags |= FPLIB_IOC; 1136 return fp32_defaultNaN(); 1137 } 1138 if (a_exp == 255) 1139 return fp32_infinity(a_sgn); 1140 if (b_exp == 255 || c_exp == 255) 1141 return fp32_infinity(b_sgn ^ c_sgn); 1142 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1143 return fp32_zero(a_sgn); 1144 1145 x_sgn = a_sgn; 1146 x_exp = a_exp + 13; 1147 x_mnt = (uint64_t)a_mnt << 27; 1148 1149 // Multiply: 1150 y_sgn = b_sgn ^ c_sgn; 1151 y_exp = b_exp + c_exp - 113; 1152 y_mnt = (uint64_t)b_mnt * c_mnt << 3; 1153 if (!y_mnt) { 1154 y_exp = x_exp; 1155 } 1156 1157 // Add: 1158 if (x_exp >= y_exp) { 1159 y_mnt = (lsr64(y_mnt, x_exp - y_exp) | 1160 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); 1161 y_exp = x_exp; 1162 } else { 1163 x_mnt = (lsr64(x_mnt, y_exp - x_exp) | 1164 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); 1165 x_exp = y_exp; 1166 } 1167 if (x_sgn == y_sgn) { 1168 x_mnt = x_mnt + y_mnt; 1169 } else if (x_mnt >= y_mnt) { 1170 x_mnt = x_mnt - y_mnt; 1171 } else { 1172 x_sgn ^= 1; 1173 x_mnt = y_mnt - x_mnt; 1174 } 1175 1176 if (!x_mnt) { 1177 // Sign of exact zero result depends on rounding mode 1178 return fp32_zero((mode & 3) == 2); 1179 } 1180 1181 // Normalise and convert to 32 bits, collapsing error into bottom bit: 1182 x_mnt = fp64_normalise(x_mnt, &x_exp); 1183 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 1184 1185 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1186} 1187 1188static uint64_t 1189fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, 1190 int mode, int *flags) 1191{ 1192 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1193 uint64_t a_mnt, b_mnt, c_mnt, x; 1194 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; 1195 1196 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1197 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1198 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1199 1200 x = fp64_process_NaNs3(a, b, c, mode, flags); 1201 1202 // Quiet NaN added to product of zero and infinity: 1203 if (a_exp == 2047 && (a_mnt >> 51 & 1) && 1204 ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) || 1205 (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) { 1206 x = fp64_defaultNaN(); 1207 *flags |= FPLIB_IOC; 1208 } 1209 1210 if (x) { 1211 return x; 1212 } 1213 1214 // Handle infinities and zeroes: 1215 if ((b_exp == 2047 && !c_mnt) || 1216 (c_exp == 2047 && !b_mnt) || 1217 (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) && 1218 (a_sgn != (b_sgn ^ c_sgn)))) { 1219 *flags |= FPLIB_IOC; 1220 return fp64_defaultNaN(); 1221 } 1222 if (a_exp == 2047) 1223 return fp64_infinity(a_sgn); 1224 if (b_exp == 2047 || c_exp == 2047) 1225 return fp64_infinity(b_sgn ^ c_sgn); 1226 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1227 return fp64_zero(a_sgn); 1228 1229 x_sgn = a_sgn; 1230 x_exp = a_exp + 11; 1231 x0_mnt = 0; 1232 x1_mnt = a_mnt; 1233 1234 // Multiply: 1235 y_sgn = b_sgn ^ c_sgn; 1236 y_exp = b_exp + c_exp - 1003; 1237 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); 1238 if (!y0_mnt && !y1_mnt) { 1239 y_exp = x_exp; 1240 } 1241 1242 // Add: 1243 if (x_exp >= y_exp) { 1244 uint64_t t0, t1; 1245 lsl128(&t0, &t1, y0_mnt, y1_mnt, 1246 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); 1247 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); 1248 y0_mnt |= !!(t0 | t1); 1249 y_exp = x_exp; 1250 } else { 1251 uint64_t t0, t1; 1252 lsl128(&t0, &t1, x0_mnt, x1_mnt, 1253 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); 1254 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); 1255 x0_mnt |= !!(t0 | t1); 1256 x_exp = y_exp; 1257 } 1258 if (x_sgn == y_sgn) { 1259 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1260 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { 1261 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1262 } else { 1263 x_sgn ^= 1; 1264 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); 1265 } 1266 1267 if (!x0_mnt && !x1_mnt) { 1268 // Sign of exact zero result depends on rounding mode 1269 return fp64_zero((mode & 3) == 2); 1270 } 1271 1272 // Normalise and convert to 64 bits, collapsing error into bottom bit: 1273 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1274 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1275 1276 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); 1277} 1278 1279static uint32_t 1280fp32_div(uint32_t a, uint32_t b, int mode, int *flags) 1281{ 1282 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1283 uint32_t a_mnt, b_mnt, x; 1284 uint64_t x_mnt; 1285 1286 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1287 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1288 1289 if ((x = fp32_process_NaNs(a, b, mode, flags))) 1290 return x; 1291 1292 // Handle infinities and zeroes: 1293 if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) { 1294 *flags |= FPLIB_IOC; 1295 return fp32_defaultNaN(); 1296 } 1297 if (a_exp == 255 || !b_mnt) { 1298 if (a_exp != 255) 1299 *flags |= FPLIB_DZC; 1300 return fp32_infinity(a_sgn ^ b_sgn); 1301 } 1302 if (!a_mnt || b_exp == 255) 1303 return fp32_zero(a_sgn ^ b_sgn); 1304 1305 // Divide, setting bottom bit if inexact: 1306 a_mnt = fp32_normalise(a_mnt, &a_exp); 1307 x_sgn = a_sgn ^ b_sgn; 1308 x_exp = a_exp - b_exp + 172; 1309 x_mnt = ((uint64_t)a_mnt << 18) / b_mnt; 1310 x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18); 1311 1312 // Normalise and convert to 32 bits, collapsing error into bottom bit: 1313 x_mnt = fp64_normalise(x_mnt, &x_exp); 1314 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 1315 1316 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1317} 1318 1319static uint64_t 1320fp64_div(uint64_t a, uint64_t b, int mode, int *flags) 1321{ 1322 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; 1323 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; 1324 1325 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1326 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1327 1328 if ((x = fp64_process_NaNs(a, b, mode, flags))) 1329 return x; 1330 1331 // Handle infinities and zeroes: 1332 if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) { 1333 *flags |= FPLIB_IOC; 1334 return fp64_defaultNaN(); 1335 } 1336 if (a_exp == 2047 || !b_mnt) { 1337 if (a_exp != 2047) 1338 *flags |= FPLIB_DZC; 1339 return fp64_infinity(a_sgn ^ b_sgn); 1340 } 1341 if (!a_mnt || b_exp == 2047) 1342 return fp64_zero(a_sgn ^ b_sgn); 1343 1344 // Find reciprocal of divisor with Newton-Raphson: 1345 a_mnt = fp64_normalise(a_mnt, &a_exp); 1346 b_mnt = fp64_normalise(b_mnt, &b_exp); 1347 x_mnt = ~(uint64_t)0 / (b_mnt >> 31); 1348 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); 1349 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); 1350 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); 1351 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); 1352 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); 1353 1354 // Multiply by dividend: 1355 x_sgn = a_sgn ^ b_sgn; 1356 x_exp = a_exp - b_exp + 1031; 1357 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough 1358 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); 1359 x_mnt = x1_mnt; 1360 1361 // This is an underestimate, so try adding one: 1362 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough 1363 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); 1364 if (c <= 0) { 1365 ++x_mnt; 1366 } 1367 1368 x_mnt = fp64_normalise(x_mnt, &x_exp); 1369 1370 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 1371} 1372 1373static void 1374set_fpscr0(FPSCR &fpscr, int flags) 1375{ 1376 if (flags & FPLIB_IDC) { 1377 fpscr.idc = 1; 1378 } 1379 if (flags & FPLIB_IOC) { 1380 fpscr.ioc = 1; 1381 } 1382 if (flags & FPLIB_DZC) { 1383 fpscr.dzc = 1; 1384 } 1385 if (flags & FPLIB_OFC) { 1386 fpscr.ofc = 1; 1387 } 1388 if (flags & FPLIB_UFC) { 1389 fpscr.ufc = 1; 1390 } 1391 if (flags & FPLIB_IXC) { 1392 fpscr.ixc = 1; 1393 } 1394} 1395 1396static uint32_t 1397fp32_sqrt(uint32_t a, int mode, int *flags) 1398{ 1399 int a_sgn, a_exp, x_sgn, x_exp; 1400 uint32_t a_mnt, x, x_mnt; 1401 uint64_t t0, t1; 1402 1403 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1404 1405 // Handle NaNs: 1406 if (a_exp == 255 && (uint32_t)(a_mnt << 9)) 1407 return fp32_process_NaN(a, mode, flags); 1408 1409 // Handle infinities and zeroes: 1410 if (!a_mnt) { 1411 return fp32_zero(a_sgn); 1412 } 1413 if (a_exp == 255 && !a_sgn) { 1414 return fp32_infinity(a_sgn); 1415 } 1416 if (a_sgn) { 1417 *flags |= FPLIB_IOC; 1418 return fp32_defaultNaN(); 1419 } 1420 1421 a_mnt = fp32_normalise(a_mnt, &a_exp); 1422 if (!(a_exp & 1)) { 1423 ++a_exp; 1424 a_mnt >>= 1; 1425 } 1426 1427 // x = (a * 3 + 5) / 8 1428 x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28); 1429 1430 // x = (a / x + x) / 2; // 16-bit accuracy 1431 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 1432 1433 // x = (a / x + x) / 2; // 16-bit accuracy 1434 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 1435 1436 // x = (a / x + x) / 2; // 32-bit accuracy 1437 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); 1438 1439 x_sgn = 0; 1440 x_exp = (a_exp + 147) >> 1; 1441 x_mnt = ((x - (1 << 5)) >> 6) + 1; 1442 t1 = (uint64_t)x_mnt * x_mnt; 1443 t0 = (uint64_t)a_mnt << 19; 1444 if (t1 > t0) { 1445 --x_mnt; 1446 } 1447 1448 x_mnt = fp32_normalise(x_mnt, &x_exp); 1449 1450 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); 1451} 1452 1453static uint64_t 1454fp64_sqrt(uint64_t a, int mode, int *flags) 1455{ 1456 int a_sgn, a_exp, x_sgn, x_exp, c; 1457 uint64_t a_mnt, x_mnt, r, x0, x1; 1458 uint32_t x; 1459 1460 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1461 1462 // Handle NaNs: 1463 if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) { 1464 return fp64_process_NaN(a, mode, flags); 1465 } 1466 1467 // Handle infinities and zeroes: 1468 if (!a_mnt) 1469 return fp64_zero(a_sgn); 1470 if (a_exp == 2047 && !a_sgn) 1471 return fp64_infinity(a_sgn); 1472 if (a_sgn) { 1473 *flags |= FPLIB_IOC; 1474 return fp64_defaultNaN(); 1475 } 1476 1477 a_mnt = fp64_normalise(a_mnt, &a_exp); 1478 if (a_exp & 1) { 1479 ++a_exp; 1480 a_mnt >>= 1; 1481 } 1482 1483 // x = (a * 3 + 5) / 8 1484 x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28); 1485 1486 // x = (a / x + x) / 2; // 16-bit accuracy 1487 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 1488 1489 // x = (a / x + x) / 2; // 16-bit accuracy 1490 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 1491 1492 // x = (a / x + x) / 2; // 32-bit accuracy 1493 x = ((a_mnt / x) >> 2) + (x >> 1); 1494 1495 // r = 1 / x; // 32-bit accuracy 1496 r = ((uint64_t)1 << 62) / x; 1497 1498 // r = r * (2 - x * r); // 64-bit accuracy 1499 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); 1500 lsr128(&x0, &x1, x0, x1, 31); 1501 1502 // x = (x + a * r) / 2; // 64-bit accuracy 1503 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); 1504 lsl128(&x0, &x1, x0, x1, 5); 1505 lsr128(&x0, &x1, x0, x1, 56); 1506 1507 x0 = ((uint64_t)x << 31) + (x0 >> 1); 1508 1509 x_sgn = 0; 1510 x_exp = (a_exp + 1053) >> 1; 1511 x_mnt = x0; 1512 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; 1513 mul62x62(&x0, &x1, x_mnt, x_mnt); 1514 lsl128(&x0, &x1, x0, x1, 19); 1515 c = cmp128(x0, x1, 0, a_mnt); 1516 if (c > 0) 1517 --x_mnt; 1518 1519 x_mnt = fp64_normalise(x_mnt, &x_exp); 1520 1521 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 1522} 1523 1524static int 1525modeConv(FPSCR fpscr) 1526{ 1527 return (((int) fpscr) >> 22) & 0xF; 1528} 1529 1530static void 1531set_fpscr(FPSCR &fpscr, int flags) 1532{ 1533 // translate back to FPSCR 1534 bool underflow = false; 1535 if (flags & FPLIB_IDC) { 1536 fpscr.idc = 1; 1537 } 1538 if (flags & FPLIB_IOC) { 1539 fpscr.ioc = 1; 1540 } 1541 if (flags & FPLIB_DZC) { 1542 fpscr.dzc = 1; 1543 } 1544 if (flags & FPLIB_OFC) { 1545 fpscr.ofc = 1; 1546 } 1547 if (flags & FPLIB_UFC) { 1548 underflow = true; //xx Why is this required? 1549 fpscr.ufc = 1; 1550 } 1551 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { 1552 fpscr.ixc = 1; 1553 } 1554} 1555 1556template <> 1557bool 1558fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) 1559{ 1560 int flags = 0; 1561 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); 1562 set_fpscr(fpscr, flags); 1563 return x; 1564} 1565 1566template <> 1567bool 1568fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) 1569{ 1570 int flags = 0; 1571 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); 1572 set_fpscr(fpscr, flags); 1573 return x; 1574} 1575 1576template <> 1577bool 1578fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) 1579{ 1580 int flags = 0; 1581 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); 1582 set_fpscr(fpscr, flags); 1583 return x; 1584} 1585 1586template <> 1587bool 1588fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) 1589{ 1590 int flags = 0; 1591 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); 1592 set_fpscr(fpscr, flags); 1593 return x; 1594} 1595 1596template <> 1597bool 1598fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) 1599{ 1600 int flags = 0; 1601 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); 1602 set_fpscr(fpscr, flags); 1603 return x; 1604} 1605 1606template <> 1607bool 1608fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) 1609{ 1610 int flags = 0; 1611 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); 1612 set_fpscr(fpscr, flags); 1613 return x; 1614} 1615 1616template <> 1617uint32_t 1618fplibAbs(uint32_t op) 1619{ 1620 return op & ~((uint32_t)1 << 31); 1621} 1622 1623template <> 1624uint64_t 1625fplibAbs(uint64_t op) 1626{ 1627 return op & ~((uint64_t)1 << 63); 1628} 1629 1630template <> 1631uint32_t 1632fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) 1633{ 1634 int flags = 0; 1635 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); 1636 set_fpscr0(fpscr, flags); 1637 return result; 1638} 1639 1640template <> 1641uint64_t 1642fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) 1643{ 1644 int flags = 0; 1645 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); 1646 set_fpscr0(fpscr, flags); 1647 return result; 1648} 1649 1650template <> 1651int 1652fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) 1653{ 1654 int mode = modeConv(fpscr); 1655 int flags = 0; 1656 int sgn1, exp1, sgn2, exp2, result; 1657 uint32_t mnt1, mnt2; 1658 1659 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 1660 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 1661 1662 if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) || 1663 (exp2 == 255 && (uint32_t)(mnt2 << 9))) { 1664 result = 3; 1665 if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) || 1666 (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) || 1667 signal_nans) 1668 flags |= FPLIB_IOC; 1669 } else { 1670 if (op1 == op2 || (!mnt1 && !mnt2)) { 1671 result = 6; 1672 } else if (sgn1 != sgn2) { 1673 result = sgn1 ? 8 : 2; 1674 } else if (exp1 != exp2) { 1675 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 1676 } else { 1677 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 1678 } 1679 } 1680 1681 set_fpscr0(fpscr, flags); 1682 1683 return result; 1684} 1685 1686template <> 1687int 1688fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) 1689{ 1690 int mode = modeConv(fpscr); 1691 int flags = 0; 1692 int sgn1, exp1, sgn2, exp2, result; 1693 uint64_t mnt1, mnt2; 1694 1695 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 1696 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 1697 1698 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) || 1699 (exp2 == 2047 && (uint64_t)(mnt2 << 12))) { 1700 result = 3; 1701 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) || 1702 (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) || 1703 signal_nans) 1704 flags |= FPLIB_IOC; 1705 } else { 1706 if (op1 == op2 || (!mnt1 && !mnt2)) { 1707 result = 6; 1708 } else if (sgn1 != sgn2) { 1709 result = sgn1 ? 8 : 2; 1710 } else if (exp1 != exp2) { 1711 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 1712 } else { 1713 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 1714 } 1715 } 1716 1717 set_fpscr0(fpscr, flags); 1718 1719 return result; 1720} 1721 1722static uint16_t 1723fp16_FPConvertNaN_32(uint32_t op) 1724{ 1725 return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13); 1726} 1727 1728static uint16_t 1729fp16_FPConvertNaN_64(uint64_t op) 1730{ 1731 return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42); 1732} 1733 1734static uint32_t 1735fp32_FPConvertNaN_16(uint16_t op) 1736{ 1737 return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13); 1738} 1739 1740static uint32_t 1741fp32_FPConvertNaN_64(uint64_t op) 1742{ 1743 return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29); 1744} 1745 1746static uint64_t 1747fp64_FPConvertNaN_16(uint16_t op) 1748{ 1749 return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42); 1750} 1751 1752static uint64_t 1753fp64_FPConvertNaN_32(uint32_t op) 1754{ 1755 return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29); 1756} 1757 1758static uint32_t 1759fp32_FPOnePointFive(int sgn) 1760{ 1761 return fp32_pack(sgn, 127, (uint64_t)1 << 22); 1762} 1763 1764static uint64_t 1765fp64_FPOnePointFive(int sgn) 1766{ 1767 return fp64_pack(sgn, 1023, (uint64_t)1 << 51); 1768} 1769 1770static uint32_t 1771fp32_FPThree(int sgn) 1772{ 1773 return fp32_pack(sgn, 128, (uint64_t)1 << 22); 1774} 1775 1776static uint64_t 1777fp64_FPThree(int sgn) 1778{ 1779 return fp64_pack(sgn, 1024, (uint64_t)1 << 51); 1780} 1781 1782static uint32_t 1783fp32_FPTwo(int sgn) 1784{ 1785 return fp32_pack(sgn, 128, 0); 1786} 1787 1788static uint64_t 1789fp64_FPTwo(int sgn) 1790{ 1791 return fp64_pack(sgn, 1024, 0); 1792} 1793 1794template <> 1795uint16_t 1796fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 1797{ 1798 int mode = modeConv(fpscr); 1799 int flags = 0; 1800 int sgn, exp; 1801 uint32_t mnt; 1802 uint16_t result; 1803 1804 // Unpack floating-point operand optionally with flush-to-zero: 1805 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1806 1807 bool alt_hp = fpscr.ahp; 1808 1809 if (exp == 255 && (uint32_t)(mnt << 9)) { 1810 if (alt_hp) { 1811 result = fp16_zero(sgn); 1812 } else if (fpscr.dn) { 1813 result = fp16_defaultNaN(); 1814 } else { 1815 result = fp16_FPConvertNaN_32(op); 1816 } 1817 if (!(mnt >> 22 & 1) || alt_hp) { 1818 flags |= FPLIB_IOC; 1819 } 1820 } else if (exp == 255) { 1821 if (alt_hp) { 1822 result = sgn << 15 | (uint16_t)0x7fff; 1823 flags |= FPLIB_IOC; 1824 } else { 1825 result = fp16_infinity(sgn); 1826 } 1827 } else if (!mnt) { 1828 result = fp16_zero(sgn); 1829 } else { 1830 result = fp16_round_(sgn, exp - 127 + 15, 1831 mnt >> 7 | !!(uint32_t)(mnt << 25), 1832 rounding, mode | alt_hp << 4, &flags); 1833 } 1834 1835 set_fpscr0(fpscr, flags); 1836 1837 return result; 1838} 1839 1840template <> 1841uint16_t 1842fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 1843{ 1844 int mode = modeConv(fpscr); 1845 int flags = 0; 1846 int sgn, exp; 1847 uint64_t mnt; 1848 uint16_t result; 1849 1850 // Unpack floating-point operand optionally with flush-to-zero: 1851 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1852 1853 bool alt_hp = fpscr.ahp; 1854 1855 if (exp == 2047 && (uint64_t)(mnt << 12)) { 1856 if (alt_hp) { 1857 result = fp16_zero(sgn); 1858 } else if (fpscr.dn) { 1859 result = fp16_defaultNaN(); 1860 } else { 1861 result = fp16_FPConvertNaN_64(op); 1862 } 1863 if (!(mnt >> 51 & 1) || alt_hp) { 1864 flags |= FPLIB_IOC; 1865 } 1866 } else if (exp == 2047) { 1867 if (alt_hp) { 1868 result = sgn << 15 | (uint16_t)0x7fff; 1869 flags |= FPLIB_IOC; 1870 } else { 1871 result = fp16_infinity(sgn); 1872 } 1873 } else if (!mnt) { 1874 result = fp16_zero(sgn); 1875 } else { 1876 result = fp16_round_(sgn, exp - 1023 + 15, 1877 mnt >> 36 | !!(uint64_t)(mnt << 28), 1878 rounding, mode | alt_hp << 4, &flags); 1879 } 1880 1881 set_fpscr0(fpscr, flags); 1882 1883 return result; 1884} 1885 1886template <> 1887uint32_t 1888fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 1889{ 1890 int mode = modeConv(fpscr); 1891 int flags = 0; 1892 int sgn, exp; 1893 uint16_t mnt; 1894 uint32_t result; 1895 1896 // Unpack floating-point operand optionally with flush-to-zero: 1897 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1898 1899 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { 1900 if (fpscr.dn) { 1901 result = fp32_defaultNaN(); 1902 } else { 1903 result = fp32_FPConvertNaN_16(op); 1904 } 1905 if (!(mnt >> 9 & 1)) { 1906 flags |= FPLIB_IOC; 1907 } 1908 } else if (exp == 31 && !fpscr.ahp) { 1909 result = fp32_infinity(sgn); 1910 } else if (!mnt) { 1911 result = fp32_zero(sgn); 1912 } else { 1913 mnt = fp16_normalise(mnt, &exp); 1914 result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8); 1915 } 1916 1917 set_fpscr0(fpscr, flags); 1918 1919 return result; 1920} 1921 1922template <> 1923uint32_t 1924fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 1925{ 1926 int mode = modeConv(fpscr); 1927 int flags = 0; 1928 int sgn, exp; 1929 uint64_t mnt; 1930 uint32_t result; 1931 1932 // Unpack floating-point operand optionally with flush-to-zero: 1933 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1934 1935 if (exp == 2047 && (uint64_t)(mnt << 12)) { 1936 if (fpscr.dn) { 1937 result = fp32_defaultNaN(); 1938 } else { 1939 result = fp32_FPConvertNaN_64(op); 1940 } 1941 if (!(mnt >> 51 & 1)) { 1942 flags |= FPLIB_IOC; 1943 } 1944 } else if (exp == 2047) { 1945 result = fp32_infinity(sgn); 1946 } else if (!mnt) { 1947 result = fp32_zero(sgn); 1948 } else { 1949 result = fp32_round_(sgn, exp - 1023 + 127, 1950 mnt >> 20 | !!(uint64_t)(mnt << 44), 1951 rounding, mode, &flags); 1952 } 1953 1954 set_fpscr0(fpscr, flags); 1955 1956 return result; 1957} 1958 1959template <> 1960uint64_t 1961fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 1962{ 1963 int mode = modeConv(fpscr); 1964 int flags = 0; 1965 int sgn, exp; 1966 uint16_t mnt; 1967 uint64_t result; 1968 1969 // Unpack floating-point operand optionally with flush-to-zero: 1970 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1971 1972 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { 1973 if (fpscr.dn) { 1974 result = fp64_defaultNaN(); 1975 } else { 1976 result = fp64_FPConvertNaN_16(op); 1977 } 1978 if (!(mnt >> 9 & 1)) { 1979 flags |= FPLIB_IOC; 1980 } 1981 } else if (exp == 31 && !fpscr.ahp) { 1982 result = fp64_infinity(sgn); 1983 } else if (!mnt) { 1984 result = fp64_zero(sgn); 1985 } else { 1986 mnt = fp16_normalise(mnt, &exp); 1987 result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37); 1988 } 1989 1990 set_fpscr0(fpscr, flags); 1991 1992 return result; 1993} 1994 1995template <> 1996uint64_t 1997fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 1998{ 1999 int mode = modeConv(fpscr); 2000 int flags = 0; 2001 int sgn, exp; 2002 uint32_t mnt; 2003 uint64_t result; 2004 2005 // Unpack floating-point operand optionally with flush-to-zero: 2006 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2007 2008 if (exp == 255 && (uint32_t)(mnt << 9)) { 2009 if (fpscr.dn) { 2010 result = fp64_defaultNaN(); 2011 } else { 2012 result = fp64_FPConvertNaN_32(op); 2013 } 2014 if (!(mnt >> 22 & 1)) { 2015 flags |= FPLIB_IOC; 2016 } 2017 } else if (exp == 255) { 2018 result = fp64_infinity(sgn); 2019 } else if (!mnt) { 2020 result = fp64_zero(sgn); 2021 } else { 2022 mnt = fp32_normalise(mnt, &exp); 2023 result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21); 2024 } 2025 2026 set_fpscr0(fpscr, flags); 2027 2028 return result; 2029} 2030 2031template <> 2032uint32_t 2033fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) 2034{ 2035 int flags = 0; 2036 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2037 set_fpscr0(fpscr, flags); 2038 return result; 2039} 2040 2041template <> 2042uint64_t 2043fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) 2044{ 2045 int flags = 0; 2046 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2047 set_fpscr0(fpscr, flags); 2048 return result; 2049} 2050 2051template <> 2052uint32_t 2053fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2054{ 2055 int flags = 0; 2056 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); 2057 set_fpscr0(fpscr, flags); 2058 return result; 2059} 2060 2061template <> 2062uint64_t 2063fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2064{ 2065 int flags = 0; 2066 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); 2067 set_fpscr0(fpscr, flags); 2068 return result; 2069} 2070 2071static uint32_t 2072fp32_repack(int sgn, int exp, uint32_t mnt) 2073{ 2074 return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt); 2075} 2076 2077static uint64_t 2078fp64_repack(int sgn, int exp, uint64_t mnt) 2079{ 2080 return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt); 2081} 2082 2083static void 2084fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn) 2085{ 2086 // Treat a single quiet-NaN as +Infinity/-Infinity 2087 if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23) 2088 *op1 = fp32_infinity(sgn); 2089 if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23) 2090 *op2 = fp32_infinity(sgn); 2091} 2092 2093static void 2094fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn) 2095{ 2096 // Treat a single quiet-NaN as +Infinity/-Infinity 2097 if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52) 2098 *op1 = fp64_infinity(sgn); 2099 if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52) 2100 *op2 = fp64_infinity(sgn); 2101} 2102 2103template <> 2104uint32_t 2105fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2106{ 2107 int mode = modeConv(fpscr); 2108 int flags = 0; 2109 int sgn1, exp1, sgn2, exp2; 2110 uint32_t mnt1, mnt2, x, result; 2111 2112 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2113 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2114 2115 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 2116 result = x; 2117 } else { 2118 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 2119 fp32_repack(sgn1, exp1, mnt1) : 2120 fp32_repack(sgn2, exp2, mnt2)); 2121 } 2122 set_fpscr0(fpscr, flags); 2123 return result; 2124} 2125 2126template <> 2127uint64_t 2128fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2129{ 2130 int mode = modeConv(fpscr); 2131 int flags = 0; 2132 int sgn1, exp1, sgn2, exp2; 2133 uint64_t mnt1, mnt2, x, result; 2134 2135 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2136 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2137 2138 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 2139 result = x; 2140 } else { 2141 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 2142 fp64_repack(sgn1, exp1, mnt1) : 2143 fp64_repack(sgn2, exp2, mnt2)); 2144 } 2145 set_fpscr0(fpscr, flags); 2146 return result; 2147} 2148 2149template <> 2150uint32_t 2151fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2152{ 2153 fp32_minmaxnum(&op1, &op2, 1); 2154 return fplibMax<uint32_t>(op1, op2, fpscr); 2155} 2156 2157template <> 2158uint64_t 2159fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2160{ 2161 fp64_minmaxnum(&op1, &op2, 1); 2162 return fplibMax<uint64_t>(op1, op2, fpscr); 2163} 2164 2165template <> 2166uint32_t 2167fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2168{ 2169 int mode = modeConv(fpscr); 2170 int flags = 0; 2171 int sgn1, exp1, sgn2, exp2; 2172 uint32_t mnt1, mnt2, x, result; 2173 2174 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2175 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2176 2177 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 2178 result = x; 2179 } else { 2180 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 2181 fp32_repack(sgn1, exp1, mnt1) : 2182 fp32_repack(sgn2, exp2, mnt2)); 2183 } 2184 set_fpscr0(fpscr, flags); 2185 return result; 2186} 2187 2188template <> 2189uint64_t 2190fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2191{ 2192 int mode = modeConv(fpscr); 2193 int flags = 0; 2194 int sgn1, exp1, sgn2, exp2; 2195 uint64_t mnt1, mnt2, x, result; 2196 2197 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2198 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2199 2200 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 2201 result = x; 2202 } else { 2203 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 2204 fp64_repack(sgn1, exp1, mnt1) : 2205 fp64_repack(sgn2, exp2, mnt2)); 2206 } 2207 set_fpscr0(fpscr, flags); 2208 return result; 2209} 2210 2211template <> 2212uint32_t 2213fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2214{ 2215 fp32_minmaxnum(&op1, &op2, 0); 2216 return fplibMin<uint32_t>(op1, op2, fpscr); 2217} 2218 2219template <> 2220uint64_t 2221fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2222{ 2223 fp64_minmaxnum(&op1, &op2, 0); 2224 return fplibMin<uint64_t>(op1, op2, fpscr); 2225} 2226 2227template <> 2228uint32_t 2229fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2230{ 2231 int flags = 0; 2232 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); 2233 set_fpscr0(fpscr, flags); 2234 return result; 2235} 2236 2237template <> 2238uint64_t 2239fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2240{ 2241 int flags = 0; 2242 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); 2243 set_fpscr0(fpscr, flags); 2244 return result; 2245} 2246 2247template <> 2248uint32_t 2249fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2250{ 2251 int mode = modeConv(fpscr); 2252 int flags = 0; 2253 int sgn1, exp1, sgn2, exp2; 2254 uint32_t mnt1, mnt2, result; 2255 2256 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2257 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2258 2259 result = fp32_process_NaNs(op1, op2, mode, &flags); 2260 if (!result) { 2261 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2262 result = fp32_FPTwo(sgn1 ^ sgn2); 2263 } else if (exp1 == 255 || exp2 == 255) { 2264 result = fp32_infinity(sgn1 ^ sgn2); 2265 } else if (!mnt1 || !mnt2) { 2266 result = fp32_zero(sgn1 ^ sgn2); 2267 } else { 2268 result = fp32_mul(op1, op2, mode, &flags); 2269 } 2270 } 2271 2272 set_fpscr0(fpscr, flags); 2273 2274 return result; 2275} 2276 2277template <> 2278uint64_t 2279fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2280{ 2281 int mode = modeConv(fpscr); 2282 int flags = 0; 2283 int sgn1, exp1, sgn2, exp2; 2284 uint64_t mnt1, mnt2, result; 2285 2286 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2287 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2288 2289 result = fp64_process_NaNs(op1, op2, mode, &flags); 2290 if (!result) { 2291 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2292 result = fp64_FPTwo(sgn1 ^ sgn2); 2293 } else if (exp1 == 2047 || exp2 == 2047) { 2294 result = fp64_infinity(sgn1 ^ sgn2); 2295 } else if (!mnt1 || !mnt2) { 2296 result = fp64_zero(sgn1 ^ sgn2); 2297 } else { 2298 result = fp64_mul(op1, op2, mode, &flags); 2299 } 2300 } 2301 2302 set_fpscr0(fpscr, flags); 2303 2304 return result; 2305} 2306 2307template <> 2308uint32_t 2309fplibNeg(uint32_t op) 2310{ 2311 return op ^ (uint32_t)1 << 31; 2312} 2313 2314template <> 2315uint64_t 2316fplibNeg(uint64_t op) 2317{ 2318 return op ^ (uint64_t)1 << 63; 2319} 2320 2321static const uint8_t recip_sqrt_estimate[256] = { 2322 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, 2323 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, 2324 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, 2325 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, 2326 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 2327 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 2328 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, 2329 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, 2330 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, 2331 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, 2332 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, 2333 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, 2334 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, 2335 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, 2336 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, 2337 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 2338}; 2339 2340template <> 2341uint32_t 2342fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) 2343{ 2344 int mode = modeConv(fpscr); 2345 int flags = 0; 2346 int sgn, exp; 2347 uint32_t mnt, result; 2348 2349 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2350 2351 if (exp == 255 && (uint32_t)(mnt << 9)) { 2352 result = fp32_process_NaN(op, mode, &flags); 2353 } else if (!mnt) { 2354 result = fp32_infinity(sgn); 2355 flags |= FPLIB_DZC; 2356 } else if (sgn) { 2357 result = fp32_defaultNaN(); 2358 flags |= FPLIB_IOC; 2359 } else if (exp == 255) { 2360 result = fp32_zero(0); 2361 } else { 2362 exp += 8; 2363 mnt = fp32_normalise(mnt, &exp); 2364 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)]; 2365 result = fp32_pack(0, (380 - exp) >> 1, mnt << 15); 2366 } 2367 2368 set_fpscr0(fpscr, flags); 2369 2370 return result; 2371} 2372 2373template <> 2374uint64_t 2375fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) 2376{ 2377 int mode = modeConv(fpscr); 2378 int flags = 0; 2379 int sgn, exp; 2380 uint64_t mnt, result; 2381 2382 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2383 2384 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2385 result = fp64_process_NaN(op, mode, &flags); 2386 } else if (!mnt) { 2387 result = fp64_infinity(sgn); 2388 flags |= FPLIB_DZC; 2389 } else if (sgn) { 2390 result = fp64_defaultNaN(); 2391 flags |= FPLIB_IOC; 2392 } else if (exp == 2047) { 2393 result = fp32_zero(0); 2394 } else { 2395 exp += 11; 2396 mnt = fp64_normalise(mnt, &exp); 2397 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)]; 2398 result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44); 2399 } 2400 2401 set_fpscr0(fpscr, flags); 2402 2403 return result; 2404} 2405 2406template <> 2407uint32_t 2408fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2409{ 2410 int mode = modeConv(fpscr); 2411 int flags = 0; 2412 int sgn1, exp1, sgn2, exp2; 2413 uint32_t mnt1, mnt2, result; 2414 2415 op1 = fplibNeg<uint32_t>(op1); 2416 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2417 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2418 2419 result = fp32_process_NaNs(op1, op2, mode, &flags); 2420 if (!result) { 2421 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2422 result = fp32_FPOnePointFive(0); 2423 } else if (exp1 == 255 || exp2 == 255) { 2424 result = fp32_infinity(sgn1 ^ sgn2); 2425 } else { 2426 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); 2427 } 2428 } 2429 2430 set_fpscr0(fpscr, flags); 2431 2432 return result; 2433} 2434 2435template <> 2436uint64_t 2437fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2438{ 2439 int mode = modeConv(fpscr); 2440 int flags = 0; 2441 int sgn1, exp1, sgn2, exp2; 2442 uint64_t mnt1, mnt2, result; 2443 2444 op1 = fplibNeg<uint64_t>(op1); 2445 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2446 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2447 2448 result = fp64_process_NaNs(op1, op2, mode, &flags); 2449 if (!result) { 2450 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2451 result = fp64_FPOnePointFive(0); 2452 } else if (exp1 == 2047 || exp2 == 2047) { 2453 result = fp64_infinity(sgn1 ^ sgn2); 2454 } else { 2455 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); 2456 } 2457 } 2458 2459 set_fpscr0(fpscr, flags); 2460 2461 return result; 2462} 2463 2464template <> 2465uint32_t 2466fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2467{ 2468 int mode = modeConv(fpscr); 2469 int flags = 0; 2470 int sgn1, exp1, sgn2, exp2; 2471 uint32_t mnt1, mnt2, result; 2472 2473 op1 = fplibNeg<uint32_t>(op1); 2474 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2475 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2476 2477 result = fp32_process_NaNs(op1, op2, mode, &flags); 2478 if (!result) { 2479 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2480 result = fp32_FPTwo(0); 2481 } else if (exp1 == 255 || exp2 == 255) { 2482 result = fp32_infinity(sgn1 ^ sgn2); 2483 } else { 2484 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags); 2485 } 2486 } 2487 2488 set_fpscr0(fpscr, flags); 2489 2490 return result; 2491} 2492 2493template <> 2494uint32_t 2495fplibRecipEstimate(uint32_t op, FPSCR &fpscr) 2496{ 2497 int mode = modeConv(fpscr); 2498 int flags = 0; 2499 int sgn, exp; 2500 uint32_t mnt, result; 2501 2502 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2503 2504 if (exp == 255 && (uint32_t)(mnt << 9)) { 2505 result = fp32_process_NaN(op, mode, &flags); 2506 } else if (exp == 255) { 2507 result = fp32_zero(sgn); 2508 } else if (!mnt) { 2509 result = fp32_infinity(sgn); 2510 flags |= FPLIB_DZC; 2511 } else if (!((uint32_t)(op << 1) >> 22)) { 2512 bool overflow_to_inf = false; 2513 switch (FPCRRounding(fpscr)) { 2514 case FPRounding_TIEEVEN: 2515 overflow_to_inf = true; 2516 break; 2517 case FPRounding_POSINF: 2518 overflow_to_inf = !sgn; 2519 break; 2520 case FPRounding_NEGINF: 2521 overflow_to_inf = sgn; 2522 break; 2523 case FPRounding_ZERO: 2524 overflow_to_inf = false; 2525 break; 2526 default: 2527 assert(0); 2528 } 2529 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn); 2530 flags |= FPLIB_OFC | FPLIB_IXC; 2531 } else if (fpscr.fz && exp >= 253) { 2532 result = fp32_zero(sgn); 2533 flags |= FPLIB_UFC; 2534 } else { 2535 exp += 8; 2536 mnt = fp32_normalise(mnt, &exp); 2537 int result_exp = 253 - exp; 2538 uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1; 2539 fraction <<= 15; 2540 if (result_exp == 0) { 2541 fraction >>= 1; 2542 } else if (result_exp == -1) { 2543 fraction >>= 2; 2544 result_exp = 0; 2545 } 2546 result = fp32_pack(sgn, result_exp, fraction); 2547 } 2548 2549 set_fpscr0(fpscr, flags); 2550 2551 return result; 2552} 2553 2554template <> 2555uint64_t 2556fplibRecipEstimate(uint64_t op, FPSCR &fpscr) 2557{ 2558 int mode = modeConv(fpscr); 2559 int flags = 0; 2560 int sgn, exp; 2561 uint64_t mnt, result; 2562 2563 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2564 2565 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2566 result = fp64_process_NaN(op, mode, &flags); 2567 } else if (exp == 2047) { 2568 result = fp64_zero(sgn); 2569 } else if (!mnt) { 2570 result = fp64_infinity(sgn); 2571 flags |= FPLIB_DZC; 2572 } else if (!((uint64_t)(op << 1) >> 51)) { 2573 bool overflow_to_inf = false; 2574 switch (FPCRRounding(fpscr)) { 2575 case FPRounding_TIEEVEN: 2576 overflow_to_inf = true; 2577 break; 2578 case FPRounding_POSINF: 2579 overflow_to_inf = !sgn; 2580 break; 2581 case FPRounding_NEGINF: 2582 overflow_to_inf = sgn; 2583 break; 2584 case FPRounding_ZERO: 2585 overflow_to_inf = false; 2586 break; 2587 default: 2588 assert(0); 2589 } 2590 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn); 2591 flags |= FPLIB_OFC | FPLIB_IXC; 2592 } else if (fpscr.fz && exp >= 2045) { 2593 result = fp64_zero(sgn); 2594 flags |= FPLIB_UFC; 2595 } else { 2596 exp += 11; 2597 mnt = fp64_normalise(mnt, &exp); 2598 int result_exp = 2045 - exp; 2599 uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1; 2600 fraction <<= 44; 2601 if (result_exp == 0) { 2602 fraction >>= 1; 2603 } else if (result_exp == -1) { 2604 fraction >>= 2; 2605 result_exp = 0; 2606 } 2607 result = fp64_pack(sgn, result_exp, fraction); 2608 } 2609 2610 set_fpscr0(fpscr, flags); 2611 2612 return result; 2613} 2614 2615template <> 2616uint64_t 2617fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2618{ 2619 int mode = modeConv(fpscr); 2620 int flags = 0; 2621 int sgn1, exp1, sgn2, exp2; 2622 uint64_t mnt1, mnt2, result; 2623 2624 op1 = fplibNeg<uint64_t>(op1); 2625 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2626 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2627 2628 result = fp64_process_NaNs(op1, op2, mode, &flags); 2629 if (!result) { 2630 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2631 result = fp64_FPTwo(0); 2632 } else if (exp1 == 2047 || exp2 == 2047) { 2633 result = fp64_infinity(sgn1 ^ sgn2); 2634 } else { 2635 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags); 2636 } 2637 } 2638 2639 set_fpscr0(fpscr, flags); 2640 2641 return result; 2642} 2643 2644template <> 2645uint32_t 2646fplibRecpX(uint32_t op, FPSCR &fpscr) 2647{ 2648 int mode = modeConv(fpscr); 2649 int flags = 0; 2650 int sgn, exp; 2651 uint32_t mnt, result; 2652 2653 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2654 2655 if (exp == 255 && (uint32_t)(mnt << 9)) { 2656 result = fp32_process_NaN(op, mode, &flags); 2657 } 2658 else { 2659 if (!mnt) { // Zero and denormals 2660 result = fp32_pack(sgn, 254, 0); 2661 } else { // Infinities and normals 2662 result = fp32_pack(sgn, exp ^ 255, 0); 2663 } 2664 } 2665 2666 set_fpscr0(fpscr, flags); 2667 2668 return result; 2669} 2670 2671template <> 2672uint64_t 2673fplibRecpX(uint64_t op, FPSCR &fpscr) 2674{ 2675 int mode = modeConv(fpscr); 2676 int flags = 0; 2677 int sgn, exp; 2678 uint64_t mnt, result; 2679 2680 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2681 2682 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2683 result = fp64_process_NaN(op, mode, &flags); 2684 } 2685 else { 2686 if (!mnt) { // Zero and denormals 2687 result = fp64_pack(sgn, 2046, 0); 2688 } else { // Infinities and normals 2689 result = fp64_pack(sgn, exp ^ 2047, 0); 2690 } 2691 } 2692 2693 set_fpscr0(fpscr, flags); 2694 2695 return result; 2696} 2697 2698template <> 2699uint32_t 2700fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 2701{ 2702 int mode = modeConv(fpscr); 2703 int flags = 0; 2704 int sgn, exp; 2705 uint32_t mnt, result; 2706 2707 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2708 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2709 2710 // Handle NaNs, infinities and zeroes: 2711 if (exp == 255 && (uint32_t)(mnt << 9)) { 2712 result = fp32_process_NaN(op, mode, &flags); 2713 } else if (exp == 255) { 2714 result = fp32_infinity(sgn); 2715 } else if (!mnt) { 2716 result = fp32_zero(sgn); 2717 } else if (exp >= 150) { 2718 // There are no fractional bits 2719 result = op; 2720 } else { 2721 // Truncate towards zero: 2722 uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp); 2723 int err = exp < 118 ? 1 : 2724 (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0); 2725 switch (rounding) { 2726 case FPRounding_TIEEVEN: 2727 x += (err == 3 || (err == 2 && (x & 1))); 2728 break; 2729 case FPRounding_POSINF: 2730 x += err && !sgn; 2731 break; 2732 case FPRounding_NEGINF: 2733 x += err && sgn; 2734 break; 2735 case FPRounding_ZERO: 2736 break; 2737 case FPRounding_TIEAWAY: 2738 x += err >> 1; 2739 break; 2740 default: 2741 assert(0); 2742 } 2743 2744 if (x == 0) { 2745 result = fp32_zero(sgn); 2746 } else { 2747 exp = 150; 2748 mnt = fp32_normalise(x, &exp); 2749 result = fp32_pack(sgn, exp + 8, mnt >> 8); 2750 } 2751 2752 if (err && exact) 2753 flags |= FPLIB_IXC; 2754 } 2755 2756 set_fpscr0(fpscr, flags); 2757 2758 return result; 2759} 2760 2761template <> 2762uint64_t 2763fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 2764{ 2765 int mode = modeConv(fpscr); 2766 int flags = 0; 2767 int sgn, exp; 2768 uint64_t mnt, result; 2769 2770 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2771 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2772 2773 // Handle NaNs, infinities and zeroes: 2774 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2775 result = fp64_process_NaN(op, mode, &flags); 2776 } else if (exp == 2047) { 2777 result = fp64_infinity(sgn); 2778 } else if (!mnt) { 2779 result = fp64_zero(sgn); 2780 } else if (exp >= 1075) { 2781 // There are no fractional bits 2782 result = op; 2783 } else { 2784 // Truncate towards zero: 2785 uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp); 2786 int err = exp < 1011 ? 1 : 2787 (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0); 2788 switch (rounding) { 2789 case FPRounding_TIEEVEN: 2790 x += (err == 3 || (err == 2 && (x & 1))); 2791 break; 2792 case FPRounding_POSINF: 2793 x += err && !sgn; 2794 break; 2795 case FPRounding_NEGINF: 2796 x += err && sgn; 2797 break; 2798 case FPRounding_ZERO: 2799 break; 2800 case FPRounding_TIEAWAY: 2801 x += err >> 1; 2802 break; 2803 default: 2804 assert(0); 2805 } 2806 2807 if (x == 0) { 2808 result = fp64_zero(sgn); 2809 } else { 2810 exp = 1075; 2811 mnt = fp64_normalise(x, &exp); 2812 result = fp64_pack(sgn, exp + 11, mnt >> 11); 2813 } 2814 2815 if (err && exact) 2816 flags |= FPLIB_IXC; 2817 } 2818 2819 set_fpscr0(fpscr, flags); 2820 2821 return result; 2822} 2823 2824template <> 2825uint32_t 2826fplibSqrt(uint32_t op, FPSCR &fpscr) 2827{ 2828 int flags = 0; 2829 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags); 2830 set_fpscr0(fpscr, flags); 2831 return result; 2832} 2833 2834template <> 2835uint64_t 2836fplibSqrt(uint64_t op, FPSCR &fpscr) 2837{ 2838 int flags = 0; 2839 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags); 2840 set_fpscr0(fpscr, flags); 2841 return result; 2842} 2843 2844template <> 2845uint32_t 2846fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2847{ 2848 int flags = 0; 2849 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags); 2850 set_fpscr0(fpscr, flags); 2851 return result; 2852} 2853 2854template <> 2855uint64_t 2856fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2857{ 2858 int flags = 0; 2859 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags); 2860 set_fpscr0(fpscr, flags); 2861 return result; 2862} 2863 2864static uint64_t 2865FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 2866 int *flags) 2867{ 2868 uint64_t x; 2869 int err; 2870 2871 if (exp > 1023 + 63) { 2872 *flags = FPLIB_IOC; 2873 return ((uint64_t)!u << 63) - !sgn; 2874 } 2875 2876 x = lsr64(mnt << 11, 1023 + 63 - exp); 2877 err = (exp > 1023 + 63 - 2 ? 0 : 2878 (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) | 2879 !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1))); 2880 2881 switch (rounding) { 2882 case FPRounding_TIEEVEN: 2883 x += (err == 3 || (err == 2 && (x & 1))); 2884 break; 2885 case FPRounding_POSINF: 2886 x += err && !sgn; 2887 break; 2888 case FPRounding_NEGINF: 2889 x += err && sgn; 2890 break; 2891 case FPRounding_ZERO: 2892 break; 2893 case FPRounding_TIEAWAY: 2894 x += err >> 1; 2895 break; 2896 default: 2897 assert(0); 2898 } 2899 2900 if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) { 2901 *flags = FPLIB_IOC; 2902 return ((uint64_t)!u << 63) - !sgn; 2903 } 2904 2905 if (err) { 2906 *flags = FPLIB_IXC; 2907 } 2908 2909 return sgn ? -x : x; 2910} 2911 2912static uint32_t 2913FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 2914 int *flags) 2915{ 2916 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); 2917 if (u ? x >= (uint64_t)1 << 32 : 2918 !(x < (uint64_t)1 << 31 || 2919 (uint64_t)-x <= (uint64_t)1 << 31)) { 2920 *flags = FPLIB_IOC; 2921 x = ((uint32_t)!u << 31) - !sgn; 2922 } 2923 return x; 2924} 2925 2926template <> 2927uint32_t 2928fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2929{ 2930 int flags = 0; 2931 int sgn, exp; 2932 uint32_t mnt, result; 2933 2934 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2935 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2936 2937 // If NaN, set cumulative flag or take exception: 2938 if (exp == 255 && (uint32_t)(mnt << 9)) { 2939 flags = FPLIB_IOC; 2940 result = 0; 2941 } else { 2942 result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits, 2943 (uint64_t)mnt << (52 - 23), u, rounding, &flags); 2944 } 2945 2946 set_fpscr0(fpscr, flags); 2947 2948 return result; 2949} 2950 2951template <> 2952uint32_t 2953fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2954{ 2955 int flags = 0; 2956 int sgn, exp; 2957 uint64_t mnt; 2958 uint32_t result; 2959 2960 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2961 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2962 2963 // If NaN, set cumulative flag or take exception: 2964 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2965 flags = FPLIB_IOC; 2966 result = 0; 2967 } else { 2968 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags); 2969 } 2970 2971 set_fpscr0(fpscr, flags); 2972 2973 return result; 2974} 2975 2976template <> 2977uint64_t 2978fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2979{ 2980 int flags = 0; 2981 int sgn, exp; 2982 uint32_t mnt; 2983 uint64_t result; 2984 2985 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2986 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2987 2988 // If NaN, set cumulative flag or take exception: 2989 if (exp == 255 && (uint32_t)(mnt << 9)) { 2990 flags = FPLIB_IOC; 2991 result = 0; 2992 } else { 2993 result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits, 2994 (uint64_t)mnt << (52 - 23), u, rounding, &flags); 2995 } 2996 2997 set_fpscr0(fpscr, flags); 2998 2999 return result; 3000} 3001 3002template <> 3003uint64_t 3004fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 3005{ 3006 int flags = 0; 3007 int sgn, exp; 3008 uint64_t mnt, result; 3009 3010 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 3011 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 3012 3013 // If NaN, set cumulative flag or take exception: 3014 if (exp == 2047 && (uint64_t)(mnt << 12)) { 3015 flags = FPLIB_IOC; 3016 result = 0; 3017 } else { 3018 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags); 3019 } 3020 3021 set_fpscr0(fpscr, flags); 3022 3023 return result; 3024} 3025 3026static uint32_t 3027fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) 3028{ 3029 int x_sgn = !u && a >> 63; 3030 int x_exp = 190 - fbits; 3031 uint64_t x_mnt = x_sgn ? -a : a; 3032 3033 // Handle zero: 3034 if (!x_mnt) { 3035 return fp32_zero(0); 3036 } 3037 3038 // Normalise and convert to 32 bits, collapsing error into bottom bit: 3039 x_mnt = fp64_normalise(x_mnt, &x_exp); 3040 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 3041 3042 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 3043} 3044 3045static uint64_t 3046fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) 3047{ 3048 int x_sgn = !u && a >> 63; 3049 int x_exp = 1024 + 62 - fbits; 3050 uint64_t x_mnt = x_sgn ? -a : a; 3051 3052 // Handle zero: 3053 if (!x_mnt) { 3054 return fp64_zero(0); 3055 } 3056 3057 x_mnt = fp64_normalise(x_mnt, &x_exp); 3058 3059 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags); 3060} 3061 3062template <> 3063uint32_t 3064fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 3065{ 3066 int flags = 0; 3067 uint32_t res = fp32_cvtf(op, fbits, u, 3068 (int)rounding | ((uint32_t)fpscr >> 22 & 12), 3069 &flags); 3070 set_fpscr0(fpscr, flags); 3071 return res; 3072} 3073 3074template <> 3075uint64_t 3076fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 3077{ 3078 int flags = 0; 3079 uint64_t res = fp64_cvtf(op, fbits, u, 3080 (int)rounding | ((uint32_t)fpscr >> 22 & 12), 3081 &flags); 3082 set_fpscr0(fpscr, flags); 3083 return res; 3084} 3085 3086} 3087