fplib.cc revision 13118:897ff5214d07
1/* 2* Copyright (c) 2012-2013, 2017-2018 ARM Limited 3* All rights reserved 4* 5* The license below extends only to copyright in the software and shall 6* not be construed as granting a license to any other intellectual 7* property including but not limited to intellectual property relating 8* to a hardware implementation of the functionality of the software 9* licensed hereunder. You may use the software subject to the license 10* terms below provided that you ensure that this notice is replicated 11* unmodified and in its entirety in all distributions of the software, 12* modified or unmodified, in source code or in binary form. 13* 14* Redistribution and use in source and binary forms, with or without 15* modification, are permitted provided that the following conditions are 16* met: redistributions of source code must retain the above copyright 17* notice, this list of conditions and the following disclaimer; 18* redistributions in binary form must reproduce the above copyright 19* notice, this list of conditions and the following disclaimer in the 20* documentation and/or other materials provided with the distribution; 21* neither the name of the copyright holders nor the names of its 22* contributors may be used to endorse or promote products derived from 23* this software without specific prior written permission. 24* 25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36* 37* Authors: Edmund Grimley Evans 38* Thomas Grocutt 39*/ 40 41#include <stdint.h> 42 43#include <cassert> 44 45#include "fplib.hh" 46 47namespace ArmISA 48{ 49 50#define FPLIB_RN 0 51#define FPLIB_RP 1 52#define FPLIB_RM 2 53#define FPLIB_RZ 3 54#define FPLIB_FZ 4 55#define FPLIB_DN 8 56#define FPLIB_AHP 16 57#define FPLIB_FZ16 32 58 59#define FPLIB_IDC 128 // Input Denormal 60#define FPLIB_IXC 16 // Inexact 61#define FPLIB_UFC 8 // Underflow 62#define FPLIB_OFC 4 // Overflow 63#define FPLIB_DZC 2 // Division by Zero 64#define FPLIB_IOC 1 // Invalid Operation 65 66#define FP16_BITS 16 67#define FP32_BITS 32 68#define FP64_BITS 64 69 70#define FP16_EXP_BITS 5 71#define FP32_EXP_BITS 8 72#define FP64_EXP_BITS 11 73 74#define FP16_EXP_BIAS 15 75#define FP32_EXP_BIAS 127 76#define FP64_EXP_BIAS 1023 77 78#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1) 79#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1) 80#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1) 81 82#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1) 83#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1) 84#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1) 85 86#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1)) 87#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1)) 88#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1)) 89 90#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1)) 91#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1)) 92#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1)) 93 94static inline uint16_t 95lsl16(uint16_t x, uint32_t shift) 96{ 97 return shift < 16 ? x << shift : 0; 98} 99 100static inline uint16_t 101lsr16(uint16_t x, uint32_t shift) 102{ 103 return shift < 16 ? x >> shift : 0; 104} 105 106static inline uint32_t 107lsl32(uint32_t x, uint32_t shift) 108{ 109 return shift < 32 ? x << shift : 0; 110} 111 112static inline uint32_t 113lsr32(uint32_t x, uint32_t shift) 114{ 115 return shift < 32 ? x >> shift : 0; 116} 117 118static inline uint64_t 119lsl64(uint64_t x, uint32_t shift) 120{ 121 return shift < 64 ? x << shift : 0; 122} 123 124static inline uint64_t 125lsr64(uint64_t x, uint32_t shift) 126{ 127 return shift < 64 ? x >> shift : 0; 128} 129 130static inline void 131lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 132{ 133 if (shift == 0) { 134 *r1 = x1; 135 *r0 = x0; 136 } else if (shift < 64) { 137 *r1 = x1 << shift | x0 >> (64 - shift); 138 *r0 = x0 << shift; 139 } else if (shift < 128) { 140 *r1 = x0 << (shift - 64); 141 *r0 = 0; 142 } else { 143 *r1 = 0; 144 *r0 = 0; 145 } 146} 147 148static inline void 149lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 150{ 151 if (shift == 0) { 152 *r1 = x1; 153 *r0 = x0; 154 } else if (shift < 64) { 155 *r0 = x0 >> shift | x1 << (64 - shift); 156 *r1 = x1 >> shift; 157 } else if (shift < 128) { 158 *r0 = x1 >> (shift - 64); 159 *r1 = 0; 160 } else { 161 *r0 = 0; 162 *r1 = 0; 163 } 164} 165 166static inline void 167mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) 168{ 169 uint32_t mask = ((uint32_t)1 << 31) - 1; 170 uint64_t a0 = a & mask; 171 uint64_t a1 = a >> 31 & mask; 172 uint64_t b0 = b & mask; 173 uint64_t b1 = b >> 31 & mask; 174 uint64_t p0 = a0 * b0; 175 uint64_t p2 = a1 * b1; 176 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; 177 uint64_t s0 = p0; 178 uint64_t s1 = (s0 >> 31) + p1; 179 uint64_t s2 = (s1 >> 31) + p2; 180 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62; 181 *x1 = s2 >> 2; 182} 183 184static inline 185void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b) 186{ 187 uint64_t t0 = (uint64_t)(uint32_t)a * b; 188 uint64_t t1 = (t0 >> 32) + (a >> 32) * b; 189 *x0 = t1 << 32 | (uint32_t)t0; 190 *x1 = t1 >> 32; 191} 192 193static inline void 194add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 195 uint64_t b1) 196{ 197 *x0 = a0 + b0; 198 *x1 = a1 + b1 + (*x0 < a0); 199} 200 201static inline void 202sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 203 uint64_t b1) 204{ 205 *x0 = a0 - b0; 206 *x1 = a1 - b1 - (*x0 > a0); 207} 208 209static inline int 210cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 211{ 212 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); 213} 214 215static inline uint16_t 216fp16_normalise(uint16_t mnt, int *exp) 217{ 218 int shift; 219 220 if (!mnt) { 221 return 0; 222 } 223 224 for (shift = 8; shift; shift >>= 1) { 225 if (!(mnt >> (16 - shift))) { 226 mnt <<= shift; 227 *exp -= shift; 228 } 229 } 230 return mnt; 231} 232 233static inline uint32_t 234fp32_normalise(uint32_t mnt, int *exp) 235{ 236 int shift; 237 238 if (!mnt) { 239 return 0; 240 } 241 242 for (shift = 16; shift; shift >>= 1) { 243 if (!(mnt >> (32 - shift))) { 244 mnt <<= shift; 245 *exp -= shift; 246 } 247 } 248 return mnt; 249} 250 251static inline uint64_t 252fp64_normalise(uint64_t mnt, int *exp) 253{ 254 int shift; 255 256 if (!mnt) { 257 return 0; 258 } 259 260 for (shift = 32; shift; shift >>= 1) { 261 if (!(mnt >> (64 - shift))) { 262 mnt <<= shift; 263 *exp -= shift; 264 } 265 } 266 return mnt; 267} 268 269static inline void 270fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp) 271{ 272 uint64_t x0 = *mnt0; 273 uint64_t x1 = *mnt1; 274 int shift; 275 276 if (!x0 && !x1) { 277 return; 278 } 279 280 if (!x1) { 281 x1 = x0; 282 x0 = 0; 283 *exp -= 64; 284 } 285 286 for (shift = 32; shift; shift >>= 1) { 287 if (!(x1 >> (64 - shift))) { 288 x1 = x1 << shift | x0 >> (64 - shift); 289 x0 <<= shift; 290 *exp -= shift; 291 } 292 } 293 294 *mnt0 = x0; 295 *mnt1 = x1; 296} 297 298static inline uint16_t 299fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) 300{ 301 return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt); 302} 303 304static inline uint32_t 305fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) 306{ 307 return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt); 308} 309 310static inline uint64_t 311fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) 312{ 313 return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt); 314} 315 316static inline uint16_t 317fp16_zero(int sgn) 318{ 319 return fp16_pack(sgn, 0, 0); 320} 321 322static inline uint32_t 323fp32_zero(int sgn) 324{ 325 return fp32_pack(sgn, 0, 0); 326} 327 328static inline uint64_t 329fp64_zero(int sgn) 330{ 331 return fp64_pack(sgn, 0, 0); 332} 333 334static inline uint16_t 335fp16_max_normal(int sgn) 336{ 337 return fp16_pack(sgn, FP16_EXP_INF - 1, -1); 338} 339 340static inline uint32_t 341fp32_max_normal(int sgn) 342{ 343 return fp32_pack(sgn, FP32_EXP_INF - 1, -1); 344} 345 346static inline uint64_t 347fp64_max_normal(int sgn) 348{ 349 return fp64_pack(sgn, FP64_EXP_INF - 1, -1); 350} 351 352static inline uint16_t 353fp16_infinity(int sgn) 354{ 355 return fp16_pack(sgn, FP16_EXP_INF, 0); 356} 357 358static inline uint32_t 359fp32_infinity(int sgn) 360{ 361 return fp32_pack(sgn, FP32_EXP_INF, 0); 362} 363 364static inline uint64_t 365fp64_infinity(int sgn) 366{ 367 return fp64_pack(sgn, FP64_EXP_INF, 0); 368} 369 370static inline uint16_t 371fp16_defaultNaN() 372{ 373 return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1)); 374} 375 376static inline uint32_t 377fp32_defaultNaN() 378{ 379 return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1)); 380} 381 382static inline uint64_t 383fp64_defaultNaN() 384{ 385 return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1)); 386} 387 388static inline void 389fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, 390 int *flags) 391{ 392 *sgn = x >> (FP16_BITS - 1); 393 *exp = FP16_EXP(x); 394 *mnt = FP16_MANT(x); 395 396 // Handle subnormals: 397 if (*exp) { 398 *mnt |= 1ULL << FP16_MANT_BITS; 399 } else { 400 ++*exp; 401 // IDC (Input Denormal) is not set in this case. 402 if (mode & FPLIB_FZ16) 403 *mnt = 0; 404 } 405} 406 407static inline void 408fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, 409 int *flags) 410{ 411 *sgn = x >> (FP32_BITS - 1); 412 *exp = FP32_EXP(x); 413 *mnt = FP32_MANT(x); 414 415 // Handle subnormals: 416 if (*exp) { 417 *mnt |= 1ULL << FP32_MANT_BITS; 418 } else { 419 ++*exp; 420 if ((mode & FPLIB_FZ) && *mnt) { 421 *flags |= FPLIB_IDC; 422 *mnt = 0; 423 } 424 } 425} 426 427static inline void 428fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, 429 int *flags) 430{ 431 *sgn = x >> (FP64_BITS - 1); 432 *exp = FP64_EXP(x); 433 *mnt = FP64_MANT(x); 434 435 // Handle subnormals: 436 if (*exp) { 437 *mnt |= 1ULL << FP64_MANT_BITS; 438 } else { 439 ++*exp; 440 if ((mode & FPLIB_FZ) && *mnt) { 441 *flags |= FPLIB_IDC; 442 *mnt = 0; 443 } 444 } 445} 446 447static inline int 448fp16_is_NaN(int exp, uint16_t mnt) 449{ 450 return exp == FP16_EXP_INF && FP16_MANT(mnt); 451} 452 453static inline int 454fp32_is_NaN(int exp, uint32_t mnt) 455{ 456 return exp == FP32_EXP_INF && FP32_MANT(mnt); 457} 458 459static inline int 460fp64_is_NaN(int exp, uint64_t mnt) 461{ 462 return exp == FP64_EXP_INF && FP64_MANT(mnt); 463} 464 465static inline int 466fp16_is_signalling_NaN(int exp, uint16_t mnt) 467{ 468 return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1); 469} 470 471static inline int 472fp32_is_signalling_NaN(int exp, uint32_t mnt) 473{ 474 return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1); 475} 476 477static inline int 478fp64_is_signalling_NaN(int exp, uint64_t mnt) 479{ 480 return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1); 481} 482 483static inline int 484fp16_is_quiet_NaN(int exp, uint16_t mnt) 485{ 486 return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1); 487} 488 489static inline int 490fp32_is_quiet_NaN(int exp, uint32_t mnt) 491{ 492 return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1); 493} 494 495static inline int 496fp64_is_quiet_NaN(int exp, uint64_t mnt) 497{ 498 return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1); 499} 500 501static inline int 502fp16_is_infinity(int exp, uint16_t mnt) 503{ 504 return exp == FP16_EXP_INF && !FP16_MANT(mnt); 505} 506 507static inline int 508fp32_is_infinity(int exp, uint32_t mnt) 509{ 510 return exp == FP32_EXP_INF && !FP32_MANT(mnt); 511} 512 513static inline int 514fp64_is_infinity(int exp, uint64_t mnt) 515{ 516 return exp == FP64_EXP_INF && !FP64_MANT(mnt); 517} 518 519static inline uint16_t 520fp16_process_NaN(uint16_t a, int mode, int *flags) 521{ 522 if (!(a >> (FP16_MANT_BITS - 1) & 1)) { 523 *flags |= FPLIB_IOC; 524 a |= 1ULL << (FP16_MANT_BITS - 1); 525 } 526 return mode & FPLIB_DN ? fp16_defaultNaN() : a; 527} 528 529static inline uint32_t 530fp32_process_NaN(uint32_t a, int mode, int *flags) 531{ 532 if (!(a >> (FP32_MANT_BITS - 1) & 1)) { 533 *flags |= FPLIB_IOC; 534 a |= 1ULL << (FP32_MANT_BITS - 1); 535 } 536 return mode & FPLIB_DN ? fp32_defaultNaN() : a; 537} 538 539static inline uint64_t 540fp64_process_NaN(uint64_t a, int mode, int *flags) 541{ 542 if (!(a >> (FP64_MANT_BITS - 1) & 1)) { 543 *flags |= FPLIB_IOC; 544 a |= 1ULL << (FP64_MANT_BITS - 1); 545 } 546 return mode & FPLIB_DN ? fp64_defaultNaN() : a; 547} 548 549static uint16_t 550fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags) 551{ 552 int a_exp = FP16_EXP(a); 553 uint16_t a_mnt = FP16_MANT(a); 554 int b_exp = FP16_EXP(b); 555 uint16_t b_mnt = FP16_MANT(b); 556 557 // Handle signalling NaNs: 558 if (fp16_is_signalling_NaN(a_exp, a_mnt)) 559 return fp16_process_NaN(a, mode, flags); 560 if (fp16_is_signalling_NaN(b_exp, b_mnt)) 561 return fp16_process_NaN(b, mode, flags); 562 563 // Handle quiet NaNs: 564 if (fp16_is_NaN(a_exp, a_mnt)) 565 return fp16_process_NaN(a, mode, flags); 566 if (fp16_is_NaN(b_exp, b_mnt)) 567 return fp16_process_NaN(b, mode, flags); 568 569 return 0; 570} 571 572static uint32_t 573fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags) 574{ 575 int a_exp = FP32_EXP(a); 576 uint32_t a_mnt = FP32_MANT(a); 577 int b_exp = FP32_EXP(b); 578 uint32_t b_mnt = FP32_MANT(b); 579 580 // Handle signalling NaNs: 581 if (fp32_is_signalling_NaN(a_exp, a_mnt)) 582 return fp32_process_NaN(a, mode, flags); 583 if (fp32_is_signalling_NaN(b_exp, b_mnt)) 584 return fp32_process_NaN(b, mode, flags); 585 586 // Handle quiet NaNs: 587 if (fp32_is_NaN(a_exp, a_mnt)) 588 return fp32_process_NaN(a, mode, flags); 589 if (fp32_is_NaN(b_exp, b_mnt)) 590 return fp32_process_NaN(b, mode, flags); 591 592 return 0; 593} 594 595static uint64_t 596fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags) 597{ 598 int a_exp = FP64_EXP(a); 599 uint64_t a_mnt = FP64_MANT(a); 600 int b_exp = FP64_EXP(b); 601 uint64_t b_mnt = FP64_MANT(b); 602 603 // Handle signalling NaNs: 604 if (fp64_is_signalling_NaN(a_exp, a_mnt)) 605 return fp64_process_NaN(a, mode, flags); 606 if (fp64_is_signalling_NaN(b_exp, b_mnt)) 607 return fp64_process_NaN(b, mode, flags); 608 609 // Handle quiet NaNs: 610 if (fp64_is_NaN(a_exp, a_mnt)) 611 return fp64_process_NaN(a, mode, flags); 612 if (fp64_is_NaN(b_exp, b_mnt)) 613 return fp64_process_NaN(b, mode, flags); 614 615 return 0; 616} 617 618static uint16_t 619fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags) 620{ 621 int a_exp = FP16_EXP(a); 622 uint16_t a_mnt = FP16_MANT(a); 623 int b_exp = FP16_EXP(b); 624 uint16_t b_mnt = FP16_MANT(b); 625 int c_exp = FP16_EXP(c); 626 uint16_t c_mnt = FP16_MANT(c); 627 628 // Handle signalling NaNs: 629 if (fp16_is_signalling_NaN(a_exp, a_mnt)) 630 return fp16_process_NaN(a, mode, flags); 631 if (fp16_is_signalling_NaN(b_exp, b_mnt)) 632 return fp16_process_NaN(b, mode, flags); 633 if (fp16_is_signalling_NaN(c_exp, c_mnt)) 634 return fp16_process_NaN(c, mode, flags); 635 636 // Handle quiet NaNs: 637 if (fp16_is_NaN(a_exp, a_mnt)) 638 return fp16_process_NaN(a, mode, flags); 639 if (fp16_is_NaN(b_exp, b_mnt)) 640 return fp16_process_NaN(b, mode, flags); 641 if (fp16_is_NaN(c_exp, c_mnt)) 642 return fp16_process_NaN(c, mode, flags); 643 644 return 0; 645} 646 647static uint32_t 648fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags) 649{ 650 int a_exp = FP32_EXP(a); 651 uint32_t a_mnt = FP32_MANT(a); 652 int b_exp = FP32_EXP(b); 653 uint32_t b_mnt = FP32_MANT(b); 654 int c_exp = FP32_EXP(c); 655 uint32_t c_mnt = FP32_MANT(c); 656 657 // Handle signalling NaNs: 658 if (fp32_is_signalling_NaN(a_exp, a_mnt)) 659 return fp32_process_NaN(a, mode, flags); 660 if (fp32_is_signalling_NaN(b_exp, b_mnt)) 661 return fp32_process_NaN(b, mode, flags); 662 if (fp32_is_signalling_NaN(c_exp, c_mnt)) 663 return fp32_process_NaN(c, mode, flags); 664 665 // Handle quiet NaNs: 666 if (fp32_is_NaN(a_exp, a_mnt)) 667 return fp32_process_NaN(a, mode, flags); 668 if (fp32_is_NaN(b_exp, b_mnt)) 669 return fp32_process_NaN(b, mode, flags); 670 if (fp32_is_NaN(c_exp, c_mnt)) 671 return fp32_process_NaN(c, mode, flags); 672 673 return 0; 674} 675 676static uint64_t 677fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags) 678{ 679 int a_exp = FP64_EXP(a); 680 uint64_t a_mnt = FP64_MANT(a); 681 int b_exp = FP64_EXP(b); 682 uint64_t b_mnt = FP64_MANT(b); 683 int c_exp = FP64_EXP(c); 684 uint64_t c_mnt = FP64_MANT(c); 685 686 // Handle signalling NaNs: 687 if (fp64_is_signalling_NaN(a_exp, a_mnt)) 688 return fp64_process_NaN(a, mode, flags); 689 if (fp64_is_signalling_NaN(b_exp, b_mnt)) 690 return fp64_process_NaN(b, mode, flags); 691 if (fp64_is_signalling_NaN(c_exp, c_mnt)) 692 return fp64_process_NaN(c, mode, flags); 693 694 // Handle quiet NaNs: 695 if (fp64_is_NaN(a_exp, a_mnt)) 696 return fp64_process_NaN(a, mode, flags); 697 if (fp64_is_NaN(b_exp, b_mnt)) 698 return fp64_process_NaN(b, mode, flags); 699 if (fp64_is_NaN(c_exp, c_mnt)) 700 return fp64_process_NaN(c, mode, flags); 701 702 return 0; 703} 704 705static uint16_t 706fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags) 707{ 708 int biased_exp; // non-negative exponent value for result 709 uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS) 710 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 711 712 assert(rm != FPRounding_TIEAWAY); 713 714 // Flush to zero: 715 if ((mode & FPLIB_FZ16) && exp < 1) { 716 *flags |= FPLIB_UFC; 717 return fp16_zero(sgn); 718 } 719 720 // The bottom FP16_EXP_BITS bits of mnt are orred together: 721 mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) | 722 ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0)); 723 724 if (exp > 0) { 725 biased_exp = exp; 726 int_mant = mnt >> 2; 727 error = mnt & 3; 728 } else { 729 biased_exp = 0; 730 int_mant = lsr16(mnt, 3 - exp); 731 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1)); 732 } 733 734 if (!biased_exp && error) { // xx should also check fpscr_val<11> 735 *flags |= FPLIB_UFC; 736 } 737 738 // Round up: 739 if ((rm == FPLIB_RN && (error == 3 || 740 (error == 2 && (int_mant & 1)))) || 741 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 742 ++int_mant; 743 if (int_mant == 1ULL << FP16_MANT_BITS) { 744 // Rounded up from denormalized to normalized 745 biased_exp = 1; 746 } 747 if (int_mant == 2ULL << FP16_MANT_BITS) { 748 // Rounded up to next exponent 749 ++biased_exp; 750 int_mant >>= 1; 751 } 752 } 753 754 // Handle rounding to odd aka Von Neumann rounding: 755 if (error && rm == FPRounding_ODD) 756 int_mant |= 1; 757 758 // Handle overflow: 759 if (!(mode & FPLIB_AHP)) { 760 if (biased_exp >= (int)FP16_EXP_INF) { 761 *flags |= FPLIB_OFC | FPLIB_IXC; 762 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 763 (rm == FPLIB_RM && sgn)) { 764 return fp16_infinity(sgn); 765 } else { 766 return fp16_max_normal(sgn); 767 } 768 } 769 } else { 770 if (biased_exp >= (int)FP16_EXP_INF + 1) { 771 *flags |= FPLIB_IOC; 772 return fp16_pack(sgn, FP16_EXP_INF, -1); 773 } 774 } 775 776 if (error) { 777 *flags |= FPLIB_IXC; 778 } 779 780 return fp16_pack(sgn, biased_exp, int_mant); 781} 782 783static uint16_t 784fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags) 785{ 786 return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags); 787} 788 789static uint32_t 790fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags) 791{ 792 int biased_exp; // non-negative exponent value for result 793 uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS) 794 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 795 796 assert(rm != FPRounding_TIEAWAY); 797 798 // Flush to zero: 799 if ((mode & FPLIB_FZ) && exp < 1) { 800 *flags |= FPLIB_UFC; 801 return fp32_zero(sgn); 802 } 803 804 // The bottom FP32_EXP_BITS bits of mnt are orred together: 805 mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) | 806 ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0)); 807 808 if (exp > 0) { 809 biased_exp = exp; 810 int_mant = mnt >> 2; 811 error = mnt & 3; 812 } else { 813 biased_exp = 0; 814 int_mant = lsr32(mnt, 3 - exp); 815 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1)); 816 } 817 818 if (!biased_exp && error) { // xx should also check fpscr_val<11> 819 *flags |= FPLIB_UFC; 820 } 821 822 // Round up: 823 if ((rm == FPLIB_RN && (error == 3 || 824 (error == 2 && (int_mant & 1)))) || 825 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 826 ++int_mant; 827 if (int_mant == 1ULL << FP32_MANT_BITS) { 828 // Rounded up from denormalized to normalized 829 biased_exp = 1; 830 } 831 if (int_mant == 2ULL << FP32_MANT_BITS) { 832 // Rounded up to next exponent 833 ++biased_exp; 834 int_mant >>= 1; 835 } 836 } 837 838 // Handle rounding to odd aka Von Neumann rounding: 839 if (error && rm == FPRounding_ODD) 840 int_mant |= 1; 841 842 // Handle overflow: 843 if (biased_exp >= (int)FP32_EXP_INF) { 844 *flags |= FPLIB_OFC | FPLIB_IXC; 845 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 846 (rm == FPLIB_RM && sgn)) { 847 return fp32_infinity(sgn); 848 } else { 849 return fp32_max_normal(sgn); 850 } 851 } 852 853 if (error) { 854 *flags |= FPLIB_IXC; 855 } 856 857 return fp32_pack(sgn, biased_exp, int_mant); 858} 859 860static uint32_t 861fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags) 862{ 863 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); 864} 865 866static uint64_t 867fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags) 868{ 869 int biased_exp; // non-negative exponent value for result 870 uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS) 871 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 872 873 assert(rm != FPRounding_TIEAWAY); 874 875 // Flush to zero: 876 if ((mode & FPLIB_FZ) && exp < 1) { 877 *flags |= FPLIB_UFC; 878 return fp64_zero(sgn); 879 } 880 881 // The bottom FP64_EXP_BITS bits of mnt are orred together: 882 mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) | 883 ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0)); 884 885 if (exp > 0) { 886 biased_exp = exp; 887 int_mant = mnt >> 2; 888 error = mnt & 3; 889 } else { 890 biased_exp = 0; 891 int_mant = lsr64(mnt, 3 - exp); 892 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1)); 893 } 894 895 if (!biased_exp && error) { // xx should also check fpscr_val<11> 896 *flags |= FPLIB_UFC; 897 } 898 899 // Round up: 900 if ((rm == FPLIB_RN && (error == 3 || 901 (error == 2 && (int_mant & 1)))) || 902 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 903 ++int_mant; 904 if (int_mant == 1ULL << FP64_MANT_BITS) { 905 // Rounded up from denormalized to normalized 906 biased_exp = 1; 907 } 908 if (int_mant == 2ULL << FP64_MANT_BITS) { 909 // Rounded up to next exponent 910 ++biased_exp; 911 int_mant >>= 1; 912 } 913 } 914 915 // Handle rounding to odd aka Von Neumann rounding: 916 if (error && rm == FPRounding_ODD) 917 int_mant |= 1; 918 919 // Handle overflow: 920 if (biased_exp >= (int)FP64_EXP_INF) { 921 *flags |= FPLIB_OFC | FPLIB_IXC; 922 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 923 (rm == FPLIB_RM && sgn)) { 924 return fp64_infinity(sgn); 925 } else { 926 return fp64_max_normal(sgn); 927 } 928 } 929 930 if (error) { 931 *flags |= FPLIB_IXC; 932 } 933 934 return fp64_pack(sgn, biased_exp, int_mant); 935} 936 937static uint64_t 938fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags) 939{ 940 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); 941} 942 943static int 944fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags) 945{ 946 int a_sgn, a_exp, b_sgn, b_exp; 947 uint16_t a_mnt, b_mnt; 948 949 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 950 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 951 952 if (fp16_is_NaN(a_exp, a_mnt) || 953 fp16_is_NaN(b_exp, b_mnt)) { 954 if (fp16_is_signalling_NaN(a_exp, a_mnt) || 955 fp16_is_signalling_NaN(b_exp, b_mnt)) 956 *flags |= FPLIB_IOC; 957 return 0; 958 } 959 return a == b || (!a_mnt && !b_mnt); 960} 961 962static int 963fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags) 964{ 965 int a_sgn, a_exp, b_sgn, b_exp; 966 uint16_t a_mnt, b_mnt; 967 968 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 969 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 970 971 if (fp16_is_NaN(a_exp, a_mnt) || 972 fp16_is_NaN(b_exp, b_mnt)) { 973 *flags |= FPLIB_IOC; 974 return 0; 975 } 976 if (!a_mnt && !b_mnt) 977 return 1; 978 if (a_sgn != b_sgn) 979 return b_sgn; 980 if (a_exp != b_exp) 981 return a_sgn ^ (a_exp > b_exp); 982 if (a_mnt != b_mnt) 983 return a_sgn ^ (a_mnt > b_mnt); 984 return 1; 985} 986 987static int 988fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags) 989{ 990 int a_sgn, a_exp, b_sgn, b_exp; 991 uint16_t a_mnt, b_mnt; 992 993 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 994 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 995 996 if (fp16_is_NaN(a_exp, a_mnt) || 997 fp16_is_NaN(b_exp, b_mnt)) { 998 *flags |= FPLIB_IOC; 999 return 0; 1000 } 1001 if (!a_mnt && !b_mnt) 1002 return 0; 1003 if (a_sgn != b_sgn) 1004 return b_sgn; 1005 if (a_exp != b_exp) 1006 return a_sgn ^ (a_exp > b_exp); 1007 if (a_mnt != b_mnt) 1008 return a_sgn ^ (a_mnt > b_mnt); 1009 return 0; 1010} 1011 1012static int 1013fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags) 1014{ 1015 int a_sgn, a_exp, b_sgn, b_exp; 1016 uint16_t a_mnt, b_mnt; 1017 1018 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1019 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1020 1021 if (fp16_is_NaN(a_exp, a_mnt) || 1022 fp16_is_NaN(b_exp, b_mnt)) { 1023 if (fp16_is_signalling_NaN(a_exp, a_mnt) || 1024 fp16_is_signalling_NaN(b_exp, b_mnt)) 1025 *flags |= FPLIB_IOC; 1026 return 1; 1027 } 1028 return 0; 1029} 1030 1031static int 1032fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags) 1033{ 1034 int a_sgn, a_exp, b_sgn, b_exp; 1035 uint32_t a_mnt, b_mnt; 1036 1037 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1038 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1039 1040 if (fp32_is_NaN(a_exp, a_mnt) || 1041 fp32_is_NaN(b_exp, b_mnt)) { 1042 if (fp32_is_signalling_NaN(a_exp, a_mnt) || 1043 fp32_is_signalling_NaN(b_exp, b_mnt)) 1044 *flags |= FPLIB_IOC; 1045 return 0; 1046 } 1047 return a == b || (!a_mnt && !b_mnt); 1048} 1049 1050static int 1051fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags) 1052{ 1053 int a_sgn, a_exp, b_sgn, b_exp; 1054 uint32_t a_mnt, b_mnt; 1055 1056 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1057 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1058 1059 if (fp32_is_NaN(a_exp, a_mnt) || 1060 fp32_is_NaN(b_exp, b_mnt)) { 1061 *flags |= FPLIB_IOC; 1062 return 0; 1063 } 1064 if (!a_mnt && !b_mnt) 1065 return 1; 1066 if (a_sgn != b_sgn) 1067 return b_sgn; 1068 if (a_exp != b_exp) 1069 return a_sgn ^ (a_exp > b_exp); 1070 if (a_mnt != b_mnt) 1071 return a_sgn ^ (a_mnt > b_mnt); 1072 return 1; 1073} 1074 1075static int 1076fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags) 1077{ 1078 int a_sgn, a_exp, b_sgn, b_exp; 1079 uint32_t a_mnt, b_mnt; 1080 1081 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1082 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1083 1084 if (fp32_is_NaN(a_exp, a_mnt) || 1085 fp32_is_NaN(b_exp, b_mnt)) { 1086 *flags |= FPLIB_IOC; 1087 return 0; 1088 } 1089 if (!a_mnt && !b_mnt) 1090 return 0; 1091 if (a_sgn != b_sgn) 1092 return b_sgn; 1093 if (a_exp != b_exp) 1094 return a_sgn ^ (a_exp > b_exp); 1095 if (a_mnt != b_mnt) 1096 return a_sgn ^ (a_mnt > b_mnt); 1097 return 0; 1098} 1099 1100static int 1101fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags) 1102{ 1103 int a_sgn, a_exp, b_sgn, b_exp; 1104 uint32_t a_mnt, b_mnt; 1105 1106 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1107 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1108 1109 if (fp32_is_NaN(a_exp, a_mnt) || 1110 fp32_is_NaN(b_exp, b_mnt)) { 1111 if (fp32_is_signalling_NaN(a_exp, a_mnt) || 1112 fp32_is_signalling_NaN(b_exp, b_mnt)) 1113 *flags |= FPLIB_IOC; 1114 return 1; 1115 } 1116 return 0; 1117} 1118 1119static int 1120fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags) 1121{ 1122 int a_sgn, a_exp, b_sgn, b_exp; 1123 uint64_t a_mnt, b_mnt; 1124 1125 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1126 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1127 1128 if (fp64_is_NaN(a_exp, a_mnt) || 1129 fp64_is_NaN(b_exp, b_mnt)) { 1130 if (fp64_is_signalling_NaN(a_exp, a_mnt) || 1131 fp64_is_signalling_NaN(b_exp, b_mnt)) 1132 *flags |= FPLIB_IOC; 1133 return 0; 1134 } 1135 return a == b || (!a_mnt && !b_mnt); 1136} 1137 1138static int 1139fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags) 1140{ 1141 int a_sgn, a_exp, b_sgn, b_exp; 1142 uint64_t a_mnt, b_mnt; 1143 1144 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1145 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1146 1147 if (fp64_is_NaN(a_exp, a_mnt) || 1148 fp64_is_NaN(b_exp, b_mnt)) { 1149 *flags |= FPLIB_IOC; 1150 return 0; 1151 } 1152 if (!a_mnt && !b_mnt) 1153 return 1; 1154 if (a_sgn != b_sgn) 1155 return b_sgn; 1156 if (a_exp != b_exp) 1157 return a_sgn ^ (a_exp > b_exp); 1158 if (a_mnt != b_mnt) 1159 return a_sgn ^ (a_mnt > b_mnt); 1160 return 1; 1161} 1162 1163static int 1164fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags) 1165{ 1166 int a_sgn, a_exp, b_sgn, b_exp; 1167 uint64_t a_mnt, b_mnt; 1168 1169 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1170 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1171 1172 if (fp64_is_NaN(a_exp, a_mnt) || 1173 fp64_is_NaN(b_exp, b_mnt)) { 1174 *flags |= FPLIB_IOC; 1175 return 0; 1176 } 1177 if (!a_mnt && !b_mnt) 1178 return 0; 1179 if (a_sgn != b_sgn) 1180 return b_sgn; 1181 if (a_exp != b_exp) 1182 return a_sgn ^ (a_exp > b_exp); 1183 if (a_mnt != b_mnt) 1184 return a_sgn ^ (a_mnt > b_mnt); 1185 return 0; 1186} 1187 1188static int 1189fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags) 1190{ 1191 int a_sgn, a_exp, b_sgn, b_exp; 1192 uint64_t a_mnt, b_mnt; 1193 1194 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1195 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1196 1197 if (fp64_is_NaN(a_exp, a_mnt) || 1198 fp64_is_NaN(b_exp, b_mnt)) { 1199 if (fp64_is_signalling_NaN(a_exp, a_mnt) || 1200 fp64_is_signalling_NaN(b_exp, b_mnt)) 1201 *flags |= FPLIB_IOC; 1202 return 1; 1203 } 1204 return 0; 1205} 1206 1207static uint16_t 1208fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags) 1209{ 1210 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1211 uint16_t a_mnt, b_mnt, x, x_mnt; 1212 1213 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1214 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1215 1216 if ((x = fp16_process_NaNs(a, b, mode, flags))) { 1217 return x; 1218 } 1219 1220 b_sgn ^= neg; 1221 1222 // Handle infinities and zeroes: 1223 if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) { 1224 *flags |= FPLIB_IOC; 1225 return fp16_defaultNaN(); 1226 } else if (a_exp == FP16_EXP_INF) { 1227 return fp16_infinity(a_sgn); 1228 } else if (b_exp == FP16_EXP_INF) { 1229 return fp16_infinity(b_sgn); 1230 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1231 return fp16_zero(a_sgn); 1232 } 1233 1234 a_mnt <<= 3; 1235 b_mnt <<= 3; 1236 if (a_exp >= b_exp) { 1237 b_mnt = (lsr16(b_mnt, a_exp - b_exp) | 1238 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1))); 1239 b_exp = a_exp; 1240 } else { 1241 a_mnt = (lsr16(a_mnt, b_exp - a_exp) | 1242 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1))); 1243 a_exp = b_exp; 1244 } 1245 x_sgn = a_sgn; 1246 x_exp = a_exp; 1247 if (a_sgn == b_sgn) { 1248 x_mnt = a_mnt + b_mnt; 1249 } else if (a_mnt >= b_mnt) { 1250 x_mnt = a_mnt - b_mnt; 1251 } else { 1252 x_sgn ^= 1; 1253 x_mnt = b_mnt - a_mnt; 1254 } 1255 1256 if (!x_mnt) { 1257 // Sign of exact zero result depends on rounding mode 1258 return fp16_zero((mode & 3) == 2); 1259 } 1260 1261 x_mnt = fp16_normalise(x_mnt, &x_exp); 1262 1263 return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1, 1264 mode, flags); 1265} 1266 1267static uint32_t 1268fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags) 1269{ 1270 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1271 uint32_t a_mnt, b_mnt, x, x_mnt; 1272 1273 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1274 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1275 1276 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1277 return x; 1278 } 1279 1280 b_sgn ^= neg; 1281 1282 // Handle infinities and zeroes: 1283 if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) { 1284 *flags |= FPLIB_IOC; 1285 return fp32_defaultNaN(); 1286 } else if (a_exp == FP32_EXP_INF) { 1287 return fp32_infinity(a_sgn); 1288 } else if (b_exp == FP32_EXP_INF) { 1289 return fp32_infinity(b_sgn); 1290 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1291 return fp32_zero(a_sgn); 1292 } 1293 1294 a_mnt <<= 3; 1295 b_mnt <<= 3; 1296 if (a_exp >= b_exp) { 1297 b_mnt = (lsr32(b_mnt, a_exp - b_exp) | 1298 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); 1299 b_exp = a_exp; 1300 } else { 1301 a_mnt = (lsr32(a_mnt, b_exp - a_exp) | 1302 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); 1303 a_exp = b_exp; 1304 } 1305 x_sgn = a_sgn; 1306 x_exp = a_exp; 1307 if (a_sgn == b_sgn) { 1308 x_mnt = a_mnt + b_mnt; 1309 } else if (a_mnt >= b_mnt) { 1310 x_mnt = a_mnt - b_mnt; 1311 } else { 1312 x_sgn ^= 1; 1313 x_mnt = b_mnt - a_mnt; 1314 } 1315 1316 if (!x_mnt) { 1317 // Sign of exact zero result depends on rounding mode 1318 return fp32_zero((mode & 3) == 2); 1319 } 1320 1321 x_mnt = fp32_normalise(x_mnt, &x_exp); 1322 1323 return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1, 1324 mode, flags); 1325} 1326 1327static uint64_t 1328fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags) 1329{ 1330 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1331 uint64_t a_mnt, b_mnt, x, x_mnt; 1332 1333 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1334 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1335 1336 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1337 return x; 1338 } 1339 1340 b_sgn ^= neg; 1341 1342 // Handle infinities and zeroes: 1343 if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) { 1344 *flags |= FPLIB_IOC; 1345 return fp64_defaultNaN(); 1346 } else if (a_exp == FP64_EXP_INF) { 1347 return fp64_infinity(a_sgn); 1348 } else if (b_exp == FP64_EXP_INF) { 1349 return fp64_infinity(b_sgn); 1350 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1351 return fp64_zero(a_sgn); 1352 } 1353 1354 a_mnt <<= 3; 1355 b_mnt <<= 3; 1356 if (a_exp >= b_exp) { 1357 b_mnt = (lsr64(b_mnt, a_exp - b_exp) | 1358 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); 1359 b_exp = a_exp; 1360 } else { 1361 a_mnt = (lsr64(a_mnt, b_exp - a_exp) | 1362 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); 1363 a_exp = b_exp; 1364 } 1365 x_sgn = a_sgn; 1366 x_exp = a_exp; 1367 if (a_sgn == b_sgn) { 1368 x_mnt = a_mnt + b_mnt; 1369 } else if (a_mnt >= b_mnt) { 1370 x_mnt = a_mnt - b_mnt; 1371 } else { 1372 x_sgn ^= 1; 1373 x_mnt = b_mnt - a_mnt; 1374 } 1375 1376 if (!x_mnt) { 1377 // Sign of exact zero result depends on rounding mode 1378 return fp64_zero((mode & 3) == 2); 1379 } 1380 1381 x_mnt = fp64_normalise(x_mnt, &x_exp); 1382 1383 return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1, 1384 mode, flags); 1385} 1386 1387static uint16_t 1388fp16_mul(uint16_t a, uint16_t b, int mode, int *flags) 1389{ 1390 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1391 uint16_t a_mnt, b_mnt, x; 1392 uint32_t x_mnt; 1393 1394 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1395 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1396 1397 if ((x = fp16_process_NaNs(a, b, mode, flags))) { 1398 return x; 1399 } 1400 1401 // Handle infinities and zeroes: 1402 if ((a_exp == FP16_EXP_INF && !b_mnt) || 1403 (b_exp == FP16_EXP_INF && !a_mnt)) { 1404 *flags |= FPLIB_IOC; 1405 return fp16_defaultNaN(); 1406 } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) { 1407 return fp16_infinity(a_sgn ^ b_sgn); 1408 } else if (!a_mnt || !b_mnt) { 1409 return fp16_zero(a_sgn ^ b_sgn); 1410 } 1411 1412 // Multiply and normalise: 1413 x_sgn = a_sgn ^ b_sgn; 1414 x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1; 1415 x_mnt = (uint32_t)a_mnt * b_mnt; 1416 x_mnt = fp32_normalise(x_mnt, &x_exp); 1417 1418 // Convert to FP16_BITS bits, collapsing error into bottom bit: 1419 x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1); 1420 1421 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); 1422} 1423 1424static uint32_t 1425fp32_mul(uint32_t a, uint32_t b, int mode, int *flags) 1426{ 1427 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1428 uint32_t a_mnt, b_mnt, x; 1429 uint64_t x_mnt; 1430 1431 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1432 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1433 1434 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1435 return x; 1436 } 1437 1438 // Handle infinities and zeroes: 1439 if ((a_exp == FP32_EXP_INF && !b_mnt) || 1440 (b_exp == FP32_EXP_INF && !a_mnt)) { 1441 *flags |= FPLIB_IOC; 1442 return fp32_defaultNaN(); 1443 } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) { 1444 return fp32_infinity(a_sgn ^ b_sgn); 1445 } else if (!a_mnt || !b_mnt) { 1446 return fp32_zero(a_sgn ^ b_sgn); 1447 } 1448 1449 // Multiply and normalise: 1450 x_sgn = a_sgn ^ b_sgn; 1451 x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1; 1452 x_mnt = (uint64_t)a_mnt * b_mnt; 1453 x_mnt = fp64_normalise(x_mnt, &x_exp); 1454 1455 // Convert to FP32_BITS bits, collapsing error into bottom bit: 1456 x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1); 1457 1458 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1459} 1460 1461static uint64_t 1462fp64_mul(uint64_t a, uint64_t b, int mode, int *flags) 1463{ 1464 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1465 uint64_t a_mnt, b_mnt, x; 1466 uint64_t x0_mnt, x1_mnt; 1467 1468 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1469 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1470 1471 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1472 return x; 1473 } 1474 1475 // Handle infinities and zeroes: 1476 if ((a_exp == FP64_EXP_INF && !b_mnt) || 1477 (b_exp == FP64_EXP_INF && !a_mnt)) { 1478 *flags |= FPLIB_IOC; 1479 return fp64_defaultNaN(); 1480 } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) { 1481 return fp64_infinity(a_sgn ^ b_sgn); 1482 } else if (!a_mnt || !b_mnt) { 1483 return fp64_zero(a_sgn ^ b_sgn); 1484 } 1485 1486 // Multiply and normalise: 1487 x_sgn = a_sgn ^ b_sgn; 1488 x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1; 1489 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); 1490 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1491 1492 // Convert to FP64_BITS bits, collapsing error into bottom bit: 1493 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1494 1495 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); 1496} 1497 1498static uint16_t 1499fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, 1500 int mode, int *flags) 1501{ 1502 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1503 uint16_t a_mnt, b_mnt, c_mnt, x; 1504 uint32_t x_mnt, y_mnt; 1505 1506 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1507 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1508 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1509 1510 x = fp16_process_NaNs3(a, b, c, mode, flags); 1511 1512 // Quiet NaN added to product of zero and infinity: 1513 if (fp16_is_quiet_NaN(a_exp, a_mnt) && 1514 ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) || 1515 (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) { 1516 x = fp16_defaultNaN(); 1517 *flags |= FPLIB_IOC; 1518 } 1519 1520 if (x) { 1521 return x; 1522 } 1523 1524 // Handle infinities and zeroes: 1525 if ((b_exp == FP16_EXP_INF && !c_mnt) || 1526 (c_exp == FP16_EXP_INF && !b_mnt) || 1527 (a_exp == FP16_EXP_INF && 1528 (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) && 1529 (a_sgn != (b_sgn ^ c_sgn)))) { 1530 *flags |= FPLIB_IOC; 1531 return fp16_defaultNaN(); 1532 } 1533 if (a_exp == FP16_EXP_INF) 1534 return fp16_infinity(a_sgn); 1535 if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) 1536 return fp16_infinity(b_sgn ^ c_sgn); 1537 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1538 return fp16_zero(a_sgn); 1539 1540 x_sgn = a_sgn; 1541 x_exp = a_exp + 2 * FP16_EXP_BITS - 3; 1542 x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4); 1543 1544 // Multiply: 1545 y_sgn = b_sgn ^ c_sgn; 1546 y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3; 1547 y_mnt = (uint32_t)b_mnt * c_mnt << 3; 1548 if (!y_mnt) { 1549 y_exp = x_exp; 1550 } 1551 1552 // Add: 1553 if (x_exp >= y_exp) { 1554 y_mnt = (lsr32(y_mnt, x_exp - y_exp) | 1555 !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1))); 1556 y_exp = x_exp; 1557 } else { 1558 x_mnt = (lsr32(x_mnt, y_exp - x_exp) | 1559 !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1))); 1560 x_exp = y_exp; 1561 } 1562 if (x_sgn == y_sgn) { 1563 x_mnt = x_mnt + y_mnt; 1564 } else if (x_mnt >= y_mnt) { 1565 x_mnt = x_mnt - y_mnt; 1566 } else { 1567 x_sgn ^= 1; 1568 x_mnt = y_mnt - x_mnt; 1569 } 1570 1571 if (!x_mnt) { 1572 // Sign of exact zero result depends on rounding mode 1573 return fp16_zero((mode & 3) == 2); 1574 } 1575 1576 // Normalise into FP16_BITS bits, collapsing error into bottom bit: 1577 x_mnt = fp32_normalise(x_mnt, &x_exp); 1578 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1); 1579 1580 return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1581} 1582 1583static uint32_t 1584fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, 1585 int mode, int *flags) 1586{ 1587 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1588 uint32_t a_mnt, b_mnt, c_mnt, x; 1589 uint64_t x_mnt, y_mnt; 1590 1591 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1592 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1593 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1594 1595 x = fp32_process_NaNs3(a, b, c, mode, flags); 1596 1597 // Quiet NaN added to product of zero and infinity: 1598 if (fp32_is_quiet_NaN(a_exp, a_mnt) && 1599 ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) || 1600 (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) { 1601 x = fp32_defaultNaN(); 1602 *flags |= FPLIB_IOC; 1603 } 1604 1605 if (x) { 1606 return x; 1607 } 1608 1609 // Handle infinities and zeroes: 1610 if ((b_exp == FP32_EXP_INF && !c_mnt) || 1611 (c_exp == FP32_EXP_INF && !b_mnt) || 1612 (a_exp == FP32_EXP_INF && 1613 (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) && 1614 (a_sgn != (b_sgn ^ c_sgn)))) { 1615 *flags |= FPLIB_IOC; 1616 return fp32_defaultNaN(); 1617 } 1618 if (a_exp == FP32_EXP_INF) 1619 return fp32_infinity(a_sgn); 1620 if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) 1621 return fp32_infinity(b_sgn ^ c_sgn); 1622 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1623 return fp32_zero(a_sgn); 1624 1625 x_sgn = a_sgn; 1626 x_exp = a_exp + 2 * FP32_EXP_BITS - 3; 1627 x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4); 1628 1629 // Multiply: 1630 y_sgn = b_sgn ^ c_sgn; 1631 y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3; 1632 y_mnt = (uint64_t)b_mnt * c_mnt << 3; 1633 if (!y_mnt) { 1634 y_exp = x_exp; 1635 } 1636 1637 // Add: 1638 if (x_exp >= y_exp) { 1639 y_mnt = (lsr64(y_mnt, x_exp - y_exp) | 1640 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); 1641 y_exp = x_exp; 1642 } else { 1643 x_mnt = (lsr64(x_mnt, y_exp - x_exp) | 1644 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); 1645 x_exp = y_exp; 1646 } 1647 if (x_sgn == y_sgn) { 1648 x_mnt = x_mnt + y_mnt; 1649 } else if (x_mnt >= y_mnt) { 1650 x_mnt = x_mnt - y_mnt; 1651 } else { 1652 x_sgn ^= 1; 1653 x_mnt = y_mnt - x_mnt; 1654 } 1655 1656 if (!x_mnt) { 1657 // Sign of exact zero result depends on rounding mode 1658 return fp32_zero((mode & 3) == 2); 1659 } 1660 1661 // Normalise into FP32_BITS bits, collapsing error into bottom bit: 1662 x_mnt = fp64_normalise(x_mnt, &x_exp); 1663 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1); 1664 1665 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1666} 1667 1668static uint64_t 1669fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, 1670 int mode, int *flags) 1671{ 1672 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1673 uint64_t a_mnt, b_mnt, c_mnt, x; 1674 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; 1675 1676 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1677 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1678 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1679 1680 x = fp64_process_NaNs3(a, b, c, mode, flags); 1681 1682 // Quiet NaN added to product of zero and infinity: 1683 if (fp64_is_quiet_NaN(a_exp, a_mnt) && 1684 ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) || 1685 (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) { 1686 x = fp64_defaultNaN(); 1687 *flags |= FPLIB_IOC; 1688 } 1689 1690 if (x) { 1691 return x; 1692 } 1693 1694 // Handle infinities and zeroes: 1695 if ((b_exp == FP64_EXP_INF && !c_mnt) || 1696 (c_exp == FP64_EXP_INF && !b_mnt) || 1697 (a_exp == FP64_EXP_INF && 1698 (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) && 1699 (a_sgn != (b_sgn ^ c_sgn)))) { 1700 *flags |= FPLIB_IOC; 1701 return fp64_defaultNaN(); 1702 } 1703 if (a_exp == FP64_EXP_INF) 1704 return fp64_infinity(a_sgn); 1705 if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) 1706 return fp64_infinity(b_sgn ^ c_sgn); 1707 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1708 return fp64_zero(a_sgn); 1709 1710 x_sgn = a_sgn; 1711 x_exp = a_exp + FP64_EXP_BITS; 1712 x0_mnt = 0; 1713 x1_mnt = a_mnt; 1714 1715 // Multiply: 1716 y_sgn = b_sgn ^ c_sgn; 1717 y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3; 1718 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); 1719 if (!y0_mnt && !y1_mnt) { 1720 y_exp = x_exp; 1721 } 1722 1723 // Add: 1724 if (x_exp >= y_exp) { 1725 uint64_t t0, t1; 1726 lsl128(&t0, &t1, y0_mnt, y1_mnt, 1727 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); 1728 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); 1729 y0_mnt |= !!(t0 | t1); 1730 y_exp = x_exp; 1731 } else { 1732 uint64_t t0, t1; 1733 lsl128(&t0, &t1, x0_mnt, x1_mnt, 1734 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); 1735 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); 1736 x0_mnt |= !!(t0 | t1); 1737 x_exp = y_exp; 1738 } 1739 if (x_sgn == y_sgn) { 1740 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1741 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { 1742 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1743 } else { 1744 x_sgn ^= 1; 1745 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); 1746 } 1747 1748 if (!x0_mnt && !x1_mnt) { 1749 // Sign of exact zero result depends on rounding mode 1750 return fp64_zero((mode & 3) == 2); 1751 } 1752 1753 // Normalise into FP64_BITS bits, collapsing error into bottom bit: 1754 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1755 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1756 1757 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); 1758} 1759 1760static uint16_t 1761fp16_div(uint16_t a, uint16_t b, int mode, int *flags) 1762{ 1763 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1764 uint16_t a_mnt, b_mnt, x; 1765 uint32_t x_mnt; 1766 1767 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1768 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1769 1770 if ((x = fp16_process_NaNs(a, b, mode, flags))) 1771 return x; 1772 1773 // Handle infinities and zeroes: 1774 if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) || 1775 (!a_mnt && !b_mnt)) { 1776 *flags |= FPLIB_IOC; 1777 return fp16_defaultNaN(); 1778 } 1779 if (a_exp == FP16_EXP_INF || !b_mnt) { 1780 if (a_exp != FP16_EXP_INF) 1781 *flags |= FPLIB_DZC; 1782 return fp16_infinity(a_sgn ^ b_sgn); 1783 } 1784 if (!a_mnt || b_exp == FP16_EXP_INF) 1785 return fp16_zero(a_sgn ^ b_sgn); 1786 1787 // Divide, setting bottom bit if inexact: 1788 a_mnt = fp16_normalise(a_mnt, &a_exp); 1789 x_sgn = a_sgn ^ b_sgn; 1790 x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3); 1791 x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt; 1792 x_mnt |= (x_mnt * b_mnt != 1793 (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)); 1794 1795 // Normalise into FP16_BITS bits, collapsing error into bottom bit: 1796 x_mnt = fp32_normalise(x_mnt, &x_exp); 1797 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1); 1798 1799 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); 1800} 1801 1802static uint32_t 1803fp32_div(uint32_t a, uint32_t b, int mode, int *flags) 1804{ 1805 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1806 uint32_t a_mnt, b_mnt, x; 1807 uint64_t x_mnt; 1808 1809 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1810 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1811 1812 if ((x = fp32_process_NaNs(a, b, mode, flags))) 1813 return x; 1814 1815 // Handle infinities and zeroes: 1816 if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) || 1817 (!a_mnt && !b_mnt)) { 1818 *flags |= FPLIB_IOC; 1819 return fp32_defaultNaN(); 1820 } 1821 if (a_exp == FP32_EXP_INF || !b_mnt) { 1822 if (a_exp != FP32_EXP_INF) 1823 *flags |= FPLIB_DZC; 1824 return fp32_infinity(a_sgn ^ b_sgn); 1825 } 1826 if (!a_mnt || b_exp == FP32_EXP_INF) 1827 return fp32_zero(a_sgn ^ b_sgn); 1828 1829 // Divide, setting bottom bit if inexact: 1830 a_mnt = fp32_normalise(a_mnt, &a_exp); 1831 x_sgn = a_sgn ^ b_sgn; 1832 x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3); 1833 x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt; 1834 x_mnt |= (x_mnt * b_mnt != 1835 (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)); 1836 1837 // Normalise into FP32_BITS bits, collapsing error into bottom bit: 1838 x_mnt = fp64_normalise(x_mnt, &x_exp); 1839 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1); 1840 1841 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1842} 1843 1844static uint64_t 1845fp64_div(uint64_t a, uint64_t b, int mode, int *flags) 1846{ 1847 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; 1848 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; 1849 1850 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1851 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1852 1853 if ((x = fp64_process_NaNs(a, b, mode, flags))) 1854 return x; 1855 1856 // Handle infinities and zeroes: 1857 if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) || 1858 (!a_mnt && !b_mnt)) { 1859 *flags |= FPLIB_IOC; 1860 return fp64_defaultNaN(); 1861 } 1862 if (a_exp == FP64_EXP_INF || !b_mnt) { 1863 if (a_exp != FP64_EXP_INF) 1864 *flags |= FPLIB_DZC; 1865 return fp64_infinity(a_sgn ^ b_sgn); 1866 } 1867 if (!a_mnt || b_exp == FP64_EXP_INF) 1868 return fp64_zero(a_sgn ^ b_sgn); 1869 1870 // Find reciprocal of divisor with Newton-Raphson: 1871 a_mnt = fp64_normalise(a_mnt, &a_exp); 1872 b_mnt = fp64_normalise(b_mnt, &b_exp); 1873 x_mnt = ~(uint64_t)0 / (b_mnt >> 31); 1874 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); 1875 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); 1876 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); 1877 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); 1878 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); 1879 1880 // Multiply by dividend: 1881 x_sgn = a_sgn ^ b_sgn; 1882 x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8; 1883 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); 1884 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); 1885 x_mnt = x1_mnt; 1886 1887 // This is an underestimate, so try adding one: 1888 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); 1889 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); 1890 if (c <= 0) { 1891 ++x_mnt; 1892 } 1893 1894 x_mnt = fp64_normalise(x_mnt, &x_exp); 1895 1896 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 1897} 1898 1899static void 1900set_fpscr0(FPSCR &fpscr, int flags) 1901{ 1902 if (flags & FPLIB_IDC) { 1903 fpscr.idc = 1; 1904 } 1905 if (flags & FPLIB_IOC) { 1906 fpscr.ioc = 1; 1907 } 1908 if (flags & FPLIB_DZC) { 1909 fpscr.dzc = 1; 1910 } 1911 if (flags & FPLIB_OFC) { 1912 fpscr.ofc = 1; 1913 } 1914 if (flags & FPLIB_UFC) { 1915 fpscr.ufc = 1; 1916 } 1917 if (flags & FPLIB_IXC) { 1918 fpscr.ixc = 1; 1919 } 1920} 1921 1922static uint16_t 1923fp16_scale(uint16_t a, int16_t b, int mode, int *flags) 1924{ 1925 int a_sgn, a_exp; 1926 uint16_t a_mnt; 1927 1928 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1929 1930 // Handle NaNs: 1931 if (fp16_is_NaN(a_exp, a_mnt)) { 1932 return fp16_process_NaN(a, mode, flags); 1933 } 1934 1935 // Handle zeroes: 1936 if (!a_mnt) { 1937 return fp16_zero(a_sgn); 1938 } 1939 1940 // Handle infinities: 1941 if (a_exp == FP16_EXP_INF) { 1942 return fp16_infinity(a_sgn); 1943 } 1944 1945 b = b < -300 ? -300 : b; 1946 b = b > 300 ? 300 : b; 1947 a_exp += b; 1948 a_mnt <<= 3; 1949 1950 a_mnt = fp16_normalise(a_mnt, &a_exp); 1951 1952 return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1, 1953 mode, flags); 1954} 1955 1956static uint32_t 1957fp32_scale(uint32_t a, int32_t b, int mode, int *flags) 1958{ 1959 int a_sgn, a_exp; 1960 uint32_t a_mnt; 1961 1962 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1963 1964 // Handle NaNs: 1965 if (fp32_is_NaN(a_exp, a_mnt)) { 1966 return fp32_process_NaN(a, mode, flags); 1967 } 1968 1969 // Handle zeroes: 1970 if (!a_mnt) { 1971 return fp32_zero(a_sgn); 1972 } 1973 1974 // Handle infinities: 1975 if (a_exp == FP32_EXP_INF) { 1976 return fp32_infinity(a_sgn); 1977 } 1978 1979 b = b < -300 ? -300 : b; 1980 b = b > 300 ? 300 : b; 1981 a_exp += b; 1982 a_mnt <<= 3; 1983 1984 a_mnt = fp32_normalise(a_mnt, &a_exp); 1985 1986 return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1, 1987 mode, flags); 1988} 1989 1990static uint64_t 1991fp64_scale(uint64_t a, int64_t b, int mode, int *flags) 1992{ 1993 int a_sgn, a_exp; 1994 uint64_t a_mnt; 1995 1996 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1997 1998 // Handle NaNs: 1999 if (fp64_is_NaN(a_exp, a_mnt)) { 2000 return fp64_process_NaN(a, mode, flags); 2001 } 2002 2003 // Handle zeroes: 2004 if (!a_mnt) { 2005 return fp64_zero(a_sgn); 2006 } 2007 2008 // Handle infinities: 2009 if (a_exp == FP64_EXP_INF) { 2010 return fp64_infinity(a_sgn); 2011 } 2012 2013 b = b < -3000 ? -3000 : b; 2014 b = b > 3000 ? 3000 : b; 2015 a_exp += b; 2016 a_mnt <<= 3; 2017 2018 a_mnt = fp64_normalise(a_mnt, &a_exp); 2019 2020 return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1, 2021 mode, flags); 2022} 2023 2024static uint16_t 2025fp16_sqrt(uint16_t a, int mode, int *flags) 2026{ 2027 int a_sgn, a_exp, x_sgn, x_exp; 2028 uint16_t a_mnt, x_mnt; 2029 uint32_t x, t0, t1; 2030 2031 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2032 2033 // Handle NaNs: 2034 if (fp16_is_NaN(a_exp, a_mnt)) 2035 return fp16_process_NaN(a, mode, flags); 2036 2037 // Handle infinities and zeroes: 2038 if (!a_mnt) 2039 return fp16_zero(a_sgn); 2040 if (a_exp == FP16_EXP_INF && !a_sgn) 2041 return fp16_infinity(a_sgn); 2042 if (a_sgn) { 2043 *flags |= FPLIB_IOC; 2044 return fp16_defaultNaN(); 2045 } 2046 2047 a_mnt = fp16_normalise(a_mnt, &a_exp); 2048 if (a_exp & 1) { 2049 ++a_exp; 2050 a_mnt >>= 1; 2051 } 2052 2053 // x = (a * 3 + 5) / 8 2054 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28); 2055 2056 // x = (a / x + x) / 2; // 8-bit accuracy 2057 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15; 2058 2059 // x = (a / x + x) / 2; // 16-bit accuracy 2060 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15; 2061 2062 x_sgn = 0; 2063 x_exp = (a_exp + 27) >> 1; 2064 x_mnt = ((x - (1 << 18)) >> 19) + 1; 2065 t1 = (uint32_t)x_mnt * x_mnt; 2066 t0 = (uint32_t)a_mnt << 9; 2067 if (t1 > t0) { 2068 --x_mnt; 2069 } 2070 2071 x_mnt = fp16_normalise(x_mnt, &x_exp); 2072 2073 return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); 2074} 2075 2076static uint32_t 2077fp32_sqrt(uint32_t a, int mode, int *flags) 2078{ 2079 int a_sgn, a_exp, x_sgn, x_exp; 2080 uint32_t a_mnt, x, x_mnt; 2081 uint64_t t0, t1; 2082 2083 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2084 2085 // Handle NaNs: 2086 if (fp32_is_NaN(a_exp, a_mnt)) 2087 return fp32_process_NaN(a, mode, flags); 2088 2089 // Handle infinities and zeroes: 2090 if (!a_mnt) 2091 return fp32_zero(a_sgn); 2092 if (a_exp == FP32_EXP_INF && !a_sgn) 2093 return fp32_infinity(a_sgn); 2094 if (a_sgn) { 2095 *flags |= FPLIB_IOC; 2096 return fp32_defaultNaN(); 2097 } 2098 2099 a_mnt = fp32_normalise(a_mnt, &a_exp); 2100 if (!(a_exp & 1)) { 2101 ++a_exp; 2102 a_mnt >>= 1; 2103 } 2104 2105 // x = (a * 3 + 5) / 8 2106 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28); 2107 2108 // x = (a / x + x) / 2; // 8-bit accuracy 2109 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 2110 2111 // x = (a / x + x) / 2; // 16-bit accuracy 2112 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 2113 2114 // x = (a / x + x) / 2; // 32-bit accuracy 2115 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); 2116 2117 x_sgn = 0; 2118 x_exp = (a_exp + 147) >> 1; 2119 x_mnt = ((x - (1 << 5)) >> 6) + 1; 2120 t1 = (uint64_t)x_mnt * x_mnt; 2121 t0 = (uint64_t)a_mnt << 19; 2122 if (t1 > t0) { 2123 --x_mnt; 2124 } 2125 2126 x_mnt = fp32_normalise(x_mnt, &x_exp); 2127 2128 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); 2129} 2130 2131static uint64_t 2132fp64_sqrt(uint64_t a, int mode, int *flags) 2133{ 2134 int a_sgn, a_exp, x_sgn, x_exp, c; 2135 uint64_t a_mnt, x_mnt, r, x0, x1; 2136 uint32_t x; 2137 2138 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2139 2140 // Handle NaNs: 2141 if (fp64_is_NaN(a_exp, a_mnt)) 2142 return fp64_process_NaN(a, mode, flags); 2143 2144 // Handle infinities and zeroes: 2145 if (!a_mnt) 2146 return fp64_zero(a_sgn); 2147 if (a_exp == FP64_EXP_INF && !a_sgn) 2148 return fp64_infinity(a_sgn); 2149 if (a_sgn) { 2150 *flags |= FPLIB_IOC; 2151 return fp64_defaultNaN(); 2152 } 2153 2154 a_mnt = fp64_normalise(a_mnt, &a_exp); 2155 if (a_exp & 1) { 2156 ++a_exp; 2157 a_mnt >>= 1; 2158 } 2159 2160 // x = (a * 3 + 5) / 8 2161 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28); 2162 2163 // x = (a / x + x) / 2; // 8-bit accuracy 2164 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 2165 2166 // x = (a / x + x) / 2; // 16-bit accuracy 2167 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 2168 2169 // x = (a / x + x) / 2; // 32-bit accuracy 2170 x = ((a_mnt / x) >> 2) + (x >> 1); 2171 2172 // r = 1 / x; // 32-bit accuracy 2173 r = ((uint64_t)1 << 62) / x; 2174 2175 // r = r * (2 - x * r); // 64-bit accuracy 2176 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); 2177 lsr128(&x0, &x1, x0, x1, 31); 2178 2179 // x = (x + a * r) / 2; // 64-bit accuracy 2180 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); 2181 lsl128(&x0, &x1, x0, x1, 5); 2182 lsr128(&x0, &x1, x0, x1, 56); 2183 2184 x0 = ((uint64_t)x << 31) + (x0 >> 1); 2185 2186 x_sgn = 0; 2187 x_exp = (a_exp + 1053) >> 1; 2188 x_mnt = x0; 2189 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; 2190 mul62x62(&x0, &x1, x_mnt, x_mnt); 2191 lsl128(&x0, &x1, x0, x1, 19); 2192 c = cmp128(x0, x1, 0, a_mnt); 2193 if (c > 0) 2194 --x_mnt; 2195 2196 x_mnt = fp64_normalise(x_mnt, &x_exp); 2197 2198 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 2199} 2200 2201static int 2202modeConv(FPSCR fpscr) 2203{ 2204 uint32_t x = (uint32_t)fpscr; 2205 return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0); 2206 // AHP bit is ignored. Only fplibConvert uses AHP. 2207} 2208 2209static void 2210set_fpscr(FPSCR &fpscr, int flags) 2211{ 2212 // translate back to FPSCR 2213 bool underflow = false; 2214 if (flags & FPLIB_IDC) { 2215 fpscr.idc = 1; 2216 } 2217 if (flags & FPLIB_IOC) { 2218 fpscr.ioc = 1; 2219 } 2220 if (flags & FPLIB_DZC) { 2221 fpscr.dzc = 1; 2222 } 2223 if (flags & FPLIB_OFC) { 2224 fpscr.ofc = 1; 2225 } 2226 if (flags & FPLIB_UFC) { 2227 underflow = true; //xx Why is this required? 2228 fpscr.ufc = 1; 2229 } 2230 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { 2231 fpscr.ixc = 1; 2232 } 2233} 2234 2235template <> 2236bool 2237fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr) 2238{ 2239 int flags = 0; 2240 int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags); 2241 set_fpscr(fpscr, flags); 2242 return x; 2243} 2244 2245template <> 2246bool 2247fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr) 2248{ 2249 int flags = 0; 2250 int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags); 2251 set_fpscr(fpscr, flags); 2252 return x; 2253} 2254 2255template <> 2256bool 2257fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr) 2258{ 2259 int flags = 0; 2260 int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags); 2261 set_fpscr(fpscr, flags); 2262 return x; 2263} 2264 2265template <> 2266bool 2267fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr) 2268{ 2269 int flags = 0; 2270 int x = fp16_compare_un(a, b, modeConv(fpscr), &flags); 2271 set_fpscr(fpscr, flags); 2272 return x; 2273} 2274 2275template <> 2276bool 2277fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) 2278{ 2279 int flags = 0; 2280 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); 2281 set_fpscr(fpscr, flags); 2282 return x; 2283} 2284 2285template <> 2286bool 2287fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) 2288{ 2289 int flags = 0; 2290 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); 2291 set_fpscr(fpscr, flags); 2292 return x; 2293} 2294 2295template <> 2296bool 2297fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) 2298{ 2299 int flags = 0; 2300 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); 2301 set_fpscr(fpscr, flags); 2302 return x; 2303} 2304 2305template <> 2306bool 2307fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr) 2308{ 2309 int flags = 0; 2310 int x = fp32_compare_un(a, b, modeConv(fpscr), &flags); 2311 set_fpscr(fpscr, flags); 2312 return x; 2313} 2314 2315template <> 2316bool 2317fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) 2318{ 2319 int flags = 0; 2320 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); 2321 set_fpscr(fpscr, flags); 2322 return x; 2323} 2324 2325template <> 2326bool 2327fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) 2328{ 2329 int flags = 0; 2330 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); 2331 set_fpscr(fpscr, flags); 2332 return x; 2333} 2334 2335template <> 2336bool 2337fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) 2338{ 2339 int flags = 0; 2340 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); 2341 set_fpscr(fpscr, flags); 2342 return x; 2343} 2344 2345template <> 2346bool 2347fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr) 2348{ 2349 int flags = 0; 2350 int x = fp64_compare_un(a, b, modeConv(fpscr), &flags); 2351 set_fpscr(fpscr, flags); 2352 return x; 2353} 2354 2355template <> 2356uint16_t 2357fplibAbs(uint16_t op) 2358{ 2359 return op & ~(1ULL << (FP16_BITS - 1)); 2360} 2361 2362template <> 2363uint32_t 2364fplibAbs(uint32_t op) 2365{ 2366 return op & ~(1ULL << (FP32_BITS - 1)); 2367} 2368 2369template <> 2370uint64_t 2371fplibAbs(uint64_t op) 2372{ 2373 return op & ~(1ULL << (FP64_BITS - 1)); 2374} 2375 2376template <> 2377uint16_t 2378fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr) 2379{ 2380 int flags = 0; 2381 uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags); 2382 set_fpscr0(fpscr, flags); 2383 return result; 2384} 2385 2386template <> 2387uint32_t 2388fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2389{ 2390 int flags = 0; 2391 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); 2392 set_fpscr0(fpscr, flags); 2393 return result; 2394} 2395 2396template <> 2397uint64_t 2398fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2399{ 2400 int flags = 0; 2401 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); 2402 set_fpscr0(fpscr, flags); 2403 return result; 2404} 2405 2406template <> 2407int 2408fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr) 2409{ 2410 int mode = modeConv(fpscr); 2411 int flags = 0; 2412 int sgn1, exp1, sgn2, exp2, result; 2413 uint16_t mnt1, mnt2; 2414 2415 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2416 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2417 2418 if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) { 2419 result = 3; 2420 if (fp16_is_signalling_NaN(exp1, mnt1) || 2421 fp16_is_signalling_NaN(exp2, mnt2) || signal_nans) 2422 flags |= FPLIB_IOC; 2423 } else { 2424 if (op1 == op2 || (!mnt1 && !mnt2)) { 2425 result = 6; 2426 } else if (sgn1 != sgn2) { 2427 result = sgn1 ? 8 : 2; 2428 } else if (exp1 != exp2) { 2429 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2430 } else { 2431 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2432 } 2433 } 2434 2435 set_fpscr0(fpscr, flags); 2436 2437 return result; 2438} 2439 2440template <> 2441int 2442fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) 2443{ 2444 int mode = modeConv(fpscr); 2445 int flags = 0; 2446 int sgn1, exp1, sgn2, exp2, result; 2447 uint32_t mnt1, mnt2; 2448 2449 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2450 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2451 2452 if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) { 2453 result = 3; 2454 if (fp32_is_signalling_NaN(exp1, mnt1) || 2455 fp32_is_signalling_NaN(exp2, mnt2) || signal_nans) 2456 flags |= FPLIB_IOC; 2457 } else { 2458 if (op1 == op2 || (!mnt1 && !mnt2)) { 2459 result = 6; 2460 } else if (sgn1 != sgn2) { 2461 result = sgn1 ? 8 : 2; 2462 } else if (exp1 != exp2) { 2463 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2464 } else { 2465 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2466 } 2467 } 2468 2469 set_fpscr0(fpscr, flags); 2470 2471 return result; 2472} 2473 2474template <> 2475int 2476fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) 2477{ 2478 int mode = modeConv(fpscr); 2479 int flags = 0; 2480 int sgn1, exp1, sgn2, exp2, result; 2481 uint64_t mnt1, mnt2; 2482 2483 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2484 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2485 2486 if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) { 2487 result = 3; 2488 if (fp64_is_signalling_NaN(exp1, mnt1) || 2489 fp64_is_signalling_NaN(exp2, mnt2) || signal_nans) 2490 flags |= FPLIB_IOC; 2491 } else { 2492 if (op1 == op2 || (!mnt1 && !mnt2)) { 2493 result = 6; 2494 } else if (sgn1 != sgn2) { 2495 result = sgn1 ? 8 : 2; 2496 } else if (exp1 != exp2) { 2497 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2498 } else { 2499 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2500 } 2501 } 2502 2503 set_fpscr0(fpscr, flags); 2504 2505 return result; 2506} 2507 2508static uint16_t 2509fp16_FPConvertNaN_32(uint32_t op) 2510{ 2511 return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF, 2512 1ULL << (FP16_MANT_BITS - 1) | 2513 op >> (FP32_MANT_BITS - FP16_MANT_BITS)); 2514} 2515 2516static uint16_t 2517fp16_FPConvertNaN_64(uint64_t op) 2518{ 2519 return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF, 2520 1ULL << (FP16_MANT_BITS - 1) | 2521 op >> (FP64_MANT_BITS - FP16_MANT_BITS)); 2522} 2523 2524static uint32_t 2525fp32_FPConvertNaN_16(uint16_t op) 2526{ 2527 return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF, 2528 1ULL << (FP32_MANT_BITS - 1) | 2529 (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS)); 2530} 2531 2532static uint32_t 2533fp32_FPConvertNaN_64(uint64_t op) 2534{ 2535 return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF, 2536 1ULL << (FP32_MANT_BITS - 1) | 2537 op >> (FP64_MANT_BITS - FP32_MANT_BITS)); 2538} 2539 2540static uint64_t 2541fp64_FPConvertNaN_16(uint16_t op) 2542{ 2543 return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF, 2544 1ULL << (FP64_MANT_BITS - 1) | 2545 (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS)); 2546} 2547 2548static uint64_t 2549fp64_FPConvertNaN_32(uint32_t op) 2550{ 2551 return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF, 2552 1ULL << (FP64_MANT_BITS - 1) | 2553 (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS)); 2554} 2555 2556static uint16_t 2557fp16_FPOnePointFive(int sgn) 2558{ 2559 return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1)); 2560} 2561 2562static uint32_t 2563fp32_FPOnePointFive(int sgn) 2564{ 2565 return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1)); 2566} 2567 2568static uint64_t 2569fp64_FPOnePointFive(int sgn) 2570{ 2571 return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1)); 2572} 2573 2574static uint16_t 2575fp16_FPThree(int sgn) 2576{ 2577 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1)); 2578} 2579 2580static uint32_t 2581fp32_FPThree(int sgn) 2582{ 2583 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1)); 2584} 2585 2586static uint64_t 2587fp64_FPThree(int sgn) 2588{ 2589 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1)); 2590} 2591 2592static uint16_t 2593fp16_FPTwo(int sgn) 2594{ 2595 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0); 2596} 2597 2598static uint32_t 2599fp32_FPTwo(int sgn) 2600{ 2601 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0); 2602} 2603 2604static uint64_t 2605fp64_FPTwo(int sgn) 2606{ 2607 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0); 2608} 2609 2610template <> 2611uint16_t 2612fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 2613{ 2614 int mode = modeConv(fpscr); 2615 int flags = 0; 2616 int sgn, exp; 2617 uint32_t mnt; 2618 uint16_t result; 2619 2620 // Unpack floating-point operand optionally with flush-to-zero: 2621 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2622 2623 bool alt_hp = fpscr.ahp; 2624 2625 if (fp32_is_NaN(exp, mnt)) { 2626 if (alt_hp) { 2627 result = fp16_zero(sgn); 2628 } else if (fpscr.dn) { 2629 result = fp16_defaultNaN(); 2630 } else { 2631 result = fp16_FPConvertNaN_32(op); 2632 } 2633 if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) { 2634 flags |= FPLIB_IOC; 2635 } 2636 } else if (exp == FP32_EXP_INF) { 2637 if (alt_hp) { 2638 result = ((uint16_t)sgn << (FP16_BITS - 1) | 2639 ((1ULL << (FP16_BITS - 1)) - 1)); 2640 flags |= FPLIB_IOC; 2641 } else { 2642 result = fp16_infinity(sgn); 2643 } 2644 } else if (!mnt) { 2645 result = fp16_zero(sgn); 2646 } else { 2647 result = 2648 fp16_round_(sgn, exp - FP32_EXP_BIAS + FP16_EXP_BIAS, 2649 mnt >> (FP32_MANT_BITS - FP16_BITS) | 2650 !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)), 2651 rounding, (mode & 0xf) | alt_hp << 4, &flags); 2652 } 2653 2654 set_fpscr0(fpscr, flags); 2655 2656 return result; 2657} 2658 2659template <> 2660uint16_t 2661fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 2662{ 2663 int mode = modeConv(fpscr); 2664 int flags = 0; 2665 int sgn, exp; 2666 uint64_t mnt; 2667 uint16_t result; 2668 2669 // Unpack floating-point operand optionally with flush-to-zero: 2670 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2671 2672 bool alt_hp = fpscr.ahp; 2673 2674 if (fp64_is_NaN(exp, mnt)) { 2675 if (alt_hp) { 2676 result = fp16_zero(sgn); 2677 } else if (fpscr.dn) { 2678 result = fp16_defaultNaN(); 2679 } else { 2680 result = fp16_FPConvertNaN_64(op); 2681 } 2682 if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) { 2683 flags |= FPLIB_IOC; 2684 } 2685 } else if (exp == FP64_EXP_INF) { 2686 if (alt_hp) { 2687 result = ((uint16_t)sgn << (FP16_BITS - 1) | 2688 ((1ULL << (FP16_BITS - 1)) - 1)); 2689 flags |= FPLIB_IOC; 2690 } else { 2691 result = fp16_infinity(sgn); 2692 } 2693 } else if (!mnt) { 2694 result = fp16_zero(sgn); 2695 } else { 2696 result = 2697 fp16_round_(sgn, exp - FP64_EXP_BIAS + FP16_EXP_BIAS, 2698 mnt >> (FP64_MANT_BITS - FP16_BITS) | 2699 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)), 2700 rounding, (mode & 0xf) | alt_hp << 4, &flags); 2701 } 2702 2703 set_fpscr0(fpscr, flags); 2704 2705 return result; 2706} 2707 2708template <> 2709uint32_t 2710fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 2711{ 2712 int mode = modeConv(fpscr); 2713 int flags = 0; 2714 int sgn, exp; 2715 uint16_t mnt; 2716 uint32_t result; 2717 2718 // Unpack floating-point operand optionally with flush-to-zero: 2719 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags); 2720 2721 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) { 2722 if (fpscr.dn) { 2723 result = fp32_defaultNaN(); 2724 } else { 2725 result = fp32_FPConvertNaN_16(op); 2726 } 2727 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) { 2728 flags |= FPLIB_IOC; 2729 } 2730 } else if (exp == FP16_EXP_INF && !fpscr.ahp) { 2731 result = fp32_infinity(sgn); 2732 } else if (!mnt) { 2733 result = fp32_zero(sgn); 2734 } else { 2735 mnt = fp16_normalise(mnt, &exp); 2736 result = fp32_pack(sgn, (exp - FP16_EXP_BIAS + 2737 FP32_EXP_BIAS + FP16_EXP_BITS), 2738 (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1)); 2739 } 2740 2741 set_fpscr0(fpscr, flags); 2742 2743 return result; 2744} 2745 2746template <> 2747uint32_t 2748fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 2749{ 2750 int mode = modeConv(fpscr); 2751 int flags = 0; 2752 int sgn, exp; 2753 uint64_t mnt; 2754 uint32_t result; 2755 2756 // Unpack floating-point operand optionally with flush-to-zero: 2757 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2758 2759 if (fp64_is_NaN(exp, mnt)) { 2760 if (fpscr.dn) { 2761 result = fp32_defaultNaN(); 2762 } else { 2763 result = fp32_FPConvertNaN_64(op); 2764 } 2765 if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) { 2766 flags |= FPLIB_IOC; 2767 } 2768 } else if (exp == FP64_EXP_INF) { 2769 result = fp32_infinity(sgn); 2770 } else if (!mnt) { 2771 result = fp32_zero(sgn); 2772 } else { 2773 result = 2774 fp32_round_(sgn, exp - FP64_EXP_BIAS + FP32_EXP_BIAS, 2775 mnt >> (FP64_MANT_BITS - FP32_BITS) | 2776 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)), 2777 rounding, mode, &flags); 2778 } 2779 2780 set_fpscr0(fpscr, flags); 2781 2782 return result; 2783} 2784 2785template <> 2786uint64_t 2787fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 2788{ 2789 int mode = modeConv(fpscr); 2790 int flags = 0; 2791 int sgn, exp; 2792 uint16_t mnt; 2793 uint64_t result; 2794 2795 // Unpack floating-point operand optionally with flush-to-zero: 2796 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags); 2797 2798 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) { 2799 if (fpscr.dn) { 2800 result = fp64_defaultNaN(); 2801 } else { 2802 result = fp64_FPConvertNaN_16(op); 2803 } 2804 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) { 2805 flags |= FPLIB_IOC; 2806 } 2807 } else if (exp == FP16_EXP_INF && !fpscr.ahp) { 2808 result = fp64_infinity(sgn); 2809 } else if (!mnt) { 2810 result = fp64_zero(sgn); 2811 } else { 2812 mnt = fp16_normalise(mnt, &exp); 2813 result = fp64_pack(sgn, (exp - FP16_EXP_BIAS + 2814 FP64_EXP_BIAS + FP16_EXP_BITS), 2815 (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1)); 2816 } 2817 2818 set_fpscr0(fpscr, flags); 2819 2820 return result; 2821} 2822 2823template <> 2824uint64_t 2825fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 2826{ 2827 int mode = modeConv(fpscr); 2828 int flags = 0; 2829 int sgn, exp; 2830 uint32_t mnt; 2831 uint64_t result; 2832 2833 // Unpack floating-point operand optionally with flush-to-zero: 2834 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2835 2836 if (fp32_is_NaN(exp, mnt)) { 2837 if (fpscr.dn) { 2838 result = fp64_defaultNaN(); 2839 } else { 2840 result = fp64_FPConvertNaN_32(op); 2841 } 2842 if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) { 2843 flags |= FPLIB_IOC; 2844 } 2845 } else if (exp == FP32_EXP_INF) { 2846 result = fp64_infinity(sgn); 2847 } else if (!mnt) { 2848 result = fp64_zero(sgn); 2849 } else { 2850 mnt = fp32_normalise(mnt, &exp); 2851 result = fp64_pack(sgn, (exp - FP32_EXP_BIAS + 2852 FP64_EXP_BIAS + FP32_EXP_BITS), 2853 (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1)); 2854 } 2855 2856 set_fpscr0(fpscr, flags); 2857 2858 return result; 2859} 2860 2861template <> 2862uint16_t 2863fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr) 2864{ 2865 int flags = 0; 2866 uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2867 set_fpscr0(fpscr, flags); 2868 return result; 2869} 2870 2871template <> 2872uint32_t 2873fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) 2874{ 2875 int flags = 0; 2876 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2877 set_fpscr0(fpscr, flags); 2878 return result; 2879} 2880 2881template <> 2882uint64_t 2883fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) 2884{ 2885 int flags = 0; 2886 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2887 set_fpscr0(fpscr, flags); 2888 return result; 2889} 2890 2891template <> 2892uint16_t 2893fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr) 2894{ 2895 int flags = 0; 2896 uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags); 2897 set_fpscr0(fpscr, flags); 2898 return result; 2899} 2900 2901template <> 2902uint32_t 2903fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2904{ 2905 int flags = 0; 2906 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); 2907 set_fpscr0(fpscr, flags); 2908 return result; 2909} 2910 2911template <> 2912uint64_t 2913fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2914{ 2915 int flags = 0; 2916 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); 2917 set_fpscr0(fpscr, flags); 2918 return result; 2919} 2920 2921template <> 2922uint16_t 2923fplibExpA(uint16_t op) 2924{ 2925 static uint16_t coeff[32] = { 2926 0x0000, 2927 0x0016, 2928 0x002d, 2929 0x0045, 2930 0x005d, 2931 0x0075, 2932 0x008e, 2933 0x00a8, 2934 0x00c2, 2935 0x00dc, 2936 0x00f8, 2937 0x0114, 2938 0x0130, 2939 0x014d, 2940 0x016b, 2941 0x0189, 2942 0x01a8, 2943 0x01c8, 2944 0x01e8, 2945 0x0209, 2946 0x022b, 2947 0x024e, 2948 0x0271, 2949 0x0295, 2950 0x02ba, 2951 0x02e0, 2952 0x0306, 2953 0x032e, 2954 0x0356, 2955 0x037f, 2956 0x03a9, 2957 0x03d4 2958 }; 2959 return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) | 2960 coeff[op & ((1 << 5) - 1)]); 2961} 2962 2963template <> 2964uint32_t 2965fplibExpA(uint32_t op) 2966{ 2967 static uint32_t coeff[64] = { 2968 0x000000, 2969 0x0164d2, 2970 0x02cd87, 2971 0x043a29, 2972 0x05aac3, 2973 0x071f62, 2974 0x08980f, 2975 0x0a14d5, 2976 0x0b95c2, 2977 0x0d1adf, 2978 0x0ea43a, 2979 0x1031dc, 2980 0x11c3d3, 2981 0x135a2b, 2982 0x14f4f0, 2983 0x16942d, 2984 0x1837f0, 2985 0x19e046, 2986 0x1b8d3a, 2987 0x1d3eda, 2988 0x1ef532, 2989 0x20b051, 2990 0x227043, 2991 0x243516, 2992 0x25fed7, 2993 0x27cd94, 2994 0x29a15b, 2995 0x2b7a3a, 2996 0x2d583f, 2997 0x2f3b79, 2998 0x3123f6, 2999 0x3311c4, 3000 0x3504f3, 3001 0x36fd92, 3002 0x38fbaf, 3003 0x3aff5b, 3004 0x3d08a4, 3005 0x3f179a, 3006 0x412c4d, 3007 0x4346cd, 3008 0x45672a, 3009 0x478d75, 3010 0x49b9be, 3011 0x4bec15, 3012 0x4e248c, 3013 0x506334, 3014 0x52a81e, 3015 0x54f35b, 3016 0x5744fd, 3017 0x599d16, 3018 0x5bfbb8, 3019 0x5e60f5, 3020 0x60ccdf, 3021 0x633f89, 3022 0x65b907, 3023 0x68396a, 3024 0x6ac0c7, 3025 0x6d4f30, 3026 0x6fe4ba, 3027 0x728177, 3028 0x75257d, 3029 0x77d0df, 3030 0x7a83b3, 3031 0x7d3e0c 3032 }; 3033 return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) | 3034 coeff[op & ((1 << 6) - 1)]); 3035} 3036 3037template <> 3038uint64_t 3039fplibExpA(uint64_t op) 3040{ 3041 static uint64_t coeff[64] = { 3042 0x0000000000000ULL, 3043 0x02c9a3e778061ULL, 3044 0x059b0d3158574ULL, 3045 0x0874518759bc8ULL, 3046 0x0b5586cf9890fULL, 3047 0x0e3ec32d3d1a2ULL, 3048 0x11301d0125b51ULL, 3049 0x1429aaea92de0ULL, 3050 0x172b83c7d517bULL, 3051 0x1a35beb6fcb75ULL, 3052 0x1d4873168b9aaULL, 3053 0x2063b88628cd6ULL, 3054 0x2387a6e756238ULL, 3055 0x26b4565e27cddULL, 3056 0x29e9df51fdee1ULL, 3057 0x2d285a6e4030bULL, 3058 0x306fe0a31b715ULL, 3059 0x33c08b26416ffULL, 3060 0x371a7373aa9cbULL, 3061 0x3a7db34e59ff7ULL, 3062 0x3dea64c123422ULL, 3063 0x4160a21f72e2aULL, 3064 0x44e086061892dULL, 3065 0x486a2b5c13cd0ULL, 3066 0x4bfdad5362a27ULL, 3067 0x4f9b2769d2ca7ULL, 3068 0x5342b569d4f82ULL, 3069 0x56f4736b527daULL, 3070 0x5ab07dd485429ULL, 3071 0x5e76f15ad2148ULL, 3072 0x6247eb03a5585ULL, 3073 0x6623882552225ULL, 3074 0x6a09e667f3bcdULL, 3075 0x6dfb23c651a2fULL, 3076 0x71f75e8ec5f74ULL, 3077 0x75feb564267c9ULL, 3078 0x7a11473eb0187ULL, 3079 0x7e2f336cf4e62ULL, 3080 0x82589994cce13ULL, 3081 0x868d99b4492edULL, 3082 0x8ace5422aa0dbULL, 3083 0x8f1ae99157736ULL, 3084 0x93737b0cdc5e5ULL, 3085 0x97d829fde4e50ULL, 3086 0x9c49182a3f090ULL, 3087 0xa0c667b5de565ULL, 3088 0xa5503b23e255dULL, 3089 0xa9e6b5579fdbfULL, 3090 0xae89f995ad3adULL, 3091 0xb33a2b84f15fbULL, 3092 0xb7f76f2fb5e47ULL, 3093 0xbcc1e904bc1d2ULL, 3094 0xc199bdd85529cULL, 3095 0xc67f12e57d14bULL, 3096 0xcb720dcef9069ULL, 3097 0xd072d4a07897cULL, 3098 0xd5818dcfba487ULL, 3099 0xda9e603db3285ULL, 3100 0xdfc97337b9b5fULL, 3101 0xe502ee78b3ff6ULL, 3102 0xea4afa2a490daULL, 3103 0xefa1bee615a27ULL, 3104 0xf50765b6e4540ULL, 3105 0xfa7c1819e90d8ULL 3106 }; 3107 return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) | 3108 coeff[op & ((1 << 6) - 1)]); 3109} 3110 3111static uint16_t 3112fp16_repack(int sgn, int exp, uint16_t mnt) 3113{ 3114 return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt); 3115} 3116 3117static uint32_t 3118fp32_repack(int sgn, int exp, uint32_t mnt) 3119{ 3120 return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt); 3121} 3122 3123static uint64_t 3124fp64_repack(int sgn, int exp, uint64_t mnt) 3125{ 3126 return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt); 3127} 3128 3129static void 3130fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn) 3131{ 3132 // Treat a single quiet-NaN as +Infinity/-Infinity 3133 if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) && 3134 (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) 3135 *op1 = fp16_infinity(sgn); 3136 if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) && 3137 (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) 3138 *op2 = fp16_infinity(sgn); 3139} 3140 3141static void 3142fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn) 3143{ 3144 // Treat a single quiet-NaN as +Infinity/-Infinity 3145 if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) && 3146 (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) 3147 *op1 = fp32_infinity(sgn); 3148 if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) && 3149 (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) 3150 *op2 = fp32_infinity(sgn); 3151} 3152 3153static void 3154fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn) 3155{ 3156 // Treat a single quiet-NaN as +Infinity/-Infinity 3157 if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) && 3158 (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) 3159 *op1 = fp64_infinity(sgn); 3160 if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) && 3161 (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) 3162 *op2 = fp64_infinity(sgn); 3163} 3164 3165template <> 3166uint16_t 3167fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3168{ 3169 int mode = modeConv(fpscr); 3170 int flags = 0; 3171 int sgn1, exp1, sgn2, exp2; 3172 uint16_t mnt1, mnt2, x, result; 3173 3174 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3175 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3176 3177 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) { 3178 result = x; 3179 } else { 3180 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3181 fp16_repack(sgn1, exp1, mnt1) : 3182 fp16_repack(sgn2, exp2, mnt2)); 3183 } 3184 set_fpscr0(fpscr, flags); 3185 return result; 3186} 3187 3188template <> 3189uint32_t 3190fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3191{ 3192 int mode = modeConv(fpscr); 3193 int flags = 0; 3194 int sgn1, exp1, sgn2, exp2; 3195 uint32_t mnt1, mnt2, x, result; 3196 3197 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3198 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3199 3200 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 3201 result = x; 3202 } else { 3203 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3204 fp32_repack(sgn1, exp1, mnt1) : 3205 fp32_repack(sgn2, exp2, mnt2)); 3206 } 3207 set_fpscr0(fpscr, flags); 3208 return result; 3209} 3210 3211template <> 3212uint64_t 3213fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3214{ 3215 int mode = modeConv(fpscr); 3216 int flags = 0; 3217 int sgn1, exp1, sgn2, exp2; 3218 uint64_t mnt1, mnt2, x, result; 3219 3220 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3221 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3222 3223 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 3224 result = x; 3225 } else { 3226 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3227 fp64_repack(sgn1, exp1, mnt1) : 3228 fp64_repack(sgn2, exp2, mnt2)); 3229 } 3230 set_fpscr0(fpscr, flags); 3231 return result; 3232} 3233 3234template <> 3235uint16_t 3236fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3237{ 3238 fp16_minmaxnum(&op1, &op2, 1); 3239 return fplibMax<uint16_t>(op1, op2, fpscr); 3240} 3241 3242template <> 3243uint32_t 3244fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3245{ 3246 fp32_minmaxnum(&op1, &op2, 1); 3247 return fplibMax<uint32_t>(op1, op2, fpscr); 3248} 3249 3250template <> 3251uint64_t 3252fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3253{ 3254 fp64_minmaxnum(&op1, &op2, 1); 3255 return fplibMax<uint64_t>(op1, op2, fpscr); 3256} 3257 3258template <> 3259uint16_t 3260fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3261{ 3262 int mode = modeConv(fpscr); 3263 int flags = 0; 3264 int sgn1, exp1, sgn2, exp2; 3265 uint16_t mnt1, mnt2, x, result; 3266 3267 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3268 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3269 3270 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) { 3271 result = x; 3272 } else { 3273 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3274 fp16_repack(sgn1, exp1, mnt1) : 3275 fp16_repack(sgn2, exp2, mnt2)); 3276 } 3277 set_fpscr0(fpscr, flags); 3278 return result; 3279} 3280 3281template <> 3282uint32_t 3283fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3284{ 3285 int mode = modeConv(fpscr); 3286 int flags = 0; 3287 int sgn1, exp1, sgn2, exp2; 3288 uint32_t mnt1, mnt2, x, result; 3289 3290 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3291 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3292 3293 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 3294 result = x; 3295 } else { 3296 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3297 fp32_repack(sgn1, exp1, mnt1) : 3298 fp32_repack(sgn2, exp2, mnt2)); 3299 } 3300 set_fpscr0(fpscr, flags); 3301 return result; 3302} 3303 3304template <> 3305uint64_t 3306fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3307{ 3308 int mode = modeConv(fpscr); 3309 int flags = 0; 3310 int sgn1, exp1, sgn2, exp2; 3311 uint64_t mnt1, mnt2, x, result; 3312 3313 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3314 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3315 3316 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 3317 result = x; 3318 } else { 3319 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3320 fp64_repack(sgn1, exp1, mnt1) : 3321 fp64_repack(sgn2, exp2, mnt2)); 3322 } 3323 set_fpscr0(fpscr, flags); 3324 return result; 3325} 3326 3327template <> 3328uint16_t 3329fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3330{ 3331 fp16_minmaxnum(&op1, &op2, 0); 3332 return fplibMin<uint16_t>(op1, op2, fpscr); 3333} 3334 3335template <> 3336uint32_t 3337fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3338{ 3339 fp32_minmaxnum(&op1, &op2, 0); 3340 return fplibMin<uint32_t>(op1, op2, fpscr); 3341} 3342 3343template <> 3344uint64_t 3345fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3346{ 3347 fp64_minmaxnum(&op1, &op2, 0); 3348 return fplibMin<uint64_t>(op1, op2, fpscr); 3349} 3350 3351template <> 3352uint16_t 3353fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3354{ 3355 int flags = 0; 3356 uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags); 3357 set_fpscr0(fpscr, flags); 3358 return result; 3359} 3360 3361template <> 3362uint32_t 3363fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3364{ 3365 int flags = 0; 3366 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); 3367 set_fpscr0(fpscr, flags); 3368 return result; 3369} 3370 3371template <> 3372uint64_t 3373fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3374{ 3375 int flags = 0; 3376 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); 3377 set_fpscr0(fpscr, flags); 3378 return result; 3379} 3380 3381template <> 3382uint16_t 3383fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3384{ 3385 int mode = modeConv(fpscr); 3386 int flags = 0; 3387 int sgn1, exp1, sgn2, exp2; 3388 uint16_t mnt1, mnt2, result; 3389 3390 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3391 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3392 3393 result = fp16_process_NaNs(op1, op2, mode, &flags); 3394 if (!result) { 3395 if ((exp1 == FP16_EXP_INF && !mnt2) || 3396 (exp2 == FP16_EXP_INF && !mnt1)) { 3397 result = fp16_FPTwo(sgn1 ^ sgn2); 3398 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) { 3399 result = fp16_infinity(sgn1 ^ sgn2); 3400 } else if (!mnt1 || !mnt2) { 3401 result = fp16_zero(sgn1 ^ sgn2); 3402 } else { 3403 result = fp16_mul(op1, op2, mode, &flags); 3404 } 3405 } 3406 3407 set_fpscr0(fpscr, flags); 3408 3409 return result; 3410} 3411 3412template <> 3413uint32_t 3414fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3415{ 3416 int mode = modeConv(fpscr); 3417 int flags = 0; 3418 int sgn1, exp1, sgn2, exp2; 3419 uint32_t mnt1, mnt2, result; 3420 3421 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3422 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3423 3424 result = fp32_process_NaNs(op1, op2, mode, &flags); 3425 if (!result) { 3426 if ((exp1 == FP32_EXP_INF && !mnt2) || 3427 (exp2 == FP32_EXP_INF && !mnt1)) { 3428 result = fp32_FPTwo(sgn1 ^ sgn2); 3429 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) { 3430 result = fp32_infinity(sgn1 ^ sgn2); 3431 } else if (!mnt1 || !mnt2) { 3432 result = fp32_zero(sgn1 ^ sgn2); 3433 } else { 3434 result = fp32_mul(op1, op2, mode, &flags); 3435 } 3436 } 3437 3438 set_fpscr0(fpscr, flags); 3439 3440 return result; 3441} 3442 3443template <> 3444uint64_t 3445fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3446{ 3447 int mode = modeConv(fpscr); 3448 int flags = 0; 3449 int sgn1, exp1, sgn2, exp2; 3450 uint64_t mnt1, mnt2, result; 3451 3452 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3453 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3454 3455 result = fp64_process_NaNs(op1, op2, mode, &flags); 3456 if (!result) { 3457 if ((exp1 == FP64_EXP_INF && !mnt2) || 3458 (exp2 == FP64_EXP_INF && !mnt1)) { 3459 result = fp64_FPTwo(sgn1 ^ sgn2); 3460 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) { 3461 result = fp64_infinity(sgn1 ^ sgn2); 3462 } else if (!mnt1 || !mnt2) { 3463 result = fp64_zero(sgn1 ^ sgn2); 3464 } else { 3465 result = fp64_mul(op1, op2, mode, &flags); 3466 } 3467 } 3468 3469 set_fpscr0(fpscr, flags); 3470 3471 return result; 3472} 3473 3474template <> 3475uint16_t 3476fplibNeg(uint16_t op) 3477{ 3478 return op ^ 1ULL << (FP16_BITS - 1); 3479} 3480 3481template <> 3482uint32_t 3483fplibNeg(uint32_t op) 3484{ 3485 return op ^ 1ULL << (FP32_BITS - 1); 3486} 3487 3488template <> 3489uint64_t 3490fplibNeg(uint64_t op) 3491{ 3492 return op ^ 1ULL << (FP64_BITS - 1); 3493} 3494 3495static const uint8_t recip_sqrt_estimate[256] = { 3496 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, 3497 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, 3498 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, 3499 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, 3500 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 3501 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 3502 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, 3503 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, 3504 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, 3505 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, 3506 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, 3507 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, 3508 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, 3509 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, 3510 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, 3511 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 3512}; 3513 3514template <> 3515uint16_t 3516fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr) 3517{ 3518 int mode = modeConv(fpscr); 3519 int flags = 0; 3520 int sgn, exp; 3521 uint16_t mnt, result; 3522 3523 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3524 3525 if (fp16_is_NaN(exp, mnt)) { 3526 result = fp16_process_NaN(op, mode, &flags); 3527 } else if (!mnt) { 3528 result = fp16_infinity(sgn); 3529 flags |= FPLIB_DZC; 3530 } else if (sgn) { 3531 result = fp16_defaultNaN(); 3532 flags |= FPLIB_IOC; 3533 } else if (exp == FP16_EXP_INF) { 3534 result = fp16_zero(0); 3535 } else { 3536 exp += FP16_EXP_BITS; 3537 mnt = fp16_normalise(mnt, &exp); 3538 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | 3539 (mnt >> (FP16_BITS - 8) & 127)]; 3540 result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1, 3541 mnt << (FP16_MANT_BITS - 8)); 3542 } 3543 3544 set_fpscr0(fpscr, flags); 3545 3546 return result; 3547} 3548 3549template <> 3550uint32_t 3551fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) 3552{ 3553 int mode = modeConv(fpscr); 3554 int flags = 0; 3555 int sgn, exp; 3556 uint32_t mnt, result; 3557 3558 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3559 3560 if (fp32_is_NaN(exp, mnt)) { 3561 result = fp32_process_NaN(op, mode, &flags); 3562 } else if (!mnt) { 3563 result = fp32_infinity(sgn); 3564 flags |= FPLIB_DZC; 3565 } else if (sgn) { 3566 result = fp32_defaultNaN(); 3567 flags |= FPLIB_IOC; 3568 } else if (exp == FP32_EXP_INF) { 3569 result = fp32_zero(0); 3570 } else { 3571 exp += FP32_EXP_BITS; 3572 mnt = fp32_normalise(mnt, &exp); 3573 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | 3574 (mnt >> (FP32_BITS - 8) & 127)]; 3575 result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1, 3576 mnt << (FP32_MANT_BITS - 8)); 3577 } 3578 3579 set_fpscr0(fpscr, flags); 3580 3581 return result; 3582} 3583 3584template <> 3585uint64_t 3586fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) 3587{ 3588 int mode = modeConv(fpscr); 3589 int flags = 0; 3590 int sgn, exp; 3591 uint64_t mnt, result; 3592 3593 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3594 3595 if (fp64_is_NaN(exp, mnt)) { 3596 result = fp64_process_NaN(op, mode, &flags); 3597 } else if (!mnt) { 3598 result = fp64_infinity(sgn); 3599 flags |= FPLIB_DZC; 3600 } else if (sgn) { 3601 result = fp64_defaultNaN(); 3602 flags |= FPLIB_IOC; 3603 } else if (exp == FP64_EXP_INF) { 3604 result = fp32_zero(0); 3605 } else { 3606 exp += FP64_EXP_BITS; 3607 mnt = fp64_normalise(mnt, &exp); 3608 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | 3609 (mnt >> (FP64_BITS - 8) & 127)]; 3610 result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1, 3611 mnt << (FP64_MANT_BITS - 8)); 3612 } 3613 3614 set_fpscr0(fpscr, flags); 3615 3616 return result; 3617} 3618 3619template <> 3620uint16_t 3621fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3622{ 3623 int mode = modeConv(fpscr); 3624 int flags = 0; 3625 int sgn1, exp1, sgn2, exp2; 3626 uint16_t mnt1, mnt2, result; 3627 3628 op1 = fplibNeg<uint16_t>(op1); 3629 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3630 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3631 3632 result = fp16_process_NaNs(op1, op2, mode, &flags); 3633 if (!result) { 3634 if ((exp1 == FP16_EXP_INF && !mnt2) || 3635 (exp2 == FP16_EXP_INF && !mnt1)) { 3636 result = fp16_FPOnePointFive(0); 3637 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) { 3638 result = fp16_infinity(sgn1 ^ sgn2); 3639 } else { 3640 result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags); 3641 } 3642 } 3643 3644 set_fpscr0(fpscr, flags); 3645 3646 return result; 3647} 3648 3649template <> 3650uint32_t 3651fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3652{ 3653 int mode = modeConv(fpscr); 3654 int flags = 0; 3655 int sgn1, exp1, sgn2, exp2; 3656 uint32_t mnt1, mnt2, result; 3657 3658 op1 = fplibNeg<uint32_t>(op1); 3659 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3660 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3661 3662 result = fp32_process_NaNs(op1, op2, mode, &flags); 3663 if (!result) { 3664 if ((exp1 == FP32_EXP_INF && !mnt2) || 3665 (exp2 == FP32_EXP_INF && !mnt1)) { 3666 result = fp32_FPOnePointFive(0); 3667 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) { 3668 result = fp32_infinity(sgn1 ^ sgn2); 3669 } else { 3670 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); 3671 } 3672 } 3673 3674 set_fpscr0(fpscr, flags); 3675 3676 return result; 3677} 3678 3679template <> 3680uint64_t 3681fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3682{ 3683 int mode = modeConv(fpscr); 3684 int flags = 0; 3685 int sgn1, exp1, sgn2, exp2; 3686 uint64_t mnt1, mnt2, result; 3687 3688 op1 = fplibNeg<uint64_t>(op1); 3689 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3690 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3691 3692 result = fp64_process_NaNs(op1, op2, mode, &flags); 3693 if (!result) { 3694 if ((exp1 == FP64_EXP_INF && !mnt2) || 3695 (exp2 == FP64_EXP_INF && !mnt1)) { 3696 result = fp64_FPOnePointFive(0); 3697 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) { 3698 result = fp64_infinity(sgn1 ^ sgn2); 3699 } else { 3700 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); 3701 } 3702 } 3703 3704 set_fpscr0(fpscr, flags); 3705 3706 return result; 3707} 3708 3709template <> 3710uint16_t 3711fplibRecipEstimate(uint16_t op, FPSCR &fpscr) 3712{ 3713 int mode = modeConv(fpscr); 3714 int flags = 0; 3715 int sgn, exp; 3716 uint16_t mnt, result; 3717 3718 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3719 3720 if (fp16_is_NaN(exp, mnt)) { 3721 result = fp16_process_NaN(op, mode, &flags); 3722 } else if (exp == FP16_EXP_INF) { 3723 result = fp16_zero(sgn); 3724 } else if (!mnt) { 3725 result = fp16_infinity(sgn); 3726 flags |= FPLIB_DZC; 3727 } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) { 3728 bool overflow_to_inf = false; 3729 switch (FPCRRounding(fpscr)) { 3730 case FPRounding_TIEEVEN: 3731 overflow_to_inf = true; 3732 break; 3733 case FPRounding_POSINF: 3734 overflow_to_inf = !sgn; 3735 break; 3736 case FPRounding_NEGINF: 3737 overflow_to_inf = sgn; 3738 break; 3739 case FPRounding_ZERO: 3740 overflow_to_inf = false; 3741 break; 3742 default: 3743 assert(0); 3744 } 3745 result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn); 3746 flags |= FPLIB_OFC | FPLIB_IXC; 3747 } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) { 3748 result = fp16_zero(sgn); 3749 flags |= FPLIB_UFC; 3750 } else { 3751 exp += FP16_EXP_BITS; 3752 mnt = fp16_normalise(mnt, &exp); 3753 int result_exp = 2 * FP16_EXP_BIAS - 1 - exp; 3754 uint16_t fraction = (((uint32_t)1 << 19) / 3755 (mnt >> (FP16_BITS - 10) | 1) + 1) >> 1; 3756 fraction <<= FP16_MANT_BITS - 8; 3757 if (result_exp == 0) { 3758 fraction >>= 1; 3759 } else if (result_exp == -1) { 3760 fraction >>= 2; 3761 result_exp = 0; 3762 } 3763 result = fp16_pack(sgn, result_exp, fraction); 3764 } 3765 3766 set_fpscr0(fpscr, flags); 3767 3768 return result; 3769} 3770 3771template <> 3772uint32_t 3773fplibRecipEstimate(uint32_t op, FPSCR &fpscr) 3774{ 3775 int mode = modeConv(fpscr); 3776 int flags = 0; 3777 int sgn, exp; 3778 uint32_t mnt, result; 3779 3780 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3781 3782 if (fp32_is_NaN(exp, mnt)) { 3783 result = fp32_process_NaN(op, mode, &flags); 3784 } else if (exp == FP32_EXP_INF) { 3785 result = fp32_zero(sgn); 3786 } else if (!mnt) { 3787 result = fp32_infinity(sgn); 3788 flags |= FPLIB_DZC; 3789 } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) { 3790 bool overflow_to_inf = false; 3791 switch (FPCRRounding(fpscr)) { 3792 case FPRounding_TIEEVEN: 3793 overflow_to_inf = true; 3794 break; 3795 case FPRounding_POSINF: 3796 overflow_to_inf = !sgn; 3797 break; 3798 case FPRounding_NEGINF: 3799 overflow_to_inf = sgn; 3800 break; 3801 case FPRounding_ZERO: 3802 overflow_to_inf = false; 3803 break; 3804 default: 3805 assert(0); 3806 } 3807 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn); 3808 flags |= FPLIB_OFC | FPLIB_IXC; 3809 } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) { 3810 result = fp32_zero(sgn); 3811 flags |= FPLIB_UFC; 3812 } else { 3813 exp += FP32_EXP_BITS; 3814 mnt = fp32_normalise(mnt, &exp); 3815 int result_exp = 2 * FP32_EXP_BIAS - 1 - exp; 3816 uint32_t fraction = (((uint32_t)1 << 19) / 3817 (mnt >> (FP32_BITS - 10) | 1) + 1) >> 1; 3818 fraction <<= FP32_MANT_BITS - 8; 3819 if (result_exp == 0) { 3820 fraction >>= 1; 3821 } else if (result_exp == -1) { 3822 fraction >>= 2; 3823 result_exp = 0; 3824 } 3825 result = fp32_pack(sgn, result_exp, fraction); 3826 } 3827 3828 set_fpscr0(fpscr, flags); 3829 3830 return result; 3831} 3832 3833template <> 3834uint64_t 3835fplibRecipEstimate(uint64_t op, FPSCR &fpscr) 3836{ 3837 int mode = modeConv(fpscr); 3838 int flags = 0; 3839 int sgn, exp; 3840 uint64_t mnt, result; 3841 3842 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3843 3844 if (fp64_is_NaN(exp, mnt)) { 3845 result = fp64_process_NaN(op, mode, &flags); 3846 } else if (exp == FP64_EXP_INF) { 3847 result = fp64_zero(sgn); 3848 } else if (!mnt) { 3849 result = fp64_infinity(sgn); 3850 flags |= FPLIB_DZC; 3851 } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) { 3852 bool overflow_to_inf = false; 3853 switch (FPCRRounding(fpscr)) { 3854 case FPRounding_TIEEVEN: 3855 overflow_to_inf = true; 3856 break; 3857 case FPRounding_POSINF: 3858 overflow_to_inf = !sgn; 3859 break; 3860 case FPRounding_NEGINF: 3861 overflow_to_inf = sgn; 3862 break; 3863 case FPRounding_ZERO: 3864 overflow_to_inf = false; 3865 break; 3866 default: 3867 assert(0); 3868 } 3869 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn); 3870 flags |= FPLIB_OFC | FPLIB_IXC; 3871 } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) { 3872 result = fp64_zero(sgn); 3873 flags |= FPLIB_UFC; 3874 } else { 3875 exp += FP64_EXP_BITS; 3876 mnt = fp64_normalise(mnt, &exp); 3877 int result_exp = 2 * FP64_EXP_BIAS - 1 - exp; 3878 uint64_t fraction = (((uint32_t)1 << 19) / 3879 (mnt >> (FP64_BITS - 10) | 1) + 1) >> 1; 3880 fraction <<= FP64_MANT_BITS - 8; 3881 if (result_exp == 0) { 3882 fraction >>= 1; 3883 } else if (result_exp == -1) { 3884 fraction >>= 2; 3885 result_exp = 0; 3886 } 3887 result = fp64_pack(sgn, result_exp, fraction); 3888 } 3889 3890 set_fpscr0(fpscr, flags); 3891 3892 return result; 3893} 3894 3895template <> 3896uint16_t 3897fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3898{ 3899 int mode = modeConv(fpscr); 3900 int flags = 0; 3901 int sgn1, exp1, sgn2, exp2; 3902 uint16_t mnt1, mnt2, result; 3903 3904 op1 = fplibNeg<uint16_t>(op1); 3905 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3906 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3907 3908 result = fp16_process_NaNs(op1, op2, mode, &flags); 3909 if (!result) { 3910 if ((exp1 == FP16_EXP_INF && !mnt2) || 3911 (exp2 == FP16_EXP_INF && !mnt1)) { 3912 result = fp16_FPTwo(0); 3913 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) { 3914 result = fp16_infinity(sgn1 ^ sgn2); 3915 } else { 3916 result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags); 3917 } 3918 } 3919 3920 set_fpscr0(fpscr, flags); 3921 3922 return result; 3923} 3924 3925template <> 3926uint32_t 3927fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3928{ 3929 int mode = modeConv(fpscr); 3930 int flags = 0; 3931 int sgn1, exp1, sgn2, exp2; 3932 uint32_t mnt1, mnt2, result; 3933 3934 op1 = fplibNeg<uint32_t>(op1); 3935 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3936 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3937 3938 result = fp32_process_NaNs(op1, op2, mode, &flags); 3939 if (!result) { 3940 if ((exp1 == FP32_EXP_INF && !mnt2) || 3941 (exp2 == FP32_EXP_INF && !mnt1)) { 3942 result = fp32_FPTwo(0); 3943 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) { 3944 result = fp32_infinity(sgn1 ^ sgn2); 3945 } else { 3946 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags); 3947 } 3948 } 3949 3950 set_fpscr0(fpscr, flags); 3951 3952 return result; 3953} 3954 3955template <> 3956uint64_t 3957fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3958{ 3959 int mode = modeConv(fpscr); 3960 int flags = 0; 3961 int sgn1, exp1, sgn2, exp2; 3962 uint64_t mnt1, mnt2, result; 3963 3964 op1 = fplibNeg<uint64_t>(op1); 3965 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3966 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3967 3968 result = fp64_process_NaNs(op1, op2, mode, &flags); 3969 if (!result) { 3970 if ((exp1 == FP64_EXP_INF && !mnt2) || 3971 (exp2 == FP64_EXP_INF && !mnt1)) { 3972 result = fp64_FPTwo(0); 3973 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) { 3974 result = fp64_infinity(sgn1 ^ sgn2); 3975 } else { 3976 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags); 3977 } 3978 } 3979 3980 set_fpscr0(fpscr, flags); 3981 3982 return result; 3983} 3984 3985template <> 3986uint16_t 3987fplibRecpX(uint16_t op, FPSCR &fpscr) 3988{ 3989 int mode = modeConv(fpscr); 3990 int flags = 0; 3991 int sgn, exp; 3992 uint16_t mnt, result; 3993 3994 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3995 3996 if (fp16_is_NaN(exp, mnt)) { 3997 result = fp16_process_NaN(op, mode, &flags); 3998 } 3999 else { 4000 if (!mnt) { // Zero and denormals 4001 result = fp16_pack(sgn, FP16_EXP_INF - 1, 0); 4002 } else { // Infinities and normals 4003 result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0); 4004 } 4005 } 4006 4007 set_fpscr0(fpscr, flags); 4008 4009 return result; 4010} 4011 4012template <> 4013uint32_t 4014fplibRecpX(uint32_t op, FPSCR &fpscr) 4015{ 4016 int mode = modeConv(fpscr); 4017 int flags = 0; 4018 int sgn, exp; 4019 uint32_t mnt, result; 4020 4021 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4022 4023 if (fp32_is_NaN(exp, mnt)) { 4024 result = fp32_process_NaN(op, mode, &flags); 4025 } 4026 else { 4027 if (!mnt) { // Zero and denormals 4028 result = fp32_pack(sgn, FP32_EXP_INF - 1, 0); 4029 } else { // Infinities and normals 4030 result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0); 4031 } 4032 } 4033 4034 set_fpscr0(fpscr, flags); 4035 4036 return result; 4037} 4038 4039template <> 4040uint64_t 4041fplibRecpX(uint64_t op, FPSCR &fpscr) 4042{ 4043 int mode = modeConv(fpscr); 4044 int flags = 0; 4045 int sgn, exp; 4046 uint64_t mnt, result; 4047 4048 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4049 4050 if (fp64_is_NaN(exp, mnt)) { 4051 result = fp64_process_NaN(op, mode, &flags); 4052 } 4053 else { 4054 if (!mnt) { // Zero and denormals 4055 result = fp64_pack(sgn, FP64_EXP_INF - 1, 0); 4056 } else { // Infinities and normals 4057 result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0); 4058 } 4059 } 4060 4061 set_fpscr0(fpscr, flags); 4062 4063 return result; 4064} 4065 4066template <> 4067uint16_t 4068fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 4069{ 4070 int expint = FP16_EXP_BIAS + FP16_MANT_BITS; 4071 int mode = modeConv(fpscr); 4072 int flags = 0; 4073 int sgn, exp; 4074 uint16_t mnt, result; 4075 4076 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4077 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4078 4079 // Handle NaNs, infinities and zeroes: 4080 if (fp16_is_NaN(exp, mnt)) { 4081 result = fp16_process_NaN(op, mode, &flags); 4082 } else if (exp == FP16_EXP_INF) { 4083 result = fp16_infinity(sgn); 4084 } else if (!mnt) { 4085 result = fp16_zero(sgn); 4086 } else if (exp >= expint) { 4087 // There are no fractional bits 4088 result = op; 4089 } else { 4090 // Truncate towards zero: 4091 uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp); 4092 int err = exp < expint - FP16_BITS ? 1 : 4093 ((mnt << 1 >> (expint - exp - 1) & 3) | 4094 ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0)); 4095 switch (rounding) { 4096 case FPRounding_TIEEVEN: 4097 x += (err == 3 || (err == 2 && (x & 1))); 4098 break; 4099 case FPRounding_POSINF: 4100 x += err && !sgn; 4101 break; 4102 case FPRounding_NEGINF: 4103 x += err && sgn; 4104 break; 4105 case FPRounding_ZERO: 4106 break; 4107 case FPRounding_TIEAWAY: 4108 x += err >> 1; 4109 break; 4110 default: 4111 assert(0); 4112 } 4113 4114 if (x == 0) { 4115 result = fp16_zero(sgn); 4116 } else { 4117 exp = expint; 4118 mnt = fp16_normalise(x, &exp); 4119 result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS); 4120 } 4121 4122 if (err && exact) 4123 flags |= FPLIB_IXC; 4124 } 4125 4126 set_fpscr0(fpscr, flags); 4127 4128 return result; 4129} 4130 4131template <> 4132uint32_t 4133fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 4134{ 4135 int expint = FP32_EXP_BIAS + FP32_MANT_BITS; 4136 int mode = modeConv(fpscr); 4137 int flags = 0; 4138 int sgn, exp; 4139 uint32_t mnt, result; 4140 4141 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4142 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4143 4144 // Handle NaNs, infinities and zeroes: 4145 if (fp32_is_NaN(exp, mnt)) { 4146 result = fp32_process_NaN(op, mode, &flags); 4147 } else if (exp == FP32_EXP_INF) { 4148 result = fp32_infinity(sgn); 4149 } else if (!mnt) { 4150 result = fp32_zero(sgn); 4151 } else if (exp >= expint) { 4152 // There are no fractional bits 4153 result = op; 4154 } else { 4155 // Truncate towards zero: 4156 uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp); 4157 int err = exp < expint - FP32_BITS ? 1 : 4158 ((mnt << 1 >> (expint - exp - 1) & 3) | 4159 ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0)); 4160 switch (rounding) { 4161 case FPRounding_TIEEVEN: 4162 x += (err == 3 || (err == 2 && (x & 1))); 4163 break; 4164 case FPRounding_POSINF: 4165 x += err && !sgn; 4166 break; 4167 case FPRounding_NEGINF: 4168 x += err && sgn; 4169 break; 4170 case FPRounding_ZERO: 4171 break; 4172 case FPRounding_TIEAWAY: 4173 x += err >> 1; 4174 break; 4175 default: 4176 assert(0); 4177 } 4178 4179 if (x == 0) { 4180 result = fp32_zero(sgn); 4181 } else { 4182 exp = expint; 4183 mnt = fp32_normalise(x, &exp); 4184 result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS); 4185 } 4186 4187 if (err && exact) 4188 flags |= FPLIB_IXC; 4189 } 4190 4191 set_fpscr0(fpscr, flags); 4192 4193 return result; 4194} 4195 4196template <> 4197uint64_t 4198fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 4199{ 4200 int expint = FP64_EXP_BIAS + FP64_MANT_BITS; 4201 int mode = modeConv(fpscr); 4202 int flags = 0; 4203 int sgn, exp; 4204 uint64_t mnt, result; 4205 4206 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4207 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4208 4209 // Handle NaNs, infinities and zeroes: 4210 if (fp64_is_NaN(exp, mnt)) { 4211 result = fp64_process_NaN(op, mode, &flags); 4212 } else if (exp == FP64_EXP_INF) { 4213 result = fp64_infinity(sgn); 4214 } else if (!mnt) { 4215 result = fp64_zero(sgn); 4216 } else if (exp >= expint) { 4217 // There are no fractional bits 4218 result = op; 4219 } else { 4220 // Truncate towards zero: 4221 uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp); 4222 int err = exp < expint - FP64_BITS ? 1 : 4223 ((mnt << 1 >> (expint - exp - 1) & 3) | 4224 ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0)); 4225 switch (rounding) { 4226 case FPRounding_TIEEVEN: 4227 x += (err == 3 || (err == 2 && (x & 1))); 4228 break; 4229 case FPRounding_POSINF: 4230 x += err && !sgn; 4231 break; 4232 case FPRounding_NEGINF: 4233 x += err && sgn; 4234 break; 4235 case FPRounding_ZERO: 4236 break; 4237 case FPRounding_TIEAWAY: 4238 x += err >> 1; 4239 break; 4240 default: 4241 assert(0); 4242 } 4243 4244 if (x == 0) { 4245 result = fp64_zero(sgn); 4246 } else { 4247 exp = expint; 4248 mnt = fp64_normalise(x, &exp); 4249 result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS); 4250 } 4251 4252 if (err && exact) 4253 flags |= FPLIB_IXC; 4254 } 4255 4256 set_fpscr0(fpscr, flags); 4257 4258 return result; 4259} 4260 4261template <> 4262uint16_t 4263fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr) 4264{ 4265 int flags = 0; 4266 uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags); 4267 set_fpscr0(fpscr, flags); 4268 return result; 4269} 4270 4271template <> 4272uint32_t 4273fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr) 4274{ 4275 int flags = 0; 4276 uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags); 4277 set_fpscr0(fpscr, flags); 4278 return result; 4279} 4280 4281template <> 4282uint64_t 4283fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr) 4284{ 4285 int flags = 0; 4286 uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags); 4287 set_fpscr0(fpscr, flags); 4288 return result; 4289} 4290 4291template <> 4292uint16_t 4293fplibSqrt(uint16_t op, FPSCR &fpscr) 4294{ 4295 int flags = 0; 4296 uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags); 4297 set_fpscr0(fpscr, flags); 4298 return result; 4299} 4300 4301template <> 4302uint32_t 4303fplibSqrt(uint32_t op, FPSCR &fpscr) 4304{ 4305 int flags = 0; 4306 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags); 4307 set_fpscr0(fpscr, flags); 4308 return result; 4309} 4310 4311template <> 4312uint64_t 4313fplibSqrt(uint64_t op, FPSCR &fpscr) 4314{ 4315 int flags = 0; 4316 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags); 4317 set_fpscr0(fpscr, flags); 4318 return result; 4319} 4320 4321template <> 4322uint16_t 4323fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr) 4324{ 4325 int flags = 0; 4326 uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags); 4327 set_fpscr0(fpscr, flags); 4328 return result; 4329} 4330 4331template <> 4332uint32_t 4333fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr) 4334{ 4335 int flags = 0; 4336 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags); 4337 set_fpscr0(fpscr, flags); 4338 return result; 4339} 4340 4341template <> 4342uint64_t 4343fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr) 4344{ 4345 int flags = 0; 4346 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags); 4347 set_fpscr0(fpscr, flags); 4348 return result; 4349} 4350 4351template <> 4352uint16_t 4353fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr) 4354{ 4355 static uint16_t coeff[2][8] = { 4356 { 4357 0x3c00, 4358 0xb155, 4359 0x2030, 4360 0x0000, 4361 0x0000, 4362 0x0000, 4363 0x0000, 4364 0x0000, 4365 }, 4366 { 4367 0x3c00, 4368 0xb800, 4369 0x293a, 4370 0x0000, 4371 0x0000, 4372 0x0000, 4373 0x0000, 4374 0x0000 4375 } 4376 }; 4377 int flags = 0; 4378 uint16_t result = 4379 fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1, 4380 fplibAbs(op2), 0, modeConv(fpscr), &flags); 4381 set_fpscr0(fpscr, flags); 4382 return result; 4383} 4384 4385template <> 4386uint32_t 4387fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr) 4388{ 4389 static uint32_t coeff[2][8] = { 4390 { 4391 0x3f800000, 4392 0xbe2aaaab, 4393 0x3c088886, 4394 0xb95008b9, 4395 0x36369d6d, 4396 0x00000000, 4397 0x00000000, 4398 0x00000000 4399 }, 4400 { 4401 0x3f800000, 4402 0xbf000000, 4403 0x3d2aaaa6, 4404 0xbab60705, 4405 0x37cd37cc, 4406 0x00000000, 4407 0x00000000, 4408 0x00000000 4409 } 4410 }; 4411 int flags = 0; 4412 uint32_t result = 4413 fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1, 4414 fplibAbs(op2), 0, modeConv(fpscr), &flags); 4415 set_fpscr0(fpscr, flags); 4416 return result; 4417} 4418 4419template <> 4420uint64_t 4421fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr) 4422{ 4423 static uint64_t coeff[2][8] = { 4424 { 4425 0x3ff0000000000000ULL, 4426 0xbfc5555555555543ULL, 4427 0x3f8111111110f30cULL, 4428 0xbf2a01a019b92fc6ULL, 4429 0x3ec71de351f3d22bULL, 4430 0xbe5ae5e2b60f7b91ULL, 4431 0x3de5d8408868552fULL, 4432 0x0000000000000000ULL 4433 }, 4434 { 4435 0x3ff0000000000000ULL, 4436 0xbfe0000000000000ULL, 4437 0x3fa5555555555536ULL, 4438 0xbf56c16c16c13a0bULL, 4439 0x3efa01a019b1e8d8ULL, 4440 0xbe927e4f7282f468ULL, 4441 0x3e21ee96d2641b13ULL, 4442 0xbda8f76380fbb401ULL 4443 } 4444 }; 4445 int flags = 0; 4446 uint64_t result = 4447 fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1, 4448 fplibAbs(op2), 0, modeConv(fpscr), &flags); 4449 set_fpscr0(fpscr, flags); 4450 return result; 4451} 4452 4453template <> 4454uint16_t 4455fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr) 4456{ 4457 int flags = 0; 4458 int sgn, exp; 4459 uint16_t mnt; 4460 4461 int mode = modeConv(fpscr); 4462 uint16_t result = fp16_mul(op1, op1, mode, &flags); 4463 set_fpscr0(fpscr, flags); 4464 4465 fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags); 4466 if (!fp16_is_NaN(exp, mnt)) { 4467 result = (result & ~(1ULL << (FP16_BITS - 1))) | 4468 op2 << (FP16_BITS - 1); 4469 } 4470 return result; 4471} 4472 4473template <> 4474uint32_t 4475fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) 4476{ 4477 int flags = 0; 4478 int sgn, exp; 4479 uint32_t mnt; 4480 4481 int mode = modeConv(fpscr); 4482 uint32_t result = fp32_mul(op1, op1, mode, &flags); 4483 set_fpscr0(fpscr, flags); 4484 4485 fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags); 4486 if (!fp32_is_NaN(exp, mnt)) { 4487 result = (result & ~(1ULL << (FP32_BITS - 1))) | op2 << (FP32_BITS - 1); 4488 } 4489 return result; 4490} 4491 4492template <> 4493uint64_t 4494fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) 4495{ 4496 int flags = 0; 4497 int sgn, exp; 4498 uint64_t mnt; 4499 4500 int mode = modeConv(fpscr); 4501 uint64_t result = fp64_mul(op1, op1, mode, &flags); 4502 set_fpscr0(fpscr, flags); 4503 4504 fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags); 4505 if (!fp64_is_NaN(exp, mnt)) { 4506 result = (result & ~(1ULL << (FP64_BITS - 1))) | op2 << (FP64_BITS - 1); 4507 } 4508 return result; 4509} 4510 4511template <> 4512uint16_t 4513fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr) 4514{ 4515 static constexpr uint16_t fpOne = 4516 (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0 4517 if (op2 & 1) 4518 op1 = fpOne; 4519 return op1 ^ ((op2 >> 1) << (FP16_BITS - 1)); 4520} 4521 4522template <> 4523uint32_t 4524fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr) 4525{ 4526 static constexpr uint32_t fpOne = 4527 (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0 4528 if (op2 & 1) 4529 op1 = fpOne; 4530 return op1 ^ ((op2 >> 1) << (FP32_BITS - 1)); 4531} 4532 4533template <> 4534uint64_t 4535fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr) 4536{ 4537 static constexpr uint64_t fpOne = 4538 (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0 4539 if (op2 & 1) 4540 op1 = fpOne; 4541 return op1 ^ ((op2 >> 1) << (FP64_BITS - 1)); 4542} 4543 4544static uint64_t 4545FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 4546 int *flags) 4547{ 4548 int expmax = FP64_EXP_BIAS + FP64_BITS - 1; 4549 uint64_t x; 4550 int err; 4551 4552 if (exp > expmax) { 4553 *flags = FPLIB_IOC; 4554 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn; 4555 } 4556 4557 x = lsr64(mnt << FP64_EXP_BITS, expmax - exp); 4558 err = (exp > expmax - 2 ? 0 : 4559 (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) | 4560 !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1))); 4561 4562 switch (rounding) { 4563 case FPRounding_TIEEVEN: 4564 x += (err == 3 || (err == 2 && (x & 1))); 4565 break; 4566 case FPRounding_POSINF: 4567 x += err && !sgn; 4568 break; 4569 case FPRounding_NEGINF: 4570 x += err && sgn; 4571 break; 4572 case FPRounding_ZERO: 4573 break; 4574 case FPRounding_TIEAWAY: 4575 x += err >> 1; 4576 break; 4577 default: 4578 assert(0); 4579 } 4580 4581 if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) { 4582 *flags = FPLIB_IOC; 4583 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn; 4584 } 4585 4586 if (err) { 4587 *flags = FPLIB_IXC; 4588 } 4589 4590 return sgn ? -x : x; 4591} 4592 4593static uint32_t 4594FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 4595 int *flags) 4596{ 4597 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); 4598 if (u ? x >= 1ULL << FP32_BITS : 4599 !(x < 1ULL << (FP32_BITS - 1) || 4600 (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) { 4601 *flags = FPLIB_IOC; 4602 x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn; 4603 } 4604 return x; 4605} 4606 4607static uint16_t 4608FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 4609 int *flags) 4610{ 4611 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); 4612 if (u ? x >= 1ULL << FP16_BITS : 4613 !(x < 1ULL << (FP16_BITS - 1) || 4614 (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) { 4615 *flags = FPLIB_IOC; 4616 x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn; 4617 } 4618 return x; 4619} 4620 4621template <> 4622uint16_t 4623fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, 4624 FPSCR &fpscr) 4625{ 4626 int flags = 0; 4627 int sgn, exp; 4628 uint16_t mnt, result; 4629 4630 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4631 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4632 4633 // If NaN, set cumulative flag or take exception: 4634 if (fp16_is_NaN(exp, mnt)) { 4635 flags = FPLIB_IOC; 4636 result = 0; 4637 } else { 4638 assert(fbits >= 0); 4639 // Infinity is treated as an ordinary normalised number that saturates. 4640 result = 4641 FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits, 4642 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS), 4643 u, rounding, &flags); 4644 } 4645 4646 set_fpscr0(fpscr, flags); 4647 4648 return result; 4649} 4650 4651template <> 4652uint32_t 4653fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, 4654 FPSCR &fpscr) 4655{ 4656 int flags = 0; 4657 int sgn, exp; 4658 uint16_t mnt; 4659 uint32_t result; 4660 4661 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4662 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4663 4664 // If NaN, set cumulative flag or take exception: 4665 if (fp16_is_NaN(exp, mnt)) { 4666 flags = FPLIB_IOC; 4667 result = 0; 4668 } else { 4669 assert(fbits >= 0); 4670 if (exp == FP16_EXP_INF) 4671 exp = 255; // infinity: make it big enough to saturate 4672 result = 4673 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits, 4674 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS), 4675 u, rounding, &flags); 4676 } 4677 4678 set_fpscr0(fpscr, flags); 4679 4680 return result; 4681} 4682 4683template <> 4684uint32_t 4685fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4686{ 4687 int flags = 0; 4688 int sgn, exp; 4689 uint32_t mnt, result; 4690 4691 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4692 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4693 4694 // If NaN, set cumulative flag or take exception: 4695 if (fp32_is_NaN(exp, mnt)) { 4696 flags = FPLIB_IOC; 4697 result = 0; 4698 } else { 4699 assert(fbits >= 0); 4700 // Infinity is treated as an ordinary normalised number that saturates. 4701 result = 4702 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits, 4703 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS), 4704 u, rounding, &flags); 4705 } 4706 4707 set_fpscr0(fpscr, flags); 4708 4709 return result; 4710} 4711 4712template <> 4713uint32_t 4714fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4715{ 4716 int flags = 0; 4717 int sgn, exp; 4718 uint64_t mnt; 4719 uint32_t result; 4720 4721 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4722 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4723 4724 // If NaN, set cumulative flag or take exception: 4725 if (fp64_is_NaN(exp, mnt)) { 4726 flags = FPLIB_IOC; 4727 result = 0; 4728 } else { 4729 assert(fbits >= 0); 4730 // Infinity is treated as an ordinary normalised number that saturates. 4731 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags); 4732 } 4733 4734 set_fpscr0(fpscr, flags); 4735 4736 return result; 4737} 4738 4739template <> 4740uint64_t 4741fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, 4742 FPSCR &fpscr) 4743{ 4744 int flags = 0; 4745 int sgn, exp; 4746 uint16_t mnt; 4747 uint64_t result; 4748 4749 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4750 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4751 4752 // If NaN, set cumulative flag or take exception: 4753 if (fp16_is_NaN(exp, mnt)) { 4754 flags = FPLIB_IOC; 4755 result = 0; 4756 } else { 4757 assert(fbits >= 0); 4758 if (exp == FP16_EXP_INF) 4759 exp = 255; // infinity: make it big enough to saturate 4760 result = 4761 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits, 4762 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS), 4763 u, rounding, &flags); 4764 } 4765 4766 set_fpscr0(fpscr, flags); 4767 4768 return result; 4769} 4770 4771template <> 4772uint64_t 4773fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4774{ 4775 int flags = 0; 4776 int sgn, exp; 4777 uint32_t mnt; 4778 uint64_t result; 4779 4780 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4781 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4782 4783 // If NaN, set cumulative flag or take exception: 4784 if (fp32_is_NaN(exp, mnt)) { 4785 flags = FPLIB_IOC; 4786 result = 0; 4787 } else { 4788 assert(fbits >= 0); 4789 // Infinity is treated as an ordinary normalised number that saturates. 4790 result = 4791 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits, 4792 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS), 4793 u, rounding, &flags); 4794 } 4795 4796 set_fpscr0(fpscr, flags); 4797 4798 return result; 4799} 4800 4801template <> 4802uint64_t 4803fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4804{ 4805 int flags = 0; 4806 int sgn, exp; 4807 uint64_t mnt, result; 4808 4809 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4810 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4811 4812 // If NaN, set cumulative flag or take exception: 4813 if (fp64_is_NaN(exp, mnt)) { 4814 flags = FPLIB_IOC; 4815 result = 0; 4816 } else { 4817 assert(fbits >= 0); 4818 // Infinity is treated as an ordinary normalised number that saturates. 4819 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags); 4820 } 4821 4822 set_fpscr0(fpscr, flags); 4823 4824 return result; 4825} 4826 4827static uint16_t 4828fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) 4829{ 4830 int x_sgn = !u && a >> (FP64_BITS - 1); 4831 int x_exp = FP16_EXP_BIAS + FP64_BITS - 1 - fbits; 4832 uint64_t x_mnt = x_sgn ? -a : a; 4833 4834 // Handle zero: 4835 if (!x_mnt) { 4836 return fp16_zero(0); 4837 } 4838 4839 // Normalise into FP16_BITS bits, collapsing error into bottom bit: 4840 x_mnt = fp64_normalise(x_mnt, &x_exp); 4841 x_mnt = (x_mnt >> (FP64_BITS - FP16_BITS - 1) | 4842 !!(x_mnt & ((1ULL << (FP64_BITS - FP16_BITS - 1)) - 1))); 4843 4844 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); 4845} 4846 4847static uint32_t 4848fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) 4849{ 4850 int x_sgn = !u && a >> (FP64_BITS - 1); 4851 int x_exp = FP32_EXP_BIAS + FP64_BITS - 1 - fbits; 4852 uint64_t x_mnt = x_sgn ? -a : a; 4853 4854 // Handle zero: 4855 if (!x_mnt) { 4856 return fp32_zero(0); 4857 } 4858 4859 // Normalise into FP32_BITS bits, collapsing error into bottom bit: 4860 x_mnt = fp64_normalise(x_mnt, &x_exp); 4861 x_mnt = (x_mnt >> (FP64_BITS - FP32_BITS - 1) | 4862 !!(x_mnt & ((1ULL << (FP64_BITS - FP32_BITS - 1)) - 1))); 4863 4864 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 4865} 4866 4867static uint64_t 4868fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) 4869{ 4870 int x_sgn = !u && a >> (FP64_BITS - 1); 4871 int x_exp = FP64_EXP_BIAS + FP64_BITS - 1 - fbits; 4872 uint64_t x_mnt = x_sgn ? -a : a; 4873 4874 // Handle zero: 4875 if (!x_mnt) { 4876 return fp64_zero(0); 4877 } 4878 4879 x_mnt = fp64_normalise(x_mnt, &x_exp); 4880 4881 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags); 4882} 4883 4884template <> 4885uint16_t 4886fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, 4887 FPSCR &fpscr) 4888{ 4889 int flags = 0; 4890 uint16_t res = fp16_cvtf(op, fbits, u, 4891 (int)rounding | ((uint32_t)fpscr >> 22 & 12), 4892 &flags); 4893 set_fpscr0(fpscr, flags); 4894 return res; 4895} 4896 4897template <> 4898uint32_t 4899fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4900{ 4901 int flags = 0; 4902 uint32_t res = fp32_cvtf(op, fbits, u, 4903 (int)rounding | ((uint32_t)fpscr >> 22 & 12), 4904 &flags); 4905 set_fpscr0(fpscr, flags); 4906 return res; 4907} 4908 4909template <> 4910uint64_t 4911fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4912{ 4913 int flags = 0; 4914 uint64_t res = fp64_cvtf(op, fbits, u, 4915 (int)rounding | ((uint32_t)fpscr >> 22 & 12), 4916 &flags); 4917 set_fpscr0(fpscr, flags); 4918 return res; 4919} 4920 4921template <> 4922uint16_t 4923fplibInfinity(int sgn) 4924{ 4925 return fp16_infinity(sgn); 4926} 4927 4928template <> 4929uint32_t 4930fplibInfinity(int sgn) 4931{ 4932 return fp32_infinity(sgn); 4933} 4934 4935template <> 4936uint64_t 4937fplibInfinity(int sgn) 4938{ 4939 return fp64_infinity(sgn); 4940} 4941 4942template <> 4943uint16_t 4944fplibDefaultNaN() 4945{ 4946 return fp16_defaultNaN(); 4947} 4948 4949template <> 4950uint32_t 4951fplibDefaultNaN() 4952{ 4953 return fp32_defaultNaN(); 4954} 4955 4956template <> 4957uint64_t 4958fplibDefaultNaN() 4959{ 4960 return fp64_defaultNaN(); 4961} 4962 4963} 4964