Cross Reference: /gem5/src/arch/arm/insts/fplib.cc

Deleted Added

sdiff udiff text old ( 13118:897ff5214d07 ) new ( 13449:2f7efa89c58b )

full compact

fplib.cc (13118:897ff5214d07)	fplib.cc (13449:2f7efa89c58b)
1/* 2* Copyright (c) 2012-2013, 2017-2018 ARM Limited 3* All rights reserved 4* 5* The license below extends only to copyright in the software and shall 6* not be construed as granting a license to any other intellectual 7* property including but not limited to intellectual property relating 8* to a hardware implementation of the functionality of the software 9* licensed hereunder. You may use the software subject to the license 10* terms below provided that you ensure that this notice is replicated 11* unmodified and in its entirety in all distributions of the software, 12* modified or unmodified, in source code or in binary form. 13* 14* Redistribution and use in source and binary forms, with or without 15* modification, are permitted provided that the following conditions are 16* met: redistributions of source code must retain the above copyright 17* notice, this list of conditions and the following disclaimer; 18* redistributions in binary form must reproduce the above copyright 19* notice, this list of conditions and the following disclaimer in the 20* documentation and/or other materials provided with the distribution; 21* neither the name of the copyright holders nor the names of its 22* contributors may be used to endorse or promote products derived from 23* this software without specific prior written permission. 24* 25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36* 37* Authors: Edmund Grimley Evans 38* Thomas Grocutt 39*/ 40 41#include <stdint.h> 42 43#include <cassert> 44	1/* 2* Copyright (c) 2012-2013, 2017-2018 ARM Limited 3* All rights reserved 4* 5* The license below extends only to copyright in the software and shall 6* not be construed as granting a license to any other intellectual 7* property including but not limited to intellectual property relating 8* to a hardware implementation of the functionality of the software 9* licensed hereunder. You may use the software subject to the license 10* terms below provided that you ensure that this notice is replicated 11* unmodified and in its entirety in all distributions of the software, 12* modified or unmodified, in source code or in binary form. 13* 14* Redistribution and use in source and binary forms, with or without 15* modification, are permitted provided that the following conditions are 16* met: redistributions of source code must retain the above copyright 17* notice, this list of conditions and the following disclaimer; 18* redistributions in binary form must reproduce the above copyright 19* notice, this list of conditions and the following disclaimer in the 20* documentation and/or other materials provided with the distribution; 21* neither the name of the copyright holders nor the names of its 22* contributors may be used to endorse or promote products derived from 23* this software without specific prior written permission. 24* 25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36* 37* Authors: Edmund Grimley Evans 38* Thomas Grocutt 39*/ 40 41#include <stdint.h> 42 43#include <cassert> 44
	45#include "base/logging.hh"
45#include "fplib.hh" 46 47namespace ArmISA 48{ 49 50#define FPLIB_RN 0 51#define FPLIB_RP 1 52#define FPLIB_RM 2 53#define FPLIB_RZ 3 54#define FPLIB_FZ 4 55#define FPLIB_DN 8 56#define FPLIB_AHP 16 57#define FPLIB_FZ16 32 58 59#define FPLIB_IDC 128 // Input Denormal 60#define FPLIB_IXC 16 // Inexact 61#define FPLIB_UFC 8 // Underflow 62#define FPLIB_OFC 4 // Overflow 63#define FPLIB_DZC 2 // Division by Zero 64#define FPLIB_IOC 1 // Invalid Operation 65 66#define FP16_BITS 16 67#define FP32_BITS 32 68#define FP64_BITS 64 69 70#define FP16_EXP_BITS 5 71#define FP32_EXP_BITS 8 72#define FP64_EXP_BITS 11 73 74#define FP16_EXP_BIAS 15 75#define FP32_EXP_BIAS 127 76#define FP64_EXP_BIAS 1023 77 78#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1) 79#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1) 80#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1) 81 82#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1) 83#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1) 84#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1) 85 86#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1)) 87#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1)) 88#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1)) 89 90#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1)) 91#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1)) 92#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1)) 93 94static inline uint16_t 95lsl16(uint16_t x, uint32_t shift) 96{ 97 return shift < 16 ? x << shift : 0; 98} 99 100static inline uint16_t 101lsr16(uint16_t x, uint32_t shift) 102{ 103 return shift < 16 ? x >> shift : 0; 104} 105 106static inline uint32_t 107lsl32(uint32_t x, uint32_t shift) 108{ 109 return shift < 32 ? x << shift : 0; 110} 111 112static inline uint32_t 113lsr32(uint32_t x, uint32_t shift) 114{ 115 return shift < 32 ? x >> shift : 0; 116} 117 118static inline uint64_t 119lsl64(uint64_t x, uint32_t shift) 120{ 121 return shift < 64 ? x << shift : 0; 122} 123 124static inline uint64_t 125lsr64(uint64_t x, uint32_t shift) 126{ 127 return shift < 64 ? x >> shift : 0; 128} 129 130static inline void 131lsl128(uint64_t r0, uint64_t r1, uint64_t x0, uint64_t x1, uint32_t shift) 132{ 133 if (shift == 0) { 134 r1 = x1; 135* r0 = x0; 136* } else if (shift < 64) { 137 r1 = x1 << shift \| x0 >> (64 - shift); 138* r0 = x0 << shift; 139* } else if (shift < 128) { 140 r1 = x0 << (shift - 64); 141* r0 = 0; 142* } else { 143 r1 = 0; 144* r0 = 0; 145* } 146} 147 148static inline void 149lsr128(uint64_t r0, uint64_t r1, uint64_t x0, uint64_t x1, uint32_t shift) 150{ 151 if (shift == 0) { 152 r1 = x1; 153* r0 = x0; 154* } else if (shift < 64) { 155 r0 = x0 >> shift \| x1 << (64 - shift); 156* r1 = x1 >> shift; 157* } else if (shift < 128) { 158 r0 = x1 >> (shift - 64); 159* r1 = 0; 160* } else { 161 r0 = 0; 162* r1 = 0; 163* } 164} 165 166static inline void 167mul62x62(uint64_t x0, uint64_t x1, uint64_t a, uint64_t b) 168{ 169 uint32_t mask = ((uint32_t)1 << 31) - 1; 170 uint64_t a0 = a & mask; 171 uint64_t a1 = a >> 31 & mask; 172 uint64_t b0 = b & mask; 173 uint64_t b1 = b >> 31 & mask; 174 uint64_t p0 = a0 * b0; 175 uint64_t p2 = a1 * b1; 176 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; 177 uint64_t s0 = p0; 178 uint64_t s1 = (s0 >> 31) + p1; 179 uint64_t s2 = (s1 >> 31) + p2; 180 x0 = (s0 & mask) \| (s1 & mask) << 31 \| s2 << 62; 181* x1 = s2 >> 2; 182} 183* 184static inline 185void mul64x32(uint64_t x0, uint64_t x1, uint64_t a, uint32_t b) 186{ 187 uint64_t t0 = (uint64_t)(uint32_t)a * b; 188 uint64_t t1 = (t0 >> 32) + (a >> 32) * b; 189 x0 = t1 << 32 \| (uint32_t)t0; 190* x1 = t1 >> 32; 191} 192* 193static inline void 194add128(uint64_t x0, uint64_t x1, uint64_t a0, uint64_t a1, uint64_t b0, 195 uint64_t b1) 196{ 197 x0 = a0 + b0; 198* x1 = a1 + b1 + (x0 < a0); 199} 200 201static inline void 202sub128(uint64_t x0, uint64_t x1, uint64_t a0, uint64_t a1, uint64_t b0, 203 uint64_t b1) 204{ 205 x0 = a0 - b0; 206* x1 = a1 - b1 - (x0 > a0); 207} 208 209static inline int 210cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 211{ 212 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); 213} 214 215static inline uint16_t 216fp16_normalise(uint16_t mnt, int exp) 217{ 218* int shift; 219 220 if (!mnt) { 221 return 0; 222 } 223 224 for (shift = 8; shift; shift >>= 1) { 225 if (!(mnt >> (16 - shift))) { 226 mnt <<= shift; 227 exp -= shift; 228* } 229 } 230 return mnt; 231} 232 233static inline uint32_t 234fp32_normalise(uint32_t mnt, int exp) 235{ 236* int shift; 237 238 if (!mnt) { 239 return 0; 240 } 241 242 for (shift = 16; shift; shift >>= 1) { 243 if (!(mnt >> (32 - shift))) { 244 mnt <<= shift; 245 exp -= shift; 246* } 247 } 248 return mnt; 249} 250 251static inline uint64_t 252fp64_normalise(uint64_t mnt, int exp) 253{ 254* int shift; 255 256 if (!mnt) { 257 return 0; 258 } 259 260 for (shift = 32; shift; shift >>= 1) { 261 if (!(mnt >> (64 - shift))) { 262 mnt <<= shift; 263 exp -= shift; 264* } 265 } 266 return mnt; 267} 268 269static inline void 270fp128_normalise(uint64_t mnt0, uint64_t mnt1, int exp) 271{ 272* uint64_t x0 = mnt0; 273* uint64_t x1 = mnt1; 274* int shift; 275 276 if (!x0 && !x1) { 277 return; 278 } 279 280 if (!x1) { 281 x1 = x0; 282 x0 = 0; 283 exp -= 64; 284* } 285 286 for (shift = 32; shift; shift >>= 1) { 287 if (!(x1 >> (64 - shift))) { 288 x1 = x1 << shift \| x0 >> (64 - shift); 289 x0 <<= shift; 290 exp -= shift; 291* } 292 } 293 294 mnt0 = x0; 295* mnt1 = x1; 296} 297* 298static inline uint16_t 299fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) 300{ 301 return sgn << (FP16_BITS - 1) \| exp << FP16_MANT_BITS \| FP16_MANT(mnt); 302} 303 304static inline uint32_t 305fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) 306{ 307 return sgn << (FP32_BITS - 1) \| exp << FP32_MANT_BITS \| FP32_MANT(mnt); 308} 309 310static inline uint64_t 311fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) 312{ 313 return sgn << (FP64_BITS - 1) \| exp << FP64_MANT_BITS \| FP64_MANT(mnt); 314} 315 316static inline uint16_t 317fp16_zero(int sgn) 318{ 319 return fp16_pack(sgn, 0, 0); 320} 321 322static inline uint32_t 323fp32_zero(int sgn) 324{ 325 return fp32_pack(sgn, 0, 0); 326} 327 328static inline uint64_t 329fp64_zero(int sgn) 330{ 331 return fp64_pack(sgn, 0, 0); 332} 333 334static inline uint16_t 335fp16_max_normal(int sgn) 336{ 337 return fp16_pack(sgn, FP16_EXP_INF - 1, -1); 338} 339 340static inline uint32_t 341fp32_max_normal(int sgn) 342{ 343 return fp32_pack(sgn, FP32_EXP_INF - 1, -1); 344} 345 346static inline uint64_t 347fp64_max_normal(int sgn) 348{ 349 return fp64_pack(sgn, FP64_EXP_INF - 1, -1); 350} 351 352static inline uint16_t 353fp16_infinity(int sgn) 354{ 355 return fp16_pack(sgn, FP16_EXP_INF, 0); 356} 357 358static inline uint32_t 359fp32_infinity(int sgn) 360{ 361 return fp32_pack(sgn, FP32_EXP_INF, 0); 362} 363 364static inline uint64_t 365fp64_infinity(int sgn) 366{ 367 return fp64_pack(sgn, FP64_EXP_INF, 0); 368} 369 370static inline uint16_t 371fp16_defaultNaN() 372{ 373 return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1)); 374} 375 376static inline uint32_t 377fp32_defaultNaN() 378{ 379 return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1)); 380} 381 382static inline uint64_t 383fp64_defaultNaN() 384{ 385 return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1)); 386} 387 388static inline void 389fp16_unpack(int sgn, int exp, uint16_t mnt, uint16_t x, int mode, 390* int flags) 391{ 392* sgn = x >> (FP16_BITS - 1); 393* exp = FP16_EXP(x); 394* mnt = FP16_MANT(x); 395* 396 // Handle subnormals: 397 if (exp) { 398* mnt \|= 1ULL << FP16_MANT_BITS; 399* } else { 400 ++exp; 401* // IDC (Input Denormal) is not set in this case. 402 if (mode & FPLIB_FZ16) 403 mnt = 0; 404* } 405} 406 407static inline void 408fp32_unpack(int sgn, int exp, uint32_t mnt, uint32_t x, int mode, 409* int flags) 410{ 411* sgn = x >> (FP32_BITS - 1); 412* exp = FP32_EXP(x); 413* mnt = FP32_MANT(x); 414* 415 // Handle subnormals: 416 if (exp) { 417* mnt \|= 1ULL << FP32_MANT_BITS; 418* } else { 419 ++exp; 420* if ((mode & FPLIB_FZ) && mnt) { 421* flags \|= FPLIB_IDC; 422* mnt = 0; 423* } 424 } 425} 426 427static inline void 428fp64_unpack(int sgn, int exp, uint64_t mnt, uint64_t x, int mode, 429* int flags) 430{ 431* sgn = x >> (FP64_BITS - 1); 432* exp = FP64_EXP(x); 433* mnt = FP64_MANT(x); 434* 435 // Handle subnormals: 436 if (exp) { 437* mnt \|= 1ULL << FP64_MANT_BITS; 438* } else { 439 ++exp; 440* if ((mode & FPLIB_FZ) && mnt) { 441* flags \|= FPLIB_IDC; 442* mnt = 0; 443* } 444 } 445} 446 447static inline int 448fp16_is_NaN(int exp, uint16_t mnt) 449{ 450 return exp == FP16_EXP_INF && FP16_MANT(mnt); 451} 452 453static inline int 454fp32_is_NaN(int exp, uint32_t mnt) 455{ 456 return exp == FP32_EXP_INF && FP32_MANT(mnt); 457} 458 459static inline int 460fp64_is_NaN(int exp, uint64_t mnt) 461{ 462 return exp == FP64_EXP_INF && FP64_MANT(mnt); 463} 464 465static inline int 466fp16_is_signalling_NaN(int exp, uint16_t mnt) 467{ 468 return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1); 469} 470 471static inline int 472fp32_is_signalling_NaN(int exp, uint32_t mnt) 473{ 474 return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1); 475} 476 477static inline int 478fp64_is_signalling_NaN(int exp, uint64_t mnt) 479{ 480 return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1); 481} 482 483static inline int 484fp16_is_quiet_NaN(int exp, uint16_t mnt) 485{ 486 return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1); 487} 488 489static inline int 490fp32_is_quiet_NaN(int exp, uint32_t mnt) 491{ 492 return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1); 493} 494 495static inline int 496fp64_is_quiet_NaN(int exp, uint64_t mnt) 497{ 498 return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1); 499} 500 501static inline int 502fp16_is_infinity(int exp, uint16_t mnt) 503{ 504 return exp == FP16_EXP_INF && !FP16_MANT(mnt); 505} 506 507static inline int 508fp32_is_infinity(int exp, uint32_t mnt) 509{ 510 return exp == FP32_EXP_INF && !FP32_MANT(mnt); 511} 512 513static inline int 514fp64_is_infinity(int exp, uint64_t mnt) 515{ 516 return exp == FP64_EXP_INF && !FP64_MANT(mnt); 517} 518 519static inline uint16_t 520fp16_process_NaN(uint16_t a, int mode, int flags) 521{ 522* if (!(a >> (FP16_MANT_BITS - 1) & 1)) { 523 flags \|= FPLIB_IOC; 524* a \|= 1ULL << (FP16_MANT_BITS - 1); 525 } 526 return mode & FPLIB_DN ? fp16_defaultNaN() : a; 527} 528 529static inline uint32_t 530fp32_process_NaN(uint32_t a, int mode, int flags) 531{ 532* if (!(a >> (FP32_MANT_BITS - 1) & 1)) { 533 flags \|= FPLIB_IOC; 534* a \|= 1ULL << (FP32_MANT_BITS - 1); 535 } 536 return mode & FPLIB_DN ? fp32_defaultNaN() : a; 537} 538 539static inline uint64_t 540fp64_process_NaN(uint64_t a, int mode, int flags) 541{ 542* if (!(a >> (FP64_MANT_BITS - 1) & 1)) { 543 flags \|= FPLIB_IOC; 544* a \|= 1ULL << (FP64_MANT_BITS - 1); 545 } 546 return mode & FPLIB_DN ? fp64_defaultNaN() : a; 547} 548 549static uint16_t 550fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int flags) 551{ 552* int a_exp = FP16_EXP(a); 553 uint16_t a_mnt = FP16_MANT(a); 554 int b_exp = FP16_EXP(b); 555 uint16_t b_mnt = FP16_MANT(b); 556 557 // Handle signalling NaNs: 558 if (fp16_is_signalling_NaN(a_exp, a_mnt)) 559 return fp16_process_NaN(a, mode, flags); 560 if (fp16_is_signalling_NaN(b_exp, b_mnt)) 561 return fp16_process_NaN(b, mode, flags); 562 563 // Handle quiet NaNs: 564 if (fp16_is_NaN(a_exp, a_mnt)) 565 return fp16_process_NaN(a, mode, flags); 566 if (fp16_is_NaN(b_exp, b_mnt)) 567 return fp16_process_NaN(b, mode, flags); 568 569 return 0; 570} 571 572static uint32_t 573fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int flags) 574{ 575* int a_exp = FP32_EXP(a); 576 uint32_t a_mnt = FP32_MANT(a); 577 int b_exp = FP32_EXP(b); 578 uint32_t b_mnt = FP32_MANT(b); 579 580 // Handle signalling NaNs: 581 if (fp32_is_signalling_NaN(a_exp, a_mnt)) 582 return fp32_process_NaN(a, mode, flags); 583 if (fp32_is_signalling_NaN(b_exp, b_mnt)) 584 return fp32_process_NaN(b, mode, flags); 585 586 // Handle quiet NaNs: 587 if (fp32_is_NaN(a_exp, a_mnt)) 588 return fp32_process_NaN(a, mode, flags); 589 if (fp32_is_NaN(b_exp, b_mnt)) 590 return fp32_process_NaN(b, mode, flags); 591 592 return 0; 593} 594 595static uint64_t 596fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int flags) 597{ 598* int a_exp = FP64_EXP(a); 599 uint64_t a_mnt = FP64_MANT(a); 600 int b_exp = FP64_EXP(b); 601 uint64_t b_mnt = FP64_MANT(b); 602 603 // Handle signalling NaNs: 604 if (fp64_is_signalling_NaN(a_exp, a_mnt)) 605 return fp64_process_NaN(a, mode, flags); 606 if (fp64_is_signalling_NaN(b_exp, b_mnt)) 607 return fp64_process_NaN(b, mode, flags); 608 609 // Handle quiet NaNs: 610 if (fp64_is_NaN(a_exp, a_mnt)) 611 return fp64_process_NaN(a, mode, flags); 612 if (fp64_is_NaN(b_exp, b_mnt)) 613 return fp64_process_NaN(b, mode, flags); 614 615 return 0; 616} 617 618static uint16_t 619fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int flags) 620{ 621* int a_exp = FP16_EXP(a); 622 uint16_t a_mnt = FP16_MANT(a); 623 int b_exp = FP16_EXP(b); 624 uint16_t b_mnt = FP16_MANT(b); 625 int c_exp = FP16_EXP(c); 626 uint16_t c_mnt = FP16_MANT(c); 627 628 // Handle signalling NaNs: 629 if (fp16_is_signalling_NaN(a_exp, a_mnt)) 630 return fp16_process_NaN(a, mode, flags); 631 if (fp16_is_signalling_NaN(b_exp, b_mnt)) 632 return fp16_process_NaN(b, mode, flags); 633 if (fp16_is_signalling_NaN(c_exp, c_mnt)) 634 return fp16_process_NaN(c, mode, flags); 635 636 // Handle quiet NaNs: 637 if (fp16_is_NaN(a_exp, a_mnt)) 638 return fp16_process_NaN(a, mode, flags); 639 if (fp16_is_NaN(b_exp, b_mnt)) 640 return fp16_process_NaN(b, mode, flags); 641 if (fp16_is_NaN(c_exp, c_mnt)) 642 return fp16_process_NaN(c, mode, flags); 643 644 return 0; 645} 646 647static uint32_t 648fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int flags) 649{ 650* int a_exp = FP32_EXP(a); 651 uint32_t a_mnt = FP32_MANT(a); 652 int b_exp = FP32_EXP(b); 653 uint32_t b_mnt = FP32_MANT(b); 654 int c_exp = FP32_EXP(c); 655 uint32_t c_mnt = FP32_MANT(c); 656 657 // Handle signalling NaNs: 658 if (fp32_is_signalling_NaN(a_exp, a_mnt)) 659 return fp32_process_NaN(a, mode, flags); 660 if (fp32_is_signalling_NaN(b_exp, b_mnt)) 661 return fp32_process_NaN(b, mode, flags); 662 if (fp32_is_signalling_NaN(c_exp, c_mnt)) 663 return fp32_process_NaN(c, mode, flags); 664 665 // Handle quiet NaNs: 666 if (fp32_is_NaN(a_exp, a_mnt)) 667 return fp32_process_NaN(a, mode, flags); 668 if (fp32_is_NaN(b_exp, b_mnt)) 669 return fp32_process_NaN(b, mode, flags); 670 if (fp32_is_NaN(c_exp, c_mnt)) 671 return fp32_process_NaN(c, mode, flags); 672 673 return 0; 674} 675 676static uint64_t 677fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int flags) 678{ 679* int a_exp = FP64_EXP(a); 680 uint64_t a_mnt = FP64_MANT(a); 681 int b_exp = FP64_EXP(b); 682 uint64_t b_mnt = FP64_MANT(b); 683 int c_exp = FP64_EXP(c); 684 uint64_t c_mnt = FP64_MANT(c); 685 686 // Handle signalling NaNs: 687 if (fp64_is_signalling_NaN(a_exp, a_mnt)) 688 return fp64_process_NaN(a, mode, flags); 689 if (fp64_is_signalling_NaN(b_exp, b_mnt)) 690 return fp64_process_NaN(b, mode, flags); 691 if (fp64_is_signalling_NaN(c_exp, c_mnt)) 692 return fp64_process_NaN(c, mode, flags); 693 694 // Handle quiet NaNs: 695 if (fp64_is_NaN(a_exp, a_mnt)) 696 return fp64_process_NaN(a, mode, flags); 697 if (fp64_is_NaN(b_exp, b_mnt)) 698 return fp64_process_NaN(b, mode, flags); 699 if (fp64_is_NaN(c_exp, c_mnt)) 700 return fp64_process_NaN(c, mode, flags); 701 702 return 0; 703} 704 705static uint16_t 706fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int flags) 707{ 708* int biased_exp; // non-negative exponent value for result 709 uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS) 710 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 711 712 assert(rm != FPRounding_TIEAWAY); 713 714 // Flush to zero: 715 if ((mode & FPLIB_FZ16) && exp < 1) { 716 flags \|= FPLIB_UFC; 717* return fp16_zero(sgn); 718 } 719 720 // The bottom FP16_EXP_BITS bits of mnt are orred together: 721 mnt = (4ULL << FP16_MANT_BITS \| mnt >> (FP16_EXP_BITS - 1) \| 722 ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0)); 723 724 if (exp > 0) { 725 biased_exp = exp; 726 int_mant = mnt >> 2; 727 error = mnt & 3; 728 } else { 729 biased_exp = 0; 730 int_mant = lsr16(mnt, 3 - exp); 731 error = (lsr16(mnt, 1 - exp) & 3) \| !!(mnt & (lsl16(1, 1 - exp) - 1)); 732 } 733 734 if (!biased_exp && error) { // xx should also check fpscr_val<11> 735 flags \|= FPLIB_UFC; 736* } 737 738 // Round up: 739 if ((rm == FPLIB_RN && (error == 3 \|\| 740 (error == 2 && (int_mant & 1)))) \|\| 741 (((rm == FPLIB_RP && !sgn) \|\| (rm == FPLIB_RM && sgn)) && error)) { 742 ++int_mant; 743 if (int_mant == 1ULL << FP16_MANT_BITS) { 744 // Rounded up from denormalized to normalized 745 biased_exp = 1; 746 } 747 if (int_mant == 2ULL << FP16_MANT_BITS) { 748 // Rounded up to next exponent 749 ++biased_exp; 750 int_mant >>= 1; 751 } 752 } 753 754 // Handle rounding to odd aka Von Neumann rounding: 755 if (error && rm == FPRounding_ODD) 756 int_mant \|= 1; 757 758 // Handle overflow: 759 if (!(mode & FPLIB_AHP)) { 760 if (biased_exp >= (int)FP16_EXP_INF) { 761 flags \|= FPLIB_OFC \| FPLIB_IXC; 762* if (rm == FPLIB_RN \|\| (rm == FPLIB_RP && !sgn) \|\| 763 (rm == FPLIB_RM && sgn)) { 764 return fp16_infinity(sgn); 765 } else { 766 return fp16_max_normal(sgn); 767 } 768 } 769 } else { 770 if (biased_exp >= (int)FP16_EXP_INF + 1) { 771 flags \|= FPLIB_IOC; 772* return fp16_pack(sgn, FP16_EXP_INF, -1); 773 } 774 } 775 776 if (error) { 777 flags \|= FPLIB_IXC; 778* } 779 780 return fp16_pack(sgn, biased_exp, int_mant); 781} 782 783static uint16_t 784fp16_round(int sgn, int exp, uint16_t mnt, int mode, int flags) 785{ 786* return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags); 787} 788 789static uint32_t 790fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int flags) 791{ 792* int biased_exp; // non-negative exponent value for result 793 uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS) 794 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 795 796 assert(rm != FPRounding_TIEAWAY); 797 798 // Flush to zero: 799 if ((mode & FPLIB_FZ) && exp < 1) { 800 flags \|= FPLIB_UFC; 801* return fp32_zero(sgn); 802 } 803 804 // The bottom FP32_EXP_BITS bits of mnt are orred together: 805 mnt = (4ULL << FP32_MANT_BITS \| mnt >> (FP32_EXP_BITS - 1) \| 806 ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0)); 807 808 if (exp > 0) { 809 biased_exp = exp; 810 int_mant = mnt >> 2; 811 error = mnt & 3; 812 } else { 813 biased_exp = 0; 814 int_mant = lsr32(mnt, 3 - exp); 815 error = (lsr32(mnt, 1 - exp) & 3) \| !!(mnt & (lsl32(1, 1 - exp) - 1)); 816 } 817 818 if (!biased_exp && error) { // xx should also check fpscr_val<11> 819 flags \|= FPLIB_UFC; 820* } 821 822 // Round up: 823 if ((rm == FPLIB_RN && (error == 3 \|\| 824 (error == 2 && (int_mant & 1)))) \|\| 825 (((rm == FPLIB_RP && !sgn) \|\| (rm == FPLIB_RM && sgn)) && error)) { 826 ++int_mant; 827 if (int_mant == 1ULL << FP32_MANT_BITS) { 828 // Rounded up from denormalized to normalized 829 biased_exp = 1; 830 } 831 if (int_mant == 2ULL << FP32_MANT_BITS) { 832 // Rounded up to next exponent 833 ++biased_exp; 834 int_mant >>= 1; 835 } 836 } 837 838 // Handle rounding to odd aka Von Neumann rounding: 839 if (error && rm == FPRounding_ODD) 840 int_mant \|= 1; 841 842 // Handle overflow: 843 if (biased_exp >= (int)FP32_EXP_INF) { 844 flags \|= FPLIB_OFC \| FPLIB_IXC; 845* if (rm == FPLIB_RN \|\| (rm == FPLIB_RP && !sgn) \|\| 846 (rm == FPLIB_RM && sgn)) { 847 return fp32_infinity(sgn); 848 } else { 849 return fp32_max_normal(sgn); 850 } 851 } 852 853 if (error) { 854 flags \|= FPLIB_IXC; 855* } 856 857 return fp32_pack(sgn, biased_exp, int_mant); 858} 859 860static uint32_t 861fp32_round(int sgn, int exp, uint32_t mnt, int mode, int flags) 862{ 863* return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); 864} 865 866static uint64_t 867fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int flags) 868{ 869* int biased_exp; // non-negative exponent value for result 870 uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS) 871 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 872 873 assert(rm != FPRounding_TIEAWAY); 874 875 // Flush to zero: 876 if ((mode & FPLIB_FZ) && exp < 1) { 877 flags \|= FPLIB_UFC; 878* return fp64_zero(sgn); 879 } 880 881 // The bottom FP64_EXP_BITS bits of mnt are orred together: 882 mnt = (4ULL << FP64_MANT_BITS \| mnt >> (FP64_EXP_BITS - 1) \| 883 ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0)); 884 885 if (exp > 0) { 886 biased_exp = exp; 887 int_mant = mnt >> 2; 888 error = mnt & 3; 889 } else { 890 biased_exp = 0; 891 int_mant = lsr64(mnt, 3 - exp); 892 error = (lsr64(mnt, 1 - exp) & 3) \| !!(mnt & (lsl64(1, 1 - exp) - 1)); 893 } 894 895 if (!biased_exp && error) { // xx should also check fpscr_val<11> 896 flags \|= FPLIB_UFC; 897* } 898 899 // Round up: 900 if ((rm == FPLIB_RN && (error == 3 \|\| 901 (error == 2 && (int_mant & 1)))) \|\| 902 (((rm == FPLIB_RP && !sgn) \|\| (rm == FPLIB_RM && sgn)) && error)) { 903 ++int_mant; 904 if (int_mant == 1ULL << FP64_MANT_BITS) { 905 // Rounded up from denormalized to normalized 906 biased_exp = 1; 907 } 908 if (int_mant == 2ULL << FP64_MANT_BITS) { 909 // Rounded up to next exponent 910 ++biased_exp; 911 int_mant >>= 1; 912 } 913 } 914 915 // Handle rounding to odd aka Von Neumann rounding: 916 if (error && rm == FPRounding_ODD) 917 int_mant \|= 1; 918 919 // Handle overflow: 920 if (biased_exp >= (int)FP64_EXP_INF) { 921 flags \|= FPLIB_OFC \| FPLIB_IXC; 922* if (rm == FPLIB_RN \|\| (rm == FPLIB_RP && !sgn) \|\| 923 (rm == FPLIB_RM && sgn)) { 924 return fp64_infinity(sgn); 925 } else { 926 return fp64_max_normal(sgn); 927 } 928 } 929 930 if (error) { 931 flags \|= FPLIB_IXC; 932* } 933 934 return fp64_pack(sgn, biased_exp, int_mant); 935} 936 937static uint64_t 938fp64_round(int sgn, int exp, uint64_t mnt, int mode, int flags) 939{ 940* return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); 941} 942 943static int 944fp16_compare_eq(uint16_t a, uint16_t b, int mode, int flags) 945{ 946* int a_sgn, a_exp, b_sgn, b_exp; 947 uint16_t a_mnt, b_mnt; 948 949 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 950 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 951 952 if (fp16_is_NaN(a_exp, a_mnt) \|\| 953 fp16_is_NaN(b_exp, b_mnt)) { 954 if (fp16_is_signalling_NaN(a_exp, a_mnt) \|\| 955 fp16_is_signalling_NaN(b_exp, b_mnt)) 956 flags \|= FPLIB_IOC; 957* return 0; 958 } 959 return a == b \|\| (!a_mnt && !b_mnt); 960} 961 962static int 963fp16_compare_ge(uint16_t a, uint16_t b, int mode, int flags) 964{ 965* int a_sgn, a_exp, b_sgn, b_exp; 966 uint16_t a_mnt, b_mnt; 967 968 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 969 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 970 971 if (fp16_is_NaN(a_exp, a_mnt) \|\| 972 fp16_is_NaN(b_exp, b_mnt)) { 973 flags \|= FPLIB_IOC; 974* return 0; 975 } 976 if (!a_mnt && !b_mnt) 977 return 1; 978 if (a_sgn != b_sgn) 979 return b_sgn; 980 if (a_exp != b_exp) 981 return a_sgn ^ (a_exp > b_exp); 982 if (a_mnt != b_mnt) 983 return a_sgn ^ (a_mnt > b_mnt); 984 return 1; 985} 986 987static int 988fp16_compare_gt(uint16_t a, uint16_t b, int mode, int flags) 989{ 990* int a_sgn, a_exp, b_sgn, b_exp; 991 uint16_t a_mnt, b_mnt; 992 993 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 994 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 995 996 if (fp16_is_NaN(a_exp, a_mnt) \|\| 997 fp16_is_NaN(b_exp, b_mnt)) { 998 flags \|= FPLIB_IOC; 999* return 0; 1000 } 1001 if (!a_mnt && !b_mnt) 1002 return 0; 1003 if (a_sgn != b_sgn) 1004 return b_sgn; 1005 if (a_exp != b_exp) 1006 return a_sgn ^ (a_exp > b_exp); 1007 if (a_mnt != b_mnt) 1008 return a_sgn ^ (a_mnt > b_mnt); 1009 return 0; 1010} 1011 1012static int 1013fp16_compare_un(uint16_t a, uint16_t b, int mode, int flags) 1014{ 1015* int a_sgn, a_exp, b_sgn, b_exp; 1016 uint16_t a_mnt, b_mnt; 1017 1018 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1019 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1020 1021 if (fp16_is_NaN(a_exp, a_mnt) \|\| 1022 fp16_is_NaN(b_exp, b_mnt)) { 1023 if (fp16_is_signalling_NaN(a_exp, a_mnt) \|\| 1024 fp16_is_signalling_NaN(b_exp, b_mnt)) 1025 flags \|= FPLIB_IOC; 1026* return 1; 1027 } 1028 return 0; 1029} 1030 1031static int 1032fp32_compare_eq(uint32_t a, uint32_t b, int mode, int flags) 1033{ 1034* int a_sgn, a_exp, b_sgn, b_exp; 1035 uint32_t a_mnt, b_mnt; 1036 1037 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1038 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1039 1040 if (fp32_is_NaN(a_exp, a_mnt) \|\| 1041 fp32_is_NaN(b_exp, b_mnt)) { 1042 if (fp32_is_signalling_NaN(a_exp, a_mnt) \|\| 1043 fp32_is_signalling_NaN(b_exp, b_mnt)) 1044 flags \|= FPLIB_IOC; 1045* return 0; 1046 } 1047 return a == b \|\| (!a_mnt && !b_mnt); 1048} 1049 1050static int 1051fp32_compare_ge(uint32_t a, uint32_t b, int mode, int flags) 1052{ 1053* int a_sgn, a_exp, b_sgn, b_exp; 1054 uint32_t a_mnt, b_mnt; 1055 1056 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1057 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1058 1059 if (fp32_is_NaN(a_exp, a_mnt) \|\| 1060 fp32_is_NaN(b_exp, b_mnt)) { 1061 flags \|= FPLIB_IOC; 1062* return 0; 1063 } 1064 if (!a_mnt && !b_mnt) 1065 return 1; 1066 if (a_sgn != b_sgn) 1067 return b_sgn; 1068 if (a_exp != b_exp) 1069 return a_sgn ^ (a_exp > b_exp); 1070 if (a_mnt != b_mnt) 1071 return a_sgn ^ (a_mnt > b_mnt); 1072 return 1; 1073} 1074 1075static int 1076fp32_compare_gt(uint32_t a, uint32_t b, int mode, int flags) 1077{ 1078* int a_sgn, a_exp, b_sgn, b_exp; 1079 uint32_t a_mnt, b_mnt; 1080 1081 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1082 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1083 1084 if (fp32_is_NaN(a_exp, a_mnt) \|\| 1085 fp32_is_NaN(b_exp, b_mnt)) { 1086 flags \|= FPLIB_IOC; 1087* return 0; 1088 } 1089 if (!a_mnt && !b_mnt) 1090 return 0; 1091 if (a_sgn != b_sgn) 1092 return b_sgn; 1093 if (a_exp != b_exp) 1094 return a_sgn ^ (a_exp > b_exp); 1095 if (a_mnt != b_mnt) 1096 return a_sgn ^ (a_mnt > b_mnt); 1097 return 0; 1098} 1099 1100static int 1101fp32_compare_un(uint32_t a, uint32_t b, int mode, int flags) 1102{ 1103* int a_sgn, a_exp, b_sgn, b_exp; 1104 uint32_t a_mnt, b_mnt; 1105 1106 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1107 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1108 1109 if (fp32_is_NaN(a_exp, a_mnt) \|\| 1110 fp32_is_NaN(b_exp, b_mnt)) { 1111 if (fp32_is_signalling_NaN(a_exp, a_mnt) \|\| 1112 fp32_is_signalling_NaN(b_exp, b_mnt)) 1113 flags \|= FPLIB_IOC; 1114* return 1; 1115 } 1116 return 0; 1117} 1118 1119static int 1120fp64_compare_eq(uint64_t a, uint64_t b, int mode, int flags) 1121{ 1122* int a_sgn, a_exp, b_sgn, b_exp; 1123 uint64_t a_mnt, b_mnt; 1124 1125 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1126 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1127 1128 if (fp64_is_NaN(a_exp, a_mnt) \|\| 1129 fp64_is_NaN(b_exp, b_mnt)) { 1130 if (fp64_is_signalling_NaN(a_exp, a_mnt) \|\| 1131 fp64_is_signalling_NaN(b_exp, b_mnt)) 1132 flags \|= FPLIB_IOC; 1133* return 0; 1134 } 1135 return a == b \|\| (!a_mnt && !b_mnt); 1136} 1137 1138static int 1139fp64_compare_ge(uint64_t a, uint64_t b, int mode, int flags) 1140{ 1141* int a_sgn, a_exp, b_sgn, b_exp; 1142 uint64_t a_mnt, b_mnt; 1143 1144 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1145 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1146 1147 if (fp64_is_NaN(a_exp, a_mnt) \|\| 1148 fp64_is_NaN(b_exp, b_mnt)) { 1149 flags \|= FPLIB_IOC; 1150* return 0; 1151 } 1152 if (!a_mnt && !b_mnt) 1153 return 1; 1154 if (a_sgn != b_sgn) 1155 return b_sgn; 1156 if (a_exp != b_exp) 1157 return a_sgn ^ (a_exp > b_exp); 1158 if (a_mnt != b_mnt) 1159 return a_sgn ^ (a_mnt > b_mnt); 1160 return 1; 1161} 1162 1163static int 1164fp64_compare_gt(uint64_t a, uint64_t b, int mode, int flags) 1165{ 1166* int a_sgn, a_exp, b_sgn, b_exp; 1167 uint64_t a_mnt, b_mnt; 1168 1169 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1170 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1171 1172 if (fp64_is_NaN(a_exp, a_mnt) \|\| 1173 fp64_is_NaN(b_exp, b_mnt)) { 1174 flags \|= FPLIB_IOC; 1175* return 0; 1176 } 1177 if (!a_mnt && !b_mnt) 1178 return 0; 1179 if (a_sgn != b_sgn) 1180 return b_sgn; 1181 if (a_exp != b_exp) 1182 return a_sgn ^ (a_exp > b_exp); 1183 if (a_mnt != b_mnt) 1184 return a_sgn ^ (a_mnt > b_mnt); 1185 return 0; 1186} 1187 1188static int 1189fp64_compare_un(uint64_t a, uint64_t b, int mode, int flags) 1190{ 1191* int a_sgn, a_exp, b_sgn, b_exp; 1192 uint64_t a_mnt, b_mnt; 1193 1194 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1195 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1196 1197 if (fp64_is_NaN(a_exp, a_mnt) \|\| 1198 fp64_is_NaN(b_exp, b_mnt)) { 1199 if (fp64_is_signalling_NaN(a_exp, a_mnt) \|\| 1200 fp64_is_signalling_NaN(b_exp, b_mnt)) 1201 flags \|= FPLIB_IOC; 1202* return 1; 1203 } 1204 return 0; 1205} 1206 1207static uint16_t 1208fp16_add(uint16_t a, uint16_t b, int neg, int mode, int flags) 1209{ 1210* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1211 uint16_t a_mnt, b_mnt, x, x_mnt; 1212 1213 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1214 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1215 1216 if ((x = fp16_process_NaNs(a, b, mode, flags))) { 1217 return x; 1218 } 1219 1220 b_sgn ^= neg; 1221 1222 // Handle infinities and zeroes: 1223 if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) { 1224 flags \|= FPLIB_IOC; 1225* return fp16_defaultNaN(); 1226 } else if (a_exp == FP16_EXP_INF) { 1227 return fp16_infinity(a_sgn); 1228 } else if (b_exp == FP16_EXP_INF) { 1229 return fp16_infinity(b_sgn); 1230 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1231 return fp16_zero(a_sgn); 1232 } 1233 1234 a_mnt <<= 3; 1235 b_mnt <<= 3; 1236 if (a_exp >= b_exp) { 1237 b_mnt = (lsr16(b_mnt, a_exp - b_exp) \| 1238 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1))); 1239 b_exp = a_exp; 1240 } else { 1241 a_mnt = (lsr16(a_mnt, b_exp - a_exp) \| 1242 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1))); 1243 a_exp = b_exp; 1244 } 1245 x_sgn = a_sgn; 1246 x_exp = a_exp; 1247 if (a_sgn == b_sgn) { 1248 x_mnt = a_mnt + b_mnt; 1249 } else if (a_mnt >= b_mnt) { 1250 x_mnt = a_mnt - b_mnt; 1251 } else { 1252 x_sgn ^= 1; 1253 x_mnt = b_mnt - a_mnt; 1254 } 1255 1256 if (!x_mnt) { 1257 // Sign of exact zero result depends on rounding mode 1258 return fp16_zero((mode & 3) == 2); 1259 } 1260 1261 x_mnt = fp16_normalise(x_mnt, &x_exp); 1262 1263 return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1, 1264 mode, flags); 1265} 1266 1267static uint32_t 1268fp32_add(uint32_t a, uint32_t b, int neg, int mode, int flags) 1269{ 1270* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1271 uint32_t a_mnt, b_mnt, x, x_mnt; 1272 1273 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1274 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1275 1276 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1277 return x; 1278 } 1279 1280 b_sgn ^= neg; 1281 1282 // Handle infinities and zeroes: 1283 if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) { 1284 flags \|= FPLIB_IOC; 1285* return fp32_defaultNaN(); 1286 } else if (a_exp == FP32_EXP_INF) { 1287 return fp32_infinity(a_sgn); 1288 } else if (b_exp == FP32_EXP_INF) { 1289 return fp32_infinity(b_sgn); 1290 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1291 return fp32_zero(a_sgn); 1292 } 1293 1294 a_mnt <<= 3; 1295 b_mnt <<= 3; 1296 if (a_exp >= b_exp) { 1297 b_mnt = (lsr32(b_mnt, a_exp - b_exp) \| 1298 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); 1299 b_exp = a_exp; 1300 } else { 1301 a_mnt = (lsr32(a_mnt, b_exp - a_exp) \| 1302 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); 1303 a_exp = b_exp; 1304 } 1305 x_sgn = a_sgn; 1306 x_exp = a_exp; 1307 if (a_sgn == b_sgn) { 1308 x_mnt = a_mnt + b_mnt; 1309 } else if (a_mnt >= b_mnt) { 1310 x_mnt = a_mnt - b_mnt; 1311 } else { 1312 x_sgn ^= 1; 1313 x_mnt = b_mnt - a_mnt; 1314 } 1315 1316 if (!x_mnt) { 1317 // Sign of exact zero result depends on rounding mode 1318 return fp32_zero((mode & 3) == 2); 1319 } 1320 1321 x_mnt = fp32_normalise(x_mnt, &x_exp); 1322 1323 return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1, 1324 mode, flags); 1325} 1326 1327static uint64_t 1328fp64_add(uint64_t a, uint64_t b, int neg, int mode, int flags) 1329{ 1330* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1331 uint64_t a_mnt, b_mnt, x, x_mnt; 1332 1333 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1334 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1335 1336 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1337 return x; 1338 } 1339 1340 b_sgn ^= neg; 1341 1342 // Handle infinities and zeroes: 1343 if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) { 1344 flags \|= FPLIB_IOC; 1345* return fp64_defaultNaN(); 1346 } else if (a_exp == FP64_EXP_INF) { 1347 return fp64_infinity(a_sgn); 1348 } else if (b_exp == FP64_EXP_INF) { 1349 return fp64_infinity(b_sgn); 1350 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1351 return fp64_zero(a_sgn); 1352 } 1353 1354 a_mnt <<= 3; 1355 b_mnt <<= 3; 1356 if (a_exp >= b_exp) { 1357 b_mnt = (lsr64(b_mnt, a_exp - b_exp) \| 1358 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); 1359 b_exp = a_exp; 1360 } else { 1361 a_mnt = (lsr64(a_mnt, b_exp - a_exp) \| 1362 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); 1363 a_exp = b_exp; 1364 } 1365 x_sgn = a_sgn; 1366 x_exp = a_exp; 1367 if (a_sgn == b_sgn) { 1368 x_mnt = a_mnt + b_mnt; 1369 } else if (a_mnt >= b_mnt) { 1370 x_mnt = a_mnt - b_mnt; 1371 } else { 1372 x_sgn ^= 1; 1373 x_mnt = b_mnt - a_mnt; 1374 } 1375 1376 if (!x_mnt) { 1377 // Sign of exact zero result depends on rounding mode 1378 return fp64_zero((mode & 3) == 2); 1379 } 1380 1381 x_mnt = fp64_normalise(x_mnt, &x_exp); 1382 1383 return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1, 1384 mode, flags); 1385} 1386 1387static uint16_t 1388fp16_mul(uint16_t a, uint16_t b, int mode, int flags) 1389{ 1390* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1391 uint16_t a_mnt, b_mnt, x; 1392 uint32_t x_mnt; 1393 1394 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1395 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1396 1397 if ((x = fp16_process_NaNs(a, b, mode, flags))) { 1398 return x; 1399 } 1400 1401 // Handle infinities and zeroes: 1402 if ((a_exp == FP16_EXP_INF && !b_mnt) \|\| 1403 (b_exp == FP16_EXP_INF && !a_mnt)) { 1404 flags \|= FPLIB_IOC; 1405* return fp16_defaultNaN(); 1406 } else if (a_exp == FP16_EXP_INF \|\| b_exp == FP16_EXP_INF) { 1407 return fp16_infinity(a_sgn ^ b_sgn); 1408 } else if (!a_mnt \|\| !b_mnt) { 1409 return fp16_zero(a_sgn ^ b_sgn); 1410 } 1411 1412 // Multiply and normalise: 1413 x_sgn = a_sgn ^ b_sgn; 1414 x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1; 1415 x_mnt = (uint32_t)a_mnt * b_mnt; 1416 x_mnt = fp32_normalise(x_mnt, &x_exp); 1417 1418 // Convert to FP16_BITS bits, collapsing error into bottom bit: 1419 x_mnt = lsr32(x_mnt, FP16_BITS - 1) \| !!lsl32(x_mnt, FP16_BITS + 1); 1420 1421 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); 1422} 1423 1424static uint32_t 1425fp32_mul(uint32_t a, uint32_t b, int mode, int flags) 1426{ 1427* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1428 uint32_t a_mnt, b_mnt, x; 1429 uint64_t x_mnt; 1430 1431 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1432 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1433 1434 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1435 return x; 1436 } 1437 1438 // Handle infinities and zeroes: 1439 if ((a_exp == FP32_EXP_INF && !b_mnt) \|\| 1440 (b_exp == FP32_EXP_INF && !a_mnt)) { 1441 flags \|= FPLIB_IOC; 1442* return fp32_defaultNaN(); 1443 } else if (a_exp == FP32_EXP_INF \|\| b_exp == FP32_EXP_INF) { 1444 return fp32_infinity(a_sgn ^ b_sgn); 1445 } else if (!a_mnt \|\| !b_mnt) { 1446 return fp32_zero(a_sgn ^ b_sgn); 1447 } 1448 1449 // Multiply and normalise: 1450 x_sgn = a_sgn ^ b_sgn; 1451 x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1; 1452 x_mnt = (uint64_t)a_mnt * b_mnt; 1453 x_mnt = fp64_normalise(x_mnt, &x_exp); 1454 1455 // Convert to FP32_BITS bits, collapsing error into bottom bit: 1456 x_mnt = lsr64(x_mnt, FP32_BITS - 1) \| !!lsl64(x_mnt, FP32_BITS + 1); 1457 1458 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1459} 1460 1461static uint64_t 1462fp64_mul(uint64_t a, uint64_t b, int mode, int flags) 1463{ 1464* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1465 uint64_t a_mnt, b_mnt, x; 1466 uint64_t x0_mnt, x1_mnt; 1467 1468 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1469 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1470 1471 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1472 return x; 1473 } 1474 1475 // Handle infinities and zeroes: 1476 if ((a_exp == FP64_EXP_INF && !b_mnt) \|\| 1477 (b_exp == FP64_EXP_INF && !a_mnt)) { 1478 flags \|= FPLIB_IOC; 1479* return fp64_defaultNaN(); 1480 } else if (a_exp == FP64_EXP_INF \|\| b_exp == FP64_EXP_INF) { 1481 return fp64_infinity(a_sgn ^ b_sgn); 1482 } else if (!a_mnt \|\| !b_mnt) { 1483 return fp64_zero(a_sgn ^ b_sgn); 1484 } 1485 1486 // Multiply and normalise: 1487 x_sgn = a_sgn ^ b_sgn; 1488 x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1; 1489 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); 1490 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1491 1492 // Convert to FP64_BITS bits, collapsing error into bottom bit: 1493 x0_mnt = x1_mnt << 1 \| !!x0_mnt; 1494 1495 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); 1496} 1497 1498static uint16_t 1499fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, 1500 int mode, int flags) 1501{ 1502* int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1503 uint16_t a_mnt, b_mnt, c_mnt, x; 1504 uint32_t x_mnt, y_mnt; 1505 1506 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1507 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1508 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1509 1510 x = fp16_process_NaNs3(a, b, c, mode, flags); 1511 1512 // Quiet NaN added to product of zero and infinity: 1513 if (fp16_is_quiet_NaN(a_exp, a_mnt) && 1514 ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) \|\| 1515 (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) { 1516 x = fp16_defaultNaN(); 1517 flags \|= FPLIB_IOC; 1518* } 1519 1520 if (x) { 1521 return x; 1522 } 1523 1524 // Handle infinities and zeroes: 1525 if ((b_exp == FP16_EXP_INF && !c_mnt) \|\| 1526 (c_exp == FP16_EXP_INF && !b_mnt) \|\| 1527 (a_exp == FP16_EXP_INF && 1528 (b_exp == FP16_EXP_INF \|\| c_exp == FP16_EXP_INF) && 1529 (a_sgn != (b_sgn ^ c_sgn)))) { 1530 flags \|= FPLIB_IOC; 1531* return fp16_defaultNaN(); 1532 } 1533 if (a_exp == FP16_EXP_INF) 1534 return fp16_infinity(a_sgn); 1535 if (b_exp == FP16_EXP_INF \|\| c_exp == FP16_EXP_INF) 1536 return fp16_infinity(b_sgn ^ c_sgn); 1537 if (!a_mnt && (!b_mnt \|\| !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1538 return fp16_zero(a_sgn); 1539 1540 x_sgn = a_sgn; 1541 x_exp = a_exp + 2 * FP16_EXP_BITS - 3; 1542 x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4); 1543 1544 // Multiply: 1545 y_sgn = b_sgn ^ c_sgn; 1546 y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3; 1547 y_mnt = (uint32_t)b_mnt * c_mnt << 3; 1548 if (!y_mnt) { 1549 y_exp = x_exp; 1550 } 1551 1552 // Add: 1553 if (x_exp >= y_exp) { 1554 y_mnt = (lsr32(y_mnt, x_exp - y_exp) \| 1555 !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1))); 1556 y_exp = x_exp; 1557 } else { 1558 x_mnt = (lsr32(x_mnt, y_exp - x_exp) \| 1559 !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1))); 1560 x_exp = y_exp; 1561 } 1562 if (x_sgn == y_sgn) { 1563 x_mnt = x_mnt + y_mnt; 1564 } else if (x_mnt >= y_mnt) { 1565 x_mnt = x_mnt - y_mnt; 1566 } else { 1567 x_sgn ^= 1; 1568 x_mnt = y_mnt - x_mnt; 1569 } 1570 1571 if (!x_mnt) { 1572 // Sign of exact zero result depends on rounding mode 1573 return fp16_zero((mode & 3) == 2); 1574 } 1575 1576 // Normalise into FP16_BITS bits, collapsing error into bottom bit: 1577 x_mnt = fp32_normalise(x_mnt, &x_exp); 1578 x_mnt = x_mnt >> (FP16_BITS - 1) \| !!(uint16_t)(x_mnt << 1); 1579 1580 return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1581} 1582 1583static uint32_t 1584fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, 1585 int mode, int flags) 1586{ 1587* int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1588 uint32_t a_mnt, b_mnt, c_mnt, x; 1589 uint64_t x_mnt, y_mnt; 1590 1591 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1592 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1593 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1594 1595 x = fp32_process_NaNs3(a, b, c, mode, flags); 1596 1597 // Quiet NaN added to product of zero and infinity: 1598 if (fp32_is_quiet_NaN(a_exp, a_mnt) && 1599 ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) \|\| 1600 (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) { 1601 x = fp32_defaultNaN(); 1602 flags \|= FPLIB_IOC; 1603* } 1604 1605 if (x) { 1606 return x; 1607 } 1608 1609 // Handle infinities and zeroes: 1610 if ((b_exp == FP32_EXP_INF && !c_mnt) \|\| 1611 (c_exp == FP32_EXP_INF && !b_mnt) \|\| 1612 (a_exp == FP32_EXP_INF && 1613 (b_exp == FP32_EXP_INF \|\| c_exp == FP32_EXP_INF) && 1614 (a_sgn != (b_sgn ^ c_sgn)))) { 1615 flags \|= FPLIB_IOC; 1616* return fp32_defaultNaN(); 1617 } 1618 if (a_exp == FP32_EXP_INF) 1619 return fp32_infinity(a_sgn); 1620 if (b_exp == FP32_EXP_INF \|\| c_exp == FP32_EXP_INF) 1621 return fp32_infinity(b_sgn ^ c_sgn); 1622 if (!a_mnt && (!b_mnt \|\| !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1623 return fp32_zero(a_sgn); 1624 1625 x_sgn = a_sgn; 1626 x_exp = a_exp + 2 * FP32_EXP_BITS - 3; 1627 x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4); 1628 1629 // Multiply: 1630 y_sgn = b_sgn ^ c_sgn; 1631 y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3; 1632 y_mnt = (uint64_t)b_mnt * c_mnt << 3; 1633 if (!y_mnt) { 1634 y_exp = x_exp; 1635 } 1636 1637 // Add: 1638 if (x_exp >= y_exp) { 1639 y_mnt = (lsr64(y_mnt, x_exp - y_exp) \| 1640 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); 1641 y_exp = x_exp; 1642 } else { 1643 x_mnt = (lsr64(x_mnt, y_exp - x_exp) \| 1644 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); 1645 x_exp = y_exp; 1646 } 1647 if (x_sgn == y_sgn) { 1648 x_mnt = x_mnt + y_mnt; 1649 } else if (x_mnt >= y_mnt) { 1650 x_mnt = x_mnt - y_mnt; 1651 } else { 1652 x_sgn ^= 1; 1653 x_mnt = y_mnt - x_mnt; 1654 } 1655 1656 if (!x_mnt) { 1657 // Sign of exact zero result depends on rounding mode 1658 return fp32_zero((mode & 3) == 2); 1659 } 1660 1661 // Normalise into FP32_BITS bits, collapsing error into bottom bit: 1662 x_mnt = fp64_normalise(x_mnt, &x_exp); 1663 x_mnt = x_mnt >> (FP32_BITS - 1) \| !!(uint32_t)(x_mnt << 1); 1664 1665 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1666} 1667 1668static uint64_t 1669fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, 1670 int mode, int flags) 1671{ 1672* int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1673 uint64_t a_mnt, b_mnt, c_mnt, x; 1674 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; 1675 1676 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1677 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1678 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1679 1680 x = fp64_process_NaNs3(a, b, c, mode, flags); 1681 1682 // Quiet NaN added to product of zero and infinity: 1683 if (fp64_is_quiet_NaN(a_exp, a_mnt) && 1684 ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) \|\| 1685 (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) { 1686 x = fp64_defaultNaN(); 1687 flags \|= FPLIB_IOC; 1688* } 1689 1690 if (x) { 1691 return x; 1692 } 1693 1694 // Handle infinities and zeroes: 1695 if ((b_exp == FP64_EXP_INF && !c_mnt) \|\| 1696 (c_exp == FP64_EXP_INF && !b_mnt) \|\| 1697 (a_exp == FP64_EXP_INF && 1698 (b_exp == FP64_EXP_INF \|\| c_exp == FP64_EXP_INF) && 1699 (a_sgn != (b_sgn ^ c_sgn)))) { 1700 flags \|= FPLIB_IOC; 1701* return fp64_defaultNaN(); 1702 } 1703 if (a_exp == FP64_EXP_INF) 1704 return fp64_infinity(a_sgn); 1705 if (b_exp == FP64_EXP_INF \|\| c_exp == FP64_EXP_INF) 1706 return fp64_infinity(b_sgn ^ c_sgn); 1707 if (!a_mnt && (!b_mnt \|\| !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1708 return fp64_zero(a_sgn); 1709 1710 x_sgn = a_sgn; 1711 x_exp = a_exp + FP64_EXP_BITS; 1712 x0_mnt = 0; 1713 x1_mnt = a_mnt; 1714 1715 // Multiply: 1716 y_sgn = b_sgn ^ c_sgn; 1717 y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3; 1718 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); 1719 if (!y0_mnt && !y1_mnt) { 1720 y_exp = x_exp; 1721 } 1722 1723 // Add: 1724 if (x_exp >= y_exp) { 1725 uint64_t t0, t1; 1726 lsl128(&t0, &t1, y0_mnt, y1_mnt, 1727 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); 1728 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); 1729 y0_mnt \|= !!(t0 \| t1); 1730 y_exp = x_exp; 1731 } else { 1732 uint64_t t0, t1; 1733 lsl128(&t0, &t1, x0_mnt, x1_mnt, 1734 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); 1735 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); 1736 x0_mnt \|= !!(t0 \| t1); 1737 x_exp = y_exp; 1738 } 1739 if (x_sgn == y_sgn) { 1740 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1741 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { 1742 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1743 } else { 1744 x_sgn ^= 1; 1745 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); 1746 } 1747 1748 if (!x0_mnt && !x1_mnt) { 1749 // Sign of exact zero result depends on rounding mode 1750 return fp64_zero((mode & 3) == 2); 1751 } 1752 1753 // Normalise into FP64_BITS bits, collapsing error into bottom bit: 1754 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1755 x0_mnt = x1_mnt << 1 \| !!x0_mnt; 1756 1757 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); 1758} 1759 1760static uint16_t 1761fp16_div(uint16_t a, uint16_t b, int mode, int flags) 1762{ 1763* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1764 uint16_t a_mnt, b_mnt, x; 1765 uint32_t x_mnt; 1766 1767 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1768 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1769 1770 if ((x = fp16_process_NaNs(a, b, mode, flags))) 1771 return x; 1772 1773 // Handle infinities and zeroes: 1774 if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) \|\| 1775 (!a_mnt && !b_mnt)) { 1776 flags \|= FPLIB_IOC; 1777* return fp16_defaultNaN(); 1778 } 1779 if (a_exp == FP16_EXP_INF \|\| !b_mnt) { 1780 if (a_exp != FP16_EXP_INF) 1781 flags \|= FPLIB_DZC; 1782* return fp16_infinity(a_sgn ^ b_sgn); 1783 } 1784 if (!a_mnt \|\| b_exp == FP16_EXP_INF) 1785 return fp16_zero(a_sgn ^ b_sgn); 1786 1787 // Divide, setting bottom bit if inexact: 1788 a_mnt = fp16_normalise(a_mnt, &a_exp); 1789 x_sgn = a_sgn ^ b_sgn; 1790 x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3); 1791 x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt; 1792 x_mnt \|= (x_mnt * b_mnt != 1793 (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)); 1794 1795 // Normalise into FP16_BITS bits, collapsing error into bottom bit: 1796 x_mnt = fp32_normalise(x_mnt, &x_exp); 1797 x_mnt = x_mnt >> (FP16_BITS - 1) \| !!(uint16_t)(x_mnt << 1); 1798 1799 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); 1800} 1801 1802static uint32_t 1803fp32_div(uint32_t a, uint32_t b, int mode, int flags) 1804{ 1805* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1806 uint32_t a_mnt, b_mnt, x; 1807 uint64_t x_mnt; 1808 1809 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1810 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1811 1812 if ((x = fp32_process_NaNs(a, b, mode, flags))) 1813 return x; 1814 1815 // Handle infinities and zeroes: 1816 if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) \|\| 1817 (!a_mnt && !b_mnt)) { 1818 flags \|= FPLIB_IOC; 1819* return fp32_defaultNaN(); 1820 } 1821 if (a_exp == FP32_EXP_INF \|\| !b_mnt) { 1822 if (a_exp != FP32_EXP_INF) 1823 flags \|= FPLIB_DZC; 1824* return fp32_infinity(a_sgn ^ b_sgn); 1825 } 1826 if (!a_mnt \|\| b_exp == FP32_EXP_INF) 1827 return fp32_zero(a_sgn ^ b_sgn); 1828 1829 // Divide, setting bottom bit if inexact: 1830 a_mnt = fp32_normalise(a_mnt, &a_exp); 1831 x_sgn = a_sgn ^ b_sgn; 1832 x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3); 1833 x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt; 1834 x_mnt \|= (x_mnt * b_mnt != 1835 (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)); 1836 1837 // Normalise into FP32_BITS bits, collapsing error into bottom bit: 1838 x_mnt = fp64_normalise(x_mnt, &x_exp); 1839 x_mnt = x_mnt >> (FP32_BITS - 1) \| !!(uint32_t)(x_mnt << 1); 1840 1841 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1842} 1843 1844static uint64_t 1845fp64_div(uint64_t a, uint64_t b, int mode, int flags) 1846{ 1847* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; 1848 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; 1849 1850 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1851 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1852 1853 if ((x = fp64_process_NaNs(a, b, mode, flags))) 1854 return x; 1855 1856 // Handle infinities and zeroes: 1857 if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) \|\| 1858 (!a_mnt && !b_mnt)) { 1859 flags \|= FPLIB_IOC; 1860* return fp64_defaultNaN(); 1861 } 1862 if (a_exp == FP64_EXP_INF \|\| !b_mnt) { 1863 if (a_exp != FP64_EXP_INF) 1864 flags \|= FPLIB_DZC; 1865* return fp64_infinity(a_sgn ^ b_sgn); 1866 } 1867 if (!a_mnt \|\| b_exp == FP64_EXP_INF) 1868 return fp64_zero(a_sgn ^ b_sgn); 1869 1870 // Find reciprocal of divisor with Newton-Raphson: 1871 a_mnt = fp64_normalise(a_mnt, &a_exp); 1872 b_mnt = fp64_normalise(b_mnt, &b_exp); 1873 x_mnt = ~(uint64_t)0 / (b_mnt >> 31); 1874 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); 1875 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); 1876 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); 1877 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); 1878 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); 1879 1880 // Multiply by dividend: 1881 x_sgn = a_sgn ^ b_sgn; 1882 x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8; 1883 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); 1884 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); 1885 x_mnt = x1_mnt; 1886 1887 // This is an underestimate, so try adding one: 1888 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); 1889 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); 1890 if (c <= 0) { 1891 ++x_mnt; 1892 } 1893 1894 x_mnt = fp64_normalise(x_mnt, &x_exp); 1895 1896 return fp64_round(x_sgn, x_exp, x_mnt << 1 \| !!c, mode, flags); 1897} 1898 1899static void 1900set_fpscr0(FPSCR &fpscr, int flags) 1901{ 1902 if (flags & FPLIB_IDC) { 1903 fpscr.idc = 1; 1904 } 1905 if (flags & FPLIB_IOC) { 1906 fpscr.ioc = 1; 1907 } 1908 if (flags & FPLIB_DZC) { 1909 fpscr.dzc = 1; 1910 } 1911 if (flags & FPLIB_OFC) { 1912 fpscr.ofc = 1; 1913 } 1914 if (flags & FPLIB_UFC) { 1915 fpscr.ufc = 1; 1916 } 1917 if (flags & FPLIB_IXC) { 1918 fpscr.ixc = 1; 1919 } 1920} 1921 1922static uint16_t 1923fp16_scale(uint16_t a, int16_t b, int mode, int flags) 1924{ 1925* int a_sgn, a_exp; 1926 uint16_t a_mnt; 1927 1928 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1929 1930 // Handle NaNs: 1931 if (fp16_is_NaN(a_exp, a_mnt)) { 1932 return fp16_process_NaN(a, mode, flags); 1933 } 1934 1935 // Handle zeroes: 1936 if (!a_mnt) { 1937 return fp16_zero(a_sgn); 1938 } 1939 1940 // Handle infinities: 1941 if (a_exp == FP16_EXP_INF) { 1942 return fp16_infinity(a_sgn); 1943 } 1944 1945 b = b < -300 ? -300 : b; 1946 b = b > 300 ? 300 : b; 1947 a_exp += b; 1948 a_mnt <<= 3; 1949 1950 a_mnt = fp16_normalise(a_mnt, &a_exp); 1951 1952 return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1, 1953 mode, flags); 1954} 1955 1956static uint32_t 1957fp32_scale(uint32_t a, int32_t b, int mode, int flags) 1958{ 1959* int a_sgn, a_exp; 1960 uint32_t a_mnt; 1961 1962 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1963 1964 // Handle NaNs: 1965 if (fp32_is_NaN(a_exp, a_mnt)) { 1966 return fp32_process_NaN(a, mode, flags); 1967 } 1968 1969 // Handle zeroes: 1970 if (!a_mnt) { 1971 return fp32_zero(a_sgn); 1972 } 1973 1974 // Handle infinities: 1975 if (a_exp == FP32_EXP_INF) { 1976 return fp32_infinity(a_sgn); 1977 } 1978 1979 b = b < -300 ? -300 : b; 1980 b = b > 300 ? 300 : b; 1981 a_exp += b; 1982 a_mnt <<= 3; 1983 1984 a_mnt = fp32_normalise(a_mnt, &a_exp); 1985 1986 return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1, 1987 mode, flags); 1988} 1989 1990static uint64_t 1991fp64_scale(uint64_t a, int64_t b, int mode, int flags) 1992{ 1993* int a_sgn, a_exp; 1994 uint64_t a_mnt; 1995 1996 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1997 1998 // Handle NaNs: 1999 if (fp64_is_NaN(a_exp, a_mnt)) { 2000 return fp64_process_NaN(a, mode, flags); 2001 } 2002 2003 // Handle zeroes: 2004 if (!a_mnt) { 2005 return fp64_zero(a_sgn); 2006 } 2007 2008 // Handle infinities: 2009 if (a_exp == FP64_EXP_INF) { 2010 return fp64_infinity(a_sgn); 2011 } 2012 2013 b = b < -3000 ? -3000 : b; 2014 b = b > 3000 ? 3000 : b; 2015 a_exp += b; 2016 a_mnt <<= 3; 2017 2018 a_mnt = fp64_normalise(a_mnt, &a_exp); 2019 2020 return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1, 2021 mode, flags); 2022} 2023 2024static uint16_t 2025fp16_sqrt(uint16_t a, int mode, int flags) 2026{ 2027* int a_sgn, a_exp, x_sgn, x_exp; 2028 uint16_t a_mnt, x_mnt; 2029 uint32_t x, t0, t1; 2030 2031 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2032 2033 // Handle NaNs: 2034 if (fp16_is_NaN(a_exp, a_mnt)) 2035 return fp16_process_NaN(a, mode, flags); 2036 2037 // Handle infinities and zeroes: 2038 if (!a_mnt) 2039 return fp16_zero(a_sgn); 2040 if (a_exp == FP16_EXP_INF && !a_sgn) 2041 return fp16_infinity(a_sgn); 2042 if (a_sgn) { 2043 flags \|= FPLIB_IOC; 2044* return fp16_defaultNaN(); 2045 } 2046 2047 a_mnt = fp16_normalise(a_mnt, &a_exp); 2048 if (a_exp & 1) { 2049 ++a_exp; 2050 a_mnt >>= 1; 2051 } 2052 2053 // x = (a * 3 + 5) / 8 2054 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28); 2055 2056 // x = (a / x + x) / 2; // 8-bit accuracy 2057 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15; 2058 2059 // x = (a / x + x) / 2; // 16-bit accuracy 2060 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15; 2061 2062 x_sgn = 0; 2063 x_exp = (a_exp + 27) >> 1; 2064 x_mnt = ((x - (1 << 18)) >> 19) + 1; 2065 t1 = (uint32_t)x_mnt * x_mnt; 2066 t0 = (uint32_t)a_mnt << 9; 2067 if (t1 > t0) { 2068 --x_mnt; 2069 } 2070 2071 x_mnt = fp16_normalise(x_mnt, &x_exp); 2072 2073 return fp16_round(x_sgn, x_exp, x_mnt << 1 \| (t1 != t0), mode, flags); 2074} 2075 2076static uint32_t 2077fp32_sqrt(uint32_t a, int mode, int flags) 2078{ 2079* int a_sgn, a_exp, x_sgn, x_exp; 2080 uint32_t a_mnt, x, x_mnt; 2081 uint64_t t0, t1; 2082 2083 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2084 2085 // Handle NaNs: 2086 if (fp32_is_NaN(a_exp, a_mnt)) 2087 return fp32_process_NaN(a, mode, flags); 2088 2089 // Handle infinities and zeroes: 2090 if (!a_mnt) 2091 return fp32_zero(a_sgn); 2092 if (a_exp == FP32_EXP_INF && !a_sgn) 2093 return fp32_infinity(a_sgn); 2094 if (a_sgn) { 2095 flags \|= FPLIB_IOC; 2096* return fp32_defaultNaN(); 2097 } 2098 2099 a_mnt = fp32_normalise(a_mnt, &a_exp); 2100 if (!(a_exp & 1)) { 2101 ++a_exp; 2102 a_mnt >>= 1; 2103 } 2104 2105 // x = (a * 3 + 5) / 8 2106 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28); 2107 2108 // x = (a / x + x) / 2; // 8-bit accuracy 2109 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 2110 2111 // x = (a / x + x) / 2; // 16-bit accuracy 2112 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 2113 2114 // x = (a / x + x) / 2; // 32-bit accuracy 2115 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); 2116 2117 x_sgn = 0; 2118 x_exp = (a_exp + 147) >> 1; 2119 x_mnt = ((x - (1 << 5)) >> 6) + 1; 2120 t1 = (uint64_t)x_mnt * x_mnt; 2121 t0 = (uint64_t)a_mnt << 19; 2122 if (t1 > t0) { 2123 --x_mnt; 2124 } 2125 2126 x_mnt = fp32_normalise(x_mnt, &x_exp); 2127 2128 return fp32_round(x_sgn, x_exp, x_mnt << 1 \| (t1 != t0), mode, flags); 2129} 2130 2131static uint64_t 2132fp64_sqrt(uint64_t a, int mode, int flags) 2133{ 2134* int a_sgn, a_exp, x_sgn, x_exp, c; 2135 uint64_t a_mnt, x_mnt, r, x0, x1; 2136 uint32_t x; 2137 2138 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2139 2140 // Handle NaNs: 2141 if (fp64_is_NaN(a_exp, a_mnt)) 2142 return fp64_process_NaN(a, mode, flags); 2143 2144 // Handle infinities and zeroes: 2145 if (!a_mnt) 2146 return fp64_zero(a_sgn); 2147 if (a_exp == FP64_EXP_INF && !a_sgn) 2148 return fp64_infinity(a_sgn); 2149 if (a_sgn) { 2150 flags \|= FPLIB_IOC; 2151* return fp64_defaultNaN(); 2152 } 2153 2154 a_mnt = fp64_normalise(a_mnt, &a_exp); 2155 if (a_exp & 1) { 2156 ++a_exp; 2157 a_mnt >>= 1; 2158 } 2159 2160 // x = (a * 3 + 5) / 8 2161 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28); 2162 2163 // x = (a / x + x) / 2; // 8-bit accuracy 2164 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 2165 2166 // x = (a / x + x) / 2; // 16-bit accuracy 2167 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 2168 2169 // x = (a / x + x) / 2; // 32-bit accuracy 2170 x = ((a_mnt / x) >> 2) + (x >> 1); 2171 2172 // r = 1 / x; // 32-bit accuracy 2173 r = ((uint64_t)1 << 62) / x; 2174 2175 // r = r * (2 - x * r); // 64-bit accuracy 2176 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); 2177 lsr128(&x0, &x1, x0, x1, 31); 2178 2179 // x = (x + a * r) / 2; // 64-bit accuracy 2180 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); 2181 lsl128(&x0, &x1, x0, x1, 5); 2182 lsr128(&x0, &x1, x0, x1, 56); 2183 2184 x0 = ((uint64_t)x << 31) + (x0 >> 1); 2185 2186 x_sgn = 0; 2187 x_exp = (a_exp + 1053) >> 1; 2188 x_mnt = x0; 2189 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; 2190 mul62x62(&x0, &x1, x_mnt, x_mnt); 2191 lsl128(&x0, &x1, x0, x1, 19); 2192 c = cmp128(x0, x1, 0, a_mnt); 2193 if (c > 0) 2194 --x_mnt; 2195 2196 x_mnt = fp64_normalise(x_mnt, &x_exp); 2197 2198 return fp64_round(x_sgn, x_exp, x_mnt << 1 \| !!c, mode, flags); 2199} 2200 2201static int 2202modeConv(FPSCR fpscr) 2203{ 2204 uint32_t x = (uint32_t)fpscr; 2205 return (x >> 22 & 0xf) \| (x >> 19 & 1 ? FPLIB_FZ16 : 0); 2206 // AHP bit is ignored. Only fplibConvert uses AHP. 2207} 2208 2209static void 2210set_fpscr(FPSCR &fpscr, int flags) 2211{ 2212 // translate back to FPSCR 2213 bool underflow = false; 2214 if (flags & FPLIB_IDC) { 2215 fpscr.idc = 1; 2216 } 2217 if (flags & FPLIB_IOC) { 2218 fpscr.ioc = 1; 2219 } 2220 if (flags & FPLIB_DZC) { 2221 fpscr.dzc = 1; 2222 } 2223 if (flags & FPLIB_OFC) { 2224 fpscr.ofc = 1; 2225 } 2226 if (flags & FPLIB_UFC) { 2227 underflow = true; //xx Why is this required? 2228 fpscr.ufc = 1; 2229 } 2230 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { 2231 fpscr.ixc = 1; 2232 } 2233} 2234 2235template <> 2236bool 2237fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr) 2238{ 2239 int flags = 0; 2240 int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags); 2241 set_fpscr(fpscr, flags); 2242 return x; 2243} 2244 2245template <> 2246bool 2247fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr) 2248{ 2249 int flags = 0; 2250 int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags); 2251 set_fpscr(fpscr, flags); 2252 return x; 2253} 2254 2255template <> 2256bool 2257fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr) 2258{ 2259 int flags = 0; 2260 int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags); 2261 set_fpscr(fpscr, flags); 2262 return x; 2263} 2264 2265template <> 2266bool 2267fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr) 2268{ 2269 int flags = 0; 2270 int x = fp16_compare_un(a, b, modeConv(fpscr), &flags); 2271 set_fpscr(fpscr, flags); 2272 return x; 2273} 2274 2275template <> 2276bool 2277fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) 2278{ 2279 int flags = 0; 2280 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); 2281 set_fpscr(fpscr, flags); 2282 return x; 2283} 2284 2285template <> 2286bool 2287fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) 2288{ 2289 int flags = 0; 2290 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); 2291 set_fpscr(fpscr, flags); 2292 return x; 2293} 2294 2295template <> 2296bool 2297fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) 2298{ 2299 int flags = 0; 2300 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); 2301 set_fpscr(fpscr, flags); 2302 return x; 2303} 2304 2305template <> 2306bool 2307fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr) 2308{ 2309 int flags = 0; 2310 int x = fp32_compare_un(a, b, modeConv(fpscr), &flags); 2311 set_fpscr(fpscr, flags); 2312 return x; 2313} 2314 2315template <> 2316bool 2317fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) 2318{ 2319 int flags = 0; 2320 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); 2321 set_fpscr(fpscr, flags); 2322 return x; 2323} 2324 2325template <> 2326bool 2327fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) 2328{ 2329 int flags = 0; 2330 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); 2331 set_fpscr(fpscr, flags); 2332 return x; 2333} 2334 2335template <> 2336bool 2337fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) 2338{ 2339 int flags = 0; 2340 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); 2341 set_fpscr(fpscr, flags); 2342 return x; 2343} 2344 2345template <> 2346bool 2347fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr) 2348{ 2349 int flags = 0; 2350 int x = fp64_compare_un(a, b, modeConv(fpscr), &flags); 2351 set_fpscr(fpscr, flags); 2352 return x; 2353} 2354 2355template <> 2356uint16_t 2357fplibAbs(uint16_t op) 2358{ 2359 return op & ~(1ULL << (FP16_BITS - 1)); 2360} 2361 2362template <> 2363uint32_t 2364fplibAbs(uint32_t op) 2365{ 2366 return op & ~(1ULL << (FP32_BITS - 1)); 2367} 2368 2369template <> 2370uint64_t 2371fplibAbs(uint64_t op) 2372{ 2373 return op & ~(1ULL << (FP64_BITS - 1)); 2374} 2375 2376template <> 2377uint16_t 2378fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr) 2379{ 2380 int flags = 0; 2381 uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags); 2382 set_fpscr0(fpscr, flags); 2383 return result; 2384} 2385 2386template <> 2387uint32_t 2388fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2389{ 2390 int flags = 0; 2391 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); 2392 set_fpscr0(fpscr, flags); 2393 return result; 2394} 2395 2396template <> 2397uint64_t 2398fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2399{ 2400 int flags = 0; 2401 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); 2402 set_fpscr0(fpscr, flags); 2403 return result; 2404} 2405 2406template <> 2407int 2408fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr) 2409{ 2410 int mode = modeConv(fpscr); 2411 int flags = 0; 2412 int sgn1, exp1, sgn2, exp2, result; 2413 uint16_t mnt1, mnt2; 2414 2415 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2416 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2417 2418 if (fp16_is_NaN(exp1, mnt1) \|\| fp16_is_NaN(exp2, mnt2)) { 2419 result = 3; 2420 if (fp16_is_signalling_NaN(exp1, mnt1) \|\| 2421 fp16_is_signalling_NaN(exp2, mnt2) \|\| signal_nans) 2422 flags \|= FPLIB_IOC; 2423 } else { 2424 if (op1 == op2 \|\| (!mnt1 && !mnt2)) { 2425 result = 6; 2426 } else if (sgn1 != sgn2) { 2427 result = sgn1 ? 8 : 2; 2428 } else if (exp1 != exp2) { 2429 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2430 } else { 2431 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2432 } 2433 } 2434 2435 set_fpscr0(fpscr, flags); 2436 2437 return result; 2438} 2439 2440template <> 2441int 2442fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) 2443{ 2444 int mode = modeConv(fpscr); 2445 int flags = 0; 2446 int sgn1, exp1, sgn2, exp2, result; 2447 uint32_t mnt1, mnt2; 2448 2449 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2450 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2451 2452 if (fp32_is_NaN(exp1, mnt1) \|\| fp32_is_NaN(exp2, mnt2)) { 2453 result = 3; 2454 if (fp32_is_signalling_NaN(exp1, mnt1) \|\| 2455 fp32_is_signalling_NaN(exp2, mnt2) \|\| signal_nans) 2456 flags \|= FPLIB_IOC; 2457 } else { 2458 if (op1 == op2 \|\| (!mnt1 && !mnt2)) { 2459 result = 6; 2460 } else if (sgn1 != sgn2) { 2461 result = sgn1 ? 8 : 2; 2462 } else if (exp1 != exp2) { 2463 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2464 } else { 2465 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2466 } 2467 } 2468 2469 set_fpscr0(fpscr, flags); 2470 2471 return result; 2472} 2473 2474template <> 2475int 2476fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) 2477{ 2478 int mode = modeConv(fpscr); 2479 int flags = 0; 2480 int sgn1, exp1, sgn2, exp2, result; 2481 uint64_t mnt1, mnt2; 2482 2483 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2484 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2485 2486 if (fp64_is_NaN(exp1, mnt1) \|\| fp64_is_NaN(exp2, mnt2)) { 2487 result = 3; 2488 if (fp64_is_signalling_NaN(exp1, mnt1) \|\| 2489 fp64_is_signalling_NaN(exp2, mnt2) \|\| signal_nans) 2490 flags \|= FPLIB_IOC; 2491 } else { 2492 if (op1 == op2 \|\| (!mnt1 && !mnt2)) { 2493 result = 6; 2494 } else if (sgn1 != sgn2) { 2495 result = sgn1 ? 8 : 2; 2496 } else if (exp1 != exp2) { 2497 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2498 } else { 2499 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2500 } 2501 } 2502 2503 set_fpscr0(fpscr, flags); 2504 2505 return result; 2506} 2507 2508static uint16_t 2509fp16_FPConvertNaN_32(uint32_t op) 2510{ 2511 return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF, 2512 1ULL << (FP16_MANT_BITS - 1) \| 2513 op >> (FP32_MANT_BITS - FP16_MANT_BITS)); 2514} 2515 2516static uint16_t 2517fp16_FPConvertNaN_64(uint64_t op) 2518{ 2519 return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF, 2520 1ULL << (FP16_MANT_BITS - 1) \| 2521 op >> (FP64_MANT_BITS - FP16_MANT_BITS)); 2522} 2523 2524static uint32_t 2525fp32_FPConvertNaN_16(uint16_t op) 2526{ 2527 return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF, 2528 1ULL << (FP32_MANT_BITS - 1) \| 2529 (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS)); 2530} 2531 2532static uint32_t 2533fp32_FPConvertNaN_64(uint64_t op) 2534{ 2535 return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF, 2536 1ULL << (FP32_MANT_BITS - 1) \| 2537 op >> (FP64_MANT_BITS - FP32_MANT_BITS)); 2538} 2539 2540static uint64_t 2541fp64_FPConvertNaN_16(uint16_t op) 2542{ 2543 return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF, 2544 1ULL << (FP64_MANT_BITS - 1) \| 2545 (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS)); 2546} 2547 2548static uint64_t 2549fp64_FPConvertNaN_32(uint32_t op) 2550{ 2551 return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF, 2552 1ULL << (FP64_MANT_BITS - 1) \| 2553 (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS)); 2554} 2555 2556static uint16_t 2557fp16_FPOnePointFive(int sgn) 2558{ 2559 return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1)); 2560} 2561 2562static uint32_t 2563fp32_FPOnePointFive(int sgn) 2564{ 2565 return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1)); 2566} 2567 2568static uint64_t 2569fp64_FPOnePointFive(int sgn) 2570{ 2571 return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1)); 2572} 2573 2574static uint16_t 2575fp16_FPThree(int sgn) 2576{ 2577 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1)); 2578} 2579 2580static uint32_t 2581fp32_FPThree(int sgn) 2582{ 2583 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1)); 2584} 2585 2586static uint64_t 2587fp64_FPThree(int sgn) 2588{ 2589 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1)); 2590} 2591 2592static uint16_t 2593fp16_FPTwo(int sgn) 2594{ 2595 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0); 2596} 2597 2598static uint32_t 2599fp32_FPTwo(int sgn) 2600{ 2601 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0); 2602} 2603 2604static uint64_t 2605fp64_FPTwo(int sgn) 2606{ 2607 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0); 2608} 2609 2610template <> 2611uint16_t 2612fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 2613{ 2614 int mode = modeConv(fpscr); 2615 int flags = 0; 2616 int sgn, exp; 2617 uint32_t mnt; 2618 uint16_t result; 2619 2620 // Unpack floating-point operand optionally with flush-to-zero: 2621 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2622 2623 bool alt_hp = fpscr.ahp; 2624 2625 if (fp32_is_NaN(exp, mnt)) { 2626 if (alt_hp) { 2627 result = fp16_zero(sgn); 2628 } else if (fpscr.dn) { 2629 result = fp16_defaultNaN(); 2630 } else { 2631 result = fp16_FPConvertNaN_32(op); 2632 } 2633 if (!(mnt >> (FP32_MANT_BITS - 1) & 1) \|\| alt_hp) { 2634 flags \|= FPLIB_IOC; 2635 } 2636 } else if (exp == FP32_EXP_INF) { 2637 if (alt_hp) { 2638 result = ((uint16_t)sgn << (FP16_BITS - 1) \| 2639 ((1ULL << (FP16_BITS - 1)) - 1)); 2640 flags \|= FPLIB_IOC; 2641 } else { 2642 result = fp16_infinity(sgn); 2643 } 2644 } else if (!mnt) { 2645 result = fp16_zero(sgn); 2646 } else { 2647 result = 2648 fp16_round_(sgn, exp - FP32_EXP_BIAS + FP16_EXP_BIAS, 2649 mnt >> (FP32_MANT_BITS - FP16_BITS) \| 2650 !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)), 2651 rounding, (mode & 0xf) \| alt_hp << 4, &flags); 2652 } 2653 2654 set_fpscr0(fpscr, flags); 2655 2656 return result; 2657} 2658 2659template <> 2660uint16_t 2661fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 2662{ 2663 int mode = modeConv(fpscr); 2664 int flags = 0; 2665 int sgn, exp; 2666 uint64_t mnt; 2667 uint16_t result; 2668 2669 // Unpack floating-point operand optionally with flush-to-zero: 2670 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2671 2672 bool alt_hp = fpscr.ahp; 2673 2674 if (fp64_is_NaN(exp, mnt)) { 2675 if (alt_hp) { 2676 result = fp16_zero(sgn); 2677 } else if (fpscr.dn) { 2678 result = fp16_defaultNaN(); 2679 } else { 2680 result = fp16_FPConvertNaN_64(op); 2681 } 2682 if (!(mnt >> (FP64_MANT_BITS - 1) & 1) \|\| alt_hp) { 2683 flags \|= FPLIB_IOC; 2684 } 2685 } else if (exp == FP64_EXP_INF) { 2686 if (alt_hp) { 2687 result = ((uint16_t)sgn << (FP16_BITS - 1) \| 2688 ((1ULL << (FP16_BITS - 1)) - 1)); 2689 flags \|= FPLIB_IOC; 2690 } else { 2691 result = fp16_infinity(sgn); 2692 } 2693 } else if (!mnt) { 2694 result = fp16_zero(sgn); 2695 } else { 2696 result = 2697 fp16_round_(sgn, exp - FP64_EXP_BIAS + FP16_EXP_BIAS, 2698 mnt >> (FP64_MANT_BITS - FP16_BITS) \| 2699 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)), 2700 rounding, (mode & 0xf) \| alt_hp << 4, &flags); 2701 } 2702 2703 set_fpscr0(fpscr, flags); 2704 2705 return result; 2706} 2707 2708template <> 2709uint32_t 2710fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 2711{ 2712 int mode = modeConv(fpscr); 2713 int flags = 0; 2714 int sgn, exp; 2715 uint16_t mnt; 2716 uint32_t result; 2717 2718 // Unpack floating-point operand optionally with flush-to-zero: 2719 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags); 2720 2721 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) { 2722 if (fpscr.dn) { 2723 result = fp32_defaultNaN(); 2724 } else { 2725 result = fp32_FPConvertNaN_16(op); 2726 } 2727 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) { 2728 flags \|= FPLIB_IOC; 2729 } 2730 } else if (exp == FP16_EXP_INF && !fpscr.ahp) { 2731 result = fp32_infinity(sgn); 2732 } else if (!mnt) { 2733 result = fp32_zero(sgn); 2734 } else { 2735 mnt = fp16_normalise(mnt, &exp); 2736 result = fp32_pack(sgn, (exp - FP16_EXP_BIAS + 2737 FP32_EXP_BIAS + FP16_EXP_BITS), 2738 (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1)); 2739 } 2740 2741 set_fpscr0(fpscr, flags); 2742 2743 return result; 2744} 2745 2746template <> 2747uint32_t 2748fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 2749{ 2750 int mode = modeConv(fpscr); 2751 int flags = 0; 2752 int sgn, exp; 2753 uint64_t mnt; 2754 uint32_t result; 2755 2756 // Unpack floating-point operand optionally with flush-to-zero: 2757 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2758 2759 if (fp64_is_NaN(exp, mnt)) { 2760 if (fpscr.dn) { 2761 result = fp32_defaultNaN(); 2762 } else { 2763 result = fp32_FPConvertNaN_64(op); 2764 } 2765 if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) { 2766 flags \|= FPLIB_IOC; 2767 } 2768 } else if (exp == FP64_EXP_INF) { 2769 result = fp32_infinity(sgn); 2770 } else if (!mnt) { 2771 result = fp32_zero(sgn); 2772 } else { 2773 result = 2774 fp32_round_(sgn, exp - FP64_EXP_BIAS + FP32_EXP_BIAS, 2775 mnt >> (FP64_MANT_BITS - FP32_BITS) \| 2776 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)), 2777 rounding, mode, &flags); 2778 } 2779 2780 set_fpscr0(fpscr, flags); 2781 2782 return result; 2783} 2784 2785template <> 2786uint64_t 2787fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 2788{ 2789 int mode = modeConv(fpscr); 2790 int flags = 0; 2791 int sgn, exp; 2792 uint16_t mnt; 2793 uint64_t result; 2794 2795 // Unpack floating-point operand optionally with flush-to-zero: 2796 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags); 2797 2798 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) { 2799 if (fpscr.dn) { 2800 result = fp64_defaultNaN(); 2801 } else { 2802 result = fp64_FPConvertNaN_16(op); 2803 } 2804 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) { 2805 flags \|= FPLIB_IOC; 2806 } 2807 } else if (exp == FP16_EXP_INF && !fpscr.ahp) { 2808 result = fp64_infinity(sgn); 2809 } else if (!mnt) { 2810 result = fp64_zero(sgn); 2811 } else { 2812 mnt = fp16_normalise(mnt, &exp); 2813 result = fp64_pack(sgn, (exp - FP16_EXP_BIAS + 2814 FP64_EXP_BIAS + FP16_EXP_BITS), 2815 (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1)); 2816 } 2817 2818 set_fpscr0(fpscr, flags); 2819 2820 return result; 2821} 2822 2823template <> 2824uint64_t 2825fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 2826{ 2827 int mode = modeConv(fpscr); 2828 int flags = 0; 2829 int sgn, exp; 2830 uint32_t mnt; 2831 uint64_t result; 2832 2833 // Unpack floating-point operand optionally with flush-to-zero: 2834 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2835 2836 if (fp32_is_NaN(exp, mnt)) { 2837 if (fpscr.dn) { 2838 result = fp64_defaultNaN(); 2839 } else { 2840 result = fp64_FPConvertNaN_32(op); 2841 } 2842 if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) { 2843 flags \|= FPLIB_IOC; 2844 } 2845 } else if (exp == FP32_EXP_INF) { 2846 result = fp64_infinity(sgn); 2847 } else if (!mnt) { 2848 result = fp64_zero(sgn); 2849 } else { 2850 mnt = fp32_normalise(mnt, &exp); 2851 result = fp64_pack(sgn, (exp - FP32_EXP_BIAS + 2852 FP64_EXP_BIAS + FP32_EXP_BITS), 2853 (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1)); 2854 } 2855 2856 set_fpscr0(fpscr, flags); 2857 2858 return result; 2859} 2860 2861template <> 2862uint16_t 2863fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr) 2864{ 2865 int flags = 0; 2866 uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2867 set_fpscr0(fpscr, flags); 2868 return result; 2869} 2870 2871template <> 2872uint32_t 2873fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) 2874{ 2875 int flags = 0; 2876 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2877 set_fpscr0(fpscr, flags); 2878 return result; 2879} 2880 2881template <> 2882uint64_t 2883fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) 2884{ 2885 int flags = 0; 2886 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2887 set_fpscr0(fpscr, flags); 2888 return result; 2889} 2890 2891template <> 2892uint16_t 2893fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr) 2894{ 2895 int flags = 0; 2896 uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags); 2897 set_fpscr0(fpscr, flags); 2898 return result; 2899} 2900 2901template <> 2902uint32_t 2903fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2904{ 2905 int flags = 0; 2906 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); 2907 set_fpscr0(fpscr, flags); 2908 return result; 2909} 2910 2911template <> 2912uint64_t 2913fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2914{ 2915 int flags = 0; 2916 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); 2917 set_fpscr0(fpscr, flags); 2918 return result; 2919} 2920 2921template <> 2922uint16_t 2923fplibExpA(uint16_t op) 2924{ 2925 static uint16_t coeff[32] = { 2926 0x0000, 2927 0x0016, 2928 0x002d, 2929 0x0045, 2930 0x005d, 2931 0x0075, 2932 0x008e, 2933 0x00a8, 2934 0x00c2, 2935 0x00dc, 2936 0x00f8, 2937 0x0114, 2938 0x0130, 2939 0x014d, 2940 0x016b, 2941 0x0189, 2942 0x01a8, 2943 0x01c8, 2944 0x01e8, 2945 0x0209, 2946 0x022b, 2947 0x024e, 2948 0x0271, 2949 0x0295, 2950 0x02ba, 2951 0x02e0, 2952 0x0306, 2953 0x032e, 2954 0x0356, 2955 0x037f, 2956 0x03a9, 2957 0x03d4 2958 }; 2959 return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) \| 2960 coeff[op & ((1 << 5) - 1)]); 2961} 2962 2963template <> 2964uint32_t 2965fplibExpA(uint32_t op) 2966{ 2967 static uint32_t coeff[64] = { 2968 0x000000, 2969 0x0164d2, 2970 0x02cd87, 2971 0x043a29, 2972 0x05aac3, 2973 0x071f62, 2974 0x08980f, 2975 0x0a14d5, 2976 0x0b95c2, 2977 0x0d1adf, 2978 0x0ea43a, 2979 0x1031dc, 2980 0x11c3d3, 2981 0x135a2b, 2982 0x14f4f0, 2983 0x16942d, 2984 0x1837f0, 2985 0x19e046, 2986 0x1b8d3a, 2987 0x1d3eda, 2988 0x1ef532, 2989 0x20b051, 2990 0x227043, 2991 0x243516, 2992 0x25fed7, 2993 0x27cd94, 2994 0x29a15b, 2995 0x2b7a3a, 2996 0x2d583f, 2997 0x2f3b79, 2998 0x3123f6, 2999 0x3311c4, 3000 0x3504f3, 3001 0x36fd92, 3002 0x38fbaf, 3003 0x3aff5b, 3004 0x3d08a4, 3005 0x3f179a, 3006 0x412c4d, 3007 0x4346cd, 3008 0x45672a, 3009 0x478d75, 3010 0x49b9be, 3011 0x4bec15, 3012 0x4e248c, 3013 0x506334, 3014 0x52a81e, 3015 0x54f35b, 3016 0x5744fd, 3017 0x599d16, 3018 0x5bfbb8, 3019 0x5e60f5, 3020 0x60ccdf, 3021 0x633f89, 3022 0x65b907, 3023 0x68396a, 3024 0x6ac0c7, 3025 0x6d4f30, 3026 0x6fe4ba, 3027 0x728177, 3028 0x75257d, 3029 0x77d0df, 3030 0x7a83b3, 3031 0x7d3e0c 3032 }; 3033 return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) \| 3034 coeff[op & ((1 << 6) - 1)]); 3035} 3036 3037template <> 3038uint64_t 3039fplibExpA(uint64_t op) 3040{ 3041 static uint64_t coeff[64] = { 3042 0x0000000000000ULL, 3043 0x02c9a3e778061ULL, 3044 0x059b0d3158574ULL, 3045 0x0874518759bc8ULL, 3046 0x0b5586cf9890fULL, 3047 0x0e3ec32d3d1a2ULL, 3048 0x11301d0125b51ULL, 3049 0x1429aaea92de0ULL, 3050 0x172b83c7d517bULL, 3051 0x1a35beb6fcb75ULL, 3052 0x1d4873168b9aaULL, 3053 0x2063b88628cd6ULL, 3054 0x2387a6e756238ULL, 3055 0x26b4565e27cddULL, 3056 0x29e9df51fdee1ULL, 3057 0x2d285a6e4030bULL, 3058 0x306fe0a31b715ULL, 3059 0x33c08b26416ffULL, 3060 0x371a7373aa9cbULL, 3061 0x3a7db34e59ff7ULL, 3062 0x3dea64c123422ULL, 3063 0x4160a21f72e2aULL, 3064 0x44e086061892dULL, 3065 0x486a2b5c13cd0ULL, 3066 0x4bfdad5362a27ULL, 3067 0x4f9b2769d2ca7ULL, 3068 0x5342b569d4f82ULL, 3069 0x56f4736b527daULL, 3070 0x5ab07dd485429ULL, 3071 0x5e76f15ad2148ULL, 3072 0x6247eb03a5585ULL, 3073 0x6623882552225ULL, 3074 0x6a09e667f3bcdULL, 3075 0x6dfb23c651a2fULL, 3076 0x71f75e8ec5f74ULL, 3077 0x75feb564267c9ULL, 3078 0x7a11473eb0187ULL, 3079 0x7e2f336cf4e62ULL, 3080 0x82589994cce13ULL, 3081 0x868d99b4492edULL, 3082 0x8ace5422aa0dbULL, 3083 0x8f1ae99157736ULL, 3084 0x93737b0cdc5e5ULL, 3085 0x97d829fde4e50ULL, 3086 0x9c49182a3f090ULL, 3087 0xa0c667b5de565ULL, 3088 0xa5503b23e255dULL, 3089 0xa9e6b5579fdbfULL, 3090 0xae89f995ad3adULL, 3091 0xb33a2b84f15fbULL, 3092 0xb7f76f2fb5e47ULL, 3093 0xbcc1e904bc1d2ULL, 3094 0xc199bdd85529cULL, 3095 0xc67f12e57d14bULL, 3096 0xcb720dcef9069ULL, 3097 0xd072d4a07897cULL, 3098 0xd5818dcfba487ULL, 3099 0xda9e603db3285ULL, 3100 0xdfc97337b9b5fULL, 3101 0xe502ee78b3ff6ULL, 3102 0xea4afa2a490daULL, 3103 0xefa1bee615a27ULL, 3104 0xf50765b6e4540ULL, 3105 0xfa7c1819e90d8ULL 3106 }; 3107 return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) \| 3108 coeff[op & ((1 << 6) - 1)]); 3109} 3110 3111static uint16_t 3112fp16_repack(int sgn, int exp, uint16_t mnt) 3113{ 3114 return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt); 3115} 3116 3117static uint32_t 3118fp32_repack(int sgn, int exp, uint32_t mnt) 3119{ 3120 return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt); 3121} 3122 3123static uint64_t 3124fp64_repack(int sgn, int exp, uint64_t mnt) 3125{ 3126 return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt); 3127} 3128 3129static void 3130fp16_minmaxnum(uint16_t op1, uint16_t op2, int sgn) 3131{ 3132 // Treat a single quiet-NaN as +Infinity/-Infinity 3133 if (!((uint16_t)~(op1 << 1) >> FP16_MANT_BITS) && 3134* (uint16_t)~(op2 << 1) >> FP16_MANT_BITS) 3135* op1 = fp16_infinity(sgn); 3136* if (!((uint16_t)~(op2 << 1) >> FP16_MANT_BITS) && 3137* (uint16_t)~(op1 << 1) >> FP16_MANT_BITS) 3138* op2 = fp16_infinity(sgn); 3139} 3140* 3141static void 3142fp32_minmaxnum(uint32_t op1, uint32_t op2, int sgn) 3143{ 3144 // Treat a single quiet-NaN as +Infinity/-Infinity 3145 if (!((uint32_t)~(op1 << 1) >> FP32_MANT_BITS) && 3146* (uint32_t)~(op2 << 1) >> FP32_MANT_BITS) 3147* op1 = fp32_infinity(sgn); 3148* if (!((uint32_t)~(op2 << 1) >> FP32_MANT_BITS) && 3149* (uint32_t)~(op1 << 1) >> FP32_MANT_BITS) 3150* op2 = fp32_infinity(sgn); 3151} 3152* 3153static void 3154fp64_minmaxnum(uint64_t op1, uint64_t op2, int sgn) 3155{ 3156 // Treat a single quiet-NaN as +Infinity/-Infinity 3157 if (!((uint64_t)~(op1 << 1) >> FP64_MANT_BITS) && 3158* (uint64_t)~(op2 << 1) >> FP64_MANT_BITS) 3159* op1 = fp64_infinity(sgn); 3160* if (!((uint64_t)~(op2 << 1) >> FP64_MANT_BITS) && 3161* (uint64_t)~(op1 << 1) >> FP64_MANT_BITS) 3162* op2 = fp64_infinity(sgn); 3163} 3164* 3165template <> 3166uint16_t 3167fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3168{ 3169 int mode = modeConv(fpscr); 3170 int flags = 0; 3171 int sgn1, exp1, sgn2, exp2; 3172 uint16_t mnt1, mnt2, x, result; 3173 3174 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3175 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3176 3177 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) { 3178 result = x; 3179 } else { 3180 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3181 fp16_repack(sgn1, exp1, mnt1) : 3182 fp16_repack(sgn2, exp2, mnt2)); 3183 } 3184 set_fpscr0(fpscr, flags); 3185 return result; 3186} 3187 3188template <> 3189uint32_t 3190fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3191{ 3192 int mode = modeConv(fpscr); 3193 int flags = 0; 3194 int sgn1, exp1, sgn2, exp2; 3195 uint32_t mnt1, mnt2, x, result; 3196 3197 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3198 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3199 3200 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 3201 result = x; 3202 } else { 3203 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3204 fp32_repack(sgn1, exp1, mnt1) : 3205 fp32_repack(sgn2, exp2, mnt2)); 3206 } 3207 set_fpscr0(fpscr, flags); 3208 return result; 3209} 3210 3211template <> 3212uint64_t 3213fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3214{ 3215 int mode = modeConv(fpscr); 3216 int flags = 0; 3217 int sgn1, exp1, sgn2, exp2; 3218 uint64_t mnt1, mnt2, x, result; 3219 3220 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3221 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3222 3223 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 3224 result = x; 3225 } else { 3226 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3227 fp64_repack(sgn1, exp1, mnt1) : 3228 fp64_repack(sgn2, exp2, mnt2)); 3229 } 3230 set_fpscr0(fpscr, flags); 3231 return result; 3232} 3233 3234template <> 3235uint16_t 3236fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3237{ 3238 fp16_minmaxnum(&op1, &op2, 1); 3239 return fplibMax<uint16_t>(op1, op2, fpscr); 3240} 3241 3242template <> 3243uint32_t 3244fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3245{ 3246 fp32_minmaxnum(&op1, &op2, 1); 3247 return fplibMax<uint32_t>(op1, op2, fpscr); 3248} 3249 3250template <> 3251uint64_t 3252fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3253{ 3254 fp64_minmaxnum(&op1, &op2, 1); 3255 return fplibMax<uint64_t>(op1, op2, fpscr); 3256} 3257 3258template <> 3259uint16_t 3260fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3261{ 3262 int mode = modeConv(fpscr); 3263 int flags = 0; 3264 int sgn1, exp1, sgn2, exp2; 3265 uint16_t mnt1, mnt2, x, result; 3266 3267 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3268 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3269 3270 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) { 3271 result = x; 3272 } else { 3273 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3274 fp16_repack(sgn1, exp1, mnt1) : 3275 fp16_repack(sgn2, exp2, mnt2)); 3276 } 3277 set_fpscr0(fpscr, flags); 3278 return result; 3279} 3280 3281template <> 3282uint32_t 3283fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3284{ 3285 int mode = modeConv(fpscr); 3286 int flags = 0; 3287 int sgn1, exp1, sgn2, exp2; 3288 uint32_t mnt1, mnt2, x, result; 3289 3290 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3291 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3292 3293 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 3294 result = x; 3295 } else { 3296 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3297 fp32_repack(sgn1, exp1, mnt1) : 3298 fp32_repack(sgn2, exp2, mnt2)); 3299 } 3300 set_fpscr0(fpscr, flags); 3301 return result; 3302} 3303 3304template <> 3305uint64_t 3306fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3307{ 3308 int mode = modeConv(fpscr); 3309 int flags = 0; 3310 int sgn1, exp1, sgn2, exp2; 3311 uint64_t mnt1, mnt2, x, result; 3312 3313 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3314 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3315 3316 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 3317 result = x; 3318 } else { 3319 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3320 fp64_repack(sgn1, exp1, mnt1) : 3321 fp64_repack(sgn2, exp2, mnt2)); 3322 } 3323 set_fpscr0(fpscr, flags); 3324 return result; 3325} 3326 3327template <> 3328uint16_t 3329fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3330{ 3331 fp16_minmaxnum(&op1, &op2, 0); 3332 return fplibMin<uint16_t>(op1, op2, fpscr); 3333} 3334 3335template <> 3336uint32_t 3337fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3338{ 3339 fp32_minmaxnum(&op1, &op2, 0); 3340 return fplibMin<uint32_t>(op1, op2, fpscr); 3341} 3342 3343template <> 3344uint64_t 3345fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3346{ 3347 fp64_minmaxnum(&op1, &op2, 0); 3348 return fplibMin<uint64_t>(op1, op2, fpscr); 3349} 3350 3351template <> 3352uint16_t 3353fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3354{ 3355 int flags = 0; 3356 uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags); 3357 set_fpscr0(fpscr, flags); 3358 return result; 3359} 3360 3361template <> 3362uint32_t 3363fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3364{ 3365 int flags = 0; 3366 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); 3367 set_fpscr0(fpscr, flags); 3368 return result; 3369} 3370 3371template <> 3372uint64_t 3373fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3374{ 3375 int flags = 0; 3376 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); 3377 set_fpscr0(fpscr, flags); 3378 return result; 3379} 3380 3381template <> 3382uint16_t 3383fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3384{ 3385 int mode = modeConv(fpscr); 3386 int flags = 0; 3387 int sgn1, exp1, sgn2, exp2; 3388 uint16_t mnt1, mnt2, result; 3389 3390 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3391 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3392 3393 result = fp16_process_NaNs(op1, op2, mode, &flags); 3394 if (!result) { 3395 if ((exp1 == FP16_EXP_INF && !mnt2) \|\| 3396 (exp2 == FP16_EXP_INF && !mnt1)) { 3397 result = fp16_FPTwo(sgn1 ^ sgn2); 3398 } else if (exp1 == FP16_EXP_INF \|\| exp2 == FP16_EXP_INF) { 3399 result = fp16_infinity(sgn1 ^ sgn2); 3400 } else if (!mnt1 \|\| !mnt2) { 3401 result = fp16_zero(sgn1 ^ sgn2); 3402 } else { 3403 result = fp16_mul(op1, op2, mode, &flags); 3404 } 3405 } 3406 3407 set_fpscr0(fpscr, flags); 3408 3409 return result; 3410} 3411 3412template <> 3413uint32_t 3414fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3415{ 3416 int mode = modeConv(fpscr); 3417 int flags = 0; 3418 int sgn1, exp1, sgn2, exp2; 3419 uint32_t mnt1, mnt2, result; 3420 3421 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3422 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3423 3424 result = fp32_process_NaNs(op1, op2, mode, &flags); 3425 if (!result) { 3426 if ((exp1 == FP32_EXP_INF && !mnt2) \|\| 3427 (exp2 == FP32_EXP_INF && !mnt1)) { 3428 result = fp32_FPTwo(sgn1 ^ sgn2); 3429 } else if (exp1 == FP32_EXP_INF \|\| exp2 == FP32_EXP_INF) { 3430 result = fp32_infinity(sgn1 ^ sgn2); 3431 } else if (!mnt1 \|\| !mnt2) { 3432 result = fp32_zero(sgn1 ^ sgn2); 3433 } else { 3434 result = fp32_mul(op1, op2, mode, &flags); 3435 } 3436 } 3437 3438 set_fpscr0(fpscr, flags); 3439 3440 return result; 3441} 3442 3443template <> 3444uint64_t 3445fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3446{ 3447 int mode = modeConv(fpscr); 3448 int flags = 0; 3449 int sgn1, exp1, sgn2, exp2; 3450 uint64_t mnt1, mnt2, result; 3451 3452 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3453 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3454 3455 result = fp64_process_NaNs(op1, op2, mode, &flags); 3456 if (!result) { 3457 if ((exp1 == FP64_EXP_INF && !mnt2) \|\| 3458 (exp2 == FP64_EXP_INF && !mnt1)) { 3459 result = fp64_FPTwo(sgn1 ^ sgn2); 3460 } else if (exp1 == FP64_EXP_INF \|\| exp2 == FP64_EXP_INF) { 3461 result = fp64_infinity(sgn1 ^ sgn2); 3462 } else if (!mnt1 \|\| !mnt2) { 3463 result = fp64_zero(sgn1 ^ sgn2); 3464 } else { 3465 result = fp64_mul(op1, op2, mode, &flags); 3466 } 3467 } 3468 3469 set_fpscr0(fpscr, flags); 3470 3471 return result; 3472} 3473 3474template <> 3475uint16_t 3476fplibNeg(uint16_t op) 3477{ 3478 return op ^ 1ULL << (FP16_BITS - 1); 3479} 3480 3481template <> 3482uint32_t 3483fplibNeg(uint32_t op) 3484{ 3485 return op ^ 1ULL << (FP32_BITS - 1); 3486} 3487 3488template <> 3489uint64_t 3490fplibNeg(uint64_t op) 3491{ 3492 return op ^ 1ULL << (FP64_BITS - 1); 3493} 3494 3495static const uint8_t recip_sqrt_estimate[256] = { 3496 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, 3497 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, 3498 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, 3499 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, 3500 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 3501 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 3502 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, 3503 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, 3504 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, 3505 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, 3506 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, 3507 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, 3508 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, 3509 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, 3510 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, 3511 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 3512}; 3513 3514template <> 3515uint16_t 3516fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr) 3517{ 3518 int mode = modeConv(fpscr); 3519 int flags = 0; 3520 int sgn, exp; 3521 uint16_t mnt, result; 3522 3523 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3524 3525 if (fp16_is_NaN(exp, mnt)) { 3526 result = fp16_process_NaN(op, mode, &flags); 3527 } else if (!mnt) { 3528 result = fp16_infinity(sgn); 3529 flags \|= FPLIB_DZC; 3530 } else if (sgn) { 3531 result = fp16_defaultNaN(); 3532 flags \|= FPLIB_IOC; 3533 } else if (exp == FP16_EXP_INF) { 3534 result = fp16_zero(0); 3535 } else { 3536 exp += FP16_EXP_BITS; 3537 mnt = fp16_normalise(mnt, &exp); 3538 mnt = recip_sqrt_estimate[(~exp & 1) << 7 \| 3539 (mnt >> (FP16_BITS - 8) & 127)]; 3540 result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1, 3541 mnt << (FP16_MANT_BITS - 8)); 3542 } 3543 3544 set_fpscr0(fpscr, flags); 3545 3546 return result; 3547} 3548 3549template <> 3550uint32_t 3551fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) 3552{ 3553 int mode = modeConv(fpscr); 3554 int flags = 0; 3555 int sgn, exp; 3556 uint32_t mnt, result; 3557 3558 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3559 3560 if (fp32_is_NaN(exp, mnt)) { 3561 result = fp32_process_NaN(op, mode, &flags); 3562 } else if (!mnt) { 3563 result = fp32_infinity(sgn); 3564 flags \|= FPLIB_DZC; 3565 } else if (sgn) { 3566 result = fp32_defaultNaN(); 3567 flags \|= FPLIB_IOC; 3568 } else if (exp == FP32_EXP_INF) { 3569 result = fp32_zero(0); 3570 } else { 3571 exp += FP32_EXP_BITS; 3572 mnt = fp32_normalise(mnt, &exp); 3573 mnt = recip_sqrt_estimate[(~exp & 1) << 7 \| 3574 (mnt >> (FP32_BITS - 8) & 127)]; 3575 result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1, 3576 mnt << (FP32_MANT_BITS - 8)); 3577 } 3578 3579 set_fpscr0(fpscr, flags); 3580 3581 return result; 3582} 3583 3584template <> 3585uint64_t 3586fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) 3587{ 3588 int mode = modeConv(fpscr); 3589 int flags = 0; 3590 int sgn, exp; 3591 uint64_t mnt, result; 3592 3593 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3594 3595 if (fp64_is_NaN(exp, mnt)) { 3596 result = fp64_process_NaN(op, mode, &flags); 3597 } else if (!mnt) { 3598 result = fp64_infinity(sgn); 3599 flags \|= FPLIB_DZC; 3600 } else if (sgn) { 3601 result = fp64_defaultNaN(); 3602 flags \|= FPLIB_IOC; 3603 } else if (exp == FP64_EXP_INF) { 3604 result = fp32_zero(0); 3605 } else { 3606 exp += FP64_EXP_BITS; 3607 mnt = fp64_normalise(mnt, &exp); 3608 mnt = recip_sqrt_estimate[(~exp & 1) << 7 \| 3609 (mnt >> (FP64_BITS - 8) & 127)]; 3610 result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1, 3611 mnt << (FP64_MANT_BITS - 8)); 3612 } 3613 3614 set_fpscr0(fpscr, flags); 3615 3616 return result; 3617} 3618 3619template <> 3620uint16_t 3621fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3622{ 3623 int mode = modeConv(fpscr); 3624 int flags = 0; 3625 int sgn1, exp1, sgn2, exp2; 3626 uint16_t mnt1, mnt2, result; 3627 3628 op1 = fplibNeg<uint16_t>(op1); 3629 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3630 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3631 3632 result = fp16_process_NaNs(op1, op2, mode, &flags); 3633 if (!result) { 3634 if ((exp1 == FP16_EXP_INF && !mnt2) \|\| 3635 (exp2 == FP16_EXP_INF && !mnt1)) { 3636 result = fp16_FPOnePointFive(0); 3637 } else if (exp1 == FP16_EXP_INF \|\| exp2 == FP16_EXP_INF) { 3638 result = fp16_infinity(sgn1 ^ sgn2); 3639 } else { 3640 result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags); 3641 } 3642 } 3643 3644 set_fpscr0(fpscr, flags); 3645 3646 return result; 3647} 3648 3649template <> 3650uint32_t 3651fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3652{ 3653 int mode = modeConv(fpscr); 3654 int flags = 0; 3655 int sgn1, exp1, sgn2, exp2; 3656 uint32_t mnt1, mnt2, result; 3657 3658 op1 = fplibNeg<uint32_t>(op1); 3659 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3660 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3661 3662 result = fp32_process_NaNs(op1, op2, mode, &flags); 3663 if (!result) { 3664 if ((exp1 == FP32_EXP_INF && !mnt2) \|\| 3665 (exp2 == FP32_EXP_INF && !mnt1)) { 3666 result = fp32_FPOnePointFive(0); 3667 } else if (exp1 == FP32_EXP_INF \|\| exp2 == FP32_EXP_INF) { 3668 result = fp32_infinity(sgn1 ^ sgn2); 3669 } else { 3670 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); 3671 } 3672 } 3673 3674 set_fpscr0(fpscr, flags); 3675 3676 return result; 3677} 3678 3679template <> 3680uint64_t 3681fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3682{ 3683 int mode = modeConv(fpscr); 3684 int flags = 0; 3685 int sgn1, exp1, sgn2, exp2; 3686 uint64_t mnt1, mnt2, result; 3687 3688 op1 = fplibNeg<uint64_t>(op1); 3689 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3690 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3691 3692 result = fp64_process_NaNs(op1, op2, mode, &flags); 3693 if (!result) { 3694 if ((exp1 == FP64_EXP_INF && !mnt2) \|\| 3695 (exp2 == FP64_EXP_INF && !mnt1)) { 3696 result = fp64_FPOnePointFive(0); 3697 } else if (exp1 == FP64_EXP_INF \|\| exp2 == FP64_EXP_INF) { 3698 result = fp64_infinity(sgn1 ^ sgn2); 3699 } else { 3700 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); 3701 } 3702 } 3703 3704 set_fpscr0(fpscr, flags); 3705 3706 return result; 3707} 3708 3709template <> 3710uint16_t 3711fplibRecipEstimate(uint16_t op, FPSCR &fpscr) 3712{ 3713 int mode = modeConv(fpscr); 3714 int flags = 0; 3715 int sgn, exp; 3716 uint16_t mnt, result; 3717 3718 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3719 3720 if (fp16_is_NaN(exp, mnt)) { 3721 result = fp16_process_NaN(op, mode, &flags); 3722 } else if (exp == FP16_EXP_INF) { 3723 result = fp16_zero(sgn); 3724 } else if (!mnt) { 3725 result = fp16_infinity(sgn); 3726 flags \|= FPLIB_DZC; 3727 } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) { 3728 bool overflow_to_inf = false; 3729 switch (FPCRRounding(fpscr)) { 3730 case FPRounding_TIEEVEN: 3731 overflow_to_inf = true; 3732 break; 3733 case FPRounding_POSINF: 3734 overflow_to_inf = !sgn; 3735 break; 3736 case FPRounding_NEGINF: 3737 overflow_to_inf = sgn; 3738 break; 3739 case FPRounding_ZERO: 3740 overflow_to_inf = false; 3741 break; 3742 default:	46#include "fplib.hh" 47 48namespace ArmISA 49{ 50 51#define FPLIB_RN 0 52#define FPLIB_RP 1 53#define FPLIB_RM 2 54#define FPLIB_RZ 3 55#define FPLIB_FZ 4 56#define FPLIB_DN 8 57#define FPLIB_AHP 16 58#define FPLIB_FZ16 32 59 60#define FPLIB_IDC 128 // Input Denormal 61#define FPLIB_IXC 16 // Inexact 62#define FPLIB_UFC 8 // Underflow 63#define FPLIB_OFC 4 // Overflow 64#define FPLIB_DZC 2 // Division by Zero 65#define FPLIB_IOC 1 // Invalid Operation 66 67#define FP16_BITS 16 68#define FP32_BITS 32 69#define FP64_BITS 64 70 71#define FP16_EXP_BITS 5 72#define FP32_EXP_BITS 8 73#define FP64_EXP_BITS 11 74 75#define FP16_EXP_BIAS 15 76#define FP32_EXP_BIAS 127 77#define FP64_EXP_BIAS 1023 78 79#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1) 80#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1) 81#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1) 82 83#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1) 84#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1) 85#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1) 86 87#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1)) 88#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1)) 89#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1)) 90 91#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1)) 92#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1)) 93#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1)) 94 95static inline uint16_t 96lsl16(uint16_t x, uint32_t shift) 97{ 98 return shift < 16 ? x << shift : 0; 99} 100 101static inline uint16_t 102lsr16(uint16_t x, uint32_t shift) 103{ 104 return shift < 16 ? x >> shift : 0; 105} 106 107static inline uint32_t 108lsl32(uint32_t x, uint32_t shift) 109{ 110 return shift < 32 ? x << shift : 0; 111} 112 113static inline uint32_t 114lsr32(uint32_t x, uint32_t shift) 115{ 116 return shift < 32 ? x >> shift : 0; 117} 118 119static inline uint64_t 120lsl64(uint64_t x, uint32_t shift) 121{ 122 return shift < 64 ? x << shift : 0; 123} 124 125static inline uint64_t 126lsr64(uint64_t x, uint32_t shift) 127{ 128 return shift < 64 ? x >> shift : 0; 129} 130 131static inline void 132lsl128(uint64_t r0, uint64_t r1, uint64_t x0, uint64_t x1, uint32_t shift) 133{ 134 if (shift == 0) { 135 r1 = x1; 136* r0 = x0; 137* } else if (shift < 64) { 138 r1 = x1 << shift \| x0 >> (64 - shift); 139* r0 = x0 << shift; 140* } else if (shift < 128) { 141 r1 = x0 << (shift - 64); 142* r0 = 0; 143* } else { 144 r1 = 0; 145* r0 = 0; 146* } 147} 148 149static inline void 150lsr128(uint64_t r0, uint64_t r1, uint64_t x0, uint64_t x1, uint32_t shift) 151{ 152 if (shift == 0) { 153 r1 = x1; 154* r0 = x0; 155* } else if (shift < 64) { 156 r0 = x0 >> shift \| x1 << (64 - shift); 157* r1 = x1 >> shift; 158* } else if (shift < 128) { 159 r0 = x1 >> (shift - 64); 160* r1 = 0; 161* } else { 162 r0 = 0; 163* r1 = 0; 164* } 165} 166 167static inline void 168mul62x62(uint64_t x0, uint64_t x1, uint64_t a, uint64_t b) 169{ 170 uint32_t mask = ((uint32_t)1 << 31) - 1; 171 uint64_t a0 = a & mask; 172 uint64_t a1 = a >> 31 & mask; 173 uint64_t b0 = b & mask; 174 uint64_t b1 = b >> 31 & mask; 175 uint64_t p0 = a0 * b0; 176 uint64_t p2 = a1 * b1; 177 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; 178 uint64_t s0 = p0; 179 uint64_t s1 = (s0 >> 31) + p1; 180 uint64_t s2 = (s1 >> 31) + p2; 181 x0 = (s0 & mask) \| (s1 & mask) << 31 \| s2 << 62; 182* x1 = s2 >> 2; 183} 184* 185static inline 186void mul64x32(uint64_t x0, uint64_t x1, uint64_t a, uint32_t b) 187{ 188 uint64_t t0 = (uint64_t)(uint32_t)a * b; 189 uint64_t t1 = (t0 >> 32) + (a >> 32) * b; 190 x0 = t1 << 32 \| (uint32_t)t0; 191* x1 = t1 >> 32; 192} 193* 194static inline void 195add128(uint64_t x0, uint64_t x1, uint64_t a0, uint64_t a1, uint64_t b0, 196 uint64_t b1) 197{ 198 x0 = a0 + b0; 199* x1 = a1 + b1 + (x0 < a0); 200} 201 202static inline void 203sub128(uint64_t x0, uint64_t x1, uint64_t a0, uint64_t a1, uint64_t b0, 204 uint64_t b1) 205{ 206 x0 = a0 - b0; 207* x1 = a1 - b1 - (x0 > a0); 208} 209 210static inline int 211cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 212{ 213 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); 214} 215 216static inline uint16_t 217fp16_normalise(uint16_t mnt, int exp) 218{ 219* int shift; 220 221 if (!mnt) { 222 return 0; 223 } 224 225 for (shift = 8; shift; shift >>= 1) { 226 if (!(mnt >> (16 - shift))) { 227 mnt <<= shift; 228 exp -= shift; 229* } 230 } 231 return mnt; 232} 233 234static inline uint32_t 235fp32_normalise(uint32_t mnt, int exp) 236{ 237* int shift; 238 239 if (!mnt) { 240 return 0; 241 } 242 243 for (shift = 16; shift; shift >>= 1) { 244 if (!(mnt >> (32 - shift))) { 245 mnt <<= shift; 246 exp -= shift; 247* } 248 } 249 return mnt; 250} 251 252static inline uint64_t 253fp64_normalise(uint64_t mnt, int exp) 254{ 255* int shift; 256 257 if (!mnt) { 258 return 0; 259 } 260 261 for (shift = 32; shift; shift >>= 1) { 262 if (!(mnt >> (64 - shift))) { 263 mnt <<= shift; 264 exp -= shift; 265* } 266 } 267 return mnt; 268} 269 270static inline void 271fp128_normalise(uint64_t mnt0, uint64_t mnt1, int exp) 272{ 273* uint64_t x0 = mnt0; 274* uint64_t x1 = mnt1; 275* int shift; 276 277 if (!x0 && !x1) { 278 return; 279 } 280 281 if (!x1) { 282 x1 = x0; 283 x0 = 0; 284 exp -= 64; 285* } 286 287 for (shift = 32; shift; shift >>= 1) { 288 if (!(x1 >> (64 - shift))) { 289 x1 = x1 << shift \| x0 >> (64 - shift); 290 x0 <<= shift; 291 exp -= shift; 292* } 293 } 294 295 mnt0 = x0; 296* mnt1 = x1; 297} 298* 299static inline uint16_t 300fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) 301{ 302 return sgn << (FP16_BITS - 1) \| exp << FP16_MANT_BITS \| FP16_MANT(mnt); 303} 304 305static inline uint32_t 306fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) 307{ 308 return sgn << (FP32_BITS - 1) \| exp << FP32_MANT_BITS \| FP32_MANT(mnt); 309} 310 311static inline uint64_t 312fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) 313{ 314 return sgn << (FP64_BITS - 1) \| exp << FP64_MANT_BITS \| FP64_MANT(mnt); 315} 316 317static inline uint16_t 318fp16_zero(int sgn) 319{ 320 return fp16_pack(sgn, 0, 0); 321} 322 323static inline uint32_t 324fp32_zero(int sgn) 325{ 326 return fp32_pack(sgn, 0, 0); 327} 328 329static inline uint64_t 330fp64_zero(int sgn) 331{ 332 return fp64_pack(sgn, 0, 0); 333} 334 335static inline uint16_t 336fp16_max_normal(int sgn) 337{ 338 return fp16_pack(sgn, FP16_EXP_INF - 1, -1); 339} 340 341static inline uint32_t 342fp32_max_normal(int sgn) 343{ 344 return fp32_pack(sgn, FP32_EXP_INF - 1, -1); 345} 346 347static inline uint64_t 348fp64_max_normal(int sgn) 349{ 350 return fp64_pack(sgn, FP64_EXP_INF - 1, -1); 351} 352 353static inline uint16_t 354fp16_infinity(int sgn) 355{ 356 return fp16_pack(sgn, FP16_EXP_INF, 0); 357} 358 359static inline uint32_t 360fp32_infinity(int sgn) 361{ 362 return fp32_pack(sgn, FP32_EXP_INF, 0); 363} 364 365static inline uint64_t 366fp64_infinity(int sgn) 367{ 368 return fp64_pack(sgn, FP64_EXP_INF, 0); 369} 370 371static inline uint16_t 372fp16_defaultNaN() 373{ 374 return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1)); 375} 376 377static inline uint32_t 378fp32_defaultNaN() 379{ 380 return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1)); 381} 382 383static inline uint64_t 384fp64_defaultNaN() 385{ 386 return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1)); 387} 388 389static inline void 390fp16_unpack(int sgn, int exp, uint16_t mnt, uint16_t x, int mode, 391* int flags) 392{ 393* sgn = x >> (FP16_BITS - 1); 394* exp = FP16_EXP(x); 395* mnt = FP16_MANT(x); 396* 397 // Handle subnormals: 398 if (exp) { 399* mnt \|= 1ULL << FP16_MANT_BITS; 400* } else { 401 ++exp; 402* // IDC (Input Denormal) is not set in this case. 403 if (mode & FPLIB_FZ16) 404 mnt = 0; 405* } 406} 407 408static inline void 409fp32_unpack(int sgn, int exp, uint32_t mnt, uint32_t x, int mode, 410* int flags) 411{ 412* sgn = x >> (FP32_BITS - 1); 413* exp = FP32_EXP(x); 414* mnt = FP32_MANT(x); 415* 416 // Handle subnormals: 417 if (exp) { 418* mnt \|= 1ULL << FP32_MANT_BITS; 419* } else { 420 ++exp; 421* if ((mode & FPLIB_FZ) && mnt) { 422* flags \|= FPLIB_IDC; 423* mnt = 0; 424* } 425 } 426} 427 428static inline void 429fp64_unpack(int sgn, int exp, uint64_t mnt, uint64_t x, int mode, 430* int flags) 431{ 432* sgn = x >> (FP64_BITS - 1); 433* exp = FP64_EXP(x); 434* mnt = FP64_MANT(x); 435* 436 // Handle subnormals: 437 if (exp) { 438* mnt \|= 1ULL << FP64_MANT_BITS; 439* } else { 440 ++exp; 441* if ((mode & FPLIB_FZ) && mnt) { 442* flags \|= FPLIB_IDC; 443* mnt = 0; 444* } 445 } 446} 447 448static inline int 449fp16_is_NaN(int exp, uint16_t mnt) 450{ 451 return exp == FP16_EXP_INF && FP16_MANT(mnt); 452} 453 454static inline int 455fp32_is_NaN(int exp, uint32_t mnt) 456{ 457 return exp == FP32_EXP_INF && FP32_MANT(mnt); 458} 459 460static inline int 461fp64_is_NaN(int exp, uint64_t mnt) 462{ 463 return exp == FP64_EXP_INF && FP64_MANT(mnt); 464} 465 466static inline int 467fp16_is_signalling_NaN(int exp, uint16_t mnt) 468{ 469 return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1); 470} 471 472static inline int 473fp32_is_signalling_NaN(int exp, uint32_t mnt) 474{ 475 return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1); 476} 477 478static inline int 479fp64_is_signalling_NaN(int exp, uint64_t mnt) 480{ 481 return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1); 482} 483 484static inline int 485fp16_is_quiet_NaN(int exp, uint16_t mnt) 486{ 487 return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1); 488} 489 490static inline int 491fp32_is_quiet_NaN(int exp, uint32_t mnt) 492{ 493 return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1); 494} 495 496static inline int 497fp64_is_quiet_NaN(int exp, uint64_t mnt) 498{ 499 return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1); 500} 501 502static inline int 503fp16_is_infinity(int exp, uint16_t mnt) 504{ 505 return exp == FP16_EXP_INF && !FP16_MANT(mnt); 506} 507 508static inline int 509fp32_is_infinity(int exp, uint32_t mnt) 510{ 511 return exp == FP32_EXP_INF && !FP32_MANT(mnt); 512} 513 514static inline int 515fp64_is_infinity(int exp, uint64_t mnt) 516{ 517 return exp == FP64_EXP_INF && !FP64_MANT(mnt); 518} 519 520static inline uint16_t 521fp16_process_NaN(uint16_t a, int mode, int flags) 522{ 523* if (!(a >> (FP16_MANT_BITS - 1) & 1)) { 524 flags \|= FPLIB_IOC; 525* a \|= 1ULL << (FP16_MANT_BITS - 1); 526 } 527 return mode & FPLIB_DN ? fp16_defaultNaN() : a; 528} 529 530static inline uint32_t 531fp32_process_NaN(uint32_t a, int mode, int flags) 532{ 533* if (!(a >> (FP32_MANT_BITS - 1) & 1)) { 534 flags \|= FPLIB_IOC; 535* a \|= 1ULL << (FP32_MANT_BITS - 1); 536 } 537 return mode & FPLIB_DN ? fp32_defaultNaN() : a; 538} 539 540static inline uint64_t 541fp64_process_NaN(uint64_t a, int mode, int flags) 542{ 543* if (!(a >> (FP64_MANT_BITS - 1) & 1)) { 544 flags \|= FPLIB_IOC; 545* a \|= 1ULL << (FP64_MANT_BITS - 1); 546 } 547 return mode & FPLIB_DN ? fp64_defaultNaN() : a; 548} 549 550static uint16_t 551fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int flags) 552{ 553* int a_exp = FP16_EXP(a); 554 uint16_t a_mnt = FP16_MANT(a); 555 int b_exp = FP16_EXP(b); 556 uint16_t b_mnt = FP16_MANT(b); 557 558 // Handle signalling NaNs: 559 if (fp16_is_signalling_NaN(a_exp, a_mnt)) 560 return fp16_process_NaN(a, mode, flags); 561 if (fp16_is_signalling_NaN(b_exp, b_mnt)) 562 return fp16_process_NaN(b, mode, flags); 563 564 // Handle quiet NaNs: 565 if (fp16_is_NaN(a_exp, a_mnt)) 566 return fp16_process_NaN(a, mode, flags); 567 if (fp16_is_NaN(b_exp, b_mnt)) 568 return fp16_process_NaN(b, mode, flags); 569 570 return 0; 571} 572 573static uint32_t 574fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int flags) 575{ 576* int a_exp = FP32_EXP(a); 577 uint32_t a_mnt = FP32_MANT(a); 578 int b_exp = FP32_EXP(b); 579 uint32_t b_mnt = FP32_MANT(b); 580 581 // Handle signalling NaNs: 582 if (fp32_is_signalling_NaN(a_exp, a_mnt)) 583 return fp32_process_NaN(a, mode, flags); 584 if (fp32_is_signalling_NaN(b_exp, b_mnt)) 585 return fp32_process_NaN(b, mode, flags); 586 587 // Handle quiet NaNs: 588 if (fp32_is_NaN(a_exp, a_mnt)) 589 return fp32_process_NaN(a, mode, flags); 590 if (fp32_is_NaN(b_exp, b_mnt)) 591 return fp32_process_NaN(b, mode, flags); 592 593 return 0; 594} 595 596static uint64_t 597fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int flags) 598{ 599* int a_exp = FP64_EXP(a); 600 uint64_t a_mnt = FP64_MANT(a); 601 int b_exp = FP64_EXP(b); 602 uint64_t b_mnt = FP64_MANT(b); 603 604 // Handle signalling NaNs: 605 if (fp64_is_signalling_NaN(a_exp, a_mnt)) 606 return fp64_process_NaN(a, mode, flags); 607 if (fp64_is_signalling_NaN(b_exp, b_mnt)) 608 return fp64_process_NaN(b, mode, flags); 609 610 // Handle quiet NaNs: 611 if (fp64_is_NaN(a_exp, a_mnt)) 612 return fp64_process_NaN(a, mode, flags); 613 if (fp64_is_NaN(b_exp, b_mnt)) 614 return fp64_process_NaN(b, mode, flags); 615 616 return 0; 617} 618 619static uint16_t 620fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int flags) 621{ 622* int a_exp = FP16_EXP(a); 623 uint16_t a_mnt = FP16_MANT(a); 624 int b_exp = FP16_EXP(b); 625 uint16_t b_mnt = FP16_MANT(b); 626 int c_exp = FP16_EXP(c); 627 uint16_t c_mnt = FP16_MANT(c); 628 629 // Handle signalling NaNs: 630 if (fp16_is_signalling_NaN(a_exp, a_mnt)) 631 return fp16_process_NaN(a, mode, flags); 632 if (fp16_is_signalling_NaN(b_exp, b_mnt)) 633 return fp16_process_NaN(b, mode, flags); 634 if (fp16_is_signalling_NaN(c_exp, c_mnt)) 635 return fp16_process_NaN(c, mode, flags); 636 637 // Handle quiet NaNs: 638 if (fp16_is_NaN(a_exp, a_mnt)) 639 return fp16_process_NaN(a, mode, flags); 640 if (fp16_is_NaN(b_exp, b_mnt)) 641 return fp16_process_NaN(b, mode, flags); 642 if (fp16_is_NaN(c_exp, c_mnt)) 643 return fp16_process_NaN(c, mode, flags); 644 645 return 0; 646} 647 648static uint32_t 649fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int flags) 650{ 651* int a_exp = FP32_EXP(a); 652 uint32_t a_mnt = FP32_MANT(a); 653 int b_exp = FP32_EXP(b); 654 uint32_t b_mnt = FP32_MANT(b); 655 int c_exp = FP32_EXP(c); 656 uint32_t c_mnt = FP32_MANT(c); 657 658 // Handle signalling NaNs: 659 if (fp32_is_signalling_NaN(a_exp, a_mnt)) 660 return fp32_process_NaN(a, mode, flags); 661 if (fp32_is_signalling_NaN(b_exp, b_mnt)) 662 return fp32_process_NaN(b, mode, flags); 663 if (fp32_is_signalling_NaN(c_exp, c_mnt)) 664 return fp32_process_NaN(c, mode, flags); 665 666 // Handle quiet NaNs: 667 if (fp32_is_NaN(a_exp, a_mnt)) 668 return fp32_process_NaN(a, mode, flags); 669 if (fp32_is_NaN(b_exp, b_mnt)) 670 return fp32_process_NaN(b, mode, flags); 671 if (fp32_is_NaN(c_exp, c_mnt)) 672 return fp32_process_NaN(c, mode, flags); 673 674 return 0; 675} 676 677static uint64_t 678fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int flags) 679{ 680* int a_exp = FP64_EXP(a); 681 uint64_t a_mnt = FP64_MANT(a); 682 int b_exp = FP64_EXP(b); 683 uint64_t b_mnt = FP64_MANT(b); 684 int c_exp = FP64_EXP(c); 685 uint64_t c_mnt = FP64_MANT(c); 686 687 // Handle signalling NaNs: 688 if (fp64_is_signalling_NaN(a_exp, a_mnt)) 689 return fp64_process_NaN(a, mode, flags); 690 if (fp64_is_signalling_NaN(b_exp, b_mnt)) 691 return fp64_process_NaN(b, mode, flags); 692 if (fp64_is_signalling_NaN(c_exp, c_mnt)) 693 return fp64_process_NaN(c, mode, flags); 694 695 // Handle quiet NaNs: 696 if (fp64_is_NaN(a_exp, a_mnt)) 697 return fp64_process_NaN(a, mode, flags); 698 if (fp64_is_NaN(b_exp, b_mnt)) 699 return fp64_process_NaN(b, mode, flags); 700 if (fp64_is_NaN(c_exp, c_mnt)) 701 return fp64_process_NaN(c, mode, flags); 702 703 return 0; 704} 705 706static uint16_t 707fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int flags) 708{ 709* int biased_exp; // non-negative exponent value for result 710 uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS) 711 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 712 713 assert(rm != FPRounding_TIEAWAY); 714 715 // Flush to zero: 716 if ((mode & FPLIB_FZ16) && exp < 1) { 717 flags \|= FPLIB_UFC; 718* return fp16_zero(sgn); 719 } 720 721 // The bottom FP16_EXP_BITS bits of mnt are orred together: 722 mnt = (4ULL << FP16_MANT_BITS \| mnt >> (FP16_EXP_BITS - 1) \| 723 ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0)); 724 725 if (exp > 0) { 726 biased_exp = exp; 727 int_mant = mnt >> 2; 728 error = mnt & 3; 729 } else { 730 biased_exp = 0; 731 int_mant = lsr16(mnt, 3 - exp); 732 error = (lsr16(mnt, 1 - exp) & 3) \| !!(mnt & (lsl16(1, 1 - exp) - 1)); 733 } 734 735 if (!biased_exp && error) { // xx should also check fpscr_val<11> 736 flags \|= FPLIB_UFC; 737* } 738 739 // Round up: 740 if ((rm == FPLIB_RN && (error == 3 \|\| 741 (error == 2 && (int_mant & 1)))) \|\| 742 (((rm == FPLIB_RP && !sgn) \|\| (rm == FPLIB_RM && sgn)) && error)) { 743 ++int_mant; 744 if (int_mant == 1ULL << FP16_MANT_BITS) { 745 // Rounded up from denormalized to normalized 746 biased_exp = 1; 747 } 748 if (int_mant == 2ULL << FP16_MANT_BITS) { 749 // Rounded up to next exponent 750 ++biased_exp; 751 int_mant >>= 1; 752 } 753 } 754 755 // Handle rounding to odd aka Von Neumann rounding: 756 if (error && rm == FPRounding_ODD) 757 int_mant \|= 1; 758 759 // Handle overflow: 760 if (!(mode & FPLIB_AHP)) { 761 if (biased_exp >= (int)FP16_EXP_INF) { 762 flags \|= FPLIB_OFC \| FPLIB_IXC; 763* if (rm == FPLIB_RN \|\| (rm == FPLIB_RP && !sgn) \|\| 764 (rm == FPLIB_RM && sgn)) { 765 return fp16_infinity(sgn); 766 } else { 767 return fp16_max_normal(sgn); 768 } 769 } 770 } else { 771 if (biased_exp >= (int)FP16_EXP_INF + 1) { 772 flags \|= FPLIB_IOC; 773* return fp16_pack(sgn, FP16_EXP_INF, -1); 774 } 775 } 776 777 if (error) { 778 flags \|= FPLIB_IXC; 779* } 780 781 return fp16_pack(sgn, biased_exp, int_mant); 782} 783 784static uint16_t 785fp16_round(int sgn, int exp, uint16_t mnt, int mode, int flags) 786{ 787* return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags); 788} 789 790static uint32_t 791fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int flags) 792{ 793* int biased_exp; // non-negative exponent value for result 794 uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS) 795 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 796 797 assert(rm != FPRounding_TIEAWAY); 798 799 // Flush to zero: 800 if ((mode & FPLIB_FZ) && exp < 1) { 801 flags \|= FPLIB_UFC; 802* return fp32_zero(sgn); 803 } 804 805 // The bottom FP32_EXP_BITS bits of mnt are orred together: 806 mnt = (4ULL << FP32_MANT_BITS \| mnt >> (FP32_EXP_BITS - 1) \| 807 ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0)); 808 809 if (exp > 0) { 810 biased_exp = exp; 811 int_mant = mnt >> 2; 812 error = mnt & 3; 813 } else { 814 biased_exp = 0; 815 int_mant = lsr32(mnt, 3 - exp); 816 error = (lsr32(mnt, 1 - exp) & 3) \| !!(mnt & (lsl32(1, 1 - exp) - 1)); 817 } 818 819 if (!biased_exp && error) { // xx should also check fpscr_val<11> 820 flags \|= FPLIB_UFC; 821* } 822 823 // Round up: 824 if ((rm == FPLIB_RN && (error == 3 \|\| 825 (error == 2 && (int_mant & 1)))) \|\| 826 (((rm == FPLIB_RP && !sgn) \|\| (rm == FPLIB_RM && sgn)) && error)) { 827 ++int_mant; 828 if (int_mant == 1ULL << FP32_MANT_BITS) { 829 // Rounded up from denormalized to normalized 830 biased_exp = 1; 831 } 832 if (int_mant == 2ULL << FP32_MANT_BITS) { 833 // Rounded up to next exponent 834 ++biased_exp; 835 int_mant >>= 1; 836 } 837 } 838 839 // Handle rounding to odd aka Von Neumann rounding: 840 if (error && rm == FPRounding_ODD) 841 int_mant \|= 1; 842 843 // Handle overflow: 844 if (biased_exp >= (int)FP32_EXP_INF) { 845 flags \|= FPLIB_OFC \| FPLIB_IXC; 846* if (rm == FPLIB_RN \|\| (rm == FPLIB_RP && !sgn) \|\| 847 (rm == FPLIB_RM && sgn)) { 848 return fp32_infinity(sgn); 849 } else { 850 return fp32_max_normal(sgn); 851 } 852 } 853 854 if (error) { 855 flags \|= FPLIB_IXC; 856* } 857 858 return fp32_pack(sgn, biased_exp, int_mant); 859} 860 861static uint32_t 862fp32_round(int sgn, int exp, uint32_t mnt, int mode, int flags) 863{ 864* return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); 865} 866 867static uint64_t 868fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int flags) 869{ 870* int biased_exp; // non-negative exponent value for result 871 uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS) 872 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 873 874 assert(rm != FPRounding_TIEAWAY); 875 876 // Flush to zero: 877 if ((mode & FPLIB_FZ) && exp < 1) { 878 flags \|= FPLIB_UFC; 879* return fp64_zero(sgn); 880 } 881 882 // The bottom FP64_EXP_BITS bits of mnt are orred together: 883 mnt = (4ULL << FP64_MANT_BITS \| mnt >> (FP64_EXP_BITS - 1) \| 884 ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0)); 885 886 if (exp > 0) { 887 biased_exp = exp; 888 int_mant = mnt >> 2; 889 error = mnt & 3; 890 } else { 891 biased_exp = 0; 892 int_mant = lsr64(mnt, 3 - exp); 893 error = (lsr64(mnt, 1 - exp) & 3) \| !!(mnt & (lsl64(1, 1 - exp) - 1)); 894 } 895 896 if (!biased_exp && error) { // xx should also check fpscr_val<11> 897 flags \|= FPLIB_UFC; 898* } 899 900 // Round up: 901 if ((rm == FPLIB_RN && (error == 3 \|\| 902 (error == 2 && (int_mant & 1)))) \|\| 903 (((rm == FPLIB_RP && !sgn) \|\| (rm == FPLIB_RM && sgn)) && error)) { 904 ++int_mant; 905 if (int_mant == 1ULL << FP64_MANT_BITS) { 906 // Rounded up from denormalized to normalized 907 biased_exp = 1; 908 } 909 if (int_mant == 2ULL << FP64_MANT_BITS) { 910 // Rounded up to next exponent 911 ++biased_exp; 912 int_mant >>= 1; 913 } 914 } 915 916 // Handle rounding to odd aka Von Neumann rounding: 917 if (error && rm == FPRounding_ODD) 918 int_mant \|= 1; 919 920 // Handle overflow: 921 if (biased_exp >= (int)FP64_EXP_INF) { 922 flags \|= FPLIB_OFC \| FPLIB_IXC; 923* if (rm == FPLIB_RN \|\| (rm == FPLIB_RP && !sgn) \|\| 924 (rm == FPLIB_RM && sgn)) { 925 return fp64_infinity(sgn); 926 } else { 927 return fp64_max_normal(sgn); 928 } 929 } 930 931 if (error) { 932 flags \|= FPLIB_IXC; 933* } 934 935 return fp64_pack(sgn, biased_exp, int_mant); 936} 937 938static uint64_t 939fp64_round(int sgn, int exp, uint64_t mnt, int mode, int flags) 940{ 941* return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); 942} 943 944static int 945fp16_compare_eq(uint16_t a, uint16_t b, int mode, int flags) 946{ 947* int a_sgn, a_exp, b_sgn, b_exp; 948 uint16_t a_mnt, b_mnt; 949 950 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 951 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 952 953 if (fp16_is_NaN(a_exp, a_mnt) \|\| 954 fp16_is_NaN(b_exp, b_mnt)) { 955 if (fp16_is_signalling_NaN(a_exp, a_mnt) \|\| 956 fp16_is_signalling_NaN(b_exp, b_mnt)) 957 flags \|= FPLIB_IOC; 958* return 0; 959 } 960 return a == b \|\| (!a_mnt && !b_mnt); 961} 962 963static int 964fp16_compare_ge(uint16_t a, uint16_t b, int mode, int flags) 965{ 966* int a_sgn, a_exp, b_sgn, b_exp; 967 uint16_t a_mnt, b_mnt; 968 969 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 970 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 971 972 if (fp16_is_NaN(a_exp, a_mnt) \|\| 973 fp16_is_NaN(b_exp, b_mnt)) { 974 flags \|= FPLIB_IOC; 975* return 0; 976 } 977 if (!a_mnt && !b_mnt) 978 return 1; 979 if (a_sgn != b_sgn) 980 return b_sgn; 981 if (a_exp != b_exp) 982 return a_sgn ^ (a_exp > b_exp); 983 if (a_mnt != b_mnt) 984 return a_sgn ^ (a_mnt > b_mnt); 985 return 1; 986} 987 988static int 989fp16_compare_gt(uint16_t a, uint16_t b, int mode, int flags) 990{ 991* int a_sgn, a_exp, b_sgn, b_exp; 992 uint16_t a_mnt, b_mnt; 993 994 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 995 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 996 997 if (fp16_is_NaN(a_exp, a_mnt) \|\| 998 fp16_is_NaN(b_exp, b_mnt)) { 999 flags \|= FPLIB_IOC; 1000* return 0; 1001 } 1002 if (!a_mnt && !b_mnt) 1003 return 0; 1004 if (a_sgn != b_sgn) 1005 return b_sgn; 1006 if (a_exp != b_exp) 1007 return a_sgn ^ (a_exp > b_exp); 1008 if (a_mnt != b_mnt) 1009 return a_sgn ^ (a_mnt > b_mnt); 1010 return 0; 1011} 1012 1013static int 1014fp16_compare_un(uint16_t a, uint16_t b, int mode, int flags) 1015{ 1016* int a_sgn, a_exp, b_sgn, b_exp; 1017 uint16_t a_mnt, b_mnt; 1018 1019 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1020 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1021 1022 if (fp16_is_NaN(a_exp, a_mnt) \|\| 1023 fp16_is_NaN(b_exp, b_mnt)) { 1024 if (fp16_is_signalling_NaN(a_exp, a_mnt) \|\| 1025 fp16_is_signalling_NaN(b_exp, b_mnt)) 1026 flags \|= FPLIB_IOC; 1027* return 1; 1028 } 1029 return 0; 1030} 1031 1032static int 1033fp32_compare_eq(uint32_t a, uint32_t b, int mode, int flags) 1034{ 1035* int a_sgn, a_exp, b_sgn, b_exp; 1036 uint32_t a_mnt, b_mnt; 1037 1038 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1039 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1040 1041 if (fp32_is_NaN(a_exp, a_mnt) \|\| 1042 fp32_is_NaN(b_exp, b_mnt)) { 1043 if (fp32_is_signalling_NaN(a_exp, a_mnt) \|\| 1044 fp32_is_signalling_NaN(b_exp, b_mnt)) 1045 flags \|= FPLIB_IOC; 1046* return 0; 1047 } 1048 return a == b \|\| (!a_mnt && !b_mnt); 1049} 1050 1051static int 1052fp32_compare_ge(uint32_t a, uint32_t b, int mode, int flags) 1053{ 1054* int a_sgn, a_exp, b_sgn, b_exp; 1055 uint32_t a_mnt, b_mnt; 1056 1057 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1058 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1059 1060 if (fp32_is_NaN(a_exp, a_mnt) \|\| 1061 fp32_is_NaN(b_exp, b_mnt)) { 1062 flags \|= FPLIB_IOC; 1063* return 0; 1064 } 1065 if (!a_mnt && !b_mnt) 1066 return 1; 1067 if (a_sgn != b_sgn) 1068 return b_sgn; 1069 if (a_exp != b_exp) 1070 return a_sgn ^ (a_exp > b_exp); 1071 if (a_mnt != b_mnt) 1072 return a_sgn ^ (a_mnt > b_mnt); 1073 return 1; 1074} 1075 1076static int 1077fp32_compare_gt(uint32_t a, uint32_t b, int mode, int flags) 1078{ 1079* int a_sgn, a_exp, b_sgn, b_exp; 1080 uint32_t a_mnt, b_mnt; 1081 1082 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1083 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1084 1085 if (fp32_is_NaN(a_exp, a_mnt) \|\| 1086 fp32_is_NaN(b_exp, b_mnt)) { 1087 flags \|= FPLIB_IOC; 1088* return 0; 1089 } 1090 if (!a_mnt && !b_mnt) 1091 return 0; 1092 if (a_sgn != b_sgn) 1093 return b_sgn; 1094 if (a_exp != b_exp) 1095 return a_sgn ^ (a_exp > b_exp); 1096 if (a_mnt != b_mnt) 1097 return a_sgn ^ (a_mnt > b_mnt); 1098 return 0; 1099} 1100 1101static int 1102fp32_compare_un(uint32_t a, uint32_t b, int mode, int flags) 1103{ 1104* int a_sgn, a_exp, b_sgn, b_exp; 1105 uint32_t a_mnt, b_mnt; 1106 1107 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1108 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1109 1110 if (fp32_is_NaN(a_exp, a_mnt) \|\| 1111 fp32_is_NaN(b_exp, b_mnt)) { 1112 if (fp32_is_signalling_NaN(a_exp, a_mnt) \|\| 1113 fp32_is_signalling_NaN(b_exp, b_mnt)) 1114 flags \|= FPLIB_IOC; 1115* return 1; 1116 } 1117 return 0; 1118} 1119 1120static int 1121fp64_compare_eq(uint64_t a, uint64_t b, int mode, int flags) 1122{ 1123* int a_sgn, a_exp, b_sgn, b_exp; 1124 uint64_t a_mnt, b_mnt; 1125 1126 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1127 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1128 1129 if (fp64_is_NaN(a_exp, a_mnt) \|\| 1130 fp64_is_NaN(b_exp, b_mnt)) { 1131 if (fp64_is_signalling_NaN(a_exp, a_mnt) \|\| 1132 fp64_is_signalling_NaN(b_exp, b_mnt)) 1133 flags \|= FPLIB_IOC; 1134* return 0; 1135 } 1136 return a == b \|\| (!a_mnt && !b_mnt); 1137} 1138 1139static int 1140fp64_compare_ge(uint64_t a, uint64_t b, int mode, int flags) 1141{ 1142* int a_sgn, a_exp, b_sgn, b_exp; 1143 uint64_t a_mnt, b_mnt; 1144 1145 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1146 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1147 1148 if (fp64_is_NaN(a_exp, a_mnt) \|\| 1149 fp64_is_NaN(b_exp, b_mnt)) { 1150 flags \|= FPLIB_IOC; 1151* return 0; 1152 } 1153 if (!a_mnt && !b_mnt) 1154 return 1; 1155 if (a_sgn != b_sgn) 1156 return b_sgn; 1157 if (a_exp != b_exp) 1158 return a_sgn ^ (a_exp > b_exp); 1159 if (a_mnt != b_mnt) 1160 return a_sgn ^ (a_mnt > b_mnt); 1161 return 1; 1162} 1163 1164static int 1165fp64_compare_gt(uint64_t a, uint64_t b, int mode, int flags) 1166{ 1167* int a_sgn, a_exp, b_sgn, b_exp; 1168 uint64_t a_mnt, b_mnt; 1169 1170 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1171 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1172 1173 if (fp64_is_NaN(a_exp, a_mnt) \|\| 1174 fp64_is_NaN(b_exp, b_mnt)) { 1175 flags \|= FPLIB_IOC; 1176* return 0; 1177 } 1178 if (!a_mnt && !b_mnt) 1179 return 0; 1180 if (a_sgn != b_sgn) 1181 return b_sgn; 1182 if (a_exp != b_exp) 1183 return a_sgn ^ (a_exp > b_exp); 1184 if (a_mnt != b_mnt) 1185 return a_sgn ^ (a_mnt > b_mnt); 1186 return 0; 1187} 1188 1189static int 1190fp64_compare_un(uint64_t a, uint64_t b, int mode, int flags) 1191{ 1192* int a_sgn, a_exp, b_sgn, b_exp; 1193 uint64_t a_mnt, b_mnt; 1194 1195 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1196 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1197 1198 if (fp64_is_NaN(a_exp, a_mnt) \|\| 1199 fp64_is_NaN(b_exp, b_mnt)) { 1200 if (fp64_is_signalling_NaN(a_exp, a_mnt) \|\| 1201 fp64_is_signalling_NaN(b_exp, b_mnt)) 1202 flags \|= FPLIB_IOC; 1203* return 1; 1204 } 1205 return 0; 1206} 1207 1208static uint16_t 1209fp16_add(uint16_t a, uint16_t b, int neg, int mode, int flags) 1210{ 1211* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1212 uint16_t a_mnt, b_mnt, x, x_mnt; 1213 1214 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1215 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1216 1217 if ((x = fp16_process_NaNs(a, b, mode, flags))) { 1218 return x; 1219 } 1220 1221 b_sgn ^= neg; 1222 1223 // Handle infinities and zeroes: 1224 if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) { 1225 flags \|= FPLIB_IOC; 1226* return fp16_defaultNaN(); 1227 } else if (a_exp == FP16_EXP_INF) { 1228 return fp16_infinity(a_sgn); 1229 } else if (b_exp == FP16_EXP_INF) { 1230 return fp16_infinity(b_sgn); 1231 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1232 return fp16_zero(a_sgn); 1233 } 1234 1235 a_mnt <<= 3; 1236 b_mnt <<= 3; 1237 if (a_exp >= b_exp) { 1238 b_mnt = (lsr16(b_mnt, a_exp - b_exp) \| 1239 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1))); 1240 b_exp = a_exp; 1241 } else { 1242 a_mnt = (lsr16(a_mnt, b_exp - a_exp) \| 1243 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1))); 1244 a_exp = b_exp; 1245 } 1246 x_sgn = a_sgn; 1247 x_exp = a_exp; 1248 if (a_sgn == b_sgn) { 1249 x_mnt = a_mnt + b_mnt; 1250 } else if (a_mnt >= b_mnt) { 1251 x_mnt = a_mnt - b_mnt; 1252 } else { 1253 x_sgn ^= 1; 1254 x_mnt = b_mnt - a_mnt; 1255 } 1256 1257 if (!x_mnt) { 1258 // Sign of exact zero result depends on rounding mode 1259 return fp16_zero((mode & 3) == 2); 1260 } 1261 1262 x_mnt = fp16_normalise(x_mnt, &x_exp); 1263 1264 return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1, 1265 mode, flags); 1266} 1267 1268static uint32_t 1269fp32_add(uint32_t a, uint32_t b, int neg, int mode, int flags) 1270{ 1271* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1272 uint32_t a_mnt, b_mnt, x, x_mnt; 1273 1274 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1275 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1276 1277 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1278 return x; 1279 } 1280 1281 b_sgn ^= neg; 1282 1283 // Handle infinities and zeroes: 1284 if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) { 1285 flags \|= FPLIB_IOC; 1286* return fp32_defaultNaN(); 1287 } else if (a_exp == FP32_EXP_INF) { 1288 return fp32_infinity(a_sgn); 1289 } else if (b_exp == FP32_EXP_INF) { 1290 return fp32_infinity(b_sgn); 1291 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1292 return fp32_zero(a_sgn); 1293 } 1294 1295 a_mnt <<= 3; 1296 b_mnt <<= 3; 1297 if (a_exp >= b_exp) { 1298 b_mnt = (lsr32(b_mnt, a_exp - b_exp) \| 1299 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); 1300 b_exp = a_exp; 1301 } else { 1302 a_mnt = (lsr32(a_mnt, b_exp - a_exp) \| 1303 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); 1304 a_exp = b_exp; 1305 } 1306 x_sgn = a_sgn; 1307 x_exp = a_exp; 1308 if (a_sgn == b_sgn) { 1309 x_mnt = a_mnt + b_mnt; 1310 } else if (a_mnt >= b_mnt) { 1311 x_mnt = a_mnt - b_mnt; 1312 } else { 1313 x_sgn ^= 1; 1314 x_mnt = b_mnt - a_mnt; 1315 } 1316 1317 if (!x_mnt) { 1318 // Sign of exact zero result depends on rounding mode 1319 return fp32_zero((mode & 3) == 2); 1320 } 1321 1322 x_mnt = fp32_normalise(x_mnt, &x_exp); 1323 1324 return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1, 1325 mode, flags); 1326} 1327 1328static uint64_t 1329fp64_add(uint64_t a, uint64_t b, int neg, int mode, int flags) 1330{ 1331* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1332 uint64_t a_mnt, b_mnt, x, x_mnt; 1333 1334 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1335 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1336 1337 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1338 return x; 1339 } 1340 1341 b_sgn ^= neg; 1342 1343 // Handle infinities and zeroes: 1344 if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) { 1345 flags \|= FPLIB_IOC; 1346* return fp64_defaultNaN(); 1347 } else if (a_exp == FP64_EXP_INF) { 1348 return fp64_infinity(a_sgn); 1349 } else if (b_exp == FP64_EXP_INF) { 1350 return fp64_infinity(b_sgn); 1351 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 1352 return fp64_zero(a_sgn); 1353 } 1354 1355 a_mnt <<= 3; 1356 b_mnt <<= 3; 1357 if (a_exp >= b_exp) { 1358 b_mnt = (lsr64(b_mnt, a_exp - b_exp) \| 1359 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); 1360 b_exp = a_exp; 1361 } else { 1362 a_mnt = (lsr64(a_mnt, b_exp - a_exp) \| 1363 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); 1364 a_exp = b_exp; 1365 } 1366 x_sgn = a_sgn; 1367 x_exp = a_exp; 1368 if (a_sgn == b_sgn) { 1369 x_mnt = a_mnt + b_mnt; 1370 } else if (a_mnt >= b_mnt) { 1371 x_mnt = a_mnt - b_mnt; 1372 } else { 1373 x_sgn ^= 1; 1374 x_mnt = b_mnt - a_mnt; 1375 } 1376 1377 if (!x_mnt) { 1378 // Sign of exact zero result depends on rounding mode 1379 return fp64_zero((mode & 3) == 2); 1380 } 1381 1382 x_mnt = fp64_normalise(x_mnt, &x_exp); 1383 1384 return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1, 1385 mode, flags); 1386} 1387 1388static uint16_t 1389fp16_mul(uint16_t a, uint16_t b, int mode, int flags) 1390{ 1391* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1392 uint16_t a_mnt, b_mnt, x; 1393 uint32_t x_mnt; 1394 1395 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1396 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1397 1398 if ((x = fp16_process_NaNs(a, b, mode, flags))) { 1399 return x; 1400 } 1401 1402 // Handle infinities and zeroes: 1403 if ((a_exp == FP16_EXP_INF && !b_mnt) \|\| 1404 (b_exp == FP16_EXP_INF && !a_mnt)) { 1405 flags \|= FPLIB_IOC; 1406* return fp16_defaultNaN(); 1407 } else if (a_exp == FP16_EXP_INF \|\| b_exp == FP16_EXP_INF) { 1408 return fp16_infinity(a_sgn ^ b_sgn); 1409 } else if (!a_mnt \|\| !b_mnt) { 1410 return fp16_zero(a_sgn ^ b_sgn); 1411 } 1412 1413 // Multiply and normalise: 1414 x_sgn = a_sgn ^ b_sgn; 1415 x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1; 1416 x_mnt = (uint32_t)a_mnt * b_mnt; 1417 x_mnt = fp32_normalise(x_mnt, &x_exp); 1418 1419 // Convert to FP16_BITS bits, collapsing error into bottom bit: 1420 x_mnt = lsr32(x_mnt, FP16_BITS - 1) \| !!lsl32(x_mnt, FP16_BITS + 1); 1421 1422 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); 1423} 1424 1425static uint32_t 1426fp32_mul(uint32_t a, uint32_t b, int mode, int flags) 1427{ 1428* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1429 uint32_t a_mnt, b_mnt, x; 1430 uint64_t x_mnt; 1431 1432 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1433 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1434 1435 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1436 return x; 1437 } 1438 1439 // Handle infinities and zeroes: 1440 if ((a_exp == FP32_EXP_INF && !b_mnt) \|\| 1441 (b_exp == FP32_EXP_INF && !a_mnt)) { 1442 flags \|= FPLIB_IOC; 1443* return fp32_defaultNaN(); 1444 } else if (a_exp == FP32_EXP_INF \|\| b_exp == FP32_EXP_INF) { 1445 return fp32_infinity(a_sgn ^ b_sgn); 1446 } else if (!a_mnt \|\| !b_mnt) { 1447 return fp32_zero(a_sgn ^ b_sgn); 1448 } 1449 1450 // Multiply and normalise: 1451 x_sgn = a_sgn ^ b_sgn; 1452 x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1; 1453 x_mnt = (uint64_t)a_mnt * b_mnt; 1454 x_mnt = fp64_normalise(x_mnt, &x_exp); 1455 1456 // Convert to FP32_BITS bits, collapsing error into bottom bit: 1457 x_mnt = lsr64(x_mnt, FP32_BITS - 1) \| !!lsl64(x_mnt, FP32_BITS + 1); 1458 1459 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1460} 1461 1462static uint64_t 1463fp64_mul(uint64_t a, uint64_t b, int mode, int flags) 1464{ 1465* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1466 uint64_t a_mnt, b_mnt, x; 1467 uint64_t x0_mnt, x1_mnt; 1468 1469 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1470 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1471 1472 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1473 return x; 1474 } 1475 1476 // Handle infinities and zeroes: 1477 if ((a_exp == FP64_EXP_INF && !b_mnt) \|\| 1478 (b_exp == FP64_EXP_INF && !a_mnt)) { 1479 flags \|= FPLIB_IOC; 1480* return fp64_defaultNaN(); 1481 } else if (a_exp == FP64_EXP_INF \|\| b_exp == FP64_EXP_INF) { 1482 return fp64_infinity(a_sgn ^ b_sgn); 1483 } else if (!a_mnt \|\| !b_mnt) { 1484 return fp64_zero(a_sgn ^ b_sgn); 1485 } 1486 1487 // Multiply and normalise: 1488 x_sgn = a_sgn ^ b_sgn; 1489 x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1; 1490 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); 1491 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1492 1493 // Convert to FP64_BITS bits, collapsing error into bottom bit: 1494 x0_mnt = x1_mnt << 1 \| !!x0_mnt; 1495 1496 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); 1497} 1498 1499static uint16_t 1500fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, 1501 int mode, int flags) 1502{ 1503* int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1504 uint16_t a_mnt, b_mnt, c_mnt, x; 1505 uint32_t x_mnt, y_mnt; 1506 1507 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1508 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1509 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1510 1511 x = fp16_process_NaNs3(a, b, c, mode, flags); 1512 1513 // Quiet NaN added to product of zero and infinity: 1514 if (fp16_is_quiet_NaN(a_exp, a_mnt) && 1515 ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) \|\| 1516 (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) { 1517 x = fp16_defaultNaN(); 1518 flags \|= FPLIB_IOC; 1519* } 1520 1521 if (x) { 1522 return x; 1523 } 1524 1525 // Handle infinities and zeroes: 1526 if ((b_exp == FP16_EXP_INF && !c_mnt) \|\| 1527 (c_exp == FP16_EXP_INF && !b_mnt) \|\| 1528 (a_exp == FP16_EXP_INF && 1529 (b_exp == FP16_EXP_INF \|\| c_exp == FP16_EXP_INF) && 1530 (a_sgn != (b_sgn ^ c_sgn)))) { 1531 flags \|= FPLIB_IOC; 1532* return fp16_defaultNaN(); 1533 } 1534 if (a_exp == FP16_EXP_INF) 1535 return fp16_infinity(a_sgn); 1536 if (b_exp == FP16_EXP_INF \|\| c_exp == FP16_EXP_INF) 1537 return fp16_infinity(b_sgn ^ c_sgn); 1538 if (!a_mnt && (!b_mnt \|\| !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1539 return fp16_zero(a_sgn); 1540 1541 x_sgn = a_sgn; 1542 x_exp = a_exp + 2 * FP16_EXP_BITS - 3; 1543 x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4); 1544 1545 // Multiply: 1546 y_sgn = b_sgn ^ c_sgn; 1547 y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3; 1548 y_mnt = (uint32_t)b_mnt * c_mnt << 3; 1549 if (!y_mnt) { 1550 y_exp = x_exp; 1551 } 1552 1553 // Add: 1554 if (x_exp >= y_exp) { 1555 y_mnt = (lsr32(y_mnt, x_exp - y_exp) \| 1556 !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1))); 1557 y_exp = x_exp; 1558 } else { 1559 x_mnt = (lsr32(x_mnt, y_exp - x_exp) \| 1560 !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1))); 1561 x_exp = y_exp; 1562 } 1563 if (x_sgn == y_sgn) { 1564 x_mnt = x_mnt + y_mnt; 1565 } else if (x_mnt >= y_mnt) { 1566 x_mnt = x_mnt - y_mnt; 1567 } else { 1568 x_sgn ^= 1; 1569 x_mnt = y_mnt - x_mnt; 1570 } 1571 1572 if (!x_mnt) { 1573 // Sign of exact zero result depends on rounding mode 1574 return fp16_zero((mode & 3) == 2); 1575 } 1576 1577 // Normalise into FP16_BITS bits, collapsing error into bottom bit: 1578 x_mnt = fp32_normalise(x_mnt, &x_exp); 1579 x_mnt = x_mnt >> (FP16_BITS - 1) \| !!(uint16_t)(x_mnt << 1); 1580 1581 return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1582} 1583 1584static uint32_t 1585fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, 1586 int mode, int flags) 1587{ 1588* int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1589 uint32_t a_mnt, b_mnt, c_mnt, x; 1590 uint64_t x_mnt, y_mnt; 1591 1592 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1593 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1594 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1595 1596 x = fp32_process_NaNs3(a, b, c, mode, flags); 1597 1598 // Quiet NaN added to product of zero and infinity: 1599 if (fp32_is_quiet_NaN(a_exp, a_mnt) && 1600 ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) \|\| 1601 (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) { 1602 x = fp32_defaultNaN(); 1603 flags \|= FPLIB_IOC; 1604* } 1605 1606 if (x) { 1607 return x; 1608 } 1609 1610 // Handle infinities and zeroes: 1611 if ((b_exp == FP32_EXP_INF && !c_mnt) \|\| 1612 (c_exp == FP32_EXP_INF && !b_mnt) \|\| 1613 (a_exp == FP32_EXP_INF && 1614 (b_exp == FP32_EXP_INF \|\| c_exp == FP32_EXP_INF) && 1615 (a_sgn != (b_sgn ^ c_sgn)))) { 1616 flags \|= FPLIB_IOC; 1617* return fp32_defaultNaN(); 1618 } 1619 if (a_exp == FP32_EXP_INF) 1620 return fp32_infinity(a_sgn); 1621 if (b_exp == FP32_EXP_INF \|\| c_exp == FP32_EXP_INF) 1622 return fp32_infinity(b_sgn ^ c_sgn); 1623 if (!a_mnt && (!b_mnt \|\| !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1624 return fp32_zero(a_sgn); 1625 1626 x_sgn = a_sgn; 1627 x_exp = a_exp + 2 * FP32_EXP_BITS - 3; 1628 x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4); 1629 1630 // Multiply: 1631 y_sgn = b_sgn ^ c_sgn; 1632 y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3; 1633 y_mnt = (uint64_t)b_mnt * c_mnt << 3; 1634 if (!y_mnt) { 1635 y_exp = x_exp; 1636 } 1637 1638 // Add: 1639 if (x_exp >= y_exp) { 1640 y_mnt = (lsr64(y_mnt, x_exp - y_exp) \| 1641 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); 1642 y_exp = x_exp; 1643 } else { 1644 x_mnt = (lsr64(x_mnt, y_exp - x_exp) \| 1645 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); 1646 x_exp = y_exp; 1647 } 1648 if (x_sgn == y_sgn) { 1649 x_mnt = x_mnt + y_mnt; 1650 } else if (x_mnt >= y_mnt) { 1651 x_mnt = x_mnt - y_mnt; 1652 } else { 1653 x_sgn ^= 1; 1654 x_mnt = y_mnt - x_mnt; 1655 } 1656 1657 if (!x_mnt) { 1658 // Sign of exact zero result depends on rounding mode 1659 return fp32_zero((mode & 3) == 2); 1660 } 1661 1662 // Normalise into FP32_BITS bits, collapsing error into bottom bit: 1663 x_mnt = fp64_normalise(x_mnt, &x_exp); 1664 x_mnt = x_mnt >> (FP32_BITS - 1) \| !!(uint32_t)(x_mnt << 1); 1665 1666 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1667} 1668 1669static uint64_t 1670fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, 1671 int mode, int flags) 1672{ 1673* int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1674 uint64_t a_mnt, b_mnt, c_mnt, x; 1675 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; 1676 1677 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1678 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1679 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1680 1681 x = fp64_process_NaNs3(a, b, c, mode, flags); 1682 1683 // Quiet NaN added to product of zero and infinity: 1684 if (fp64_is_quiet_NaN(a_exp, a_mnt) && 1685 ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) \|\| 1686 (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) { 1687 x = fp64_defaultNaN(); 1688 flags \|= FPLIB_IOC; 1689* } 1690 1691 if (x) { 1692 return x; 1693 } 1694 1695 // Handle infinities and zeroes: 1696 if ((b_exp == FP64_EXP_INF && !c_mnt) \|\| 1697 (c_exp == FP64_EXP_INF && !b_mnt) \|\| 1698 (a_exp == FP64_EXP_INF && 1699 (b_exp == FP64_EXP_INF \|\| c_exp == FP64_EXP_INF) && 1700 (a_sgn != (b_sgn ^ c_sgn)))) { 1701 flags \|= FPLIB_IOC; 1702* return fp64_defaultNaN(); 1703 } 1704 if (a_exp == FP64_EXP_INF) 1705 return fp64_infinity(a_sgn); 1706 if (b_exp == FP64_EXP_INF \|\| c_exp == FP64_EXP_INF) 1707 return fp64_infinity(b_sgn ^ c_sgn); 1708 if (!a_mnt && (!b_mnt \|\| !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1709 return fp64_zero(a_sgn); 1710 1711 x_sgn = a_sgn; 1712 x_exp = a_exp + FP64_EXP_BITS; 1713 x0_mnt = 0; 1714 x1_mnt = a_mnt; 1715 1716 // Multiply: 1717 y_sgn = b_sgn ^ c_sgn; 1718 y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3; 1719 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); 1720 if (!y0_mnt && !y1_mnt) { 1721 y_exp = x_exp; 1722 } 1723 1724 // Add: 1725 if (x_exp >= y_exp) { 1726 uint64_t t0, t1; 1727 lsl128(&t0, &t1, y0_mnt, y1_mnt, 1728 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); 1729 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); 1730 y0_mnt \|= !!(t0 \| t1); 1731 y_exp = x_exp; 1732 } else { 1733 uint64_t t0, t1; 1734 lsl128(&t0, &t1, x0_mnt, x1_mnt, 1735 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); 1736 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); 1737 x0_mnt \|= !!(t0 \| t1); 1738 x_exp = y_exp; 1739 } 1740 if (x_sgn == y_sgn) { 1741 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1742 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { 1743 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1744 } else { 1745 x_sgn ^= 1; 1746 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); 1747 } 1748 1749 if (!x0_mnt && !x1_mnt) { 1750 // Sign of exact zero result depends on rounding mode 1751 return fp64_zero((mode & 3) == 2); 1752 } 1753 1754 // Normalise into FP64_BITS bits, collapsing error into bottom bit: 1755 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1756 x0_mnt = x1_mnt << 1 \| !!x0_mnt; 1757 1758 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); 1759} 1760 1761static uint16_t 1762fp16_div(uint16_t a, uint16_t b, int mode, int flags) 1763{ 1764* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1765 uint16_t a_mnt, b_mnt, x; 1766 uint32_t x_mnt; 1767 1768 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1769 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1770 1771 if ((x = fp16_process_NaNs(a, b, mode, flags))) 1772 return x; 1773 1774 // Handle infinities and zeroes: 1775 if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) \|\| 1776 (!a_mnt && !b_mnt)) { 1777 flags \|= FPLIB_IOC; 1778* return fp16_defaultNaN(); 1779 } 1780 if (a_exp == FP16_EXP_INF \|\| !b_mnt) { 1781 if (a_exp != FP16_EXP_INF) 1782 flags \|= FPLIB_DZC; 1783* return fp16_infinity(a_sgn ^ b_sgn); 1784 } 1785 if (!a_mnt \|\| b_exp == FP16_EXP_INF) 1786 return fp16_zero(a_sgn ^ b_sgn); 1787 1788 // Divide, setting bottom bit if inexact: 1789 a_mnt = fp16_normalise(a_mnt, &a_exp); 1790 x_sgn = a_sgn ^ b_sgn; 1791 x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3); 1792 x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt; 1793 x_mnt \|= (x_mnt * b_mnt != 1794 (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)); 1795 1796 // Normalise into FP16_BITS bits, collapsing error into bottom bit: 1797 x_mnt = fp32_normalise(x_mnt, &x_exp); 1798 x_mnt = x_mnt >> (FP16_BITS - 1) \| !!(uint16_t)(x_mnt << 1); 1799 1800 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); 1801} 1802 1803static uint32_t 1804fp32_div(uint32_t a, uint32_t b, int mode, int flags) 1805{ 1806* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1807 uint32_t a_mnt, b_mnt, x; 1808 uint64_t x_mnt; 1809 1810 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1811 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1812 1813 if ((x = fp32_process_NaNs(a, b, mode, flags))) 1814 return x; 1815 1816 // Handle infinities and zeroes: 1817 if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) \|\| 1818 (!a_mnt && !b_mnt)) { 1819 flags \|= FPLIB_IOC; 1820* return fp32_defaultNaN(); 1821 } 1822 if (a_exp == FP32_EXP_INF \|\| !b_mnt) { 1823 if (a_exp != FP32_EXP_INF) 1824 flags \|= FPLIB_DZC; 1825* return fp32_infinity(a_sgn ^ b_sgn); 1826 } 1827 if (!a_mnt \|\| b_exp == FP32_EXP_INF) 1828 return fp32_zero(a_sgn ^ b_sgn); 1829 1830 // Divide, setting bottom bit if inexact: 1831 a_mnt = fp32_normalise(a_mnt, &a_exp); 1832 x_sgn = a_sgn ^ b_sgn; 1833 x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3); 1834 x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt; 1835 x_mnt \|= (x_mnt * b_mnt != 1836 (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)); 1837 1838 // Normalise into FP32_BITS bits, collapsing error into bottom bit: 1839 x_mnt = fp64_normalise(x_mnt, &x_exp); 1840 x_mnt = x_mnt >> (FP32_BITS - 1) \| !!(uint32_t)(x_mnt << 1); 1841 1842 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1843} 1844 1845static uint64_t 1846fp64_div(uint64_t a, uint64_t b, int mode, int flags) 1847{ 1848* int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; 1849 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; 1850 1851 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1852 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1853 1854 if ((x = fp64_process_NaNs(a, b, mode, flags))) 1855 return x; 1856 1857 // Handle infinities and zeroes: 1858 if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) \|\| 1859 (!a_mnt && !b_mnt)) { 1860 flags \|= FPLIB_IOC; 1861* return fp64_defaultNaN(); 1862 } 1863 if (a_exp == FP64_EXP_INF \|\| !b_mnt) { 1864 if (a_exp != FP64_EXP_INF) 1865 flags \|= FPLIB_DZC; 1866* return fp64_infinity(a_sgn ^ b_sgn); 1867 } 1868 if (!a_mnt \|\| b_exp == FP64_EXP_INF) 1869 return fp64_zero(a_sgn ^ b_sgn); 1870 1871 // Find reciprocal of divisor with Newton-Raphson: 1872 a_mnt = fp64_normalise(a_mnt, &a_exp); 1873 b_mnt = fp64_normalise(b_mnt, &b_exp); 1874 x_mnt = ~(uint64_t)0 / (b_mnt >> 31); 1875 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); 1876 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); 1877 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); 1878 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); 1879 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); 1880 1881 // Multiply by dividend: 1882 x_sgn = a_sgn ^ b_sgn; 1883 x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8; 1884 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); 1885 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); 1886 x_mnt = x1_mnt; 1887 1888 // This is an underestimate, so try adding one: 1889 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); 1890 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); 1891 if (c <= 0) { 1892 ++x_mnt; 1893 } 1894 1895 x_mnt = fp64_normalise(x_mnt, &x_exp); 1896 1897 return fp64_round(x_sgn, x_exp, x_mnt << 1 \| !!c, mode, flags); 1898} 1899 1900static void 1901set_fpscr0(FPSCR &fpscr, int flags) 1902{ 1903 if (flags & FPLIB_IDC) { 1904 fpscr.idc = 1; 1905 } 1906 if (flags & FPLIB_IOC) { 1907 fpscr.ioc = 1; 1908 } 1909 if (flags & FPLIB_DZC) { 1910 fpscr.dzc = 1; 1911 } 1912 if (flags & FPLIB_OFC) { 1913 fpscr.ofc = 1; 1914 } 1915 if (flags & FPLIB_UFC) { 1916 fpscr.ufc = 1; 1917 } 1918 if (flags & FPLIB_IXC) { 1919 fpscr.ixc = 1; 1920 } 1921} 1922 1923static uint16_t 1924fp16_scale(uint16_t a, int16_t b, int mode, int flags) 1925{ 1926* int a_sgn, a_exp; 1927 uint16_t a_mnt; 1928 1929 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1930 1931 // Handle NaNs: 1932 if (fp16_is_NaN(a_exp, a_mnt)) { 1933 return fp16_process_NaN(a, mode, flags); 1934 } 1935 1936 // Handle zeroes: 1937 if (!a_mnt) { 1938 return fp16_zero(a_sgn); 1939 } 1940 1941 // Handle infinities: 1942 if (a_exp == FP16_EXP_INF) { 1943 return fp16_infinity(a_sgn); 1944 } 1945 1946 b = b < -300 ? -300 : b; 1947 b = b > 300 ? 300 : b; 1948 a_exp += b; 1949 a_mnt <<= 3; 1950 1951 a_mnt = fp16_normalise(a_mnt, &a_exp); 1952 1953 return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1, 1954 mode, flags); 1955} 1956 1957static uint32_t 1958fp32_scale(uint32_t a, int32_t b, int mode, int flags) 1959{ 1960* int a_sgn, a_exp; 1961 uint32_t a_mnt; 1962 1963 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1964 1965 // Handle NaNs: 1966 if (fp32_is_NaN(a_exp, a_mnt)) { 1967 return fp32_process_NaN(a, mode, flags); 1968 } 1969 1970 // Handle zeroes: 1971 if (!a_mnt) { 1972 return fp32_zero(a_sgn); 1973 } 1974 1975 // Handle infinities: 1976 if (a_exp == FP32_EXP_INF) { 1977 return fp32_infinity(a_sgn); 1978 } 1979 1980 b = b < -300 ? -300 : b; 1981 b = b > 300 ? 300 : b; 1982 a_exp += b; 1983 a_mnt <<= 3; 1984 1985 a_mnt = fp32_normalise(a_mnt, &a_exp); 1986 1987 return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1, 1988 mode, flags); 1989} 1990 1991static uint64_t 1992fp64_scale(uint64_t a, int64_t b, int mode, int flags) 1993{ 1994* int a_sgn, a_exp; 1995 uint64_t a_mnt; 1996 1997 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1998 1999 // Handle NaNs: 2000 if (fp64_is_NaN(a_exp, a_mnt)) { 2001 return fp64_process_NaN(a, mode, flags); 2002 } 2003 2004 // Handle zeroes: 2005 if (!a_mnt) { 2006 return fp64_zero(a_sgn); 2007 } 2008 2009 // Handle infinities: 2010 if (a_exp == FP64_EXP_INF) { 2011 return fp64_infinity(a_sgn); 2012 } 2013 2014 b = b < -3000 ? -3000 : b; 2015 b = b > 3000 ? 3000 : b; 2016 a_exp += b; 2017 a_mnt <<= 3; 2018 2019 a_mnt = fp64_normalise(a_mnt, &a_exp); 2020 2021 return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1, 2022 mode, flags); 2023} 2024 2025static uint16_t 2026fp16_sqrt(uint16_t a, int mode, int flags) 2027{ 2028* int a_sgn, a_exp, x_sgn, x_exp; 2029 uint16_t a_mnt, x_mnt; 2030 uint32_t x, t0, t1; 2031 2032 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2033 2034 // Handle NaNs: 2035 if (fp16_is_NaN(a_exp, a_mnt)) 2036 return fp16_process_NaN(a, mode, flags); 2037 2038 // Handle infinities and zeroes: 2039 if (!a_mnt) 2040 return fp16_zero(a_sgn); 2041 if (a_exp == FP16_EXP_INF && !a_sgn) 2042 return fp16_infinity(a_sgn); 2043 if (a_sgn) { 2044 flags \|= FPLIB_IOC; 2045* return fp16_defaultNaN(); 2046 } 2047 2048 a_mnt = fp16_normalise(a_mnt, &a_exp); 2049 if (a_exp & 1) { 2050 ++a_exp; 2051 a_mnt >>= 1; 2052 } 2053 2054 // x = (a * 3 + 5) / 8 2055 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28); 2056 2057 // x = (a / x + x) / 2; // 8-bit accuracy 2058 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15; 2059 2060 // x = (a / x + x) / 2; // 16-bit accuracy 2061 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15; 2062 2063 x_sgn = 0; 2064 x_exp = (a_exp + 27) >> 1; 2065 x_mnt = ((x - (1 << 18)) >> 19) + 1; 2066 t1 = (uint32_t)x_mnt * x_mnt; 2067 t0 = (uint32_t)a_mnt << 9; 2068 if (t1 > t0) { 2069 --x_mnt; 2070 } 2071 2072 x_mnt = fp16_normalise(x_mnt, &x_exp); 2073 2074 return fp16_round(x_sgn, x_exp, x_mnt << 1 \| (t1 != t0), mode, flags); 2075} 2076 2077static uint32_t 2078fp32_sqrt(uint32_t a, int mode, int flags) 2079{ 2080* int a_sgn, a_exp, x_sgn, x_exp; 2081 uint32_t a_mnt, x, x_mnt; 2082 uint64_t t0, t1; 2083 2084 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2085 2086 // Handle NaNs: 2087 if (fp32_is_NaN(a_exp, a_mnt)) 2088 return fp32_process_NaN(a, mode, flags); 2089 2090 // Handle infinities and zeroes: 2091 if (!a_mnt) 2092 return fp32_zero(a_sgn); 2093 if (a_exp == FP32_EXP_INF && !a_sgn) 2094 return fp32_infinity(a_sgn); 2095 if (a_sgn) { 2096 flags \|= FPLIB_IOC; 2097* return fp32_defaultNaN(); 2098 } 2099 2100 a_mnt = fp32_normalise(a_mnt, &a_exp); 2101 if (!(a_exp & 1)) { 2102 ++a_exp; 2103 a_mnt >>= 1; 2104 } 2105 2106 // x = (a * 3 + 5) / 8 2107 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28); 2108 2109 // x = (a / x + x) / 2; // 8-bit accuracy 2110 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 2111 2112 // x = (a / x + x) / 2; // 16-bit accuracy 2113 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 2114 2115 // x = (a / x + x) / 2; // 32-bit accuracy 2116 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); 2117 2118 x_sgn = 0; 2119 x_exp = (a_exp + 147) >> 1; 2120 x_mnt = ((x - (1 << 5)) >> 6) + 1; 2121 t1 = (uint64_t)x_mnt * x_mnt; 2122 t0 = (uint64_t)a_mnt << 19; 2123 if (t1 > t0) { 2124 --x_mnt; 2125 } 2126 2127 x_mnt = fp32_normalise(x_mnt, &x_exp); 2128 2129 return fp32_round(x_sgn, x_exp, x_mnt << 1 \| (t1 != t0), mode, flags); 2130} 2131 2132static uint64_t 2133fp64_sqrt(uint64_t a, int mode, int flags) 2134{ 2135* int a_sgn, a_exp, x_sgn, x_exp, c; 2136 uint64_t a_mnt, x_mnt, r, x0, x1; 2137 uint32_t x; 2138 2139 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 2140 2141 // Handle NaNs: 2142 if (fp64_is_NaN(a_exp, a_mnt)) 2143 return fp64_process_NaN(a, mode, flags); 2144 2145 // Handle infinities and zeroes: 2146 if (!a_mnt) 2147 return fp64_zero(a_sgn); 2148 if (a_exp == FP64_EXP_INF && !a_sgn) 2149 return fp64_infinity(a_sgn); 2150 if (a_sgn) { 2151 flags \|= FPLIB_IOC; 2152* return fp64_defaultNaN(); 2153 } 2154 2155 a_mnt = fp64_normalise(a_mnt, &a_exp); 2156 if (a_exp & 1) { 2157 ++a_exp; 2158 a_mnt >>= 1; 2159 } 2160 2161 // x = (a * 3 + 5) / 8 2162 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28); 2163 2164 // x = (a / x + x) / 2; // 8-bit accuracy 2165 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 2166 2167 // x = (a / x + x) / 2; // 16-bit accuracy 2168 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 2169 2170 // x = (a / x + x) / 2; // 32-bit accuracy 2171 x = ((a_mnt / x) >> 2) + (x >> 1); 2172 2173 // r = 1 / x; // 32-bit accuracy 2174 r = ((uint64_t)1 << 62) / x; 2175 2176 // r = r * (2 - x * r); // 64-bit accuracy 2177 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); 2178 lsr128(&x0, &x1, x0, x1, 31); 2179 2180 // x = (x + a * r) / 2; // 64-bit accuracy 2181 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); 2182 lsl128(&x0, &x1, x0, x1, 5); 2183 lsr128(&x0, &x1, x0, x1, 56); 2184 2185 x0 = ((uint64_t)x << 31) + (x0 >> 1); 2186 2187 x_sgn = 0; 2188 x_exp = (a_exp + 1053) >> 1; 2189 x_mnt = x0; 2190 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; 2191 mul62x62(&x0, &x1, x_mnt, x_mnt); 2192 lsl128(&x0, &x1, x0, x1, 19); 2193 c = cmp128(x0, x1, 0, a_mnt); 2194 if (c > 0) 2195 --x_mnt; 2196 2197 x_mnt = fp64_normalise(x_mnt, &x_exp); 2198 2199 return fp64_round(x_sgn, x_exp, x_mnt << 1 \| !!c, mode, flags); 2200} 2201 2202static int 2203modeConv(FPSCR fpscr) 2204{ 2205 uint32_t x = (uint32_t)fpscr; 2206 return (x >> 22 & 0xf) \| (x >> 19 & 1 ? FPLIB_FZ16 : 0); 2207 // AHP bit is ignored. Only fplibConvert uses AHP. 2208} 2209 2210static void 2211set_fpscr(FPSCR &fpscr, int flags) 2212{ 2213 // translate back to FPSCR 2214 bool underflow = false; 2215 if (flags & FPLIB_IDC) { 2216 fpscr.idc = 1; 2217 } 2218 if (flags & FPLIB_IOC) { 2219 fpscr.ioc = 1; 2220 } 2221 if (flags & FPLIB_DZC) { 2222 fpscr.dzc = 1; 2223 } 2224 if (flags & FPLIB_OFC) { 2225 fpscr.ofc = 1; 2226 } 2227 if (flags & FPLIB_UFC) { 2228 underflow = true; //xx Why is this required? 2229 fpscr.ufc = 1; 2230 } 2231 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { 2232 fpscr.ixc = 1; 2233 } 2234} 2235 2236template <> 2237bool 2238fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr) 2239{ 2240 int flags = 0; 2241 int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags); 2242 set_fpscr(fpscr, flags); 2243 return x; 2244} 2245 2246template <> 2247bool 2248fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr) 2249{ 2250 int flags = 0; 2251 int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags); 2252 set_fpscr(fpscr, flags); 2253 return x; 2254} 2255 2256template <> 2257bool 2258fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr) 2259{ 2260 int flags = 0; 2261 int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags); 2262 set_fpscr(fpscr, flags); 2263 return x; 2264} 2265 2266template <> 2267bool 2268fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr) 2269{ 2270 int flags = 0; 2271 int x = fp16_compare_un(a, b, modeConv(fpscr), &flags); 2272 set_fpscr(fpscr, flags); 2273 return x; 2274} 2275 2276template <> 2277bool 2278fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) 2279{ 2280 int flags = 0; 2281 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); 2282 set_fpscr(fpscr, flags); 2283 return x; 2284} 2285 2286template <> 2287bool 2288fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) 2289{ 2290 int flags = 0; 2291 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); 2292 set_fpscr(fpscr, flags); 2293 return x; 2294} 2295 2296template <> 2297bool 2298fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) 2299{ 2300 int flags = 0; 2301 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); 2302 set_fpscr(fpscr, flags); 2303 return x; 2304} 2305 2306template <> 2307bool 2308fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr) 2309{ 2310 int flags = 0; 2311 int x = fp32_compare_un(a, b, modeConv(fpscr), &flags); 2312 set_fpscr(fpscr, flags); 2313 return x; 2314} 2315 2316template <> 2317bool 2318fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) 2319{ 2320 int flags = 0; 2321 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); 2322 set_fpscr(fpscr, flags); 2323 return x; 2324} 2325 2326template <> 2327bool 2328fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) 2329{ 2330 int flags = 0; 2331 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); 2332 set_fpscr(fpscr, flags); 2333 return x; 2334} 2335 2336template <> 2337bool 2338fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) 2339{ 2340 int flags = 0; 2341 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); 2342 set_fpscr(fpscr, flags); 2343 return x; 2344} 2345 2346template <> 2347bool 2348fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr) 2349{ 2350 int flags = 0; 2351 int x = fp64_compare_un(a, b, modeConv(fpscr), &flags); 2352 set_fpscr(fpscr, flags); 2353 return x; 2354} 2355 2356template <> 2357uint16_t 2358fplibAbs(uint16_t op) 2359{ 2360 return op & ~(1ULL << (FP16_BITS - 1)); 2361} 2362 2363template <> 2364uint32_t 2365fplibAbs(uint32_t op) 2366{ 2367 return op & ~(1ULL << (FP32_BITS - 1)); 2368} 2369 2370template <> 2371uint64_t 2372fplibAbs(uint64_t op) 2373{ 2374 return op & ~(1ULL << (FP64_BITS - 1)); 2375} 2376 2377template <> 2378uint16_t 2379fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr) 2380{ 2381 int flags = 0; 2382 uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags); 2383 set_fpscr0(fpscr, flags); 2384 return result; 2385} 2386 2387template <> 2388uint32_t 2389fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2390{ 2391 int flags = 0; 2392 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); 2393 set_fpscr0(fpscr, flags); 2394 return result; 2395} 2396 2397template <> 2398uint64_t 2399fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2400{ 2401 int flags = 0; 2402 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); 2403 set_fpscr0(fpscr, flags); 2404 return result; 2405} 2406 2407template <> 2408int 2409fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr) 2410{ 2411 int mode = modeConv(fpscr); 2412 int flags = 0; 2413 int sgn1, exp1, sgn2, exp2, result; 2414 uint16_t mnt1, mnt2; 2415 2416 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2417 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2418 2419 if (fp16_is_NaN(exp1, mnt1) \|\| fp16_is_NaN(exp2, mnt2)) { 2420 result = 3; 2421 if (fp16_is_signalling_NaN(exp1, mnt1) \|\| 2422 fp16_is_signalling_NaN(exp2, mnt2) \|\| signal_nans) 2423 flags \|= FPLIB_IOC; 2424 } else { 2425 if (op1 == op2 \|\| (!mnt1 && !mnt2)) { 2426 result = 6; 2427 } else if (sgn1 != sgn2) { 2428 result = sgn1 ? 8 : 2; 2429 } else if (exp1 != exp2) { 2430 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2431 } else { 2432 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2433 } 2434 } 2435 2436 set_fpscr0(fpscr, flags); 2437 2438 return result; 2439} 2440 2441template <> 2442int 2443fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) 2444{ 2445 int mode = modeConv(fpscr); 2446 int flags = 0; 2447 int sgn1, exp1, sgn2, exp2, result; 2448 uint32_t mnt1, mnt2; 2449 2450 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2451 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2452 2453 if (fp32_is_NaN(exp1, mnt1) \|\| fp32_is_NaN(exp2, mnt2)) { 2454 result = 3; 2455 if (fp32_is_signalling_NaN(exp1, mnt1) \|\| 2456 fp32_is_signalling_NaN(exp2, mnt2) \|\| signal_nans) 2457 flags \|= FPLIB_IOC; 2458 } else { 2459 if (op1 == op2 \|\| (!mnt1 && !mnt2)) { 2460 result = 6; 2461 } else if (sgn1 != sgn2) { 2462 result = sgn1 ? 8 : 2; 2463 } else if (exp1 != exp2) { 2464 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2465 } else { 2466 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2467 } 2468 } 2469 2470 set_fpscr0(fpscr, flags); 2471 2472 return result; 2473} 2474 2475template <> 2476int 2477fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) 2478{ 2479 int mode = modeConv(fpscr); 2480 int flags = 0; 2481 int sgn1, exp1, sgn2, exp2, result; 2482 uint64_t mnt1, mnt2; 2483 2484 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2485 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2486 2487 if (fp64_is_NaN(exp1, mnt1) \|\| fp64_is_NaN(exp2, mnt2)) { 2488 result = 3; 2489 if (fp64_is_signalling_NaN(exp1, mnt1) \|\| 2490 fp64_is_signalling_NaN(exp2, mnt2) \|\| signal_nans) 2491 flags \|= FPLIB_IOC; 2492 } else { 2493 if (op1 == op2 \|\| (!mnt1 && !mnt2)) { 2494 result = 6; 2495 } else if (sgn1 != sgn2) { 2496 result = sgn1 ? 8 : 2; 2497 } else if (exp1 != exp2) { 2498 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 2499 } else { 2500 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 2501 } 2502 } 2503 2504 set_fpscr0(fpscr, flags); 2505 2506 return result; 2507} 2508 2509static uint16_t 2510fp16_FPConvertNaN_32(uint32_t op) 2511{ 2512 return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF, 2513 1ULL << (FP16_MANT_BITS - 1) \| 2514 op >> (FP32_MANT_BITS - FP16_MANT_BITS)); 2515} 2516 2517static uint16_t 2518fp16_FPConvertNaN_64(uint64_t op) 2519{ 2520 return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF, 2521 1ULL << (FP16_MANT_BITS - 1) \| 2522 op >> (FP64_MANT_BITS - FP16_MANT_BITS)); 2523} 2524 2525static uint32_t 2526fp32_FPConvertNaN_16(uint16_t op) 2527{ 2528 return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF, 2529 1ULL << (FP32_MANT_BITS - 1) \| 2530 (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS)); 2531} 2532 2533static uint32_t 2534fp32_FPConvertNaN_64(uint64_t op) 2535{ 2536 return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF, 2537 1ULL << (FP32_MANT_BITS - 1) \| 2538 op >> (FP64_MANT_BITS - FP32_MANT_BITS)); 2539} 2540 2541static uint64_t 2542fp64_FPConvertNaN_16(uint16_t op) 2543{ 2544 return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF, 2545 1ULL << (FP64_MANT_BITS - 1) \| 2546 (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS)); 2547} 2548 2549static uint64_t 2550fp64_FPConvertNaN_32(uint32_t op) 2551{ 2552 return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF, 2553 1ULL << (FP64_MANT_BITS - 1) \| 2554 (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS)); 2555} 2556 2557static uint16_t 2558fp16_FPOnePointFive(int sgn) 2559{ 2560 return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1)); 2561} 2562 2563static uint32_t 2564fp32_FPOnePointFive(int sgn) 2565{ 2566 return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1)); 2567} 2568 2569static uint64_t 2570fp64_FPOnePointFive(int sgn) 2571{ 2572 return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1)); 2573} 2574 2575static uint16_t 2576fp16_FPThree(int sgn) 2577{ 2578 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1)); 2579} 2580 2581static uint32_t 2582fp32_FPThree(int sgn) 2583{ 2584 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1)); 2585} 2586 2587static uint64_t 2588fp64_FPThree(int sgn) 2589{ 2590 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1)); 2591} 2592 2593static uint16_t 2594fp16_FPTwo(int sgn) 2595{ 2596 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0); 2597} 2598 2599static uint32_t 2600fp32_FPTwo(int sgn) 2601{ 2602 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0); 2603} 2604 2605static uint64_t 2606fp64_FPTwo(int sgn) 2607{ 2608 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0); 2609} 2610 2611template <> 2612uint16_t 2613fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 2614{ 2615 int mode = modeConv(fpscr); 2616 int flags = 0; 2617 int sgn, exp; 2618 uint32_t mnt; 2619 uint16_t result; 2620 2621 // Unpack floating-point operand optionally with flush-to-zero: 2622 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2623 2624 bool alt_hp = fpscr.ahp; 2625 2626 if (fp32_is_NaN(exp, mnt)) { 2627 if (alt_hp) { 2628 result = fp16_zero(sgn); 2629 } else if (fpscr.dn) { 2630 result = fp16_defaultNaN(); 2631 } else { 2632 result = fp16_FPConvertNaN_32(op); 2633 } 2634 if (!(mnt >> (FP32_MANT_BITS - 1) & 1) \|\| alt_hp) { 2635 flags \|= FPLIB_IOC; 2636 } 2637 } else if (exp == FP32_EXP_INF) { 2638 if (alt_hp) { 2639 result = ((uint16_t)sgn << (FP16_BITS - 1) \| 2640 ((1ULL << (FP16_BITS - 1)) - 1)); 2641 flags \|= FPLIB_IOC; 2642 } else { 2643 result = fp16_infinity(sgn); 2644 } 2645 } else if (!mnt) { 2646 result = fp16_zero(sgn); 2647 } else { 2648 result = 2649 fp16_round_(sgn, exp - FP32_EXP_BIAS + FP16_EXP_BIAS, 2650 mnt >> (FP32_MANT_BITS - FP16_BITS) \| 2651 !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)), 2652 rounding, (mode & 0xf) \| alt_hp << 4, &flags); 2653 } 2654 2655 set_fpscr0(fpscr, flags); 2656 2657 return result; 2658} 2659 2660template <> 2661uint16_t 2662fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 2663{ 2664 int mode = modeConv(fpscr); 2665 int flags = 0; 2666 int sgn, exp; 2667 uint64_t mnt; 2668 uint16_t result; 2669 2670 // Unpack floating-point operand optionally with flush-to-zero: 2671 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2672 2673 bool alt_hp = fpscr.ahp; 2674 2675 if (fp64_is_NaN(exp, mnt)) { 2676 if (alt_hp) { 2677 result = fp16_zero(sgn); 2678 } else if (fpscr.dn) { 2679 result = fp16_defaultNaN(); 2680 } else { 2681 result = fp16_FPConvertNaN_64(op); 2682 } 2683 if (!(mnt >> (FP64_MANT_BITS - 1) & 1) \|\| alt_hp) { 2684 flags \|= FPLIB_IOC; 2685 } 2686 } else if (exp == FP64_EXP_INF) { 2687 if (alt_hp) { 2688 result = ((uint16_t)sgn << (FP16_BITS - 1) \| 2689 ((1ULL << (FP16_BITS - 1)) - 1)); 2690 flags \|= FPLIB_IOC; 2691 } else { 2692 result = fp16_infinity(sgn); 2693 } 2694 } else if (!mnt) { 2695 result = fp16_zero(sgn); 2696 } else { 2697 result = 2698 fp16_round_(sgn, exp - FP64_EXP_BIAS + FP16_EXP_BIAS, 2699 mnt >> (FP64_MANT_BITS - FP16_BITS) \| 2700 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)), 2701 rounding, (mode & 0xf) \| alt_hp << 4, &flags); 2702 } 2703 2704 set_fpscr0(fpscr, flags); 2705 2706 return result; 2707} 2708 2709template <> 2710uint32_t 2711fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 2712{ 2713 int mode = modeConv(fpscr); 2714 int flags = 0; 2715 int sgn, exp; 2716 uint16_t mnt; 2717 uint32_t result; 2718 2719 // Unpack floating-point operand optionally with flush-to-zero: 2720 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags); 2721 2722 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) { 2723 if (fpscr.dn) { 2724 result = fp32_defaultNaN(); 2725 } else { 2726 result = fp32_FPConvertNaN_16(op); 2727 } 2728 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) { 2729 flags \|= FPLIB_IOC; 2730 } 2731 } else if (exp == FP16_EXP_INF && !fpscr.ahp) { 2732 result = fp32_infinity(sgn); 2733 } else if (!mnt) { 2734 result = fp32_zero(sgn); 2735 } else { 2736 mnt = fp16_normalise(mnt, &exp); 2737 result = fp32_pack(sgn, (exp - FP16_EXP_BIAS + 2738 FP32_EXP_BIAS + FP16_EXP_BITS), 2739 (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1)); 2740 } 2741 2742 set_fpscr0(fpscr, flags); 2743 2744 return result; 2745} 2746 2747template <> 2748uint32_t 2749fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 2750{ 2751 int mode = modeConv(fpscr); 2752 int flags = 0; 2753 int sgn, exp; 2754 uint64_t mnt; 2755 uint32_t result; 2756 2757 // Unpack floating-point operand optionally with flush-to-zero: 2758 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2759 2760 if (fp64_is_NaN(exp, mnt)) { 2761 if (fpscr.dn) { 2762 result = fp32_defaultNaN(); 2763 } else { 2764 result = fp32_FPConvertNaN_64(op); 2765 } 2766 if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) { 2767 flags \|= FPLIB_IOC; 2768 } 2769 } else if (exp == FP64_EXP_INF) { 2770 result = fp32_infinity(sgn); 2771 } else if (!mnt) { 2772 result = fp32_zero(sgn); 2773 } else { 2774 result = 2775 fp32_round_(sgn, exp - FP64_EXP_BIAS + FP32_EXP_BIAS, 2776 mnt >> (FP64_MANT_BITS - FP32_BITS) \| 2777 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)), 2778 rounding, mode, &flags); 2779 } 2780 2781 set_fpscr0(fpscr, flags); 2782 2783 return result; 2784} 2785 2786template <> 2787uint64_t 2788fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 2789{ 2790 int mode = modeConv(fpscr); 2791 int flags = 0; 2792 int sgn, exp; 2793 uint16_t mnt; 2794 uint64_t result; 2795 2796 // Unpack floating-point operand optionally with flush-to-zero: 2797 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags); 2798 2799 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) { 2800 if (fpscr.dn) { 2801 result = fp64_defaultNaN(); 2802 } else { 2803 result = fp64_FPConvertNaN_16(op); 2804 } 2805 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) { 2806 flags \|= FPLIB_IOC; 2807 } 2808 } else if (exp == FP16_EXP_INF && !fpscr.ahp) { 2809 result = fp64_infinity(sgn); 2810 } else if (!mnt) { 2811 result = fp64_zero(sgn); 2812 } else { 2813 mnt = fp16_normalise(mnt, &exp); 2814 result = fp64_pack(sgn, (exp - FP16_EXP_BIAS + 2815 FP64_EXP_BIAS + FP16_EXP_BITS), 2816 (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1)); 2817 } 2818 2819 set_fpscr0(fpscr, flags); 2820 2821 return result; 2822} 2823 2824template <> 2825uint64_t 2826fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 2827{ 2828 int mode = modeConv(fpscr); 2829 int flags = 0; 2830 int sgn, exp; 2831 uint32_t mnt; 2832 uint64_t result; 2833 2834 // Unpack floating-point operand optionally with flush-to-zero: 2835 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2836 2837 if (fp32_is_NaN(exp, mnt)) { 2838 if (fpscr.dn) { 2839 result = fp64_defaultNaN(); 2840 } else { 2841 result = fp64_FPConvertNaN_32(op); 2842 } 2843 if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) { 2844 flags \|= FPLIB_IOC; 2845 } 2846 } else if (exp == FP32_EXP_INF) { 2847 result = fp64_infinity(sgn); 2848 } else if (!mnt) { 2849 result = fp64_zero(sgn); 2850 } else { 2851 mnt = fp32_normalise(mnt, &exp); 2852 result = fp64_pack(sgn, (exp - FP32_EXP_BIAS + 2853 FP64_EXP_BIAS + FP32_EXP_BITS), 2854 (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1)); 2855 } 2856 2857 set_fpscr0(fpscr, flags); 2858 2859 return result; 2860} 2861 2862template <> 2863uint16_t 2864fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr) 2865{ 2866 int flags = 0; 2867 uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2868 set_fpscr0(fpscr, flags); 2869 return result; 2870} 2871 2872template <> 2873uint32_t 2874fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) 2875{ 2876 int flags = 0; 2877 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2878 set_fpscr0(fpscr, flags); 2879 return result; 2880} 2881 2882template <> 2883uint64_t 2884fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) 2885{ 2886 int flags = 0; 2887 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2888 set_fpscr0(fpscr, flags); 2889 return result; 2890} 2891 2892template <> 2893uint16_t 2894fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr) 2895{ 2896 int flags = 0; 2897 uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags); 2898 set_fpscr0(fpscr, flags); 2899 return result; 2900} 2901 2902template <> 2903uint32_t 2904fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2905{ 2906 int flags = 0; 2907 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); 2908 set_fpscr0(fpscr, flags); 2909 return result; 2910} 2911 2912template <> 2913uint64_t 2914fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2915{ 2916 int flags = 0; 2917 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); 2918 set_fpscr0(fpscr, flags); 2919 return result; 2920} 2921 2922template <> 2923uint16_t 2924fplibExpA(uint16_t op) 2925{ 2926 static uint16_t coeff[32] = { 2927 0x0000, 2928 0x0016, 2929 0x002d, 2930 0x0045, 2931 0x005d, 2932 0x0075, 2933 0x008e, 2934 0x00a8, 2935 0x00c2, 2936 0x00dc, 2937 0x00f8, 2938 0x0114, 2939 0x0130, 2940 0x014d, 2941 0x016b, 2942 0x0189, 2943 0x01a8, 2944 0x01c8, 2945 0x01e8, 2946 0x0209, 2947 0x022b, 2948 0x024e, 2949 0x0271, 2950 0x0295, 2951 0x02ba, 2952 0x02e0, 2953 0x0306, 2954 0x032e, 2955 0x0356, 2956 0x037f, 2957 0x03a9, 2958 0x03d4 2959 }; 2960 return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) \| 2961 coeff[op & ((1 << 5) - 1)]); 2962} 2963 2964template <> 2965uint32_t 2966fplibExpA(uint32_t op) 2967{ 2968 static uint32_t coeff[64] = { 2969 0x000000, 2970 0x0164d2, 2971 0x02cd87, 2972 0x043a29, 2973 0x05aac3, 2974 0x071f62, 2975 0x08980f, 2976 0x0a14d5, 2977 0x0b95c2, 2978 0x0d1adf, 2979 0x0ea43a, 2980 0x1031dc, 2981 0x11c3d3, 2982 0x135a2b, 2983 0x14f4f0, 2984 0x16942d, 2985 0x1837f0, 2986 0x19e046, 2987 0x1b8d3a, 2988 0x1d3eda, 2989 0x1ef532, 2990 0x20b051, 2991 0x227043, 2992 0x243516, 2993 0x25fed7, 2994 0x27cd94, 2995 0x29a15b, 2996 0x2b7a3a, 2997 0x2d583f, 2998 0x2f3b79, 2999 0x3123f6, 3000 0x3311c4, 3001 0x3504f3, 3002 0x36fd92, 3003 0x38fbaf, 3004 0x3aff5b, 3005 0x3d08a4, 3006 0x3f179a, 3007 0x412c4d, 3008 0x4346cd, 3009 0x45672a, 3010 0x478d75, 3011 0x49b9be, 3012 0x4bec15, 3013 0x4e248c, 3014 0x506334, 3015 0x52a81e, 3016 0x54f35b, 3017 0x5744fd, 3018 0x599d16, 3019 0x5bfbb8, 3020 0x5e60f5, 3021 0x60ccdf, 3022 0x633f89, 3023 0x65b907, 3024 0x68396a, 3025 0x6ac0c7, 3026 0x6d4f30, 3027 0x6fe4ba, 3028 0x728177, 3029 0x75257d, 3030 0x77d0df, 3031 0x7a83b3, 3032 0x7d3e0c 3033 }; 3034 return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) \| 3035 coeff[op & ((1 << 6) - 1)]); 3036} 3037 3038template <> 3039uint64_t 3040fplibExpA(uint64_t op) 3041{ 3042 static uint64_t coeff[64] = { 3043 0x0000000000000ULL, 3044 0x02c9a3e778061ULL, 3045 0x059b0d3158574ULL, 3046 0x0874518759bc8ULL, 3047 0x0b5586cf9890fULL, 3048 0x0e3ec32d3d1a2ULL, 3049 0x11301d0125b51ULL, 3050 0x1429aaea92de0ULL, 3051 0x172b83c7d517bULL, 3052 0x1a35beb6fcb75ULL, 3053 0x1d4873168b9aaULL, 3054 0x2063b88628cd6ULL, 3055 0x2387a6e756238ULL, 3056 0x26b4565e27cddULL, 3057 0x29e9df51fdee1ULL, 3058 0x2d285a6e4030bULL, 3059 0x306fe0a31b715ULL, 3060 0x33c08b26416ffULL, 3061 0x371a7373aa9cbULL, 3062 0x3a7db34e59ff7ULL, 3063 0x3dea64c123422ULL, 3064 0x4160a21f72e2aULL, 3065 0x44e086061892dULL, 3066 0x486a2b5c13cd0ULL, 3067 0x4bfdad5362a27ULL, 3068 0x4f9b2769d2ca7ULL, 3069 0x5342b569d4f82ULL, 3070 0x56f4736b527daULL, 3071 0x5ab07dd485429ULL, 3072 0x5e76f15ad2148ULL, 3073 0x6247eb03a5585ULL, 3074 0x6623882552225ULL, 3075 0x6a09e667f3bcdULL, 3076 0x6dfb23c651a2fULL, 3077 0x71f75e8ec5f74ULL, 3078 0x75feb564267c9ULL, 3079 0x7a11473eb0187ULL, 3080 0x7e2f336cf4e62ULL, 3081 0x82589994cce13ULL, 3082 0x868d99b4492edULL, 3083 0x8ace5422aa0dbULL, 3084 0x8f1ae99157736ULL, 3085 0x93737b0cdc5e5ULL, 3086 0x97d829fde4e50ULL, 3087 0x9c49182a3f090ULL, 3088 0xa0c667b5de565ULL, 3089 0xa5503b23e255dULL, 3090 0xa9e6b5579fdbfULL, 3091 0xae89f995ad3adULL, 3092 0xb33a2b84f15fbULL, 3093 0xb7f76f2fb5e47ULL, 3094 0xbcc1e904bc1d2ULL, 3095 0xc199bdd85529cULL, 3096 0xc67f12e57d14bULL, 3097 0xcb720dcef9069ULL, 3098 0xd072d4a07897cULL, 3099 0xd5818dcfba487ULL, 3100 0xda9e603db3285ULL, 3101 0xdfc97337b9b5fULL, 3102 0xe502ee78b3ff6ULL, 3103 0xea4afa2a490daULL, 3104 0xefa1bee615a27ULL, 3105 0xf50765b6e4540ULL, 3106 0xfa7c1819e90d8ULL 3107 }; 3108 return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) \| 3109 coeff[op & ((1 << 6) - 1)]); 3110} 3111 3112static uint16_t 3113fp16_repack(int sgn, int exp, uint16_t mnt) 3114{ 3115 return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt); 3116} 3117 3118static uint32_t 3119fp32_repack(int sgn, int exp, uint32_t mnt) 3120{ 3121 return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt); 3122} 3123 3124static uint64_t 3125fp64_repack(int sgn, int exp, uint64_t mnt) 3126{ 3127 return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt); 3128} 3129 3130static void 3131fp16_minmaxnum(uint16_t op1, uint16_t op2, int sgn) 3132{ 3133 // Treat a single quiet-NaN as +Infinity/-Infinity 3134 if (!((uint16_t)~(op1 << 1) >> FP16_MANT_BITS) && 3135* (uint16_t)~(op2 << 1) >> FP16_MANT_BITS) 3136* op1 = fp16_infinity(sgn); 3137* if (!((uint16_t)~(op2 << 1) >> FP16_MANT_BITS) && 3138* (uint16_t)~(op1 << 1) >> FP16_MANT_BITS) 3139* op2 = fp16_infinity(sgn); 3140} 3141* 3142static void 3143fp32_minmaxnum(uint32_t op1, uint32_t op2, int sgn) 3144{ 3145 // Treat a single quiet-NaN as +Infinity/-Infinity 3146 if (!((uint32_t)~(op1 << 1) >> FP32_MANT_BITS) && 3147* (uint32_t)~(op2 << 1) >> FP32_MANT_BITS) 3148* op1 = fp32_infinity(sgn); 3149* if (!((uint32_t)~(op2 << 1) >> FP32_MANT_BITS) && 3150* (uint32_t)~(op1 << 1) >> FP32_MANT_BITS) 3151* op2 = fp32_infinity(sgn); 3152} 3153* 3154static void 3155fp64_minmaxnum(uint64_t op1, uint64_t op2, int sgn) 3156{ 3157 // Treat a single quiet-NaN as +Infinity/-Infinity 3158 if (!((uint64_t)~(op1 << 1) >> FP64_MANT_BITS) && 3159* (uint64_t)~(op2 << 1) >> FP64_MANT_BITS) 3160* op1 = fp64_infinity(sgn); 3161* if (!((uint64_t)~(op2 << 1) >> FP64_MANT_BITS) && 3162* (uint64_t)~(op1 << 1) >> FP64_MANT_BITS) 3163* op2 = fp64_infinity(sgn); 3164} 3165* 3166template <> 3167uint16_t 3168fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3169{ 3170 int mode = modeConv(fpscr); 3171 int flags = 0; 3172 int sgn1, exp1, sgn2, exp2; 3173 uint16_t mnt1, mnt2, x, result; 3174 3175 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3176 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3177 3178 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) { 3179 result = x; 3180 } else { 3181 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3182 fp16_repack(sgn1, exp1, mnt1) : 3183 fp16_repack(sgn2, exp2, mnt2)); 3184 } 3185 set_fpscr0(fpscr, flags); 3186 return result; 3187} 3188 3189template <> 3190uint32_t 3191fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3192{ 3193 int mode = modeConv(fpscr); 3194 int flags = 0; 3195 int sgn1, exp1, sgn2, exp2; 3196 uint32_t mnt1, mnt2, x, result; 3197 3198 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3199 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3200 3201 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 3202 result = x; 3203 } else { 3204 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3205 fp32_repack(sgn1, exp1, mnt1) : 3206 fp32_repack(sgn2, exp2, mnt2)); 3207 } 3208 set_fpscr0(fpscr, flags); 3209 return result; 3210} 3211 3212template <> 3213uint64_t 3214fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3215{ 3216 int mode = modeConv(fpscr); 3217 int flags = 0; 3218 int sgn1, exp1, sgn2, exp2; 3219 uint64_t mnt1, mnt2, x, result; 3220 3221 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3222 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3223 3224 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 3225 result = x; 3226 } else { 3227 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 3228 fp64_repack(sgn1, exp1, mnt1) : 3229 fp64_repack(sgn2, exp2, mnt2)); 3230 } 3231 set_fpscr0(fpscr, flags); 3232 return result; 3233} 3234 3235template <> 3236uint16_t 3237fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3238{ 3239 fp16_minmaxnum(&op1, &op2, 1); 3240 return fplibMax<uint16_t>(op1, op2, fpscr); 3241} 3242 3243template <> 3244uint32_t 3245fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3246{ 3247 fp32_minmaxnum(&op1, &op2, 1); 3248 return fplibMax<uint32_t>(op1, op2, fpscr); 3249} 3250 3251template <> 3252uint64_t 3253fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3254{ 3255 fp64_minmaxnum(&op1, &op2, 1); 3256 return fplibMax<uint64_t>(op1, op2, fpscr); 3257} 3258 3259template <> 3260uint16_t 3261fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3262{ 3263 int mode = modeConv(fpscr); 3264 int flags = 0; 3265 int sgn1, exp1, sgn2, exp2; 3266 uint16_t mnt1, mnt2, x, result; 3267 3268 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3269 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3270 3271 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) { 3272 result = x; 3273 } else { 3274 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3275 fp16_repack(sgn1, exp1, mnt1) : 3276 fp16_repack(sgn2, exp2, mnt2)); 3277 } 3278 set_fpscr0(fpscr, flags); 3279 return result; 3280} 3281 3282template <> 3283uint32_t 3284fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3285{ 3286 int mode = modeConv(fpscr); 3287 int flags = 0; 3288 int sgn1, exp1, sgn2, exp2; 3289 uint32_t mnt1, mnt2, x, result; 3290 3291 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3292 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3293 3294 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 3295 result = x; 3296 } else { 3297 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3298 fp32_repack(sgn1, exp1, mnt1) : 3299 fp32_repack(sgn2, exp2, mnt2)); 3300 } 3301 set_fpscr0(fpscr, flags); 3302 return result; 3303} 3304 3305template <> 3306uint64_t 3307fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3308{ 3309 int mode = modeConv(fpscr); 3310 int flags = 0; 3311 int sgn1, exp1, sgn2, exp2; 3312 uint64_t mnt1, mnt2, x, result; 3313 3314 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3315 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3316 3317 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 3318 result = x; 3319 } else { 3320 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 3321 fp64_repack(sgn1, exp1, mnt1) : 3322 fp64_repack(sgn2, exp2, mnt2)); 3323 } 3324 set_fpscr0(fpscr, flags); 3325 return result; 3326} 3327 3328template <> 3329uint16_t 3330fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3331{ 3332 fp16_minmaxnum(&op1, &op2, 0); 3333 return fplibMin<uint16_t>(op1, op2, fpscr); 3334} 3335 3336template <> 3337uint32_t 3338fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3339{ 3340 fp32_minmaxnum(&op1, &op2, 0); 3341 return fplibMin<uint32_t>(op1, op2, fpscr); 3342} 3343 3344template <> 3345uint64_t 3346fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3347{ 3348 fp64_minmaxnum(&op1, &op2, 0); 3349 return fplibMin<uint64_t>(op1, op2, fpscr); 3350} 3351 3352template <> 3353uint16_t 3354fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3355{ 3356 int flags = 0; 3357 uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags); 3358 set_fpscr0(fpscr, flags); 3359 return result; 3360} 3361 3362template <> 3363uint32_t 3364fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3365{ 3366 int flags = 0; 3367 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); 3368 set_fpscr0(fpscr, flags); 3369 return result; 3370} 3371 3372template <> 3373uint64_t 3374fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3375{ 3376 int flags = 0; 3377 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); 3378 set_fpscr0(fpscr, flags); 3379 return result; 3380} 3381 3382template <> 3383uint16_t 3384fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3385{ 3386 int mode = modeConv(fpscr); 3387 int flags = 0; 3388 int sgn1, exp1, sgn2, exp2; 3389 uint16_t mnt1, mnt2, result; 3390 3391 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3392 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3393 3394 result = fp16_process_NaNs(op1, op2, mode, &flags); 3395 if (!result) { 3396 if ((exp1 == FP16_EXP_INF && !mnt2) \|\| 3397 (exp2 == FP16_EXP_INF && !mnt1)) { 3398 result = fp16_FPTwo(sgn1 ^ sgn2); 3399 } else if (exp1 == FP16_EXP_INF \|\| exp2 == FP16_EXP_INF) { 3400 result = fp16_infinity(sgn1 ^ sgn2); 3401 } else if (!mnt1 \|\| !mnt2) { 3402 result = fp16_zero(sgn1 ^ sgn2); 3403 } else { 3404 result = fp16_mul(op1, op2, mode, &flags); 3405 } 3406 } 3407 3408 set_fpscr0(fpscr, flags); 3409 3410 return result; 3411} 3412 3413template <> 3414uint32_t 3415fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3416{ 3417 int mode = modeConv(fpscr); 3418 int flags = 0; 3419 int sgn1, exp1, sgn2, exp2; 3420 uint32_t mnt1, mnt2, result; 3421 3422 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3423 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3424 3425 result = fp32_process_NaNs(op1, op2, mode, &flags); 3426 if (!result) { 3427 if ((exp1 == FP32_EXP_INF && !mnt2) \|\| 3428 (exp2 == FP32_EXP_INF && !mnt1)) { 3429 result = fp32_FPTwo(sgn1 ^ sgn2); 3430 } else if (exp1 == FP32_EXP_INF \|\| exp2 == FP32_EXP_INF) { 3431 result = fp32_infinity(sgn1 ^ sgn2); 3432 } else if (!mnt1 \|\| !mnt2) { 3433 result = fp32_zero(sgn1 ^ sgn2); 3434 } else { 3435 result = fp32_mul(op1, op2, mode, &flags); 3436 } 3437 } 3438 3439 set_fpscr0(fpscr, flags); 3440 3441 return result; 3442} 3443 3444template <> 3445uint64_t 3446fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3447{ 3448 int mode = modeConv(fpscr); 3449 int flags = 0; 3450 int sgn1, exp1, sgn2, exp2; 3451 uint64_t mnt1, mnt2, result; 3452 3453 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3454 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3455 3456 result = fp64_process_NaNs(op1, op2, mode, &flags); 3457 if (!result) { 3458 if ((exp1 == FP64_EXP_INF && !mnt2) \|\| 3459 (exp2 == FP64_EXP_INF && !mnt1)) { 3460 result = fp64_FPTwo(sgn1 ^ sgn2); 3461 } else if (exp1 == FP64_EXP_INF \|\| exp2 == FP64_EXP_INF) { 3462 result = fp64_infinity(sgn1 ^ sgn2); 3463 } else if (!mnt1 \|\| !mnt2) { 3464 result = fp64_zero(sgn1 ^ sgn2); 3465 } else { 3466 result = fp64_mul(op1, op2, mode, &flags); 3467 } 3468 } 3469 3470 set_fpscr0(fpscr, flags); 3471 3472 return result; 3473} 3474 3475template <> 3476uint16_t 3477fplibNeg(uint16_t op) 3478{ 3479 return op ^ 1ULL << (FP16_BITS - 1); 3480} 3481 3482template <> 3483uint32_t 3484fplibNeg(uint32_t op) 3485{ 3486 return op ^ 1ULL << (FP32_BITS - 1); 3487} 3488 3489template <> 3490uint64_t 3491fplibNeg(uint64_t op) 3492{ 3493 return op ^ 1ULL << (FP64_BITS - 1); 3494} 3495 3496static const uint8_t recip_sqrt_estimate[256] = { 3497 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, 3498 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, 3499 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, 3500 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, 3501 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 3502 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 3503 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, 3504 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, 3505 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, 3506 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, 3507 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, 3508 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, 3509 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, 3510 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, 3511 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, 3512 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 3513}; 3514 3515template <> 3516uint16_t 3517fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr) 3518{ 3519 int mode = modeConv(fpscr); 3520 int flags = 0; 3521 int sgn, exp; 3522 uint16_t mnt, result; 3523 3524 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3525 3526 if (fp16_is_NaN(exp, mnt)) { 3527 result = fp16_process_NaN(op, mode, &flags); 3528 } else if (!mnt) { 3529 result = fp16_infinity(sgn); 3530 flags \|= FPLIB_DZC; 3531 } else if (sgn) { 3532 result = fp16_defaultNaN(); 3533 flags \|= FPLIB_IOC; 3534 } else if (exp == FP16_EXP_INF) { 3535 result = fp16_zero(0); 3536 } else { 3537 exp += FP16_EXP_BITS; 3538 mnt = fp16_normalise(mnt, &exp); 3539 mnt = recip_sqrt_estimate[(~exp & 1) << 7 \| 3540 (mnt >> (FP16_BITS - 8) & 127)]; 3541 result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1, 3542 mnt << (FP16_MANT_BITS - 8)); 3543 } 3544 3545 set_fpscr0(fpscr, flags); 3546 3547 return result; 3548} 3549 3550template <> 3551uint32_t 3552fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) 3553{ 3554 int mode = modeConv(fpscr); 3555 int flags = 0; 3556 int sgn, exp; 3557 uint32_t mnt, result; 3558 3559 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3560 3561 if (fp32_is_NaN(exp, mnt)) { 3562 result = fp32_process_NaN(op, mode, &flags); 3563 } else if (!mnt) { 3564 result = fp32_infinity(sgn); 3565 flags \|= FPLIB_DZC; 3566 } else if (sgn) { 3567 result = fp32_defaultNaN(); 3568 flags \|= FPLIB_IOC; 3569 } else if (exp == FP32_EXP_INF) { 3570 result = fp32_zero(0); 3571 } else { 3572 exp += FP32_EXP_BITS; 3573 mnt = fp32_normalise(mnt, &exp); 3574 mnt = recip_sqrt_estimate[(~exp & 1) << 7 \| 3575 (mnt >> (FP32_BITS - 8) & 127)]; 3576 result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1, 3577 mnt << (FP32_MANT_BITS - 8)); 3578 } 3579 3580 set_fpscr0(fpscr, flags); 3581 3582 return result; 3583} 3584 3585template <> 3586uint64_t 3587fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) 3588{ 3589 int mode = modeConv(fpscr); 3590 int flags = 0; 3591 int sgn, exp; 3592 uint64_t mnt, result; 3593 3594 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3595 3596 if (fp64_is_NaN(exp, mnt)) { 3597 result = fp64_process_NaN(op, mode, &flags); 3598 } else if (!mnt) { 3599 result = fp64_infinity(sgn); 3600 flags \|= FPLIB_DZC; 3601 } else if (sgn) { 3602 result = fp64_defaultNaN(); 3603 flags \|= FPLIB_IOC; 3604 } else if (exp == FP64_EXP_INF) { 3605 result = fp32_zero(0); 3606 } else { 3607 exp += FP64_EXP_BITS; 3608 mnt = fp64_normalise(mnt, &exp); 3609 mnt = recip_sqrt_estimate[(~exp & 1) << 7 \| 3610 (mnt >> (FP64_BITS - 8) & 127)]; 3611 result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1, 3612 mnt << (FP64_MANT_BITS - 8)); 3613 } 3614 3615 set_fpscr0(fpscr, flags); 3616 3617 return result; 3618} 3619 3620template <> 3621uint16_t 3622fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3623{ 3624 int mode = modeConv(fpscr); 3625 int flags = 0; 3626 int sgn1, exp1, sgn2, exp2; 3627 uint16_t mnt1, mnt2, result; 3628 3629 op1 = fplibNeg<uint16_t>(op1); 3630 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3631 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3632 3633 result = fp16_process_NaNs(op1, op2, mode, &flags); 3634 if (!result) { 3635 if ((exp1 == FP16_EXP_INF && !mnt2) \|\| 3636 (exp2 == FP16_EXP_INF && !mnt1)) { 3637 result = fp16_FPOnePointFive(0); 3638 } else if (exp1 == FP16_EXP_INF \|\| exp2 == FP16_EXP_INF) { 3639 result = fp16_infinity(sgn1 ^ sgn2); 3640 } else { 3641 result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags); 3642 } 3643 } 3644 3645 set_fpscr0(fpscr, flags); 3646 3647 return result; 3648} 3649 3650template <> 3651uint32_t 3652fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3653{ 3654 int mode = modeConv(fpscr); 3655 int flags = 0; 3656 int sgn1, exp1, sgn2, exp2; 3657 uint32_t mnt1, mnt2, result; 3658 3659 op1 = fplibNeg<uint32_t>(op1); 3660 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3661 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3662 3663 result = fp32_process_NaNs(op1, op2, mode, &flags); 3664 if (!result) { 3665 if ((exp1 == FP32_EXP_INF && !mnt2) \|\| 3666 (exp2 == FP32_EXP_INF && !mnt1)) { 3667 result = fp32_FPOnePointFive(0); 3668 } else if (exp1 == FP32_EXP_INF \|\| exp2 == FP32_EXP_INF) { 3669 result = fp32_infinity(sgn1 ^ sgn2); 3670 } else { 3671 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); 3672 } 3673 } 3674 3675 set_fpscr0(fpscr, flags); 3676 3677 return result; 3678} 3679 3680template <> 3681uint64_t 3682fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3683{ 3684 int mode = modeConv(fpscr); 3685 int flags = 0; 3686 int sgn1, exp1, sgn2, exp2; 3687 uint64_t mnt1, mnt2, result; 3688 3689 op1 = fplibNeg<uint64_t>(op1); 3690 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3691 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3692 3693 result = fp64_process_NaNs(op1, op2, mode, &flags); 3694 if (!result) { 3695 if ((exp1 == FP64_EXP_INF && !mnt2) \|\| 3696 (exp2 == FP64_EXP_INF && !mnt1)) { 3697 result = fp64_FPOnePointFive(0); 3698 } else if (exp1 == FP64_EXP_INF \|\| exp2 == FP64_EXP_INF) { 3699 result = fp64_infinity(sgn1 ^ sgn2); 3700 } else { 3701 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); 3702 } 3703 } 3704 3705 set_fpscr0(fpscr, flags); 3706 3707 return result; 3708} 3709 3710template <> 3711uint16_t 3712fplibRecipEstimate(uint16_t op, FPSCR &fpscr) 3713{ 3714 int mode = modeConv(fpscr); 3715 int flags = 0; 3716 int sgn, exp; 3717 uint16_t mnt, result; 3718 3719 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3720 3721 if (fp16_is_NaN(exp, mnt)) { 3722 result = fp16_process_NaN(op, mode, &flags); 3723 } else if (exp == FP16_EXP_INF) { 3724 result = fp16_zero(sgn); 3725 } else if (!mnt) { 3726 result = fp16_infinity(sgn); 3727 flags \|= FPLIB_DZC; 3728 } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) { 3729 bool overflow_to_inf = false; 3730 switch (FPCRRounding(fpscr)) { 3731 case FPRounding_TIEEVEN: 3732 overflow_to_inf = true; 3733 break; 3734 case FPRounding_POSINF: 3735 overflow_to_inf = !sgn; 3736 break; 3737 case FPRounding_NEGINF: 3738 overflow_to_inf = sgn; 3739 break; 3740 case FPRounding_ZERO: 3741 overflow_to_inf = false; 3742 break; 3743 default:
3743 assert(0);	3744 panic("Unrecognized FP rounding mode");
3744 } 3745 result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn); 3746 flags \|= FPLIB_OFC \| FPLIB_IXC; 3747 } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) { 3748 result = fp16_zero(sgn); 3749 flags \|= FPLIB_UFC; 3750 } else { 3751 exp += FP16_EXP_BITS; 3752 mnt = fp16_normalise(mnt, &exp); 3753 int result_exp = 2 * FP16_EXP_BIAS - 1 - exp; 3754 uint16_t fraction = (((uint32_t)1 << 19) / 3755 (mnt >> (FP16_BITS - 10) \| 1) + 1) >> 1; 3756 fraction <<= FP16_MANT_BITS - 8; 3757 if (result_exp == 0) { 3758 fraction >>= 1; 3759 } else if (result_exp == -1) { 3760 fraction >>= 2; 3761 result_exp = 0; 3762 } 3763 result = fp16_pack(sgn, result_exp, fraction); 3764 } 3765 3766 set_fpscr0(fpscr, flags); 3767 3768 return result; 3769} 3770 3771template <> 3772uint32_t 3773fplibRecipEstimate(uint32_t op, FPSCR &fpscr) 3774{ 3775 int mode = modeConv(fpscr); 3776 int flags = 0; 3777 int sgn, exp; 3778 uint32_t mnt, result; 3779 3780 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3781 3782 if (fp32_is_NaN(exp, mnt)) { 3783 result = fp32_process_NaN(op, mode, &flags); 3784 } else if (exp == FP32_EXP_INF) { 3785 result = fp32_zero(sgn); 3786 } else if (!mnt) { 3787 result = fp32_infinity(sgn); 3788 flags \|= FPLIB_DZC; 3789 } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) { 3790 bool overflow_to_inf = false; 3791 switch (FPCRRounding(fpscr)) { 3792 case FPRounding_TIEEVEN: 3793 overflow_to_inf = true; 3794 break; 3795 case FPRounding_POSINF: 3796 overflow_to_inf = !sgn; 3797 break; 3798 case FPRounding_NEGINF: 3799 overflow_to_inf = sgn; 3800 break; 3801 case FPRounding_ZERO: 3802 overflow_to_inf = false; 3803 break; 3804 default:	3745 } 3746 result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn); 3747 flags \|= FPLIB_OFC \| FPLIB_IXC; 3748 } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) { 3749 result = fp16_zero(sgn); 3750 flags \|= FPLIB_UFC; 3751 } else { 3752 exp += FP16_EXP_BITS; 3753 mnt = fp16_normalise(mnt, &exp); 3754 int result_exp = 2 * FP16_EXP_BIAS - 1 - exp; 3755 uint16_t fraction = (((uint32_t)1 << 19) / 3756 (mnt >> (FP16_BITS - 10) \| 1) + 1) >> 1; 3757 fraction <<= FP16_MANT_BITS - 8; 3758 if (result_exp == 0) { 3759 fraction >>= 1; 3760 } else if (result_exp == -1) { 3761 fraction >>= 2; 3762 result_exp = 0; 3763 } 3764 result = fp16_pack(sgn, result_exp, fraction); 3765 } 3766 3767 set_fpscr0(fpscr, flags); 3768 3769 return result; 3770} 3771 3772template <> 3773uint32_t 3774fplibRecipEstimate(uint32_t op, FPSCR &fpscr) 3775{ 3776 int mode = modeConv(fpscr); 3777 int flags = 0; 3778 int sgn, exp; 3779 uint32_t mnt, result; 3780 3781 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3782 3783 if (fp32_is_NaN(exp, mnt)) { 3784 result = fp32_process_NaN(op, mode, &flags); 3785 } else if (exp == FP32_EXP_INF) { 3786 result = fp32_zero(sgn); 3787 } else if (!mnt) { 3788 result = fp32_infinity(sgn); 3789 flags \|= FPLIB_DZC; 3790 } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) { 3791 bool overflow_to_inf = false; 3792 switch (FPCRRounding(fpscr)) { 3793 case FPRounding_TIEEVEN: 3794 overflow_to_inf = true; 3795 break; 3796 case FPRounding_POSINF: 3797 overflow_to_inf = !sgn; 3798 break; 3799 case FPRounding_NEGINF: 3800 overflow_to_inf = sgn; 3801 break; 3802 case FPRounding_ZERO: 3803 overflow_to_inf = false; 3804 break; 3805 default:
3805 assert(0);	3806 panic("Unrecognized FP rounding mode");
3806 } 3807 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn); 3808 flags \|= FPLIB_OFC \| FPLIB_IXC; 3809 } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) { 3810 result = fp32_zero(sgn); 3811 flags \|= FPLIB_UFC; 3812 } else { 3813 exp += FP32_EXP_BITS; 3814 mnt = fp32_normalise(mnt, &exp); 3815 int result_exp = 2 * FP32_EXP_BIAS - 1 - exp; 3816 uint32_t fraction = (((uint32_t)1 << 19) / 3817 (mnt >> (FP32_BITS - 10) \| 1) + 1) >> 1; 3818 fraction <<= FP32_MANT_BITS - 8; 3819 if (result_exp == 0) { 3820 fraction >>= 1; 3821 } else if (result_exp == -1) { 3822 fraction >>= 2; 3823 result_exp = 0; 3824 } 3825 result = fp32_pack(sgn, result_exp, fraction); 3826 } 3827 3828 set_fpscr0(fpscr, flags); 3829 3830 return result; 3831} 3832 3833template <> 3834uint64_t 3835fplibRecipEstimate(uint64_t op, FPSCR &fpscr) 3836{ 3837 int mode = modeConv(fpscr); 3838 int flags = 0; 3839 int sgn, exp; 3840 uint64_t mnt, result; 3841 3842 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3843 3844 if (fp64_is_NaN(exp, mnt)) { 3845 result = fp64_process_NaN(op, mode, &flags); 3846 } else if (exp == FP64_EXP_INF) { 3847 result = fp64_zero(sgn); 3848 } else if (!mnt) { 3849 result = fp64_infinity(sgn); 3850 flags \|= FPLIB_DZC; 3851 } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) { 3852 bool overflow_to_inf = false; 3853 switch (FPCRRounding(fpscr)) { 3854 case FPRounding_TIEEVEN: 3855 overflow_to_inf = true; 3856 break; 3857 case FPRounding_POSINF: 3858 overflow_to_inf = !sgn; 3859 break; 3860 case FPRounding_NEGINF: 3861 overflow_to_inf = sgn; 3862 break; 3863 case FPRounding_ZERO: 3864 overflow_to_inf = false; 3865 break; 3866 default:	3807 } 3808 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn); 3809 flags \|= FPLIB_OFC \| FPLIB_IXC; 3810 } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) { 3811 result = fp32_zero(sgn); 3812 flags \|= FPLIB_UFC; 3813 } else { 3814 exp += FP32_EXP_BITS; 3815 mnt = fp32_normalise(mnt, &exp); 3816 int result_exp = 2 * FP32_EXP_BIAS - 1 - exp; 3817 uint32_t fraction = (((uint32_t)1 << 19) / 3818 (mnt >> (FP32_BITS - 10) \| 1) + 1) >> 1; 3819 fraction <<= FP32_MANT_BITS - 8; 3820 if (result_exp == 0) { 3821 fraction >>= 1; 3822 } else if (result_exp == -1) { 3823 fraction >>= 2; 3824 result_exp = 0; 3825 } 3826 result = fp32_pack(sgn, result_exp, fraction); 3827 } 3828 3829 set_fpscr0(fpscr, flags); 3830 3831 return result; 3832} 3833 3834template <> 3835uint64_t 3836fplibRecipEstimate(uint64_t op, FPSCR &fpscr) 3837{ 3838 int mode = modeConv(fpscr); 3839 int flags = 0; 3840 int sgn, exp; 3841 uint64_t mnt, result; 3842 3843 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3844 3845 if (fp64_is_NaN(exp, mnt)) { 3846 result = fp64_process_NaN(op, mode, &flags); 3847 } else if (exp == FP64_EXP_INF) { 3848 result = fp64_zero(sgn); 3849 } else if (!mnt) { 3850 result = fp64_infinity(sgn); 3851 flags \|= FPLIB_DZC; 3852 } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) { 3853 bool overflow_to_inf = false; 3854 switch (FPCRRounding(fpscr)) { 3855 case FPRounding_TIEEVEN: 3856 overflow_to_inf = true; 3857 break; 3858 case FPRounding_POSINF: 3859 overflow_to_inf = !sgn; 3860 break; 3861 case FPRounding_NEGINF: 3862 overflow_to_inf = sgn; 3863 break; 3864 case FPRounding_ZERO: 3865 overflow_to_inf = false; 3866 break; 3867 default:
3867 assert(0);	3868 panic("Unrecognized FP rounding mode");
3868 } 3869 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn); 3870 flags \|= FPLIB_OFC \| FPLIB_IXC; 3871 } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) { 3872 result = fp64_zero(sgn); 3873 flags \|= FPLIB_UFC; 3874 } else { 3875 exp += FP64_EXP_BITS; 3876 mnt = fp64_normalise(mnt, &exp); 3877 int result_exp = 2 * FP64_EXP_BIAS - 1 - exp; 3878 uint64_t fraction = (((uint32_t)1 << 19) / 3879 (mnt >> (FP64_BITS - 10) \| 1) + 1) >> 1; 3880 fraction <<= FP64_MANT_BITS - 8; 3881 if (result_exp == 0) { 3882 fraction >>= 1; 3883 } else if (result_exp == -1) { 3884 fraction >>= 2; 3885 result_exp = 0; 3886 } 3887 result = fp64_pack(sgn, result_exp, fraction); 3888 } 3889 3890 set_fpscr0(fpscr, flags); 3891 3892 return result; 3893} 3894 3895template <> 3896uint16_t 3897fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3898{ 3899 int mode = modeConv(fpscr); 3900 int flags = 0; 3901 int sgn1, exp1, sgn2, exp2; 3902 uint16_t mnt1, mnt2, result; 3903 3904 op1 = fplibNeg<uint16_t>(op1); 3905 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3906 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3907 3908 result = fp16_process_NaNs(op1, op2, mode, &flags); 3909 if (!result) { 3910 if ((exp1 == FP16_EXP_INF && !mnt2) \|\| 3911 (exp2 == FP16_EXP_INF && !mnt1)) { 3912 result = fp16_FPTwo(0); 3913 } else if (exp1 == FP16_EXP_INF \|\| exp2 == FP16_EXP_INF) { 3914 result = fp16_infinity(sgn1 ^ sgn2); 3915 } else { 3916 result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags); 3917 } 3918 } 3919 3920 set_fpscr0(fpscr, flags); 3921 3922 return result; 3923} 3924 3925template <> 3926uint32_t 3927fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3928{ 3929 int mode = modeConv(fpscr); 3930 int flags = 0; 3931 int sgn1, exp1, sgn2, exp2; 3932 uint32_t mnt1, mnt2, result; 3933 3934 op1 = fplibNeg<uint32_t>(op1); 3935 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3936 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3937 3938 result = fp32_process_NaNs(op1, op2, mode, &flags); 3939 if (!result) { 3940 if ((exp1 == FP32_EXP_INF && !mnt2) \|\| 3941 (exp2 == FP32_EXP_INF && !mnt1)) { 3942 result = fp32_FPTwo(0); 3943 } else if (exp1 == FP32_EXP_INF \|\| exp2 == FP32_EXP_INF) { 3944 result = fp32_infinity(sgn1 ^ sgn2); 3945 } else { 3946 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags); 3947 } 3948 } 3949 3950 set_fpscr0(fpscr, flags); 3951 3952 return result; 3953} 3954 3955template <> 3956uint64_t 3957fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3958{ 3959 int mode = modeConv(fpscr); 3960 int flags = 0; 3961 int sgn1, exp1, sgn2, exp2; 3962 uint64_t mnt1, mnt2, result; 3963 3964 op1 = fplibNeg<uint64_t>(op1); 3965 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3966 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3967 3968 result = fp64_process_NaNs(op1, op2, mode, &flags); 3969 if (!result) { 3970 if ((exp1 == FP64_EXP_INF && !mnt2) \|\| 3971 (exp2 == FP64_EXP_INF && !mnt1)) { 3972 result = fp64_FPTwo(0); 3973 } else if (exp1 == FP64_EXP_INF \|\| exp2 == FP64_EXP_INF) { 3974 result = fp64_infinity(sgn1 ^ sgn2); 3975 } else { 3976 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags); 3977 } 3978 } 3979 3980 set_fpscr0(fpscr, flags); 3981 3982 return result; 3983} 3984 3985template <> 3986uint16_t 3987fplibRecpX(uint16_t op, FPSCR &fpscr) 3988{ 3989 int mode = modeConv(fpscr); 3990 int flags = 0; 3991 int sgn, exp; 3992 uint16_t mnt, result; 3993 3994 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3995 3996 if (fp16_is_NaN(exp, mnt)) { 3997 result = fp16_process_NaN(op, mode, &flags); 3998 } 3999 else { 4000 if (!mnt) { // Zero and denormals 4001 result = fp16_pack(sgn, FP16_EXP_INF - 1, 0); 4002 } else { // Infinities and normals 4003 result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0); 4004 } 4005 } 4006 4007 set_fpscr0(fpscr, flags); 4008 4009 return result; 4010} 4011 4012template <> 4013uint32_t 4014fplibRecpX(uint32_t op, FPSCR &fpscr) 4015{ 4016 int mode = modeConv(fpscr); 4017 int flags = 0; 4018 int sgn, exp; 4019 uint32_t mnt, result; 4020 4021 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4022 4023 if (fp32_is_NaN(exp, mnt)) { 4024 result = fp32_process_NaN(op, mode, &flags); 4025 } 4026 else { 4027 if (!mnt) { // Zero and denormals 4028 result = fp32_pack(sgn, FP32_EXP_INF - 1, 0); 4029 } else { // Infinities and normals 4030 result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0); 4031 } 4032 } 4033 4034 set_fpscr0(fpscr, flags); 4035 4036 return result; 4037} 4038 4039template <> 4040uint64_t 4041fplibRecpX(uint64_t op, FPSCR &fpscr) 4042{ 4043 int mode = modeConv(fpscr); 4044 int flags = 0; 4045 int sgn, exp; 4046 uint64_t mnt, result; 4047 4048 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4049 4050 if (fp64_is_NaN(exp, mnt)) { 4051 result = fp64_process_NaN(op, mode, &flags); 4052 } 4053 else { 4054 if (!mnt) { // Zero and denormals 4055 result = fp64_pack(sgn, FP64_EXP_INF - 1, 0); 4056 } else { // Infinities and normals 4057 result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0); 4058 } 4059 } 4060 4061 set_fpscr0(fpscr, flags); 4062 4063 return result; 4064} 4065 4066template <> 4067uint16_t 4068fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 4069{ 4070 int expint = FP16_EXP_BIAS + FP16_MANT_BITS; 4071 int mode = modeConv(fpscr); 4072 int flags = 0; 4073 int sgn, exp; 4074 uint16_t mnt, result; 4075 4076 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4077 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4078 4079 // Handle NaNs, infinities and zeroes: 4080 if (fp16_is_NaN(exp, mnt)) { 4081 result = fp16_process_NaN(op, mode, &flags); 4082 } else if (exp == FP16_EXP_INF) { 4083 result = fp16_infinity(sgn); 4084 } else if (!mnt) { 4085 result = fp16_zero(sgn); 4086 } else if (exp >= expint) { 4087 // There are no fractional bits 4088 result = op; 4089 } else { 4090 // Truncate towards zero: 4091 uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp); 4092 int err = exp < expint - FP16_BITS ? 1 : 4093 ((mnt << 1 >> (expint - exp - 1) & 3) \| 4094 ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0)); 4095 switch (rounding) { 4096 case FPRounding_TIEEVEN: 4097 x += (err == 3 \|\| (err == 2 && (x & 1))); 4098 break; 4099 case FPRounding_POSINF: 4100 x += err && !sgn; 4101 break; 4102 case FPRounding_NEGINF: 4103 x += err && sgn; 4104 break; 4105 case FPRounding_ZERO: 4106 break; 4107 case FPRounding_TIEAWAY: 4108 x += err >> 1; 4109 break; 4110 default:	3869 } 3870 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn); 3871 flags \|= FPLIB_OFC \| FPLIB_IXC; 3872 } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) { 3873 result = fp64_zero(sgn); 3874 flags \|= FPLIB_UFC; 3875 } else { 3876 exp += FP64_EXP_BITS; 3877 mnt = fp64_normalise(mnt, &exp); 3878 int result_exp = 2 * FP64_EXP_BIAS - 1 - exp; 3879 uint64_t fraction = (((uint32_t)1 << 19) / 3880 (mnt >> (FP64_BITS - 10) \| 1) + 1) >> 1; 3881 fraction <<= FP64_MANT_BITS - 8; 3882 if (result_exp == 0) { 3883 fraction >>= 1; 3884 } else if (result_exp == -1) { 3885 fraction >>= 2; 3886 result_exp = 0; 3887 } 3888 result = fp64_pack(sgn, result_exp, fraction); 3889 } 3890 3891 set_fpscr0(fpscr, flags); 3892 3893 return result; 3894} 3895 3896template <> 3897uint16_t 3898fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr) 3899{ 3900 int mode = modeConv(fpscr); 3901 int flags = 0; 3902 int sgn1, exp1, sgn2, exp2; 3903 uint16_t mnt1, mnt2, result; 3904 3905 op1 = fplibNeg<uint16_t>(op1); 3906 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3907 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3908 3909 result = fp16_process_NaNs(op1, op2, mode, &flags); 3910 if (!result) { 3911 if ((exp1 == FP16_EXP_INF && !mnt2) \|\| 3912 (exp2 == FP16_EXP_INF && !mnt1)) { 3913 result = fp16_FPTwo(0); 3914 } else if (exp1 == FP16_EXP_INF \|\| exp2 == FP16_EXP_INF) { 3915 result = fp16_infinity(sgn1 ^ sgn2); 3916 } else { 3917 result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags); 3918 } 3919 } 3920 3921 set_fpscr0(fpscr, flags); 3922 3923 return result; 3924} 3925 3926template <> 3927uint32_t 3928fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 3929{ 3930 int mode = modeConv(fpscr); 3931 int flags = 0; 3932 int sgn1, exp1, sgn2, exp2; 3933 uint32_t mnt1, mnt2, result; 3934 3935 op1 = fplibNeg<uint32_t>(op1); 3936 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3937 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3938 3939 result = fp32_process_NaNs(op1, op2, mode, &flags); 3940 if (!result) { 3941 if ((exp1 == FP32_EXP_INF && !mnt2) \|\| 3942 (exp2 == FP32_EXP_INF && !mnt1)) { 3943 result = fp32_FPTwo(0); 3944 } else if (exp1 == FP32_EXP_INF \|\| exp2 == FP32_EXP_INF) { 3945 result = fp32_infinity(sgn1 ^ sgn2); 3946 } else { 3947 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags); 3948 } 3949 } 3950 3951 set_fpscr0(fpscr, flags); 3952 3953 return result; 3954} 3955 3956template <> 3957uint64_t 3958fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 3959{ 3960 int mode = modeConv(fpscr); 3961 int flags = 0; 3962 int sgn1, exp1, sgn2, exp2; 3963 uint64_t mnt1, mnt2, result; 3964 3965 op1 = fplibNeg<uint64_t>(op1); 3966 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 3967 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 3968 3969 result = fp64_process_NaNs(op1, op2, mode, &flags); 3970 if (!result) { 3971 if ((exp1 == FP64_EXP_INF && !mnt2) \|\| 3972 (exp2 == FP64_EXP_INF && !mnt1)) { 3973 result = fp64_FPTwo(0); 3974 } else if (exp1 == FP64_EXP_INF \|\| exp2 == FP64_EXP_INF) { 3975 result = fp64_infinity(sgn1 ^ sgn2); 3976 } else { 3977 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags); 3978 } 3979 } 3980 3981 set_fpscr0(fpscr, flags); 3982 3983 return result; 3984} 3985 3986template <> 3987uint16_t 3988fplibRecpX(uint16_t op, FPSCR &fpscr) 3989{ 3990 int mode = modeConv(fpscr); 3991 int flags = 0; 3992 int sgn, exp; 3993 uint16_t mnt, result; 3994 3995 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 3996 3997 if (fp16_is_NaN(exp, mnt)) { 3998 result = fp16_process_NaN(op, mode, &flags); 3999 } 4000 else { 4001 if (!mnt) { // Zero and denormals 4002 result = fp16_pack(sgn, FP16_EXP_INF - 1, 0); 4003 } else { // Infinities and normals 4004 result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0); 4005 } 4006 } 4007 4008 set_fpscr0(fpscr, flags); 4009 4010 return result; 4011} 4012 4013template <> 4014uint32_t 4015fplibRecpX(uint32_t op, FPSCR &fpscr) 4016{ 4017 int mode = modeConv(fpscr); 4018 int flags = 0; 4019 int sgn, exp; 4020 uint32_t mnt, result; 4021 4022 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4023 4024 if (fp32_is_NaN(exp, mnt)) { 4025 result = fp32_process_NaN(op, mode, &flags); 4026 } 4027 else { 4028 if (!mnt) { // Zero and denormals 4029 result = fp32_pack(sgn, FP32_EXP_INF - 1, 0); 4030 } else { // Infinities and normals 4031 result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0); 4032 } 4033 } 4034 4035 set_fpscr0(fpscr, flags); 4036 4037 return result; 4038} 4039 4040template <> 4041uint64_t 4042fplibRecpX(uint64_t op, FPSCR &fpscr) 4043{ 4044 int mode = modeConv(fpscr); 4045 int flags = 0; 4046 int sgn, exp; 4047 uint64_t mnt, result; 4048 4049 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4050 4051 if (fp64_is_NaN(exp, mnt)) { 4052 result = fp64_process_NaN(op, mode, &flags); 4053 } 4054 else { 4055 if (!mnt) { // Zero and denormals 4056 result = fp64_pack(sgn, FP64_EXP_INF - 1, 0); 4057 } else { // Infinities and normals 4058 result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0); 4059 } 4060 } 4061 4062 set_fpscr0(fpscr, flags); 4063 4064 return result; 4065} 4066 4067template <> 4068uint16_t 4069fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 4070{ 4071 int expint = FP16_EXP_BIAS + FP16_MANT_BITS; 4072 int mode = modeConv(fpscr); 4073 int flags = 0; 4074 int sgn, exp; 4075 uint16_t mnt, result; 4076 4077 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4078 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4079 4080 // Handle NaNs, infinities and zeroes: 4081 if (fp16_is_NaN(exp, mnt)) { 4082 result = fp16_process_NaN(op, mode, &flags); 4083 } else if (exp == FP16_EXP_INF) { 4084 result = fp16_infinity(sgn); 4085 } else if (!mnt) { 4086 result = fp16_zero(sgn); 4087 } else if (exp >= expint) { 4088 // There are no fractional bits 4089 result = op; 4090 } else { 4091 // Truncate towards zero: 4092 uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp); 4093 int err = exp < expint - FP16_BITS ? 1 : 4094 ((mnt << 1 >> (expint - exp - 1) & 3) \| 4095 ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0)); 4096 switch (rounding) { 4097 case FPRounding_TIEEVEN: 4098 x += (err == 3 \|\| (err == 2 && (x & 1))); 4099 break; 4100 case FPRounding_POSINF: 4101 x += err && !sgn; 4102 break; 4103 case FPRounding_NEGINF: 4104 x += err && sgn; 4105 break; 4106 case FPRounding_ZERO: 4107 break; 4108 case FPRounding_TIEAWAY: 4109 x += err >> 1; 4110 break; 4111 default:
4111 assert(0);	4112 panic("Unrecognized FP rounding mode");
4112 } 4113 4114 if (x == 0) { 4115 result = fp16_zero(sgn); 4116 } else { 4117 exp = expint; 4118 mnt = fp16_normalise(x, &exp); 4119 result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS); 4120 } 4121 4122 if (err && exact) 4123 flags \|= FPLIB_IXC; 4124 } 4125 4126 set_fpscr0(fpscr, flags); 4127 4128 return result; 4129} 4130 4131template <> 4132uint32_t 4133fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 4134{ 4135 int expint = FP32_EXP_BIAS + FP32_MANT_BITS; 4136 int mode = modeConv(fpscr); 4137 int flags = 0; 4138 int sgn, exp; 4139 uint32_t mnt, result; 4140 4141 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4142 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4143 4144 // Handle NaNs, infinities and zeroes: 4145 if (fp32_is_NaN(exp, mnt)) { 4146 result = fp32_process_NaN(op, mode, &flags); 4147 } else if (exp == FP32_EXP_INF) { 4148 result = fp32_infinity(sgn); 4149 } else if (!mnt) { 4150 result = fp32_zero(sgn); 4151 } else if (exp >= expint) { 4152 // There are no fractional bits 4153 result = op; 4154 } else { 4155 // Truncate towards zero: 4156 uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp); 4157 int err = exp < expint - FP32_BITS ? 1 : 4158 ((mnt << 1 >> (expint - exp - 1) & 3) \| 4159 ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0)); 4160 switch (rounding) { 4161 case FPRounding_TIEEVEN: 4162 x += (err == 3 \|\| (err == 2 && (x & 1))); 4163 break; 4164 case FPRounding_POSINF: 4165 x += err && !sgn; 4166 break; 4167 case FPRounding_NEGINF: 4168 x += err && sgn; 4169 break; 4170 case FPRounding_ZERO: 4171 break; 4172 case FPRounding_TIEAWAY: 4173 x += err >> 1; 4174 break; 4175 default:	4113 } 4114 4115 if (x == 0) { 4116 result = fp16_zero(sgn); 4117 } else { 4118 exp = expint; 4119 mnt = fp16_normalise(x, &exp); 4120 result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS); 4121 } 4122 4123 if (err && exact) 4124 flags \|= FPLIB_IXC; 4125 } 4126 4127 set_fpscr0(fpscr, flags); 4128 4129 return result; 4130} 4131 4132template <> 4133uint32_t 4134fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 4135{ 4136 int expint = FP32_EXP_BIAS + FP32_MANT_BITS; 4137 int mode = modeConv(fpscr); 4138 int flags = 0; 4139 int sgn, exp; 4140 uint32_t mnt, result; 4141 4142 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4143 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4144 4145 // Handle NaNs, infinities and zeroes: 4146 if (fp32_is_NaN(exp, mnt)) { 4147 result = fp32_process_NaN(op, mode, &flags); 4148 } else if (exp == FP32_EXP_INF) { 4149 result = fp32_infinity(sgn); 4150 } else if (!mnt) { 4151 result = fp32_zero(sgn); 4152 } else if (exp >= expint) { 4153 // There are no fractional bits 4154 result = op; 4155 } else { 4156 // Truncate towards zero: 4157 uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp); 4158 int err = exp < expint - FP32_BITS ? 1 : 4159 ((mnt << 1 >> (expint - exp - 1) & 3) \| 4160 ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0)); 4161 switch (rounding) { 4162 case FPRounding_TIEEVEN: 4163 x += (err == 3 \|\| (err == 2 && (x & 1))); 4164 break; 4165 case FPRounding_POSINF: 4166 x += err && !sgn; 4167 break; 4168 case FPRounding_NEGINF: 4169 x += err && sgn; 4170 break; 4171 case FPRounding_ZERO: 4172 break; 4173 case FPRounding_TIEAWAY: 4174 x += err >> 1; 4175 break; 4176 default:
4176 assert(0);	4177 panic("Unrecognized FP rounding mode");
4177 } 4178 4179 if (x == 0) { 4180 result = fp32_zero(sgn); 4181 } else { 4182 exp = expint; 4183 mnt = fp32_normalise(x, &exp); 4184 result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS); 4185 } 4186 4187 if (err && exact) 4188 flags \|= FPLIB_IXC; 4189 } 4190 4191 set_fpscr0(fpscr, flags); 4192 4193 return result; 4194} 4195 4196template <> 4197uint64_t 4198fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 4199{ 4200 int expint = FP64_EXP_BIAS + FP64_MANT_BITS; 4201 int mode = modeConv(fpscr); 4202 int flags = 0; 4203 int sgn, exp; 4204 uint64_t mnt, result; 4205 4206 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4207 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4208 4209 // Handle NaNs, infinities and zeroes: 4210 if (fp64_is_NaN(exp, mnt)) { 4211 result = fp64_process_NaN(op, mode, &flags); 4212 } else if (exp == FP64_EXP_INF) { 4213 result = fp64_infinity(sgn); 4214 } else if (!mnt) { 4215 result = fp64_zero(sgn); 4216 } else if (exp >= expint) { 4217 // There are no fractional bits 4218 result = op; 4219 } else { 4220 // Truncate towards zero: 4221 uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp); 4222 int err = exp < expint - FP64_BITS ? 1 : 4223 ((mnt << 1 >> (expint - exp - 1) & 3) \| 4224 ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0)); 4225 switch (rounding) { 4226 case FPRounding_TIEEVEN: 4227 x += (err == 3 \|\| (err == 2 && (x & 1))); 4228 break; 4229 case FPRounding_POSINF: 4230 x += err && !sgn; 4231 break; 4232 case FPRounding_NEGINF: 4233 x += err && sgn; 4234 break; 4235 case FPRounding_ZERO: 4236 break; 4237 case FPRounding_TIEAWAY: 4238 x += err >> 1; 4239 break; 4240 default:	4178 } 4179 4180 if (x == 0) { 4181 result = fp32_zero(sgn); 4182 } else { 4183 exp = expint; 4184 mnt = fp32_normalise(x, &exp); 4185 result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS); 4186 } 4187 4188 if (err && exact) 4189 flags \|= FPLIB_IXC; 4190 } 4191 4192 set_fpscr0(fpscr, flags); 4193 4194 return result; 4195} 4196 4197template <> 4198uint64_t 4199fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 4200{ 4201 int expint = FP64_EXP_BIAS + FP64_MANT_BITS; 4202 int mode = modeConv(fpscr); 4203 int flags = 0; 4204 int sgn, exp; 4205 uint64_t mnt, result; 4206 4207 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4208 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 4209 4210 // Handle NaNs, infinities and zeroes: 4211 if (fp64_is_NaN(exp, mnt)) { 4212 result = fp64_process_NaN(op, mode, &flags); 4213 } else if (exp == FP64_EXP_INF) { 4214 result = fp64_infinity(sgn); 4215 } else if (!mnt) { 4216 result = fp64_zero(sgn); 4217 } else if (exp >= expint) { 4218 // There are no fractional bits 4219 result = op; 4220 } else { 4221 // Truncate towards zero: 4222 uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp); 4223 int err = exp < expint - FP64_BITS ? 1 : 4224 ((mnt << 1 >> (expint - exp - 1) & 3) \| 4225 ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0)); 4226 switch (rounding) { 4227 case FPRounding_TIEEVEN: 4228 x += (err == 3 \|\| (err == 2 && (x & 1))); 4229 break; 4230 case FPRounding_POSINF: 4231 x += err && !sgn; 4232 break; 4233 case FPRounding_NEGINF: 4234 x += err && sgn; 4235 break; 4236 case FPRounding_ZERO: 4237 break; 4238 case FPRounding_TIEAWAY: 4239 x += err >> 1; 4240 break; 4241 default:
4241 assert(0);	4242 panic("Unrecognized FP rounding mode");
4242 } 4243 4244 if (x == 0) { 4245 result = fp64_zero(sgn); 4246 } else { 4247 exp = expint; 4248 mnt = fp64_normalise(x, &exp); 4249 result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS); 4250 } 4251 4252 if (err && exact) 4253 flags \|= FPLIB_IXC; 4254 } 4255 4256 set_fpscr0(fpscr, flags); 4257 4258 return result; 4259} 4260 4261template <> 4262uint16_t 4263fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr) 4264{ 4265 int flags = 0; 4266 uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags); 4267 set_fpscr0(fpscr, flags); 4268 return result; 4269} 4270 4271template <> 4272uint32_t 4273fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr) 4274{ 4275 int flags = 0; 4276 uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags); 4277 set_fpscr0(fpscr, flags); 4278 return result; 4279} 4280 4281template <> 4282uint64_t 4283fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr) 4284{ 4285 int flags = 0; 4286 uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags); 4287 set_fpscr0(fpscr, flags); 4288 return result; 4289} 4290 4291template <> 4292uint16_t 4293fplibSqrt(uint16_t op, FPSCR &fpscr) 4294{ 4295 int flags = 0; 4296 uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags); 4297 set_fpscr0(fpscr, flags); 4298 return result; 4299} 4300 4301template <> 4302uint32_t 4303fplibSqrt(uint32_t op, FPSCR &fpscr) 4304{ 4305 int flags = 0; 4306 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags); 4307 set_fpscr0(fpscr, flags); 4308 return result; 4309} 4310 4311template <> 4312uint64_t 4313fplibSqrt(uint64_t op, FPSCR &fpscr) 4314{ 4315 int flags = 0; 4316 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags); 4317 set_fpscr0(fpscr, flags); 4318 return result; 4319} 4320 4321template <> 4322uint16_t 4323fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr) 4324{ 4325 int flags = 0; 4326 uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags); 4327 set_fpscr0(fpscr, flags); 4328 return result; 4329} 4330 4331template <> 4332uint32_t 4333fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr) 4334{ 4335 int flags = 0; 4336 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags); 4337 set_fpscr0(fpscr, flags); 4338 return result; 4339} 4340 4341template <> 4342uint64_t 4343fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr) 4344{ 4345 int flags = 0; 4346 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags); 4347 set_fpscr0(fpscr, flags); 4348 return result; 4349} 4350 4351template <> 4352uint16_t 4353fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr) 4354{ 4355 static uint16_t coeff[2][8] = { 4356 { 4357 0x3c00, 4358 0xb155, 4359 0x2030, 4360 0x0000, 4361 0x0000, 4362 0x0000, 4363 0x0000, 4364 0x0000, 4365 }, 4366 { 4367 0x3c00, 4368 0xb800, 4369 0x293a, 4370 0x0000, 4371 0x0000, 4372 0x0000, 4373 0x0000, 4374 0x0000 4375 } 4376 }; 4377 int flags = 0; 4378 uint16_t result = 4379 fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1, 4380 fplibAbs(op2), 0, modeConv(fpscr), &flags); 4381 set_fpscr0(fpscr, flags); 4382 return result; 4383} 4384 4385template <> 4386uint32_t 4387fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr) 4388{ 4389 static uint32_t coeff[2][8] = { 4390 { 4391 0x3f800000, 4392 0xbe2aaaab, 4393 0x3c088886, 4394 0xb95008b9, 4395 0x36369d6d, 4396 0x00000000, 4397 0x00000000, 4398 0x00000000 4399 }, 4400 { 4401 0x3f800000, 4402 0xbf000000, 4403 0x3d2aaaa6, 4404 0xbab60705, 4405 0x37cd37cc, 4406 0x00000000, 4407 0x00000000, 4408 0x00000000 4409 } 4410 }; 4411 int flags = 0; 4412 uint32_t result = 4413 fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1, 4414 fplibAbs(op2), 0, modeConv(fpscr), &flags); 4415 set_fpscr0(fpscr, flags); 4416 return result; 4417} 4418 4419template <> 4420uint64_t 4421fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr) 4422{ 4423 static uint64_t coeff[2][8] = { 4424 { 4425 0x3ff0000000000000ULL, 4426 0xbfc5555555555543ULL, 4427 0x3f8111111110f30cULL, 4428 0xbf2a01a019b92fc6ULL, 4429 0x3ec71de351f3d22bULL, 4430 0xbe5ae5e2b60f7b91ULL, 4431 0x3de5d8408868552fULL, 4432 0x0000000000000000ULL 4433 }, 4434 { 4435 0x3ff0000000000000ULL, 4436 0xbfe0000000000000ULL, 4437 0x3fa5555555555536ULL, 4438 0xbf56c16c16c13a0bULL, 4439 0x3efa01a019b1e8d8ULL, 4440 0xbe927e4f7282f468ULL, 4441 0x3e21ee96d2641b13ULL, 4442 0xbda8f76380fbb401ULL 4443 } 4444 }; 4445 int flags = 0; 4446 uint64_t result = 4447 fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1, 4448 fplibAbs(op2), 0, modeConv(fpscr), &flags); 4449 set_fpscr0(fpscr, flags); 4450 return result; 4451} 4452 4453template <> 4454uint16_t 4455fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr) 4456{ 4457 int flags = 0; 4458 int sgn, exp; 4459 uint16_t mnt; 4460 4461 int mode = modeConv(fpscr); 4462 uint16_t result = fp16_mul(op1, op1, mode, &flags); 4463 set_fpscr0(fpscr, flags); 4464 4465 fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags); 4466 if (!fp16_is_NaN(exp, mnt)) { 4467 result = (result & ~(1ULL << (FP16_BITS - 1))) \| 4468 op2 << (FP16_BITS - 1); 4469 } 4470 return result; 4471} 4472 4473template <> 4474uint32_t 4475fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) 4476{ 4477 int flags = 0; 4478 int sgn, exp; 4479 uint32_t mnt; 4480 4481 int mode = modeConv(fpscr); 4482 uint32_t result = fp32_mul(op1, op1, mode, &flags); 4483 set_fpscr0(fpscr, flags); 4484 4485 fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags); 4486 if (!fp32_is_NaN(exp, mnt)) { 4487 result = (result & ~(1ULL << (FP32_BITS - 1))) \| op2 << (FP32_BITS - 1); 4488 } 4489 return result; 4490} 4491 4492template <> 4493uint64_t 4494fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) 4495{ 4496 int flags = 0; 4497 int sgn, exp; 4498 uint64_t mnt; 4499 4500 int mode = modeConv(fpscr); 4501 uint64_t result = fp64_mul(op1, op1, mode, &flags); 4502 set_fpscr0(fpscr, flags); 4503 4504 fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags); 4505 if (!fp64_is_NaN(exp, mnt)) { 4506 result = (result & ~(1ULL << (FP64_BITS - 1))) \| op2 << (FP64_BITS - 1); 4507 } 4508 return result; 4509} 4510 4511template <> 4512uint16_t 4513fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr) 4514{ 4515 static constexpr uint16_t fpOne = 4516 (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0 4517 if (op2 & 1) 4518 op1 = fpOne; 4519 return op1 ^ ((op2 >> 1) << (FP16_BITS - 1)); 4520} 4521 4522template <> 4523uint32_t 4524fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr) 4525{ 4526 static constexpr uint32_t fpOne = 4527 (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0 4528 if (op2 & 1) 4529 op1 = fpOne; 4530 return op1 ^ ((op2 >> 1) << (FP32_BITS - 1)); 4531} 4532 4533template <> 4534uint64_t 4535fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr) 4536{ 4537 static constexpr uint64_t fpOne = 4538 (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0 4539 if (op2 & 1) 4540 op1 = fpOne; 4541 return op1 ^ ((op2 >> 1) << (FP64_BITS - 1)); 4542} 4543 4544static uint64_t 4545FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 4546 int flags) 4547{ 4548* int expmax = FP64_EXP_BIAS + FP64_BITS - 1; 4549 uint64_t x; 4550 int err; 4551 4552 if (exp > expmax) { 4553 flags = FPLIB_IOC; 4554* return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn; 4555 } 4556 4557 x = lsr64(mnt << FP64_EXP_BITS, expmax - exp); 4558 err = (exp > expmax - 2 ? 0 : 4559 (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) \| 4560 !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1))); 4561 4562 switch (rounding) { 4563 case FPRounding_TIEEVEN: 4564 x += (err == 3 \|\| (err == 2 && (x & 1))); 4565 break; 4566 case FPRounding_POSINF: 4567 x += err && !sgn; 4568 break; 4569 case FPRounding_NEGINF: 4570 x += err && sgn; 4571 break; 4572 case FPRounding_ZERO: 4573 break; 4574 case FPRounding_TIEAWAY: 4575 x += err >> 1; 4576 break; 4577 default:	4243 } 4244 4245 if (x == 0) { 4246 result = fp64_zero(sgn); 4247 } else { 4248 exp = expint; 4249 mnt = fp64_normalise(x, &exp); 4250 result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS); 4251 } 4252 4253 if (err && exact) 4254 flags \|= FPLIB_IXC; 4255 } 4256 4257 set_fpscr0(fpscr, flags); 4258 4259 return result; 4260} 4261 4262template <> 4263uint16_t 4264fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr) 4265{ 4266 int flags = 0; 4267 uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags); 4268 set_fpscr0(fpscr, flags); 4269 return result; 4270} 4271 4272template <> 4273uint32_t 4274fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr) 4275{ 4276 int flags = 0; 4277 uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags); 4278 set_fpscr0(fpscr, flags); 4279 return result; 4280} 4281 4282template <> 4283uint64_t 4284fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr) 4285{ 4286 int flags = 0; 4287 uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags); 4288 set_fpscr0(fpscr, flags); 4289 return result; 4290} 4291 4292template <> 4293uint16_t 4294fplibSqrt(uint16_t op, FPSCR &fpscr) 4295{ 4296 int flags = 0; 4297 uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags); 4298 set_fpscr0(fpscr, flags); 4299 return result; 4300} 4301 4302template <> 4303uint32_t 4304fplibSqrt(uint32_t op, FPSCR &fpscr) 4305{ 4306 int flags = 0; 4307 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags); 4308 set_fpscr0(fpscr, flags); 4309 return result; 4310} 4311 4312template <> 4313uint64_t 4314fplibSqrt(uint64_t op, FPSCR &fpscr) 4315{ 4316 int flags = 0; 4317 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags); 4318 set_fpscr0(fpscr, flags); 4319 return result; 4320} 4321 4322template <> 4323uint16_t 4324fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr) 4325{ 4326 int flags = 0; 4327 uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags); 4328 set_fpscr0(fpscr, flags); 4329 return result; 4330} 4331 4332template <> 4333uint32_t 4334fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr) 4335{ 4336 int flags = 0; 4337 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags); 4338 set_fpscr0(fpscr, flags); 4339 return result; 4340} 4341 4342template <> 4343uint64_t 4344fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr) 4345{ 4346 int flags = 0; 4347 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags); 4348 set_fpscr0(fpscr, flags); 4349 return result; 4350} 4351 4352template <> 4353uint16_t 4354fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr) 4355{ 4356 static uint16_t coeff[2][8] = { 4357 { 4358 0x3c00, 4359 0xb155, 4360 0x2030, 4361 0x0000, 4362 0x0000, 4363 0x0000, 4364 0x0000, 4365 0x0000, 4366 }, 4367 { 4368 0x3c00, 4369 0xb800, 4370 0x293a, 4371 0x0000, 4372 0x0000, 4373 0x0000, 4374 0x0000, 4375 0x0000 4376 } 4377 }; 4378 int flags = 0; 4379 uint16_t result = 4380 fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1, 4381 fplibAbs(op2), 0, modeConv(fpscr), &flags); 4382 set_fpscr0(fpscr, flags); 4383 return result; 4384} 4385 4386template <> 4387uint32_t 4388fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr) 4389{ 4390 static uint32_t coeff[2][8] = { 4391 { 4392 0x3f800000, 4393 0xbe2aaaab, 4394 0x3c088886, 4395 0xb95008b9, 4396 0x36369d6d, 4397 0x00000000, 4398 0x00000000, 4399 0x00000000 4400 }, 4401 { 4402 0x3f800000, 4403 0xbf000000, 4404 0x3d2aaaa6, 4405 0xbab60705, 4406 0x37cd37cc, 4407 0x00000000, 4408 0x00000000, 4409 0x00000000 4410 } 4411 }; 4412 int flags = 0; 4413 uint32_t result = 4414 fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1, 4415 fplibAbs(op2), 0, modeConv(fpscr), &flags); 4416 set_fpscr0(fpscr, flags); 4417 return result; 4418} 4419 4420template <> 4421uint64_t 4422fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr) 4423{ 4424 static uint64_t coeff[2][8] = { 4425 { 4426 0x3ff0000000000000ULL, 4427 0xbfc5555555555543ULL, 4428 0x3f8111111110f30cULL, 4429 0xbf2a01a019b92fc6ULL, 4430 0x3ec71de351f3d22bULL, 4431 0xbe5ae5e2b60f7b91ULL, 4432 0x3de5d8408868552fULL, 4433 0x0000000000000000ULL 4434 }, 4435 { 4436 0x3ff0000000000000ULL, 4437 0xbfe0000000000000ULL, 4438 0x3fa5555555555536ULL, 4439 0xbf56c16c16c13a0bULL, 4440 0x3efa01a019b1e8d8ULL, 4441 0xbe927e4f7282f468ULL, 4442 0x3e21ee96d2641b13ULL, 4443 0xbda8f76380fbb401ULL 4444 } 4445 }; 4446 int flags = 0; 4447 uint64_t result = 4448 fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1, 4449 fplibAbs(op2), 0, modeConv(fpscr), &flags); 4450 set_fpscr0(fpscr, flags); 4451 return result; 4452} 4453 4454template <> 4455uint16_t 4456fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr) 4457{ 4458 int flags = 0; 4459 int sgn, exp; 4460 uint16_t mnt; 4461 4462 int mode = modeConv(fpscr); 4463 uint16_t result = fp16_mul(op1, op1, mode, &flags); 4464 set_fpscr0(fpscr, flags); 4465 4466 fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags); 4467 if (!fp16_is_NaN(exp, mnt)) { 4468 result = (result & ~(1ULL << (FP16_BITS - 1))) \| 4469 op2 << (FP16_BITS - 1); 4470 } 4471 return result; 4472} 4473 4474template <> 4475uint32_t 4476fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) 4477{ 4478 int flags = 0; 4479 int sgn, exp; 4480 uint32_t mnt; 4481 4482 int mode = modeConv(fpscr); 4483 uint32_t result = fp32_mul(op1, op1, mode, &flags); 4484 set_fpscr0(fpscr, flags); 4485 4486 fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags); 4487 if (!fp32_is_NaN(exp, mnt)) { 4488 result = (result & ~(1ULL << (FP32_BITS - 1))) \| op2 << (FP32_BITS - 1); 4489 } 4490 return result; 4491} 4492 4493template <> 4494uint64_t 4495fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) 4496{ 4497 int flags = 0; 4498 int sgn, exp; 4499 uint64_t mnt; 4500 4501 int mode = modeConv(fpscr); 4502 uint64_t result = fp64_mul(op1, op1, mode, &flags); 4503 set_fpscr0(fpscr, flags); 4504 4505 fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags); 4506 if (!fp64_is_NaN(exp, mnt)) { 4507 result = (result & ~(1ULL << (FP64_BITS - 1))) \| op2 << (FP64_BITS - 1); 4508 } 4509 return result; 4510} 4511 4512template <> 4513uint16_t 4514fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr) 4515{ 4516 static constexpr uint16_t fpOne = 4517 (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0 4518 if (op2 & 1) 4519 op1 = fpOne; 4520 return op1 ^ ((op2 >> 1) << (FP16_BITS - 1)); 4521} 4522 4523template <> 4524uint32_t 4525fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr) 4526{ 4527 static constexpr uint32_t fpOne = 4528 (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0 4529 if (op2 & 1) 4530 op1 = fpOne; 4531 return op1 ^ ((op2 >> 1) << (FP32_BITS - 1)); 4532} 4533 4534template <> 4535uint64_t 4536fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr) 4537{ 4538 static constexpr uint64_t fpOne = 4539 (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0 4540 if (op2 & 1) 4541 op1 = fpOne; 4542 return op1 ^ ((op2 >> 1) << (FP64_BITS - 1)); 4543} 4544 4545static uint64_t 4546FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 4547 int flags) 4548{ 4549* int expmax = FP64_EXP_BIAS + FP64_BITS - 1; 4550 uint64_t x; 4551 int err; 4552 4553 if (exp > expmax) { 4554 flags = FPLIB_IOC; 4555* return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn; 4556 } 4557 4558 x = lsr64(mnt << FP64_EXP_BITS, expmax - exp); 4559 err = (exp > expmax - 2 ? 0 : 4560 (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) \| 4561 !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1))); 4562 4563 switch (rounding) { 4564 case FPRounding_TIEEVEN: 4565 x += (err == 3 \|\| (err == 2 && (x & 1))); 4566 break; 4567 case FPRounding_POSINF: 4568 x += err && !sgn; 4569 break; 4570 case FPRounding_NEGINF: 4571 x += err && sgn; 4572 break; 4573 case FPRounding_ZERO: 4574 break; 4575 case FPRounding_TIEAWAY: 4576 x += err >> 1; 4577 break; 4578 default:
4578 assert(0);	4579 panic("Unrecognized FP rounding mode");
4579 } 4580 4581 if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) { 4582 flags = FPLIB_IOC; 4583* return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn; 4584 } 4585 4586 if (err) { 4587 flags = FPLIB_IXC; 4588* } 4589 4590 return sgn ? -x : x; 4591} 4592 4593static uint32_t 4594FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 4595 int flags) 4596{ 4597* uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); 4598 if (u ? x >= 1ULL << FP32_BITS : 4599 !(x < 1ULL << (FP32_BITS - 1) \|\| 4600 (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) { 4601 flags = FPLIB_IOC; 4602* x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn; 4603 } 4604 return x; 4605} 4606 4607static uint16_t 4608FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 4609 int flags) 4610{ 4611* uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); 4612 if (u ? x >= 1ULL << FP16_BITS : 4613 !(x < 1ULL << (FP16_BITS - 1) \|\| 4614 (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) { 4615 flags = FPLIB_IOC; 4616* x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn; 4617 } 4618 return x; 4619} 4620 4621template <> 4622uint16_t 4623fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, 4624 FPSCR &fpscr) 4625{ 4626 int flags = 0; 4627 int sgn, exp; 4628 uint16_t mnt, result; 4629 4630 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4631 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4632 4633 // If NaN, set cumulative flag or take exception: 4634 if (fp16_is_NaN(exp, mnt)) { 4635 flags = FPLIB_IOC; 4636 result = 0; 4637 } else { 4638 assert(fbits >= 0); 4639 // Infinity is treated as an ordinary normalised number that saturates. 4640 result = 4641 FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits, 4642 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS), 4643 u, rounding, &flags); 4644 } 4645 4646 set_fpscr0(fpscr, flags); 4647 4648 return result; 4649} 4650 4651template <> 4652uint32_t 4653fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, 4654 FPSCR &fpscr) 4655{ 4656 int flags = 0; 4657 int sgn, exp; 4658 uint16_t mnt; 4659 uint32_t result; 4660 4661 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4662 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4663 4664 // If NaN, set cumulative flag or take exception: 4665 if (fp16_is_NaN(exp, mnt)) { 4666 flags = FPLIB_IOC; 4667 result = 0; 4668 } else { 4669 assert(fbits >= 0); 4670 if (exp == FP16_EXP_INF) 4671 exp = 255; // infinity: make it big enough to saturate 4672 result = 4673 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits, 4674 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS), 4675 u, rounding, &flags); 4676 } 4677 4678 set_fpscr0(fpscr, flags); 4679 4680 return result; 4681} 4682 4683template <> 4684uint32_t 4685fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4686{ 4687 int flags = 0; 4688 int sgn, exp; 4689 uint32_t mnt, result; 4690 4691 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4692 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4693 4694 // If NaN, set cumulative flag or take exception: 4695 if (fp32_is_NaN(exp, mnt)) { 4696 flags = FPLIB_IOC; 4697 result = 0; 4698 } else { 4699 assert(fbits >= 0); 4700 // Infinity is treated as an ordinary normalised number that saturates. 4701 result = 4702 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits, 4703 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS), 4704 u, rounding, &flags); 4705 } 4706 4707 set_fpscr0(fpscr, flags); 4708 4709 return result; 4710} 4711 4712template <> 4713uint32_t 4714fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4715{ 4716 int flags = 0; 4717 int sgn, exp; 4718 uint64_t mnt; 4719 uint32_t result; 4720 4721 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4722 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4723 4724 // If NaN, set cumulative flag or take exception: 4725 if (fp64_is_NaN(exp, mnt)) { 4726 flags = FPLIB_IOC; 4727 result = 0; 4728 } else { 4729 assert(fbits >= 0); 4730 // Infinity is treated as an ordinary normalised number that saturates. 4731 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags); 4732 } 4733 4734 set_fpscr0(fpscr, flags); 4735 4736 return result; 4737} 4738 4739template <> 4740uint64_t 4741fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, 4742 FPSCR &fpscr) 4743{ 4744 int flags = 0; 4745 int sgn, exp; 4746 uint16_t mnt; 4747 uint64_t result; 4748 4749 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4750 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4751 4752 // If NaN, set cumulative flag or take exception: 4753 if (fp16_is_NaN(exp, mnt)) { 4754 flags = FPLIB_IOC; 4755 result = 0; 4756 } else { 4757 assert(fbits >= 0); 4758 if (exp == FP16_EXP_INF) 4759 exp = 255; // infinity: make it big enough to saturate 4760 result = 4761 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits, 4762 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS), 4763 u, rounding, &flags); 4764 } 4765 4766 set_fpscr0(fpscr, flags); 4767 4768 return result; 4769} 4770 4771template <> 4772uint64_t 4773fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4774{ 4775 int flags = 0; 4776 int sgn, exp; 4777 uint32_t mnt; 4778 uint64_t result; 4779 4780 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4781 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4782 4783 // If NaN, set cumulative flag or take exception: 4784 if (fp32_is_NaN(exp, mnt)) { 4785 flags = FPLIB_IOC; 4786 result = 0; 4787 } else { 4788 assert(fbits >= 0); 4789 // Infinity is treated as an ordinary normalised number that saturates. 4790 result = 4791 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits, 4792 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS), 4793 u, rounding, &flags); 4794 } 4795 4796 set_fpscr0(fpscr, flags); 4797 4798 return result; 4799} 4800 4801template <> 4802uint64_t 4803fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4804{ 4805 int flags = 0; 4806 int sgn, exp; 4807 uint64_t mnt, result; 4808 4809 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4810 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4811 4812 // If NaN, set cumulative flag or take exception: 4813 if (fp64_is_NaN(exp, mnt)) { 4814 flags = FPLIB_IOC; 4815 result = 0; 4816 } else { 4817 assert(fbits >= 0); 4818 // Infinity is treated as an ordinary normalised number that saturates. 4819 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags); 4820 } 4821 4822 set_fpscr0(fpscr, flags); 4823 4824 return result; 4825} 4826 4827static uint16_t 4828fp16_cvtf(uint64_t a, int fbits, int u, int mode, int flags) 4829{ 4830* int x_sgn = !u && a >> (FP64_BITS - 1); 4831 int x_exp = FP16_EXP_BIAS + FP64_BITS - 1 - fbits; 4832 uint64_t x_mnt = x_sgn ? -a : a; 4833 4834 // Handle zero: 4835 if (!x_mnt) { 4836 return fp16_zero(0); 4837 } 4838 4839 // Normalise into FP16_BITS bits, collapsing error into bottom bit: 4840 x_mnt = fp64_normalise(x_mnt, &x_exp); 4841 x_mnt = (x_mnt >> (FP64_BITS - FP16_BITS - 1) \| 4842 !!(x_mnt & ((1ULL << (FP64_BITS - FP16_BITS - 1)) - 1))); 4843 4844 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); 4845} 4846 4847static uint32_t 4848fp32_cvtf(uint64_t a, int fbits, int u, int mode, int flags) 4849{ 4850* int x_sgn = !u && a >> (FP64_BITS - 1); 4851 int x_exp = FP32_EXP_BIAS + FP64_BITS - 1 - fbits; 4852 uint64_t x_mnt = x_sgn ? -a : a; 4853 4854 // Handle zero: 4855 if (!x_mnt) { 4856 return fp32_zero(0); 4857 } 4858 4859 // Normalise into FP32_BITS bits, collapsing error into bottom bit: 4860 x_mnt = fp64_normalise(x_mnt, &x_exp); 4861 x_mnt = (x_mnt >> (FP64_BITS - FP32_BITS - 1) \| 4862 !!(x_mnt & ((1ULL << (FP64_BITS - FP32_BITS - 1)) - 1))); 4863 4864 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 4865} 4866 4867static uint64_t 4868fp64_cvtf(uint64_t a, int fbits, int u, int mode, int flags) 4869{ 4870* int x_sgn = !u && a >> (FP64_BITS - 1); 4871 int x_exp = FP64_EXP_BIAS + FP64_BITS - 1 - fbits; 4872 uint64_t x_mnt = x_sgn ? -a : a; 4873 4874 // Handle zero: 4875 if (!x_mnt) { 4876 return fp64_zero(0); 4877 } 4878 4879 x_mnt = fp64_normalise(x_mnt, &x_exp); 4880 4881 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags); 4882} 4883 4884template <> 4885uint16_t 4886fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, 4887 FPSCR &fpscr) 4888{ 4889 int flags = 0; 4890 uint16_t res = fp16_cvtf(op, fbits, u, 4891 (int)rounding \| ((uint32_t)fpscr >> 22 & 12), 4892 &flags); 4893 set_fpscr0(fpscr, flags); 4894 return res; 4895} 4896 4897template <> 4898uint32_t 4899fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4900{ 4901 int flags = 0; 4902 uint32_t res = fp32_cvtf(op, fbits, u, 4903 (int)rounding \| ((uint32_t)fpscr >> 22 & 12), 4904 &flags); 4905 set_fpscr0(fpscr, flags); 4906 return res; 4907} 4908 4909template <> 4910uint64_t 4911fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4912{ 4913 int flags = 0; 4914 uint64_t res = fp64_cvtf(op, fbits, u, 4915 (int)rounding \| ((uint32_t)fpscr >> 22 & 12), 4916 &flags); 4917 set_fpscr0(fpscr, flags); 4918 return res; 4919} 4920 4921template <> 4922uint16_t 4923fplibInfinity(int sgn) 4924{ 4925 return fp16_infinity(sgn); 4926} 4927 4928template <> 4929uint32_t 4930fplibInfinity(int sgn) 4931{ 4932 return fp32_infinity(sgn); 4933} 4934 4935template <> 4936uint64_t 4937fplibInfinity(int sgn) 4938{ 4939 return fp64_infinity(sgn); 4940} 4941 4942template <> 4943uint16_t 4944fplibDefaultNaN() 4945{ 4946 return fp16_defaultNaN(); 4947} 4948 4949template <> 4950uint32_t 4951fplibDefaultNaN() 4952{ 4953 return fp32_defaultNaN(); 4954} 4955 4956template <> 4957uint64_t 4958fplibDefaultNaN() 4959{ 4960 return fp64_defaultNaN(); 4961} 4962 4963}	4580 } 4581 4582 if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) { 4583 flags = FPLIB_IOC; 4584* return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn; 4585 } 4586 4587 if (err) { 4588 flags = FPLIB_IXC; 4589* } 4590 4591 return sgn ? -x : x; 4592} 4593 4594static uint32_t 4595FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 4596 int flags) 4597{ 4598* uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); 4599 if (u ? x >= 1ULL << FP32_BITS : 4600 !(x < 1ULL << (FP32_BITS - 1) \|\| 4601 (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) { 4602 flags = FPLIB_IOC; 4603* x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn; 4604 } 4605 return x; 4606} 4607 4608static uint16_t 4609FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 4610 int flags) 4611{ 4612* uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); 4613 if (u ? x >= 1ULL << FP16_BITS : 4614 !(x < 1ULL << (FP16_BITS - 1) \|\| 4615 (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) { 4616 flags = FPLIB_IOC; 4617* x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn; 4618 } 4619 return x; 4620} 4621 4622template <> 4623uint16_t 4624fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, 4625 FPSCR &fpscr) 4626{ 4627 int flags = 0; 4628 int sgn, exp; 4629 uint16_t mnt, result; 4630 4631 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4632 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4633 4634 // If NaN, set cumulative flag or take exception: 4635 if (fp16_is_NaN(exp, mnt)) { 4636 flags = FPLIB_IOC; 4637 result = 0; 4638 } else { 4639 assert(fbits >= 0); 4640 // Infinity is treated as an ordinary normalised number that saturates. 4641 result = 4642 FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits, 4643 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS), 4644 u, rounding, &flags); 4645 } 4646 4647 set_fpscr0(fpscr, flags); 4648 4649 return result; 4650} 4651 4652template <> 4653uint32_t 4654fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, 4655 FPSCR &fpscr) 4656{ 4657 int flags = 0; 4658 int sgn, exp; 4659 uint16_t mnt; 4660 uint32_t result; 4661 4662 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4663 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4664 4665 // If NaN, set cumulative flag or take exception: 4666 if (fp16_is_NaN(exp, mnt)) { 4667 flags = FPLIB_IOC; 4668 result = 0; 4669 } else { 4670 assert(fbits >= 0); 4671 if (exp == FP16_EXP_INF) 4672 exp = 255; // infinity: make it big enough to saturate 4673 result = 4674 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits, 4675 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS), 4676 u, rounding, &flags); 4677 } 4678 4679 set_fpscr0(fpscr, flags); 4680 4681 return result; 4682} 4683 4684template <> 4685uint32_t 4686fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4687{ 4688 int flags = 0; 4689 int sgn, exp; 4690 uint32_t mnt, result; 4691 4692 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4693 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4694 4695 // If NaN, set cumulative flag or take exception: 4696 if (fp32_is_NaN(exp, mnt)) { 4697 flags = FPLIB_IOC; 4698 result = 0; 4699 } else { 4700 assert(fbits >= 0); 4701 // Infinity is treated as an ordinary normalised number that saturates. 4702 result = 4703 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits, 4704 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS), 4705 u, rounding, &flags); 4706 } 4707 4708 set_fpscr0(fpscr, flags); 4709 4710 return result; 4711} 4712 4713template <> 4714uint32_t 4715fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4716{ 4717 int flags = 0; 4718 int sgn, exp; 4719 uint64_t mnt; 4720 uint32_t result; 4721 4722 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4723 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4724 4725 // If NaN, set cumulative flag or take exception: 4726 if (fp64_is_NaN(exp, mnt)) { 4727 flags = FPLIB_IOC; 4728 result = 0; 4729 } else { 4730 assert(fbits >= 0); 4731 // Infinity is treated as an ordinary normalised number that saturates. 4732 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags); 4733 } 4734 4735 set_fpscr0(fpscr, flags); 4736 4737 return result; 4738} 4739 4740template <> 4741uint64_t 4742fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, 4743 FPSCR &fpscr) 4744{ 4745 int flags = 0; 4746 int sgn, exp; 4747 uint16_t mnt; 4748 uint64_t result; 4749 4750 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4751 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4752 4753 // If NaN, set cumulative flag or take exception: 4754 if (fp16_is_NaN(exp, mnt)) { 4755 flags = FPLIB_IOC; 4756 result = 0; 4757 } else { 4758 assert(fbits >= 0); 4759 if (exp == FP16_EXP_INF) 4760 exp = 255; // infinity: make it big enough to saturate 4761 result = 4762 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits, 4763 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS), 4764 u, rounding, &flags); 4765 } 4766 4767 set_fpscr0(fpscr, flags); 4768 4769 return result; 4770} 4771 4772template <> 4773uint64_t 4774fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4775{ 4776 int flags = 0; 4777 int sgn, exp; 4778 uint32_t mnt; 4779 uint64_t result; 4780 4781 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4782 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4783 4784 // If NaN, set cumulative flag or take exception: 4785 if (fp32_is_NaN(exp, mnt)) { 4786 flags = FPLIB_IOC; 4787 result = 0; 4788 } else { 4789 assert(fbits >= 0); 4790 // Infinity is treated as an ordinary normalised number that saturates. 4791 result = 4792 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits, 4793 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS), 4794 u, rounding, &flags); 4795 } 4796 4797 set_fpscr0(fpscr, flags); 4798 4799 return result; 4800} 4801 4802template <> 4803uint64_t 4804fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4805{ 4806 int flags = 0; 4807 int sgn, exp; 4808 uint64_t mnt, result; 4809 4810 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 4811 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 4812 4813 // If NaN, set cumulative flag or take exception: 4814 if (fp64_is_NaN(exp, mnt)) { 4815 flags = FPLIB_IOC; 4816 result = 0; 4817 } else { 4818 assert(fbits >= 0); 4819 // Infinity is treated as an ordinary normalised number that saturates. 4820 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags); 4821 } 4822 4823 set_fpscr0(fpscr, flags); 4824 4825 return result; 4826} 4827 4828static uint16_t 4829fp16_cvtf(uint64_t a, int fbits, int u, int mode, int flags) 4830{ 4831* int x_sgn = !u && a >> (FP64_BITS - 1); 4832 int x_exp = FP16_EXP_BIAS + FP64_BITS - 1 - fbits; 4833 uint64_t x_mnt = x_sgn ? -a : a; 4834 4835 // Handle zero: 4836 if (!x_mnt) { 4837 return fp16_zero(0); 4838 } 4839 4840 // Normalise into FP16_BITS bits, collapsing error into bottom bit: 4841 x_mnt = fp64_normalise(x_mnt, &x_exp); 4842 x_mnt = (x_mnt >> (FP64_BITS - FP16_BITS - 1) \| 4843 !!(x_mnt & ((1ULL << (FP64_BITS - FP16_BITS - 1)) - 1))); 4844 4845 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); 4846} 4847 4848static uint32_t 4849fp32_cvtf(uint64_t a, int fbits, int u, int mode, int flags) 4850{ 4851* int x_sgn = !u && a >> (FP64_BITS - 1); 4852 int x_exp = FP32_EXP_BIAS + FP64_BITS - 1 - fbits; 4853 uint64_t x_mnt = x_sgn ? -a : a; 4854 4855 // Handle zero: 4856 if (!x_mnt) { 4857 return fp32_zero(0); 4858 } 4859 4860 // Normalise into FP32_BITS bits, collapsing error into bottom bit: 4861 x_mnt = fp64_normalise(x_mnt, &x_exp); 4862 x_mnt = (x_mnt >> (FP64_BITS - FP32_BITS - 1) \| 4863 !!(x_mnt & ((1ULL << (FP64_BITS - FP32_BITS - 1)) - 1))); 4864 4865 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 4866} 4867 4868static uint64_t 4869fp64_cvtf(uint64_t a, int fbits, int u, int mode, int flags) 4870{ 4871* int x_sgn = !u && a >> (FP64_BITS - 1); 4872 int x_exp = FP64_EXP_BIAS + FP64_BITS - 1 - fbits; 4873 uint64_t x_mnt = x_sgn ? -a : a; 4874 4875 // Handle zero: 4876 if (!x_mnt) { 4877 return fp64_zero(0); 4878 } 4879 4880 x_mnt = fp64_normalise(x_mnt, &x_exp); 4881 4882 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags); 4883} 4884 4885template <> 4886uint16_t 4887fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, 4888 FPSCR &fpscr) 4889{ 4890 int flags = 0; 4891 uint16_t res = fp16_cvtf(op, fbits, u, 4892 (int)rounding \| ((uint32_t)fpscr >> 22 & 12), 4893 &flags); 4894 set_fpscr0(fpscr, flags); 4895 return res; 4896} 4897 4898template <> 4899uint32_t 4900fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4901{ 4902 int flags = 0; 4903 uint32_t res = fp32_cvtf(op, fbits, u, 4904 (int)rounding \| ((uint32_t)fpscr >> 22 & 12), 4905 &flags); 4906 set_fpscr0(fpscr, flags); 4907 return res; 4908} 4909 4910template <> 4911uint64_t 4912fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 4913{ 4914 int flags = 0; 4915 uint64_t res = fp64_cvtf(op, fbits, u, 4916 (int)rounding \| ((uint32_t)fpscr >> 22 & 12), 4917 &flags); 4918 set_fpscr0(fpscr, flags); 4919 return res; 4920} 4921 4922template <> 4923uint16_t 4924fplibInfinity(int sgn) 4925{ 4926 return fp16_infinity(sgn); 4927} 4928 4929template <> 4930uint32_t 4931fplibInfinity(int sgn) 4932{ 4933 return fp32_infinity(sgn); 4934} 4935 4936template <> 4937uint64_t 4938fplibInfinity(int sgn) 4939{ 4940 return fp64_infinity(sgn); 4941} 4942 4943template <> 4944uint16_t 4945fplibDefaultNaN() 4946{ 4947 return fp16_defaultNaN(); 4948} 4949 4950template <> 4951uint32_t 4952fplibDefaultNaN() 4953{ 4954 return fp32_defaultNaN(); 4955} 4956 4957template <> 4958uint64_t 4959fplibDefaultNaN() 4960{ 4961 return fp64_defaultNaN(); 4962} 4963 4964}