fplib.cc (10104:ff709c429b7b) | fplib.cc (10272:336c7d36ac00) |
---|---|
1/* 2* Copyright (c) 2012-2013 ARM Limited 3* All rights reserved 4* 5* The license below extends only to copyright in the software and shall 6* not be construed as granting a license to any other intellectual 7* property including but not limited to intellectual property relating 8* to a hardware implementation of the functionality of the software 9* licensed hereunder. You may use the software subject to the license 10* terms below provided that you ensure that this notice is replicated 11* unmodified and in its entirety in all distributions of the software, 12* modified or unmodified, in source code or in binary form. 13* 14* Redistribution and use in source and binary forms, with or without 15* modification, are permitted provided that the following conditions are 16* met: redistributions of source code must retain the above copyright 17* notice, this list of conditions and the following disclaimer; 18* redistributions in binary form must reproduce the above copyright 19* notice, this list of conditions and the following disclaimer in the 20* documentation and/or other materials provided with the distribution; 21* neither the name of the copyright holders nor the names of its 22* contributors may be used to endorse or promote products derived from 23* this software without specific prior written permission. 24* 25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36* 37* Authors: Edmund Grimley Evans 38* Thomas Grocutt 39*/ 40 41#include <stdint.h> 42 43#include <cassert> 44 45#include "fplib.hh" 46 47namespace ArmISA 48{ 49 50#define FPLIB_RN 0 51#define FPLIB_RP 1 52#define FPLIB_RM 2 53#define FPLIB_RZ 3 54#define FPLIB_FZ 4 55#define FPLIB_DN 8 56#define FPLIB_AHP 16 57 58#define FPLIB_IDC 128 // Input Denormal 59#define FPLIB_IXC 16 // Inexact 60#define FPLIB_UFC 8 // Underflow 61#define FPLIB_OFC 4 // Overflow 62#define FPLIB_DZC 2 // Division by Zero 63#define FPLIB_IOC 1 // Invalid Operation 64 65static inline uint16_t 66lsl16(uint16_t x, uint32_t shift) 67{ 68 return shift < 16 ? x << shift : 0; 69} 70 71static inline uint16_t 72lsr16(uint16_t x, uint32_t shift) 73{ 74 return shift < 16 ? x >> shift : 0; 75} 76 77static inline uint32_t 78lsl32(uint32_t x, uint32_t shift) 79{ 80 return shift < 32 ? x << shift : 0; 81} 82 83static inline uint32_t 84lsr32(uint32_t x, uint32_t shift) 85{ 86 return shift < 32 ? x >> shift : 0; 87} 88 89static inline uint64_t 90lsl64(uint64_t x, uint32_t shift) 91{ 92 return shift < 64 ? x << shift : 0; 93} 94 95static inline uint64_t 96lsr64(uint64_t x, uint32_t shift) 97{ 98 return shift < 64 ? x >> shift : 0; 99} 100 101static inline void 102lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 103{ 104 if (shift < 64) { 105 *r1 = x1 << shift | x0 >> (64 - shift); 106 *r0 = x0 << shift; 107 } else if (shift < 128) { 108 *r1 = x0 << (shift - 64); 109 *r0 = 0; 110 } else { 111 *r1 = 0; 112 *r0 = 0; 113 } 114} 115 116static inline void 117lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 118{ 119 if (shift < 64) { 120 *r0 = x0 >> shift | x1 << (64 - shift); 121 *r1 = x1 >> shift; 122 } else if (shift < 128) { 123 *r0 = x1 >> (shift - 64); 124 *r1 = 0; 125 } else { 126 *r0 = 0; 127 *r1 = 0; 128 } 129} 130 131static inline void 132mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) 133{ 134 uint32_t mask = ((uint32_t)1 << 31) - 1; 135 uint64_t a0 = a & mask; 136 uint64_t a1 = a >> 31 & mask; 137 uint64_t b0 = b & mask; 138 uint64_t b1 = b >> 31 & mask; 139 uint64_t p0 = a0 * b0; 140 uint64_t p2 = a1 * b1; 141 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; 142 uint64_t s0 = p0; 143 uint64_t s1 = (s0 >> 31) + p1; 144 uint64_t s2 = (s1 >> 31) + p2; 145 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62; 146 *x1 = s2 >> 2; 147} 148 149static inline 150void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b) 151{ 152 uint64_t t0 = (uint64_t)(uint32_t)a * b; 153 uint64_t t1 = (t0 >> 32) + (a >> 32) * b; 154 *x0 = t1 << 32 | (uint32_t)t0; 155 *x1 = t1 >> 32; 156} 157 158static inline void | 1/* 2* Copyright (c) 2012-2013 ARM Limited 3* All rights reserved 4* 5* The license below extends only to copyright in the software and shall 6* not be construed as granting a license to any other intellectual 7* property including but not limited to intellectual property relating 8* to a hardware implementation of the functionality of the software 9* licensed hereunder. You may use the software subject to the license 10* terms below provided that you ensure that this notice is replicated 11* unmodified and in its entirety in all distributions of the software, 12* modified or unmodified, in source code or in binary form. 13* 14* Redistribution and use in source and binary forms, with or without 15* modification, are permitted provided that the following conditions are 16* met: redistributions of source code must retain the above copyright 17* notice, this list of conditions and the following disclaimer; 18* redistributions in binary form must reproduce the above copyright 19* notice, this list of conditions and the following disclaimer in the 20* documentation and/or other materials provided with the distribution; 21* neither the name of the copyright holders nor the names of its 22* contributors may be used to endorse or promote products derived from 23* this software without specific prior written permission. 24* 25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36* 37* Authors: Edmund Grimley Evans 38* Thomas Grocutt 39*/ 40 41#include <stdint.h> 42 43#include <cassert> 44 45#include "fplib.hh" 46 47namespace ArmISA 48{ 49 50#define FPLIB_RN 0 51#define FPLIB_RP 1 52#define FPLIB_RM 2 53#define FPLIB_RZ 3 54#define FPLIB_FZ 4 55#define FPLIB_DN 8 56#define FPLIB_AHP 16 57 58#define FPLIB_IDC 128 // Input Denormal 59#define FPLIB_IXC 16 // Inexact 60#define FPLIB_UFC 8 // Underflow 61#define FPLIB_OFC 4 // Overflow 62#define FPLIB_DZC 2 // Division by Zero 63#define FPLIB_IOC 1 // Invalid Operation 64 65static inline uint16_t 66lsl16(uint16_t x, uint32_t shift) 67{ 68 return shift < 16 ? x << shift : 0; 69} 70 71static inline uint16_t 72lsr16(uint16_t x, uint32_t shift) 73{ 74 return shift < 16 ? x >> shift : 0; 75} 76 77static inline uint32_t 78lsl32(uint32_t x, uint32_t shift) 79{ 80 return shift < 32 ? x << shift : 0; 81} 82 83static inline uint32_t 84lsr32(uint32_t x, uint32_t shift) 85{ 86 return shift < 32 ? x >> shift : 0; 87} 88 89static inline uint64_t 90lsl64(uint64_t x, uint32_t shift) 91{ 92 return shift < 64 ? x << shift : 0; 93} 94 95static inline uint64_t 96lsr64(uint64_t x, uint32_t shift) 97{ 98 return shift < 64 ? x >> shift : 0; 99} 100 101static inline void 102lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 103{ 104 if (shift < 64) { 105 *r1 = x1 << shift | x0 >> (64 - shift); 106 *r0 = x0 << shift; 107 } else if (shift < 128) { 108 *r1 = x0 << (shift - 64); 109 *r0 = 0; 110 } else { 111 *r1 = 0; 112 *r0 = 0; 113 } 114} 115 116static inline void 117lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) 118{ 119 if (shift < 64) { 120 *r0 = x0 >> shift | x1 << (64 - shift); 121 *r1 = x1 >> shift; 122 } else if (shift < 128) { 123 *r0 = x1 >> (shift - 64); 124 *r1 = 0; 125 } else { 126 *r0 = 0; 127 *r1 = 0; 128 } 129} 130 131static inline void 132mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) 133{ 134 uint32_t mask = ((uint32_t)1 << 31) - 1; 135 uint64_t a0 = a & mask; 136 uint64_t a1 = a >> 31 & mask; 137 uint64_t b0 = b & mask; 138 uint64_t b1 = b >> 31 & mask; 139 uint64_t p0 = a0 * b0; 140 uint64_t p2 = a1 * b1; 141 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; 142 uint64_t s0 = p0; 143 uint64_t s1 = (s0 >> 31) + p1; 144 uint64_t s2 = (s1 >> 31) + p2; 145 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62; 146 *x1 = s2 >> 2; 147} 148 149static inline 150void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b) 151{ 152 uint64_t t0 = (uint64_t)(uint32_t)a * b; 153 uint64_t t1 = (t0 >> 32) + (a >> 32) * b; 154 *x0 = t1 << 32 | (uint32_t)t0; 155 *x1 = t1 >> 32; 156} 157 158static inline void |
159mul64x64(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) 160{ 161 uint64_t a0 = (uint32_t)a; 162 uint64_t a1 = a >> 32; 163 uint64_t b0 = (uint32_t)b; 164 uint64_t b1 = b >> 32; 165 uint64_t t1 = (a0 * b0 >> 32) + a1 * b0; 166 uint64_t t2 = a0 * b1; 167 uint64_t x = ((uint64_t)(uint32_t)t1 + (uint32_t)t2) >> 32; 168 x += t1 >> 32; 169 x += t2 >> 32; 170 x += a1 * b1; 171 *x0 = a * b; 172 *x1 = x; 173} 174 175static inline void | |
176add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 177 uint64_t b1) 178{ 179 *x0 = a0 + b0; 180 *x1 = a1 + b1 + (*x0 < a0); 181} 182 183static inline void 184sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 185 uint64_t b1) 186{ 187 *x0 = a0 - b0; 188 *x1 = a1 - b1 - (*x0 > a0); 189} 190 191static inline int 192cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 193{ 194 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); 195} 196 197static inline uint16_t 198fp16_normalise(uint16_t mnt, int *exp) 199{ 200 int shift; 201 202 if (!mnt) { 203 return 0; 204 } 205 206 for (shift = 8; shift; shift >>= 1) { 207 if (!(mnt >> (16 - shift))) { 208 mnt <<= shift; 209 *exp -= shift; 210 } 211 } 212 return mnt; 213} 214 215static inline uint32_t 216fp32_normalise(uint32_t mnt, int *exp) 217{ 218 int shift; 219 220 if (!mnt) { 221 return 0; 222 } 223 224 for (shift = 16; shift; shift >>= 1) { 225 if (!(mnt >> (32 - shift))) { 226 mnt <<= shift; 227 *exp -= shift; 228 } 229 } 230 return mnt; 231} 232 233static inline uint64_t 234fp64_normalise(uint64_t mnt, int *exp) 235{ 236 int shift; 237 238 if (!mnt) { 239 return 0; 240 } 241 242 for (shift = 32; shift; shift >>= 1) { 243 if (!(mnt >> (64 - shift))) { 244 mnt <<= shift; 245 *exp -= shift; 246 } 247 } 248 return mnt; 249} 250 251static inline void 252fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp) 253{ 254 uint64_t x0 = *mnt0; 255 uint64_t x1 = *mnt1; 256 int shift; 257 258 if (!x0 && !x1) { 259 return; 260 } 261 262 if (!x1) { 263 x1 = x0; 264 x0 = 0; 265 *exp -= 64; 266 } 267 268 for (shift = 32; shift; shift >>= 1) { 269 if (!(x1 >> (64 - shift))) { 270 x1 = x1 << shift | x0 >> (64 - shift); 271 x0 <<= shift; 272 *exp -= shift; 273 } 274 } 275 276 *mnt0 = x0; 277 *mnt1 = x1; 278} 279 280static inline uint16_t 281fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) 282{ 283 return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1)); 284} 285 286static inline uint32_t 287fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) 288{ 289 return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1)); 290} 291 292static inline uint64_t 293fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) 294{ 295 return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1)); 296} 297 298static inline uint16_t 299fp16_zero(int sgn) 300{ 301 return fp16_pack(sgn, 0, 0); 302} 303 304static inline uint32_t 305fp32_zero(int sgn) 306{ 307 return fp32_pack(sgn, 0, 0); 308} 309 310static inline uint64_t 311fp64_zero(int sgn) 312{ 313 return fp64_pack(sgn, 0, 0); 314} 315 316static inline uint16_t 317fp16_max_normal(int sgn) 318{ 319 return fp16_pack(sgn, 30, -1); 320} 321 322static inline uint32_t 323fp32_max_normal(int sgn) 324{ 325 return fp32_pack(sgn, 254, -1); 326} 327 328static inline uint64_t 329fp64_max_normal(int sgn) 330{ 331 return fp64_pack(sgn, 2046, -1); 332} 333 334static inline uint16_t 335fp16_infinity(int sgn) 336{ 337 return fp16_pack(sgn, 31, 0); 338} 339 340static inline uint32_t 341fp32_infinity(int sgn) 342{ 343 return fp32_pack(sgn, 255, 0); 344} 345 346static inline uint64_t 347fp64_infinity(int sgn) 348{ 349 return fp64_pack(sgn, 2047, 0); 350} 351 352static inline uint16_t 353fp16_defaultNaN() 354{ 355 return fp16_pack(0, 31, (uint16_t)1 << 9); 356} 357 358static inline uint32_t 359fp32_defaultNaN() 360{ 361 return fp32_pack(0, 255, (uint32_t)1 << 22); 362} 363 364static inline uint64_t 365fp64_defaultNaN() 366{ 367 return fp64_pack(0, 2047, (uint64_t)1 << 51); 368} 369 370static inline void 371fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, 372 int *flags) 373{ 374 *sgn = x >> 15; 375 *exp = x >> 10 & 31; 376 *mnt = x & (((uint16_t)1 << 10) - 1); 377 378 // Handle subnormals: 379 if (*exp) { 380 *mnt |= (uint16_t)1 << 10; 381 } else { 382 ++*exp; 383 // There is no flush to zero in this case! 384 } 385} 386 387static inline void 388fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, 389 int *flags) 390{ 391 *sgn = x >> 31; 392 *exp = x >> 23 & 255; 393 *mnt = x & (((uint32_t)1 << 23) - 1); 394 395 // Handle subnormals: 396 if (*exp) { 397 *mnt |= (uint32_t)1 << 23; 398 } else { 399 ++*exp; 400 if ((mode & FPLIB_FZ) && *mnt) { 401 *flags |= FPLIB_IDC; 402 *mnt = 0; 403 } 404 } 405} 406 407static inline void 408fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, 409 int *flags) 410{ 411 *sgn = x >> 63; 412 *exp = x >> 52 & 2047; 413 *mnt = x & (((uint64_t)1 << 52) - 1); 414 415 // Handle subnormals: 416 if (*exp) { 417 *mnt |= (uint64_t)1 << 52; 418 } else { 419 ++*exp; 420 if ((mode & FPLIB_FZ) && *mnt) { 421 *flags |= FPLIB_IDC; 422 *mnt = 0; 423 } 424 } 425} 426 427static inline uint32_t 428fp32_process_NaN(uint32_t a, int mode, int *flags) 429{ 430 if (!(a >> 22 & 1)) { 431 *flags |= FPLIB_IOC; 432 a |= (uint32_t)1 << 22; 433 } 434 return mode & FPLIB_DN ? fp32_defaultNaN() : a; 435} 436 437static inline uint64_t 438fp64_process_NaN(uint64_t a, int mode, int *flags) 439{ 440 if (!(a >> 51 & 1)) { 441 *flags |= FPLIB_IOC; 442 a |= (uint64_t)1 << 51; 443 } 444 return mode & FPLIB_DN ? fp64_defaultNaN() : a; 445} 446 447static uint32_t 448fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags) 449{ 450 int a_exp = a >> 23 & 255; 451 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); 452 int b_exp = b >> 23 & 255; 453 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); 454 455 // Handle signalling NaNs: 456 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) 457 return fp32_process_NaN(a, mode, flags); 458 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) 459 return fp32_process_NaN(b, mode, flags); 460 461 // Handle quiet NaNs: 462 if (a_exp == 255 && a_mnt) 463 return fp32_process_NaN(a, mode, flags); 464 if (b_exp == 255 && b_mnt) 465 return fp32_process_NaN(b, mode, flags); 466 467 return 0; 468} 469 470static uint64_t 471fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags) 472{ 473 int a_exp = a >> 52 & 2047; 474 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); 475 int b_exp = b >> 52 & 2047; 476 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); 477 478 // Handle signalling NaNs: 479 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) 480 return fp64_process_NaN(a, mode, flags); 481 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) 482 return fp64_process_NaN(b, mode, flags); 483 484 // Handle quiet NaNs: 485 if (a_exp == 2047 && a_mnt) 486 return fp64_process_NaN(a, mode, flags); 487 if (b_exp == 2047 && b_mnt) 488 return fp64_process_NaN(b, mode, flags); 489 490 return 0; 491} 492 493static uint32_t 494fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags) 495{ 496 int a_exp = a >> 23 & 255; 497 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); 498 int b_exp = b >> 23 & 255; 499 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); 500 int c_exp = c >> 23 & 255; 501 uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1); 502 503 // Handle signalling NaNs: 504 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) 505 return fp32_process_NaN(a, mode, flags); 506 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) 507 return fp32_process_NaN(b, mode, flags); 508 if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1)) 509 return fp32_process_NaN(c, mode, flags); 510 511 // Handle quiet NaNs: 512 if (a_exp == 255 && a_mnt) 513 return fp32_process_NaN(a, mode, flags); 514 if (b_exp == 255 && b_mnt) 515 return fp32_process_NaN(b, mode, flags); 516 if (c_exp == 255 && c_mnt) 517 return fp32_process_NaN(c, mode, flags); 518 519 return 0; 520} 521 522static uint64_t 523fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags) 524{ 525 int a_exp = a >> 52 & 2047; 526 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); 527 int b_exp = b >> 52 & 2047; 528 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); 529 int c_exp = c >> 52 & 2047; 530 uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1); 531 532 // Handle signalling NaNs: 533 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) 534 return fp64_process_NaN(a, mode, flags); 535 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) 536 return fp64_process_NaN(b, mode, flags); 537 if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1)) 538 return fp64_process_NaN(c, mode, flags); 539 540 // Handle quiet NaNs: 541 if (a_exp == 2047 && a_mnt) 542 return fp64_process_NaN(a, mode, flags); 543 if (b_exp == 2047 && b_mnt) 544 return fp64_process_NaN(b, mode, flags); 545 if (c_exp == 2047 && c_mnt) 546 return fp64_process_NaN(c, mode, flags); 547 548 return 0; 549} 550 551static uint16_t 552fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags) 553{ 554 int biased_exp; // non-negative exponent value for result 555 uint16_t int_mant; // mantissa for result, less than (1 << 11) 556 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 557 558 assert(rm != FPRounding_TIEAWAY); 559 560 // There is no flush to zero in this case! 561 562 // The bottom 5 bits of mnt are orred together: 563 mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0); 564 565 if (exp > 0) { 566 biased_exp = exp; 567 int_mant = mnt >> 2; 568 error = mnt & 3; 569 } else { 570 biased_exp = 0; 571 int_mant = lsr16(mnt, 3 - exp); 572 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1)); 573 } 574 575 if (!biased_exp && error) { // xx should also check fpscr_val<11> 576 *flags |= FPLIB_UFC; 577 } 578 579 // Round up: 580 if ((rm == FPLIB_RN && (error == 3 || 581 (error == 2 && (int_mant & 1)))) || 582 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 583 ++int_mant; 584 if (int_mant == (uint32_t)1 << 10) { 585 // Rounded up from denormalized to normalized 586 biased_exp = 1; 587 } 588 if (int_mant == (uint32_t)1 << 11) { 589 // Rounded up to next exponent 590 ++biased_exp; 591 int_mant >>= 1; 592 } 593 } 594 595 // Handle rounding to odd aka Von Neumann rounding: 596 if (error && rm == FPRounding_ODD) 597 int_mant |= 1; 598 599 // Handle overflow: 600 if (!(mode & FPLIB_AHP)) { 601 if (biased_exp >= 31) { 602 *flags |= FPLIB_OFC | FPLIB_IXC; 603 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 604 (rm == FPLIB_RM && sgn)) { 605 return fp16_infinity(sgn); 606 } else { 607 return fp16_max_normal(sgn); 608 } 609 } 610 } else { 611 if (biased_exp >= 32) { 612 *flags |= FPLIB_IOC; 613 return fp16_pack(sgn, 31, -1); 614 } 615 } 616 617 if (error) { 618 *flags |= FPLIB_IXC; 619 } 620 621 return fp16_pack(sgn, biased_exp, int_mant); 622} 623 624static uint32_t 625fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags) 626{ 627 int biased_exp; // non-negative exponent value for result 628 uint32_t int_mant; // mantissa for result, less than (1 << 24) 629 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 630 631 assert(rm != FPRounding_TIEAWAY); 632 633 // Flush to zero: 634 if ((mode & FPLIB_FZ) && exp < 1) { 635 *flags |= FPLIB_UFC; 636 return fp32_zero(sgn); 637 } 638 639 // The bottom 8 bits of mnt are orred together: 640 mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0); 641 642 if (exp > 0) { 643 biased_exp = exp; 644 int_mant = mnt >> 2; 645 error = mnt & 3; 646 } else { 647 biased_exp = 0; 648 int_mant = lsr32(mnt, 3 - exp); 649 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1)); 650 } 651 652 if (!biased_exp && error) { // xx should also check fpscr_val<11> 653 *flags |= FPLIB_UFC; 654 } 655 656 // Round up: 657 if ((rm == FPLIB_RN && (error == 3 || 658 (error == 2 && (int_mant & 1)))) || 659 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 660 ++int_mant; 661 if (int_mant == (uint32_t)1 << 23) { 662 // Rounded up from denormalized to normalized 663 biased_exp = 1; 664 } 665 if (int_mant == (uint32_t)1 << 24) { 666 // Rounded up to next exponent 667 ++biased_exp; 668 int_mant >>= 1; 669 } 670 } 671 672 // Handle rounding to odd aka Von Neumann rounding: 673 if (error && rm == FPRounding_ODD) 674 int_mant |= 1; 675 676 // Handle overflow: 677 if (biased_exp >= 255) { 678 *flags |= FPLIB_OFC | FPLIB_IXC; 679 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 680 (rm == FPLIB_RM && sgn)) { 681 return fp32_infinity(sgn); 682 } else { 683 return fp32_max_normal(sgn); 684 } 685 } 686 687 if (error) { 688 *flags |= FPLIB_IXC; 689 } 690 691 return fp32_pack(sgn, biased_exp, int_mant); 692} 693 694static uint32_t 695fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags) 696{ 697 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); 698} 699 700static uint64_t 701fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags) 702{ 703 int biased_exp; // non-negative exponent value for result 704 uint64_t int_mant; // mantissa for result, less than (1 << 52) 705 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 706 707 assert(rm != FPRounding_TIEAWAY); 708 709 // Flush to zero: 710 if ((mode & FPLIB_FZ) && exp < 1) { 711 *flags |= FPLIB_UFC; 712 return fp64_zero(sgn); 713 } 714 715 // The bottom 11 bits of mnt are orred together: 716 mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0); 717 718 if (exp > 0) { 719 biased_exp = exp; 720 int_mant = mnt >> 2; 721 error = mnt & 3; 722 } else { 723 biased_exp = 0; 724 int_mant = lsr64(mnt, 3 - exp); 725 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1)); 726 } 727 728 if (!biased_exp && error) { // xx should also check fpscr_val<11> 729 *flags |= FPLIB_UFC; 730 } 731 732 // Round up: 733 if ((rm == FPLIB_RN && (error == 3 || 734 (error == 2 && (int_mant & 1)))) || 735 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 736 ++int_mant; 737 if (int_mant == (uint64_t)1 << 52) { 738 // Rounded up from denormalized to normalized 739 biased_exp = 1; 740 } 741 if (int_mant == (uint64_t)1 << 53) { 742 // Rounded up to next exponent 743 ++biased_exp; 744 int_mant >>= 1; 745 } 746 } 747 748 // Handle rounding to odd aka Von Neumann rounding: 749 if (error && rm == FPRounding_ODD) 750 int_mant |= 1; 751 752 // Handle overflow: 753 if (biased_exp >= 2047) { 754 *flags |= FPLIB_OFC | FPLIB_IXC; 755 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 756 (rm == FPLIB_RM && sgn)) { 757 return fp64_infinity(sgn); 758 } else { 759 return fp64_max_normal(sgn); 760 } 761 } 762 763 if (error) { 764 *flags |= FPLIB_IXC; 765 } 766 767 return fp64_pack(sgn, biased_exp, int_mant); 768} 769 770static uint64_t 771fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags) 772{ 773 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); 774} 775 776static int 777fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags) 778{ 779 int a_sgn, a_exp, b_sgn, b_exp; 780 uint32_t a_mnt, b_mnt; 781 782 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 783 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 784 785 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 786 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 787 if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) || 788 (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1))) 789 *flags |= FPLIB_IOC; 790 return 0; 791 } 792 return a == b || (!a_mnt && !b_mnt); 793} 794 795static int 796fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags) 797{ 798 int a_sgn, a_exp, b_sgn, b_exp; 799 uint32_t a_mnt, b_mnt; 800 801 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 802 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 803 804 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 805 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 806 *flags |= FPLIB_IOC; 807 return 0; 808 } 809 if (!a_mnt && !b_mnt) 810 return 1; 811 if (a_sgn != b_sgn) 812 return b_sgn; 813 if (a_exp != b_exp) 814 return a_sgn ^ (a_exp > b_exp); 815 if (a_mnt != b_mnt) 816 return a_sgn ^ (a_mnt > b_mnt); 817 return 1; 818} 819 820static int 821fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags) 822{ 823 int a_sgn, a_exp, b_sgn, b_exp; 824 uint32_t a_mnt, b_mnt; 825 826 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 827 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 828 829 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 830 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 831 *flags |= FPLIB_IOC; 832 return 0; 833 } 834 if (!a_mnt && !b_mnt) 835 return 0; 836 if (a_sgn != b_sgn) 837 return b_sgn; 838 if (a_exp != b_exp) 839 return a_sgn ^ (a_exp > b_exp); 840 if (a_mnt != b_mnt) 841 return a_sgn ^ (a_mnt > b_mnt); 842 return 0; 843} 844 845static int 846fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags) 847{ 848 int a_sgn, a_exp, b_sgn, b_exp; 849 uint64_t a_mnt, b_mnt; 850 851 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 852 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 853 854 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 855 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 856 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) || 857 (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1))) 858 *flags |= FPLIB_IOC; 859 return 0; 860 } 861 return a == b || (!a_mnt && !b_mnt); 862} 863 864static int 865fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags) 866{ 867 int a_sgn, a_exp, b_sgn, b_exp; 868 uint64_t a_mnt, b_mnt; 869 870 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 871 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 872 873 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 874 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 875 *flags |= FPLIB_IOC; 876 return 0; 877 } 878 if (!a_mnt && !b_mnt) 879 return 1; 880 if (a_sgn != b_sgn) 881 return b_sgn; 882 if (a_exp != b_exp) 883 return a_sgn ^ (a_exp > b_exp); 884 if (a_mnt != b_mnt) 885 return a_sgn ^ (a_mnt > b_mnt); 886 return 1; 887} 888 889static int 890fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags) 891{ 892 int a_sgn, a_exp, b_sgn, b_exp; 893 uint64_t a_mnt, b_mnt; 894 895 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 896 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 897 898 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 899 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 900 *flags |= FPLIB_IOC; 901 return 0; 902 } 903 if (!a_mnt && !b_mnt) 904 return 0; 905 if (a_sgn != b_sgn) 906 return b_sgn; 907 if (a_exp != b_exp) 908 return a_sgn ^ (a_exp > b_exp); 909 if (a_mnt != b_mnt) 910 return a_sgn ^ (a_mnt > b_mnt); 911 return 0; 912} 913 914static uint32_t 915fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags) 916{ 917 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 918 uint32_t a_mnt, b_mnt, x, x_mnt; 919 920 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 921 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 922 923 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 924 return x; 925 } 926 927 b_sgn ^= neg; 928 929 // Handle infinities and zeroes: 930 if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) { 931 *flags |= FPLIB_IOC; 932 return fp32_defaultNaN(); 933 } else if (a_exp == 255) { 934 return fp32_infinity(a_sgn); 935 } else if (b_exp == 255) { 936 return fp32_infinity(b_sgn); 937 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 938 return fp32_zero(a_sgn); 939 } 940 941 a_mnt <<= 3; 942 b_mnt <<= 3; 943 if (a_exp >= b_exp) { 944 b_mnt = (lsr32(b_mnt, a_exp - b_exp) | 945 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); 946 b_exp = a_exp; 947 } else { 948 a_mnt = (lsr32(a_mnt, b_exp - a_exp) | 949 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); 950 a_exp = b_exp; 951 } 952 x_sgn = a_sgn; 953 x_exp = a_exp; 954 if (a_sgn == b_sgn) { 955 x_mnt = a_mnt + b_mnt; 956 } else if (a_mnt >= b_mnt) { 957 x_mnt = a_mnt - b_mnt; 958 } else { 959 x_sgn ^= 1; 960 x_mnt = b_mnt - a_mnt; 961 } 962 963 if (!x_mnt) { 964 // Sign of exact zero result depends on rounding mode 965 return fp32_zero((mode & 3) == 2); 966 } 967 968 x_mnt = fp32_normalise(x_mnt, &x_exp); 969 970 return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags); 971} 972 973static uint64_t 974fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags) 975{ 976 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 977 uint64_t a_mnt, b_mnt, x, x_mnt; 978 979 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 980 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 981 982 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 983 return x; 984 } 985 986 b_sgn ^= neg; 987 988 // Handle infinities and zeroes: 989 if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) { 990 *flags |= FPLIB_IOC; 991 return fp64_defaultNaN(); 992 } else if (a_exp == 2047) { 993 return fp64_infinity(a_sgn); 994 } else if (b_exp == 2047) { 995 return fp64_infinity(b_sgn); 996 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 997 return fp64_zero(a_sgn); 998 } 999 1000 a_mnt <<= 3; 1001 b_mnt <<= 3; 1002 if (a_exp >= b_exp) { 1003 b_mnt = (lsr64(b_mnt, a_exp - b_exp) | 1004 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); 1005 b_exp = a_exp; 1006 } else { 1007 a_mnt = (lsr64(a_mnt, b_exp - a_exp) | 1008 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); 1009 a_exp = b_exp; 1010 } 1011 x_sgn = a_sgn; 1012 x_exp = a_exp; 1013 if (a_sgn == b_sgn) { 1014 x_mnt = a_mnt + b_mnt; 1015 } else if (a_mnt >= b_mnt) { 1016 x_mnt = a_mnt - b_mnt; 1017 } else { 1018 x_sgn ^= 1; 1019 x_mnt = b_mnt - a_mnt; 1020 } 1021 1022 if (!x_mnt) { 1023 // Sign of exact zero result depends on rounding mode 1024 return fp64_zero((mode & 3) == 2); 1025 } 1026 1027 x_mnt = fp64_normalise(x_mnt, &x_exp); 1028 1029 return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags); 1030} 1031 1032static uint32_t 1033fp32_mul(uint32_t a, uint32_t b, int mode, int *flags) 1034{ 1035 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1036 uint32_t a_mnt, b_mnt, x; 1037 uint64_t x_mnt; 1038 1039 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1040 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1041 1042 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1043 return x; 1044 } 1045 1046 // Handle infinities and zeroes: 1047 if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) { 1048 *flags |= FPLIB_IOC; 1049 return fp32_defaultNaN(); 1050 } else if (a_exp == 255 || b_exp == 255) { 1051 return fp32_infinity(a_sgn ^ b_sgn); 1052 } else if (!a_mnt || !b_mnt) { 1053 return fp32_zero(a_sgn ^ b_sgn); 1054 } 1055 1056 // Multiply and normalise: 1057 x_sgn = a_sgn ^ b_sgn; 1058 x_exp = a_exp + b_exp - 110; 1059 x_mnt = (uint64_t)a_mnt * b_mnt; 1060 x_mnt = fp64_normalise(x_mnt, &x_exp); 1061 1062 // Convert to 32 bits, collapsing error into bottom bit: 1063 x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33); 1064 1065 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1066} 1067 1068static uint64_t 1069fp64_mul(uint64_t a, uint64_t b, int mode, int *flags) 1070{ 1071 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1072 uint64_t a_mnt, b_mnt, x; 1073 uint64_t x0_mnt, x1_mnt; 1074 1075 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1076 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1077 1078 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1079 return x; 1080 } 1081 1082 // Handle infinities and zeroes: 1083 if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) { 1084 *flags |= FPLIB_IOC; 1085 return fp64_defaultNaN(); 1086 } else if (a_exp == 2047 || b_exp == 2047) { 1087 return fp64_infinity(a_sgn ^ b_sgn); 1088 } else if (!a_mnt || !b_mnt) { 1089 return fp64_zero(a_sgn ^ b_sgn); 1090 } 1091 1092 // Multiply and normalise: 1093 x_sgn = a_sgn ^ b_sgn; 1094 x_exp = a_exp + b_exp - 1000; 1095 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); 1096 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1097 1098 // Convert to 64 bits, collapsing error into bottom bit: 1099 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1100 1101 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); 1102} 1103 1104static uint32_t 1105fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, 1106 int mode, int *flags) 1107{ 1108 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1109 uint32_t a_mnt, b_mnt, c_mnt, x; 1110 uint64_t x_mnt, y_mnt; 1111 1112 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1113 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1114 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1115 1116 x = fp32_process_NaNs3(a, b, c, mode, flags); 1117 1118 // Quiet NaN added to product of zero and infinity: 1119 if (a_exp == 255 && (a_mnt >> 22 & 1) && 1120 ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) || 1121 (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) { 1122 x = fp32_defaultNaN(); 1123 *flags |= FPLIB_IOC; 1124 } 1125 1126 if (x) { 1127 return x; 1128 } 1129 1130 // Handle infinities and zeroes: 1131 if ((b_exp == 255 && !c_mnt) || 1132 (c_exp == 255 && !b_mnt) || 1133 (a_exp == 255 && (b_exp == 255 || c_exp == 255) && 1134 (a_sgn != (b_sgn ^ c_sgn)))) { 1135 *flags |= FPLIB_IOC; 1136 return fp32_defaultNaN(); 1137 } 1138 if (a_exp == 255) 1139 return fp32_infinity(a_sgn); 1140 if (b_exp == 255 || c_exp == 255) 1141 return fp32_infinity(b_sgn ^ c_sgn); 1142 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1143 return fp32_zero(a_sgn); 1144 1145 x_sgn = a_sgn; 1146 x_exp = a_exp + 13; 1147 x_mnt = (uint64_t)a_mnt << 27; 1148 1149 // Multiply: 1150 y_sgn = b_sgn ^ c_sgn; 1151 y_exp = b_exp + c_exp - 113; 1152 y_mnt = (uint64_t)b_mnt * c_mnt << 3; 1153 if (!y_mnt) { 1154 y_exp = x_exp; 1155 } 1156 1157 // Add: 1158 if (x_exp >= y_exp) { 1159 y_mnt = (lsr64(y_mnt, x_exp - y_exp) | 1160 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); 1161 y_exp = x_exp; 1162 } else { 1163 x_mnt = (lsr64(x_mnt, y_exp - x_exp) | 1164 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); 1165 x_exp = y_exp; 1166 } 1167 if (x_sgn == y_sgn) { 1168 x_mnt = x_mnt + y_mnt; 1169 } else if (x_mnt >= y_mnt) { 1170 x_mnt = x_mnt - y_mnt; 1171 } else { 1172 x_sgn ^= 1; 1173 x_mnt = y_mnt - x_mnt; 1174 } 1175 1176 if (!x_mnt) { 1177 // Sign of exact zero result depends on rounding mode 1178 return fp32_zero((mode & 3) == 2); 1179 } 1180 1181 // Normalise and convert to 32 bits, collapsing error into bottom bit: 1182 x_mnt = fp64_normalise(x_mnt, &x_exp); 1183 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 1184 1185 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1186} 1187 1188static uint64_t 1189fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, 1190 int mode, int *flags) 1191{ 1192 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1193 uint64_t a_mnt, b_mnt, c_mnt, x; 1194 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; 1195 1196 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1197 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1198 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1199 1200 x = fp64_process_NaNs3(a, b, c, mode, flags); 1201 1202 // Quiet NaN added to product of zero and infinity: 1203 if (a_exp == 2047 && (a_mnt >> 51 & 1) && 1204 ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) || 1205 (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) { 1206 x = fp64_defaultNaN(); 1207 *flags |= FPLIB_IOC; 1208 } 1209 1210 if (x) { 1211 return x; 1212 } 1213 1214 // Handle infinities and zeroes: 1215 if ((b_exp == 2047 && !c_mnt) || 1216 (c_exp == 2047 && !b_mnt) || 1217 (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) && 1218 (a_sgn != (b_sgn ^ c_sgn)))) { 1219 *flags |= FPLIB_IOC; 1220 return fp64_defaultNaN(); 1221 } 1222 if (a_exp == 2047) 1223 return fp64_infinity(a_sgn); 1224 if (b_exp == 2047 || c_exp == 2047) 1225 return fp64_infinity(b_sgn ^ c_sgn); 1226 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1227 return fp64_zero(a_sgn); 1228 1229 x_sgn = a_sgn; 1230 x_exp = a_exp + 11; 1231 x0_mnt = 0; 1232 x1_mnt = a_mnt; 1233 1234 // Multiply: 1235 y_sgn = b_sgn ^ c_sgn; 1236 y_exp = b_exp + c_exp - 1003; 1237 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); 1238 if (!y0_mnt && !y1_mnt) { 1239 y_exp = x_exp; 1240 } 1241 1242 // Add: 1243 if (x_exp >= y_exp) { 1244 uint64_t t0, t1; 1245 lsl128(&t0, &t1, y0_mnt, y1_mnt, 1246 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); 1247 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); 1248 y0_mnt |= !!(t0 | t1); 1249 y_exp = x_exp; 1250 } else { 1251 uint64_t t0, t1; 1252 lsl128(&t0, &t1, x0_mnt, x1_mnt, 1253 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); 1254 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); 1255 x0_mnt |= !!(t0 | t1); 1256 x_exp = y_exp; 1257 } 1258 if (x_sgn == y_sgn) { 1259 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1260 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { 1261 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1262 } else { 1263 x_sgn ^= 1; 1264 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); 1265 } 1266 1267 if (!x0_mnt && !x1_mnt) { 1268 // Sign of exact zero result depends on rounding mode 1269 return fp64_zero((mode & 3) == 2); 1270 } 1271 1272 // Normalise and convert to 64 bits, collapsing error into bottom bit: 1273 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1274 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1275 1276 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); 1277} 1278 1279static uint32_t 1280fp32_div(uint32_t a, uint32_t b, int mode, int *flags) 1281{ 1282 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1283 uint32_t a_mnt, b_mnt, x; 1284 uint64_t x_mnt; 1285 1286 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1287 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1288 1289 if ((x = fp32_process_NaNs(a, b, mode, flags))) 1290 return x; 1291 1292 // Handle infinities and zeroes: 1293 if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) { 1294 *flags |= FPLIB_IOC; 1295 return fp32_defaultNaN(); 1296 } 1297 if (a_exp == 255 || !b_mnt) { 1298 if (a_exp != 255) 1299 *flags |= FPLIB_DZC; 1300 return fp32_infinity(a_sgn ^ b_sgn); 1301 } 1302 if (!a_mnt || b_exp == 255) 1303 return fp32_zero(a_sgn ^ b_sgn); 1304 1305 // Divide, setting bottom bit if inexact: 1306 a_mnt = fp32_normalise(a_mnt, &a_exp); 1307 x_sgn = a_sgn ^ b_sgn; 1308 x_exp = a_exp - b_exp + 172; 1309 x_mnt = ((uint64_t)a_mnt << 18) / b_mnt; 1310 x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18); 1311 1312 // Normalise and convert to 32 bits, collapsing error into bottom bit: 1313 x_mnt = fp64_normalise(x_mnt, &x_exp); 1314 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 1315 1316 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1317} 1318 1319static uint64_t 1320fp64_div(uint64_t a, uint64_t b, int mode, int *flags) 1321{ 1322 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; 1323 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; 1324 1325 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1326 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1327 1328 if ((x = fp64_process_NaNs(a, b, mode, flags))) 1329 return x; 1330 1331 // Handle infinities and zeroes: 1332 if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) { 1333 *flags |= FPLIB_IOC; 1334 return fp64_defaultNaN(); 1335 } 1336 if (a_exp == 2047 || !b_mnt) { 1337 if (a_exp != 2047) 1338 *flags |= FPLIB_DZC; 1339 return fp64_infinity(a_sgn ^ b_sgn); 1340 } 1341 if (!a_mnt || b_exp == 2047) 1342 return fp64_zero(a_sgn ^ b_sgn); 1343 1344 // Find reciprocal of divisor with Newton-Raphson: 1345 a_mnt = fp64_normalise(a_mnt, &a_exp); 1346 b_mnt = fp64_normalise(b_mnt, &b_exp); 1347 x_mnt = ~(uint64_t)0 / (b_mnt >> 31); 1348 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); 1349 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); 1350 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); 1351 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); 1352 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); 1353 1354 // Multiply by dividend: 1355 x_sgn = a_sgn ^ b_sgn; 1356 x_exp = a_exp - b_exp + 1031; 1357 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough 1358 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); 1359 x_mnt = x1_mnt; 1360 1361 // This is an underestimate, so try adding one: 1362 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough 1363 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); 1364 if (c <= 0) { 1365 ++x_mnt; 1366 } 1367 1368 x_mnt = fp64_normalise(x_mnt, &x_exp); 1369 1370 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 1371} 1372 1373static void 1374set_fpscr0(FPSCR &fpscr, int flags) 1375{ 1376 if (flags & FPLIB_IDC) { 1377 fpscr.idc = 1; 1378 } 1379 if (flags & FPLIB_IOC) { 1380 fpscr.ioc = 1; 1381 } 1382 if (flags & FPLIB_DZC) { 1383 fpscr.dzc = 1; 1384 } 1385 if (flags & FPLIB_OFC) { 1386 fpscr.ofc = 1; 1387 } 1388 if (flags & FPLIB_UFC) { 1389 fpscr.ufc = 1; 1390 } 1391 if (flags & FPLIB_IXC) { 1392 fpscr.ixc = 1; 1393 } 1394} 1395 1396static uint32_t 1397fp32_sqrt(uint32_t a, int mode, int *flags) 1398{ 1399 int a_sgn, a_exp, x_sgn, x_exp; 1400 uint32_t a_mnt, x, x_mnt; 1401 uint64_t t0, t1; 1402 1403 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1404 1405 // Handle NaNs: 1406 if (a_exp == 255 && (uint32_t)(a_mnt << 9)) 1407 return fp32_process_NaN(a, mode, flags); 1408 1409 // Handle infinities and zeroes: 1410 if (!a_mnt) { 1411 return fp32_zero(a_sgn); 1412 } 1413 if (a_exp == 255 && !a_sgn) { 1414 return fp32_infinity(a_sgn); 1415 } 1416 if (a_sgn) { 1417 *flags |= FPLIB_IOC; 1418 return fp32_defaultNaN(); 1419 } 1420 1421 a_mnt = fp32_normalise(a_mnt, &a_exp); 1422 if (!(a_exp & 1)) { 1423 ++a_exp; 1424 a_mnt >>= 1; 1425 } 1426 1427 // x = (a * 3 + 5) / 8 1428 x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28); 1429 1430 // x = (a / x + x) / 2; // 16-bit accuracy 1431 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 1432 1433 // x = (a / x + x) / 2; // 16-bit accuracy 1434 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 1435 1436 // x = (a / x + x) / 2; // 32-bit accuracy 1437 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); 1438 1439 x_sgn = 0; 1440 x_exp = (a_exp + 147) >> 1; 1441 x_mnt = ((x - (1 << 5)) >> 6) + 1; 1442 t1 = (uint64_t)x_mnt * x_mnt; 1443 t0 = (uint64_t)a_mnt << 19; 1444 if (t1 > t0) { 1445 --x_mnt; 1446 } 1447 1448 x_mnt = fp32_normalise(x_mnt, &x_exp); 1449 1450 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); 1451} 1452 1453static uint64_t 1454fp64_sqrt(uint64_t a, int mode, int *flags) 1455{ 1456 int a_sgn, a_exp, x_sgn, x_exp, c; 1457 uint64_t a_mnt, x_mnt, r, x0, x1; 1458 uint32_t x; 1459 1460 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1461 1462 // Handle NaNs: 1463 if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) { 1464 return fp64_process_NaN(a, mode, flags); 1465 } 1466 1467 // Handle infinities and zeroes: 1468 if (!a_mnt) 1469 return fp64_zero(a_sgn); 1470 if (a_exp == 2047 && !a_sgn) 1471 return fp64_infinity(a_sgn); 1472 if (a_sgn) { 1473 *flags |= FPLIB_IOC; 1474 return fp64_defaultNaN(); 1475 } 1476 1477 a_mnt = fp64_normalise(a_mnt, &a_exp); 1478 if (a_exp & 1) { 1479 ++a_exp; 1480 a_mnt >>= 1; 1481 } 1482 1483 // x = (a * 3 + 5) / 8 1484 x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28); 1485 1486 // x = (a / x + x) / 2; // 16-bit accuracy 1487 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 1488 1489 // x = (a / x + x) / 2; // 16-bit accuracy 1490 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 1491 1492 // x = (a / x + x) / 2; // 32-bit accuracy 1493 x = ((a_mnt / x) >> 2) + (x >> 1); 1494 1495 // r = 1 / x; // 32-bit accuracy 1496 r = ((uint64_t)1 << 62) / x; 1497 1498 // r = r * (2 - x * r); // 64-bit accuracy 1499 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); 1500 lsr128(&x0, &x1, x0, x1, 31); 1501 1502 // x = (x + a * r) / 2; // 64-bit accuracy 1503 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); 1504 lsl128(&x0, &x1, x0, x1, 5); 1505 lsr128(&x0, &x1, x0, x1, 56); 1506 1507 x0 = ((uint64_t)x << 31) + (x0 >> 1); 1508 1509 x_sgn = 0; 1510 x_exp = (a_exp + 1053) >> 1; 1511 x_mnt = x0; 1512 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; 1513 mul62x62(&x0, &x1, x_mnt, x_mnt); 1514 lsl128(&x0, &x1, x0, x1, 19); 1515 c = cmp128(x0, x1, 0, a_mnt); 1516 if (c > 0) 1517 --x_mnt; 1518 1519 x_mnt = fp64_normalise(x_mnt, &x_exp); 1520 1521 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 1522} 1523 1524static int 1525modeConv(FPSCR fpscr) 1526{ 1527 return (((int) fpscr) >> 22) & 0xF; 1528} 1529 1530static void 1531set_fpscr(FPSCR &fpscr, int flags) 1532{ 1533 // translate back to FPSCR 1534 bool underflow = false; 1535 if (flags & FPLIB_IDC) { 1536 fpscr.idc = 1; 1537 } 1538 if (flags & FPLIB_IOC) { 1539 fpscr.ioc = 1; 1540 } 1541 if (flags & FPLIB_DZC) { 1542 fpscr.dzc = 1; 1543 } 1544 if (flags & FPLIB_OFC) { 1545 fpscr.ofc = 1; 1546 } 1547 if (flags & FPLIB_UFC) { 1548 underflow = true; //xx Why is this required? 1549 fpscr.ufc = 1; 1550 } 1551 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { 1552 fpscr.ixc = 1; 1553 } 1554} 1555 1556template <> 1557bool 1558fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) 1559{ 1560 int flags = 0; 1561 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); 1562 set_fpscr(fpscr, flags); 1563 return x; 1564} 1565 1566template <> 1567bool 1568fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) 1569{ 1570 int flags = 0; 1571 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); 1572 set_fpscr(fpscr, flags); 1573 return x; 1574} 1575 1576template <> 1577bool 1578fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) 1579{ 1580 int flags = 0; 1581 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); 1582 set_fpscr(fpscr, flags); 1583 return x; 1584} 1585 1586template <> 1587bool 1588fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) 1589{ 1590 int flags = 0; 1591 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); 1592 set_fpscr(fpscr, flags); 1593 return x; 1594} 1595 1596template <> 1597bool 1598fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) 1599{ 1600 int flags = 0; 1601 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); 1602 set_fpscr(fpscr, flags); 1603 return x; 1604} 1605 1606template <> 1607bool 1608fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) 1609{ 1610 int flags = 0; 1611 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); 1612 set_fpscr(fpscr, flags); 1613 return x; 1614} 1615 1616template <> 1617uint32_t 1618fplibAbs(uint32_t op) 1619{ 1620 return op & ~((uint32_t)1 << 31); 1621} 1622 1623template <> 1624uint64_t 1625fplibAbs(uint64_t op) 1626{ 1627 return op & ~((uint64_t)1 << 63); 1628} 1629 1630template <> 1631uint32_t 1632fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) 1633{ 1634 int flags = 0; 1635 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); 1636 set_fpscr0(fpscr, flags); 1637 return result; 1638} 1639 1640template <> 1641uint64_t 1642fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) 1643{ 1644 int flags = 0; 1645 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); 1646 set_fpscr0(fpscr, flags); 1647 return result; 1648} 1649 1650template <> 1651int 1652fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) 1653{ 1654 int mode = modeConv(fpscr); 1655 int flags = 0; 1656 int sgn1, exp1, sgn2, exp2, result; 1657 uint32_t mnt1, mnt2; 1658 1659 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 1660 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 1661 1662 if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) || 1663 (exp2 == 255 && (uint32_t)(mnt2 << 9))) { 1664 result = 3; 1665 if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) || 1666 (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) || 1667 signal_nans) 1668 flags |= FPLIB_IOC; 1669 } else { 1670 if (op1 == op2 || (!mnt1 && !mnt2)) { 1671 result = 6; 1672 } else if (sgn1 != sgn2) { 1673 result = sgn1 ? 8 : 2; 1674 } else if (exp1 != exp2) { 1675 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 1676 } else { 1677 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 1678 } 1679 } 1680 1681 set_fpscr0(fpscr, flags); 1682 1683 return result; 1684} 1685 1686template <> 1687int 1688fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) 1689{ 1690 int mode = modeConv(fpscr); 1691 int flags = 0; 1692 int sgn1, exp1, sgn2, exp2, result; 1693 uint64_t mnt1, mnt2; 1694 1695 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 1696 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 1697 1698 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) || 1699 (exp2 == 2047 && (uint64_t)(mnt2 << 12))) { 1700 result = 3; 1701 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) || 1702 (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) || 1703 signal_nans) 1704 flags |= FPLIB_IOC; 1705 } else { 1706 if (op1 == op2 || (!mnt1 && !mnt2)) { 1707 result = 6; 1708 } else if (sgn1 != sgn2) { 1709 result = sgn1 ? 8 : 2; 1710 } else if (exp1 != exp2) { 1711 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 1712 } else { 1713 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 1714 } 1715 } 1716 1717 set_fpscr0(fpscr, flags); 1718 1719 return result; 1720} 1721 1722static uint16_t 1723fp16_FPConvertNaN_32(uint32_t op) 1724{ 1725 return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13); 1726} 1727 1728static uint16_t 1729fp16_FPConvertNaN_64(uint64_t op) 1730{ 1731 return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42); 1732} 1733 1734static uint32_t 1735fp32_FPConvertNaN_16(uint16_t op) 1736{ 1737 return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13); 1738} 1739 1740static uint32_t 1741fp32_FPConvertNaN_64(uint64_t op) 1742{ 1743 return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29); 1744} 1745 1746static uint64_t 1747fp64_FPConvertNaN_16(uint16_t op) 1748{ 1749 return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42); 1750} 1751 1752static uint64_t 1753fp64_FPConvertNaN_32(uint32_t op) 1754{ 1755 return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29); 1756} 1757 1758static uint32_t 1759fp32_FPOnePointFive(int sgn) 1760{ 1761 return fp32_pack(sgn, 127, (uint64_t)1 << 22); 1762} 1763 1764static uint64_t 1765fp64_FPOnePointFive(int sgn) 1766{ 1767 return fp64_pack(sgn, 1023, (uint64_t)1 << 51); 1768} 1769 1770static uint32_t 1771fp32_FPThree(int sgn) 1772{ 1773 return fp32_pack(sgn, 128, (uint64_t)1 << 22); 1774} 1775 1776static uint64_t 1777fp64_FPThree(int sgn) 1778{ 1779 return fp64_pack(sgn, 1024, (uint64_t)1 << 51); 1780} 1781 1782static uint32_t 1783fp32_FPTwo(int sgn) 1784{ 1785 return fp32_pack(sgn, 128, 0); 1786} 1787 1788static uint64_t 1789fp64_FPTwo(int sgn) 1790{ 1791 return fp64_pack(sgn, 1024, 0); 1792} 1793 1794template <> 1795uint16_t 1796fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 1797{ 1798 int mode = modeConv(fpscr); 1799 int flags = 0; 1800 int sgn, exp; 1801 uint32_t mnt; 1802 uint16_t result; 1803 1804 // Unpack floating-point operand optionally with flush-to-zero: 1805 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1806 1807 bool alt_hp = fpscr.ahp; 1808 1809 if (exp == 255 && (uint32_t)(mnt << 9)) { 1810 if (alt_hp) { 1811 result = fp16_zero(sgn); 1812 } else if (fpscr.dn) { 1813 result = fp16_defaultNaN(); 1814 } else { 1815 result = fp16_FPConvertNaN_32(op); 1816 } 1817 if (!(mnt >> 22 & 1) || alt_hp) { 1818 flags |= FPLIB_IOC; 1819 } 1820 } else if (exp == 255) { 1821 if (alt_hp) { 1822 result = sgn << 15 | (uint16_t)0x7fff; 1823 flags |= FPLIB_IOC; 1824 } else { 1825 result = fp16_infinity(sgn); 1826 } 1827 } else if (!mnt) { 1828 result = fp16_zero(sgn); 1829 } else { 1830 result = fp16_round_(sgn, exp - 127 + 15, 1831 mnt >> 7 | !!(uint32_t)(mnt << 25), 1832 rounding, mode | alt_hp << 4, &flags); 1833 } 1834 1835 set_fpscr0(fpscr, flags); 1836 1837 return result; 1838} 1839 1840template <> 1841uint16_t 1842fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 1843{ 1844 int mode = modeConv(fpscr); 1845 int flags = 0; 1846 int sgn, exp; 1847 uint64_t mnt; 1848 uint16_t result; 1849 1850 // Unpack floating-point operand optionally with flush-to-zero: 1851 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1852 1853 bool alt_hp = fpscr.ahp; 1854 1855 if (exp == 2047 && (uint64_t)(mnt << 12)) { 1856 if (alt_hp) { 1857 result = fp16_zero(sgn); 1858 } else if (fpscr.dn) { 1859 result = fp16_defaultNaN(); 1860 } else { 1861 result = fp16_FPConvertNaN_64(op); 1862 } 1863 if (!(mnt >> 51 & 1) || alt_hp) { 1864 flags |= FPLIB_IOC; 1865 } 1866 } else if (exp == 2047) { 1867 if (alt_hp) { 1868 result = sgn << 15 | (uint16_t)0x7fff; 1869 flags |= FPLIB_IOC; 1870 } else { 1871 result = fp16_infinity(sgn); 1872 } 1873 } else if (!mnt) { 1874 result = fp16_zero(sgn); 1875 } else { 1876 result = fp16_round_(sgn, exp - 1023 + 15, 1877 mnt >> 36 | !!(uint64_t)(mnt << 28), 1878 rounding, mode | alt_hp << 4, &flags); 1879 } 1880 1881 set_fpscr0(fpscr, flags); 1882 1883 return result; 1884} 1885 1886template <> 1887uint32_t 1888fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 1889{ 1890 int mode = modeConv(fpscr); 1891 int flags = 0; 1892 int sgn, exp; 1893 uint16_t mnt; 1894 uint32_t result; 1895 1896 // Unpack floating-point operand optionally with flush-to-zero: 1897 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1898 1899 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { 1900 if (fpscr.dn) { 1901 result = fp32_defaultNaN(); 1902 } else { 1903 result = fp32_FPConvertNaN_16(op); 1904 } 1905 if (!(mnt >> 9 & 1)) { 1906 flags |= FPLIB_IOC; 1907 } 1908 } else if (exp == 31 && !fpscr.ahp) { 1909 result = fp32_infinity(sgn); 1910 } else if (!mnt) { 1911 result = fp32_zero(sgn); 1912 } else { 1913 mnt = fp16_normalise(mnt, &exp); 1914 result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8); 1915 } 1916 1917 set_fpscr0(fpscr, flags); 1918 1919 return result; 1920} 1921 1922template <> 1923uint32_t 1924fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 1925{ 1926 int mode = modeConv(fpscr); 1927 int flags = 0; 1928 int sgn, exp; 1929 uint64_t mnt; 1930 uint32_t result; 1931 1932 // Unpack floating-point operand optionally with flush-to-zero: 1933 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1934 1935 if (exp == 2047 && (uint64_t)(mnt << 12)) { 1936 if (fpscr.dn) { 1937 result = fp32_defaultNaN(); 1938 } else { 1939 result = fp32_FPConvertNaN_64(op); 1940 } 1941 if (!(mnt >> 51 & 1)) { 1942 flags |= FPLIB_IOC; 1943 } 1944 } else if (exp == 2047) { 1945 result = fp32_infinity(sgn); 1946 } else if (!mnt) { 1947 result = fp32_zero(sgn); 1948 } else { 1949 result = fp32_round_(sgn, exp - 1023 + 127, 1950 mnt >> 20 | !!(uint64_t)(mnt << 44), 1951 rounding, mode, &flags); 1952 } 1953 1954 set_fpscr0(fpscr, flags); 1955 1956 return result; 1957} 1958 1959template <> 1960uint64_t 1961fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 1962{ 1963 int mode = modeConv(fpscr); 1964 int flags = 0; 1965 int sgn, exp; 1966 uint16_t mnt; 1967 uint64_t result; 1968 1969 // Unpack floating-point operand optionally with flush-to-zero: 1970 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1971 1972 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { 1973 if (fpscr.dn) { 1974 result = fp64_defaultNaN(); 1975 } else { 1976 result = fp64_FPConvertNaN_16(op); 1977 } 1978 if (!(mnt >> 9 & 1)) { 1979 flags |= FPLIB_IOC; 1980 } 1981 } else if (exp == 31 && !fpscr.ahp) { 1982 result = fp64_infinity(sgn); 1983 } else if (!mnt) { 1984 result = fp64_zero(sgn); 1985 } else { 1986 mnt = fp16_normalise(mnt, &exp); 1987 result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37); 1988 } 1989 1990 set_fpscr0(fpscr, flags); 1991 1992 return result; 1993} 1994 1995template <> 1996uint64_t 1997fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 1998{ 1999 int mode = modeConv(fpscr); 2000 int flags = 0; 2001 int sgn, exp; 2002 uint32_t mnt; 2003 uint64_t result; 2004 2005 // Unpack floating-point operand optionally with flush-to-zero: 2006 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2007 2008 if (exp == 255 && (uint32_t)(mnt << 9)) { 2009 if (fpscr.dn) { 2010 result = fp64_defaultNaN(); 2011 } else { 2012 result = fp64_FPConvertNaN_32(op); 2013 } 2014 if (!(mnt >> 22 & 1)) { 2015 flags |= FPLIB_IOC; 2016 } 2017 } else if (exp == 255) { 2018 result = fp64_infinity(sgn); 2019 } else if (!mnt) { 2020 result = fp64_zero(sgn); 2021 } else { 2022 mnt = fp32_normalise(mnt, &exp); 2023 result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21); 2024 } 2025 2026 set_fpscr0(fpscr, flags); 2027 2028 return result; 2029} 2030 2031template <> 2032uint32_t 2033fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) 2034{ 2035 int flags = 0; 2036 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2037 set_fpscr0(fpscr, flags); 2038 return result; 2039} 2040 2041template <> 2042uint64_t 2043fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) 2044{ 2045 int flags = 0; 2046 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2047 set_fpscr0(fpscr, flags); 2048 return result; 2049} 2050 2051template <> 2052uint32_t 2053fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2054{ 2055 int flags = 0; 2056 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); 2057 set_fpscr0(fpscr, flags); 2058 return result; 2059} 2060 2061template <> 2062uint64_t 2063fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2064{ 2065 int flags = 0; 2066 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); 2067 set_fpscr0(fpscr, flags); 2068 return result; 2069} 2070 2071static uint32_t 2072fp32_repack(int sgn, int exp, uint32_t mnt) 2073{ 2074 return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt); 2075} 2076 2077static uint64_t 2078fp64_repack(int sgn, int exp, uint64_t mnt) 2079{ 2080 return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt); 2081} 2082 2083static void 2084fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn) 2085{ 2086 // Treat a single quiet-NaN as +Infinity/-Infinity 2087 if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23) 2088 *op1 = fp32_infinity(sgn); 2089 if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23) 2090 *op2 = fp32_infinity(sgn); 2091} 2092 2093static void 2094fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn) 2095{ 2096 // Treat a single quiet-NaN as +Infinity/-Infinity 2097 if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52) 2098 *op1 = fp64_infinity(sgn); 2099 if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52) 2100 *op2 = fp64_infinity(sgn); 2101} 2102 2103template <> 2104uint32_t 2105fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2106{ 2107 int mode = modeConv(fpscr); 2108 int flags = 0; 2109 int sgn1, exp1, sgn2, exp2; 2110 uint32_t mnt1, mnt2, x, result; 2111 2112 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2113 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2114 2115 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 2116 result = x; 2117 } else { 2118 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 2119 fp32_repack(sgn1, exp1, mnt1) : 2120 fp32_repack(sgn2, exp2, mnt2)); 2121 } 2122 set_fpscr0(fpscr, flags); 2123 return result; 2124} 2125 2126template <> 2127uint64_t 2128fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2129{ 2130 int mode = modeConv(fpscr); 2131 int flags = 0; 2132 int sgn1, exp1, sgn2, exp2; 2133 uint64_t mnt1, mnt2, x, result; 2134 2135 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2136 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2137 2138 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 2139 result = x; 2140 } else { 2141 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 2142 fp64_repack(sgn1, exp1, mnt1) : 2143 fp64_repack(sgn2, exp2, mnt2)); 2144 } 2145 set_fpscr0(fpscr, flags); 2146 return result; 2147} 2148 2149template <> 2150uint32_t 2151fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2152{ 2153 fp32_minmaxnum(&op1, &op2, 1); 2154 return fplibMax<uint32_t>(op1, op2, fpscr); 2155} 2156 2157template <> 2158uint64_t 2159fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2160{ 2161 fp64_minmaxnum(&op1, &op2, 1); 2162 return fplibMax<uint64_t>(op1, op2, fpscr); 2163} 2164 2165template <> 2166uint32_t 2167fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2168{ 2169 int mode = modeConv(fpscr); 2170 int flags = 0; 2171 int sgn1, exp1, sgn2, exp2; 2172 uint32_t mnt1, mnt2, x, result; 2173 2174 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2175 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2176 2177 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 2178 result = x; 2179 } else { 2180 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 2181 fp32_repack(sgn1, exp1, mnt1) : 2182 fp32_repack(sgn2, exp2, mnt2)); 2183 } 2184 set_fpscr0(fpscr, flags); 2185 return result; 2186} 2187 2188template <> 2189uint64_t 2190fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2191{ 2192 int mode = modeConv(fpscr); 2193 int flags = 0; 2194 int sgn1, exp1, sgn2, exp2; 2195 uint64_t mnt1, mnt2, x, result; 2196 2197 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2198 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2199 2200 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 2201 result = x; 2202 } else { 2203 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 2204 fp64_repack(sgn1, exp1, mnt1) : 2205 fp64_repack(sgn2, exp2, mnt2)); 2206 } 2207 set_fpscr0(fpscr, flags); 2208 return result; 2209} 2210 2211template <> 2212uint32_t 2213fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2214{ 2215 fp32_minmaxnum(&op1, &op2, 0); 2216 return fplibMin<uint32_t>(op1, op2, fpscr); 2217} 2218 2219template <> 2220uint64_t 2221fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2222{ 2223 fp64_minmaxnum(&op1, &op2, 0); 2224 return fplibMin<uint64_t>(op1, op2, fpscr); 2225} 2226 2227template <> 2228uint32_t 2229fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2230{ 2231 int flags = 0; 2232 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); 2233 set_fpscr0(fpscr, flags); 2234 return result; 2235} 2236 2237template <> 2238uint64_t 2239fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2240{ 2241 int flags = 0; 2242 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); 2243 set_fpscr0(fpscr, flags); 2244 return result; 2245} 2246 2247template <> 2248uint32_t 2249fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2250{ 2251 int mode = modeConv(fpscr); 2252 int flags = 0; 2253 int sgn1, exp1, sgn2, exp2; 2254 uint32_t mnt1, mnt2, result; 2255 2256 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2257 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2258 2259 result = fp32_process_NaNs(op1, op2, mode, &flags); 2260 if (!result) { 2261 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2262 result = fp32_FPTwo(sgn1 ^ sgn2); 2263 } else if (exp1 == 255 || exp2 == 255) { 2264 result = fp32_infinity(sgn1 ^ sgn2); 2265 } else if (!mnt1 || !mnt2) { 2266 result = fp32_zero(sgn1 ^ sgn2); 2267 } else { 2268 result = fp32_mul(op1, op2, mode, &flags); 2269 } 2270 } 2271 2272 set_fpscr0(fpscr, flags); 2273 2274 return result; 2275} 2276 2277template <> 2278uint64_t 2279fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2280{ 2281 int mode = modeConv(fpscr); 2282 int flags = 0; 2283 int sgn1, exp1, sgn2, exp2; 2284 uint64_t mnt1, mnt2, result; 2285 2286 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2287 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2288 2289 result = fp64_process_NaNs(op1, op2, mode, &flags); 2290 if (!result) { 2291 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2292 result = fp64_FPTwo(sgn1 ^ sgn2); 2293 } else if (exp1 == 2047 || exp2 == 2047) { 2294 result = fp64_infinity(sgn1 ^ sgn2); 2295 } else if (!mnt1 || !mnt2) { 2296 result = fp64_zero(sgn1 ^ sgn2); 2297 } else { 2298 result = fp64_mul(op1, op2, mode, &flags); 2299 } 2300 } 2301 2302 set_fpscr0(fpscr, flags); 2303 2304 return result; 2305} 2306 2307template <> 2308uint32_t 2309fplibNeg(uint32_t op) 2310{ 2311 return op ^ (uint32_t)1 << 31; 2312} 2313 2314template <> 2315uint64_t 2316fplibNeg(uint64_t op) 2317{ 2318 return op ^ (uint64_t)1 << 63; 2319} 2320 2321static const uint8_t recip_sqrt_estimate[256] = { 2322 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, 2323 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, 2324 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, 2325 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, 2326 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 2327 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 2328 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, 2329 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, 2330 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, 2331 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, 2332 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, 2333 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, 2334 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, 2335 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, 2336 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, 2337 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 2338}; 2339 2340template <> 2341uint32_t 2342fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) 2343{ 2344 int mode = modeConv(fpscr); 2345 int flags = 0; 2346 int sgn, exp; 2347 uint32_t mnt, result; 2348 2349 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2350 2351 if (exp == 255 && (uint32_t)(mnt << 9)) { 2352 result = fp32_process_NaN(op, mode, &flags); 2353 } else if (!mnt) { 2354 result = fp32_infinity(sgn); 2355 flags |= FPLIB_DZC; 2356 } else if (sgn) { 2357 result = fp32_defaultNaN(); 2358 flags |= FPLIB_IOC; 2359 } else if (exp == 255) { 2360 result = fp32_zero(0); 2361 } else { 2362 exp += 8; 2363 mnt = fp32_normalise(mnt, &exp); 2364 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)]; 2365 result = fp32_pack(0, (380 - exp) >> 1, mnt << 15); 2366 } 2367 2368 set_fpscr0(fpscr, flags); 2369 2370 return result; 2371} 2372 2373template <> 2374uint64_t 2375fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) 2376{ 2377 int mode = modeConv(fpscr); 2378 int flags = 0; 2379 int sgn, exp; 2380 uint64_t mnt, result; 2381 2382 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2383 2384 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2385 result = fp64_process_NaN(op, mode, &flags); 2386 } else if (!mnt) { 2387 result = fp64_infinity(sgn); 2388 flags |= FPLIB_DZC; 2389 } else if (sgn) { 2390 result = fp64_defaultNaN(); 2391 flags |= FPLIB_IOC; 2392 } else if (exp == 2047) { 2393 result = fp32_zero(0); 2394 } else { 2395 exp += 11; 2396 mnt = fp64_normalise(mnt, &exp); 2397 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)]; 2398 result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44); 2399 } 2400 2401 set_fpscr0(fpscr, flags); 2402 2403 return result; 2404} 2405 2406template <> 2407uint32_t 2408fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2409{ 2410 int mode = modeConv(fpscr); 2411 int flags = 0; 2412 int sgn1, exp1, sgn2, exp2; 2413 uint32_t mnt1, mnt2, result; 2414 2415 op1 = fplibNeg<uint32_t>(op1); 2416 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2417 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2418 2419 result = fp32_process_NaNs(op1, op2, mode, &flags); 2420 if (!result) { 2421 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2422 result = fp32_FPOnePointFive(0); 2423 } else if (exp1 == 255 || exp2 == 255) { 2424 result = fp32_infinity(sgn1 ^ sgn2); 2425 } else { 2426 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); 2427 } 2428 } 2429 2430 set_fpscr0(fpscr, flags); 2431 2432 return result; 2433} 2434 2435template <> 2436uint64_t 2437fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2438{ 2439 int mode = modeConv(fpscr); 2440 int flags = 0; 2441 int sgn1, exp1, sgn2, exp2; 2442 uint64_t mnt1, mnt2, result; 2443 2444 op1 = fplibNeg<uint64_t>(op1); 2445 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2446 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2447 2448 result = fp64_process_NaNs(op1, op2, mode, &flags); 2449 if (!result) { 2450 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2451 result = fp64_FPOnePointFive(0); 2452 } else if (exp1 == 2047 || exp2 == 2047) { 2453 result = fp64_infinity(sgn1 ^ sgn2); 2454 } else { 2455 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); 2456 } 2457 } 2458 2459 set_fpscr0(fpscr, flags); 2460 2461 return result; 2462} 2463 2464template <> 2465uint32_t 2466fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2467{ 2468 int mode = modeConv(fpscr); 2469 int flags = 0; 2470 int sgn1, exp1, sgn2, exp2; 2471 uint32_t mnt1, mnt2, result; 2472 2473 op1 = fplibNeg<uint32_t>(op1); 2474 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2475 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2476 2477 result = fp32_process_NaNs(op1, op2, mode, &flags); 2478 if (!result) { 2479 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2480 result = fp32_FPTwo(0); 2481 } else if (exp1 == 255 || exp2 == 255) { 2482 result = fp32_infinity(sgn1 ^ sgn2); 2483 } else { 2484 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags); 2485 } 2486 } 2487 2488 set_fpscr0(fpscr, flags); 2489 2490 return result; 2491} 2492 2493template <> 2494uint32_t 2495fplibRecipEstimate(uint32_t op, FPSCR &fpscr) 2496{ 2497 int mode = modeConv(fpscr); 2498 int flags = 0; 2499 int sgn, exp; 2500 uint32_t mnt, result; 2501 2502 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2503 2504 if (exp == 255 && (uint32_t)(mnt << 9)) { 2505 result = fp32_process_NaN(op, mode, &flags); 2506 } else if (exp == 255) { 2507 result = fp32_zero(sgn); 2508 } else if (!mnt) { 2509 result = fp32_infinity(sgn); 2510 flags |= FPLIB_DZC; 2511 } else if (!((uint32_t)(op << 1) >> 22)) { 2512 bool overflow_to_inf = false; 2513 switch (FPCRRounding(fpscr)) { 2514 case FPRounding_TIEEVEN: 2515 overflow_to_inf = true; 2516 break; 2517 case FPRounding_POSINF: 2518 overflow_to_inf = !sgn; 2519 break; 2520 case FPRounding_NEGINF: 2521 overflow_to_inf = sgn; 2522 break; 2523 case FPRounding_ZERO: 2524 overflow_to_inf = false; 2525 break; 2526 default: 2527 assert(0); 2528 } 2529 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn); 2530 flags |= FPLIB_OFC | FPLIB_IXC; 2531 } else if (fpscr.fz && exp >= 253) { 2532 result = fp32_zero(sgn); 2533 flags |= FPLIB_UFC; 2534 } else { 2535 exp += 8; 2536 mnt = fp32_normalise(mnt, &exp); 2537 int result_exp = 253 - exp; 2538 uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1; 2539 fraction <<= 15; 2540 if (result_exp == 0) { 2541 fraction >>= 1; 2542 } else if (result_exp == -1) { 2543 fraction >>= 2; 2544 result_exp = 0; 2545 } 2546 result = fp32_pack(sgn, result_exp, fraction); 2547 } 2548 2549 set_fpscr0(fpscr, flags); 2550 2551 return result; 2552} 2553 2554template <> 2555uint64_t 2556fplibRecipEstimate(uint64_t op, FPSCR &fpscr) 2557{ 2558 int mode = modeConv(fpscr); 2559 int flags = 0; 2560 int sgn, exp; 2561 uint64_t mnt, result; 2562 2563 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2564 2565 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2566 result = fp64_process_NaN(op, mode, &flags); 2567 } else if (exp == 2047) { 2568 result = fp64_zero(sgn); 2569 } else if (!mnt) { 2570 result = fp64_infinity(sgn); 2571 flags |= FPLIB_DZC; 2572 } else if (!((uint64_t)(op << 1) >> 51)) { 2573 bool overflow_to_inf = false; 2574 switch (FPCRRounding(fpscr)) { 2575 case FPRounding_TIEEVEN: 2576 overflow_to_inf = true; 2577 break; 2578 case FPRounding_POSINF: 2579 overflow_to_inf = !sgn; 2580 break; 2581 case FPRounding_NEGINF: 2582 overflow_to_inf = sgn; 2583 break; 2584 case FPRounding_ZERO: 2585 overflow_to_inf = false; 2586 break; 2587 default: 2588 assert(0); 2589 } 2590 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn); 2591 flags |= FPLIB_OFC | FPLIB_IXC; 2592 } else if (fpscr.fz && exp >= 2045) { 2593 result = fp64_zero(sgn); 2594 flags |= FPLIB_UFC; 2595 } else { 2596 exp += 11; 2597 mnt = fp64_normalise(mnt, &exp); 2598 int result_exp = 2045 - exp; 2599 uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1; 2600 fraction <<= 44; 2601 if (result_exp == 0) { 2602 fraction >>= 1; 2603 } else if (result_exp == -1) { 2604 fraction >>= 2; 2605 result_exp = 0; 2606 } 2607 result = fp64_pack(sgn, result_exp, fraction); 2608 } 2609 2610 set_fpscr0(fpscr, flags); 2611 2612 return result; 2613} 2614 2615template <> 2616uint64_t 2617fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2618{ 2619 int mode = modeConv(fpscr); 2620 int flags = 0; 2621 int sgn1, exp1, sgn2, exp2; 2622 uint64_t mnt1, mnt2, result; 2623 2624 op1 = fplibNeg<uint64_t>(op1); 2625 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2626 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2627 2628 result = fp64_process_NaNs(op1, op2, mode, &flags); 2629 if (!result) { 2630 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2631 result = fp64_FPTwo(0); 2632 } else if (exp1 == 2047 || exp2 == 2047) { 2633 result = fp64_infinity(sgn1 ^ sgn2); 2634 } else { 2635 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags); 2636 } 2637 } 2638 2639 set_fpscr0(fpscr, flags); 2640 2641 return result; 2642} 2643 2644template <> 2645uint32_t 2646fplibRecpX(uint32_t op, FPSCR &fpscr) 2647{ 2648 int mode = modeConv(fpscr); 2649 int flags = 0; 2650 int sgn, exp; 2651 uint32_t mnt, result; 2652 2653 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2654 2655 if (exp == 255 && (uint32_t)(mnt << 9)) { 2656 result = fp32_process_NaN(op, mode, &flags); 2657 } 2658 else { 2659 if (!mnt) { // Zero and denormals 2660 result = fp32_pack(sgn, 254, 0); 2661 } else { // Infinities and normals 2662 result = fp32_pack(sgn, exp ^ 255, 0); 2663 } 2664 } 2665 2666 set_fpscr0(fpscr, flags); 2667 2668 return result; 2669} 2670 2671template <> 2672uint64_t 2673fplibRecpX(uint64_t op, FPSCR &fpscr) 2674{ 2675 int mode = modeConv(fpscr); 2676 int flags = 0; 2677 int sgn, exp; 2678 uint64_t mnt, result; 2679 2680 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2681 2682 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2683 result = fp64_process_NaN(op, mode, &flags); 2684 } 2685 else { 2686 if (!mnt) { // Zero and denormals 2687 result = fp64_pack(sgn, 2046, 0); 2688 } else { // Infinities and normals 2689 result = fp64_pack(sgn, exp ^ 2047, 0); 2690 } 2691 } 2692 2693 set_fpscr0(fpscr, flags); 2694 2695 return result; 2696} 2697 2698template <> 2699uint32_t 2700fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 2701{ 2702 int mode = modeConv(fpscr); 2703 int flags = 0; 2704 int sgn, exp; 2705 uint32_t mnt, result; 2706 2707 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2708 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2709 2710 // Handle NaNs, infinities and zeroes: 2711 if (exp == 255 && (uint32_t)(mnt << 9)) { 2712 result = fp32_process_NaN(op, mode, &flags); 2713 } else if (exp == 255) { 2714 result = fp32_infinity(sgn); 2715 } else if (!mnt) { 2716 result = fp32_zero(sgn); 2717 } else if (exp >= 150) { 2718 // There are no fractional bits 2719 result = op; 2720 } else { 2721 // Truncate towards zero: 2722 uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp); 2723 int err = exp < 118 ? 1 : 2724 (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0); 2725 switch (rounding) { 2726 case FPRounding_TIEEVEN: 2727 x += (err == 3 || (err == 2 && (x & 1))); 2728 break; 2729 case FPRounding_POSINF: 2730 x += err && !sgn; 2731 break; 2732 case FPRounding_NEGINF: 2733 x += err && sgn; 2734 break; 2735 case FPRounding_ZERO: 2736 break; 2737 case FPRounding_TIEAWAY: 2738 x += err >> 1; 2739 break; 2740 default: 2741 assert(0); 2742 } 2743 2744 if (x == 0) { 2745 result = fp32_zero(sgn); 2746 } else { 2747 exp = 150; 2748 mnt = fp32_normalise(x, &exp); 2749 result = fp32_pack(sgn, exp + 8, mnt >> 8); 2750 } 2751 2752 if (err && exact) 2753 flags |= FPLIB_IXC; 2754 } 2755 2756 set_fpscr0(fpscr, flags); 2757 2758 return result; 2759} 2760 2761template <> 2762uint64_t 2763fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 2764{ 2765 int mode = modeConv(fpscr); 2766 int flags = 0; 2767 int sgn, exp; 2768 uint64_t mnt, result; 2769 2770 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2771 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2772 2773 // Handle NaNs, infinities and zeroes: 2774 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2775 result = fp64_process_NaN(op, mode, &flags); 2776 } else if (exp == 2047) { 2777 result = fp64_infinity(sgn); 2778 } else if (!mnt) { 2779 result = fp64_zero(sgn); 2780 } else if (exp >= 1075) { 2781 // There are no fractional bits 2782 result = op; 2783 } else { 2784 // Truncate towards zero: 2785 uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp); 2786 int err = exp < 1011 ? 1 : 2787 (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0); 2788 switch (rounding) { 2789 case FPRounding_TIEEVEN: 2790 x += (err == 3 || (err == 2 && (x & 1))); 2791 break; 2792 case FPRounding_POSINF: 2793 x += err && !sgn; 2794 break; 2795 case FPRounding_NEGINF: 2796 x += err && sgn; 2797 break; 2798 case FPRounding_ZERO: 2799 break; 2800 case FPRounding_TIEAWAY: 2801 x += err >> 1; 2802 break; 2803 default: 2804 assert(0); 2805 } 2806 2807 if (x == 0) { 2808 result = fp64_zero(sgn); 2809 } else { 2810 exp = 1075; 2811 mnt = fp64_normalise(x, &exp); 2812 result = fp64_pack(sgn, exp + 11, mnt >> 11); 2813 } 2814 2815 if (err && exact) 2816 flags |= FPLIB_IXC; 2817 } 2818 2819 set_fpscr0(fpscr, flags); 2820 2821 return result; 2822} 2823 2824template <> 2825uint32_t 2826fplibSqrt(uint32_t op, FPSCR &fpscr) 2827{ 2828 int flags = 0; 2829 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags); 2830 set_fpscr0(fpscr, flags); 2831 return result; 2832} 2833 2834template <> 2835uint64_t 2836fplibSqrt(uint64_t op, FPSCR &fpscr) 2837{ 2838 int flags = 0; 2839 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags); 2840 set_fpscr0(fpscr, flags); 2841 return result; 2842} 2843 2844template <> 2845uint32_t 2846fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2847{ 2848 int flags = 0; 2849 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags); 2850 set_fpscr0(fpscr, flags); 2851 return result; 2852} 2853 2854template <> 2855uint64_t 2856fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2857{ 2858 int flags = 0; 2859 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags); 2860 set_fpscr0(fpscr, flags); 2861 return result; 2862} 2863 2864static uint64_t 2865FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 2866 int *flags) 2867{ 2868 uint64_t x; 2869 int err; 2870 2871 if (exp > 1023 + 63) { 2872 *flags = FPLIB_IOC; 2873 return ((uint64_t)!u << 63) - !sgn; 2874 } 2875 2876 x = lsr64(mnt << 11, 1023 + 63 - exp); 2877 err = (exp > 1023 + 63 - 2 ? 0 : 2878 (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) | 2879 !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1))); 2880 2881 switch (rounding) { 2882 case FPRounding_TIEEVEN: 2883 x += (err == 3 || (err == 2 && (x & 1))); 2884 break; 2885 case FPRounding_POSINF: 2886 x += err && !sgn; 2887 break; 2888 case FPRounding_NEGINF: 2889 x += err && sgn; 2890 break; 2891 case FPRounding_ZERO: 2892 break; 2893 case FPRounding_TIEAWAY: 2894 x += err >> 1; 2895 break; 2896 default: 2897 assert(0); 2898 } 2899 2900 if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) { 2901 *flags = FPLIB_IOC; 2902 return ((uint64_t)!u << 63) - !sgn; 2903 } 2904 2905 if (err) { 2906 *flags = FPLIB_IXC; 2907 } 2908 2909 return sgn ? -x : x; 2910} 2911 2912static uint32_t 2913FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 2914 int *flags) 2915{ 2916 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); 2917 if (u ? x >= (uint64_t)1 << 32 : 2918 !(x < (uint64_t)1 << 31 || 2919 (uint64_t)-x <= (uint64_t)1 << 31)) { 2920 *flags = FPLIB_IOC; 2921 x = ((uint32_t)!u << 31) - !sgn; 2922 } 2923 return x; 2924} 2925 2926template <> 2927uint32_t 2928fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2929{ 2930 int flags = 0; 2931 int sgn, exp; 2932 uint32_t mnt, result; 2933 2934 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2935 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2936 2937 // If NaN, set cumulative flag or take exception: 2938 if (exp == 255 && (uint32_t)(mnt << 9)) { 2939 flags = FPLIB_IOC; 2940 result = 0; 2941 } else { 2942 result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits, 2943 (uint64_t)mnt << (52 - 23), u, rounding, &flags); 2944 } 2945 2946 set_fpscr0(fpscr, flags); 2947 2948 return result; 2949} 2950 2951template <> 2952uint32_t 2953fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2954{ 2955 int flags = 0; 2956 int sgn, exp; 2957 uint64_t mnt; 2958 uint32_t result; 2959 2960 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2961 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2962 2963 // If NaN, set cumulative flag or take exception: 2964 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2965 flags = FPLIB_IOC; 2966 result = 0; 2967 } else { 2968 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags); 2969 } 2970 2971 set_fpscr0(fpscr, flags); 2972 2973 return result; 2974} 2975 2976template <> 2977uint64_t 2978fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2979{ 2980 int flags = 0; 2981 int sgn, exp; 2982 uint32_t mnt; 2983 uint64_t result; 2984 2985 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2986 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2987 2988 // If NaN, set cumulative flag or take exception: 2989 if (exp == 255 && (uint32_t)(mnt << 9)) { 2990 flags = FPLIB_IOC; 2991 result = 0; 2992 } else { 2993 result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits, 2994 (uint64_t)mnt << (52 - 23), u, rounding, &flags); 2995 } 2996 2997 set_fpscr0(fpscr, flags); 2998 2999 return result; 3000} 3001 3002template <> 3003uint64_t 3004fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 3005{ 3006 int flags = 0; 3007 int sgn, exp; 3008 uint64_t mnt, result; 3009 3010 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 3011 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 3012 3013 // If NaN, set cumulative flag or take exception: 3014 if (exp == 2047 && (uint64_t)(mnt << 12)) { 3015 flags = FPLIB_IOC; 3016 result = 0; 3017 } else { 3018 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags); 3019 } 3020 3021 set_fpscr0(fpscr, flags); 3022 3023 return result; 3024} 3025 3026static uint32_t 3027fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) 3028{ 3029 int x_sgn = !u && a >> 63; 3030 int x_exp = 190 - fbits; 3031 uint64_t x_mnt = x_sgn ? -a : a; 3032 3033 // Handle zero: 3034 if (!x_mnt) { 3035 return fp32_zero(0); 3036 } 3037 3038 // Normalise and convert to 32 bits, collapsing error into bottom bit: 3039 x_mnt = fp64_normalise(x_mnt, &x_exp); 3040 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 3041 3042 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 3043} 3044 3045static uint64_t 3046fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) 3047{ 3048 int x_sgn = !u && a >> 63; 3049 int x_exp = 1024 + 62 - fbits; 3050 uint64_t x_mnt = x_sgn ? -a : a; 3051 3052 // Handle zero: 3053 if (!x_mnt) { 3054 return fp64_zero(0); 3055 } 3056 3057 x_mnt = fp64_normalise(x_mnt, &x_exp); 3058 3059 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags); 3060} 3061 3062template <> 3063uint32_t 3064fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 3065{ 3066 int flags = 0; 3067 uint32_t res = fp32_cvtf(op, fbits, u, 3068 (int)rounding | ((uint32_t)fpscr >> 22 & 12), 3069 &flags); 3070 set_fpscr0(fpscr, flags); 3071 return res; 3072} 3073 3074template <> 3075uint64_t 3076fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 3077{ 3078 int flags = 0; 3079 uint64_t res = fp64_cvtf(op, fbits, u, 3080 (int)rounding | ((uint32_t)fpscr >> 22 & 12), 3081 &flags); 3082 set_fpscr0(fpscr, flags); 3083 return res; 3084} 3085 3086} | 159add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 160 uint64_t b1) 161{ 162 *x0 = a0 + b0; 163 *x1 = a1 + b1 + (*x0 < a0); 164} 165 166static inline void 167sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, 168 uint64_t b1) 169{ 170 *x0 = a0 - b0; 171 *x1 = a1 - b1 - (*x0 > a0); 172} 173 174static inline int 175cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 176{ 177 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); 178} 179 180static inline uint16_t 181fp16_normalise(uint16_t mnt, int *exp) 182{ 183 int shift; 184 185 if (!mnt) { 186 return 0; 187 } 188 189 for (shift = 8; shift; shift >>= 1) { 190 if (!(mnt >> (16 - shift))) { 191 mnt <<= shift; 192 *exp -= shift; 193 } 194 } 195 return mnt; 196} 197 198static inline uint32_t 199fp32_normalise(uint32_t mnt, int *exp) 200{ 201 int shift; 202 203 if (!mnt) { 204 return 0; 205 } 206 207 for (shift = 16; shift; shift >>= 1) { 208 if (!(mnt >> (32 - shift))) { 209 mnt <<= shift; 210 *exp -= shift; 211 } 212 } 213 return mnt; 214} 215 216static inline uint64_t 217fp64_normalise(uint64_t mnt, int *exp) 218{ 219 int shift; 220 221 if (!mnt) { 222 return 0; 223 } 224 225 for (shift = 32; shift; shift >>= 1) { 226 if (!(mnt >> (64 - shift))) { 227 mnt <<= shift; 228 *exp -= shift; 229 } 230 } 231 return mnt; 232} 233 234static inline void 235fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp) 236{ 237 uint64_t x0 = *mnt0; 238 uint64_t x1 = *mnt1; 239 int shift; 240 241 if (!x0 && !x1) { 242 return; 243 } 244 245 if (!x1) { 246 x1 = x0; 247 x0 = 0; 248 *exp -= 64; 249 } 250 251 for (shift = 32; shift; shift >>= 1) { 252 if (!(x1 >> (64 - shift))) { 253 x1 = x1 << shift | x0 >> (64 - shift); 254 x0 <<= shift; 255 *exp -= shift; 256 } 257 } 258 259 *mnt0 = x0; 260 *mnt1 = x1; 261} 262 263static inline uint16_t 264fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) 265{ 266 return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1)); 267} 268 269static inline uint32_t 270fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) 271{ 272 return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1)); 273} 274 275static inline uint64_t 276fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) 277{ 278 return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1)); 279} 280 281static inline uint16_t 282fp16_zero(int sgn) 283{ 284 return fp16_pack(sgn, 0, 0); 285} 286 287static inline uint32_t 288fp32_zero(int sgn) 289{ 290 return fp32_pack(sgn, 0, 0); 291} 292 293static inline uint64_t 294fp64_zero(int sgn) 295{ 296 return fp64_pack(sgn, 0, 0); 297} 298 299static inline uint16_t 300fp16_max_normal(int sgn) 301{ 302 return fp16_pack(sgn, 30, -1); 303} 304 305static inline uint32_t 306fp32_max_normal(int sgn) 307{ 308 return fp32_pack(sgn, 254, -1); 309} 310 311static inline uint64_t 312fp64_max_normal(int sgn) 313{ 314 return fp64_pack(sgn, 2046, -1); 315} 316 317static inline uint16_t 318fp16_infinity(int sgn) 319{ 320 return fp16_pack(sgn, 31, 0); 321} 322 323static inline uint32_t 324fp32_infinity(int sgn) 325{ 326 return fp32_pack(sgn, 255, 0); 327} 328 329static inline uint64_t 330fp64_infinity(int sgn) 331{ 332 return fp64_pack(sgn, 2047, 0); 333} 334 335static inline uint16_t 336fp16_defaultNaN() 337{ 338 return fp16_pack(0, 31, (uint16_t)1 << 9); 339} 340 341static inline uint32_t 342fp32_defaultNaN() 343{ 344 return fp32_pack(0, 255, (uint32_t)1 << 22); 345} 346 347static inline uint64_t 348fp64_defaultNaN() 349{ 350 return fp64_pack(0, 2047, (uint64_t)1 << 51); 351} 352 353static inline void 354fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, 355 int *flags) 356{ 357 *sgn = x >> 15; 358 *exp = x >> 10 & 31; 359 *mnt = x & (((uint16_t)1 << 10) - 1); 360 361 // Handle subnormals: 362 if (*exp) { 363 *mnt |= (uint16_t)1 << 10; 364 } else { 365 ++*exp; 366 // There is no flush to zero in this case! 367 } 368} 369 370static inline void 371fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, 372 int *flags) 373{ 374 *sgn = x >> 31; 375 *exp = x >> 23 & 255; 376 *mnt = x & (((uint32_t)1 << 23) - 1); 377 378 // Handle subnormals: 379 if (*exp) { 380 *mnt |= (uint32_t)1 << 23; 381 } else { 382 ++*exp; 383 if ((mode & FPLIB_FZ) && *mnt) { 384 *flags |= FPLIB_IDC; 385 *mnt = 0; 386 } 387 } 388} 389 390static inline void 391fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, 392 int *flags) 393{ 394 *sgn = x >> 63; 395 *exp = x >> 52 & 2047; 396 *mnt = x & (((uint64_t)1 << 52) - 1); 397 398 // Handle subnormals: 399 if (*exp) { 400 *mnt |= (uint64_t)1 << 52; 401 } else { 402 ++*exp; 403 if ((mode & FPLIB_FZ) && *mnt) { 404 *flags |= FPLIB_IDC; 405 *mnt = 0; 406 } 407 } 408} 409 410static inline uint32_t 411fp32_process_NaN(uint32_t a, int mode, int *flags) 412{ 413 if (!(a >> 22 & 1)) { 414 *flags |= FPLIB_IOC; 415 a |= (uint32_t)1 << 22; 416 } 417 return mode & FPLIB_DN ? fp32_defaultNaN() : a; 418} 419 420static inline uint64_t 421fp64_process_NaN(uint64_t a, int mode, int *flags) 422{ 423 if (!(a >> 51 & 1)) { 424 *flags |= FPLIB_IOC; 425 a |= (uint64_t)1 << 51; 426 } 427 return mode & FPLIB_DN ? fp64_defaultNaN() : a; 428} 429 430static uint32_t 431fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags) 432{ 433 int a_exp = a >> 23 & 255; 434 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); 435 int b_exp = b >> 23 & 255; 436 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); 437 438 // Handle signalling NaNs: 439 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) 440 return fp32_process_NaN(a, mode, flags); 441 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) 442 return fp32_process_NaN(b, mode, flags); 443 444 // Handle quiet NaNs: 445 if (a_exp == 255 && a_mnt) 446 return fp32_process_NaN(a, mode, flags); 447 if (b_exp == 255 && b_mnt) 448 return fp32_process_NaN(b, mode, flags); 449 450 return 0; 451} 452 453static uint64_t 454fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags) 455{ 456 int a_exp = a >> 52 & 2047; 457 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); 458 int b_exp = b >> 52 & 2047; 459 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); 460 461 // Handle signalling NaNs: 462 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) 463 return fp64_process_NaN(a, mode, flags); 464 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) 465 return fp64_process_NaN(b, mode, flags); 466 467 // Handle quiet NaNs: 468 if (a_exp == 2047 && a_mnt) 469 return fp64_process_NaN(a, mode, flags); 470 if (b_exp == 2047 && b_mnt) 471 return fp64_process_NaN(b, mode, flags); 472 473 return 0; 474} 475 476static uint32_t 477fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags) 478{ 479 int a_exp = a >> 23 & 255; 480 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); 481 int b_exp = b >> 23 & 255; 482 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); 483 int c_exp = c >> 23 & 255; 484 uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1); 485 486 // Handle signalling NaNs: 487 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) 488 return fp32_process_NaN(a, mode, flags); 489 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) 490 return fp32_process_NaN(b, mode, flags); 491 if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1)) 492 return fp32_process_NaN(c, mode, flags); 493 494 // Handle quiet NaNs: 495 if (a_exp == 255 && a_mnt) 496 return fp32_process_NaN(a, mode, flags); 497 if (b_exp == 255 && b_mnt) 498 return fp32_process_NaN(b, mode, flags); 499 if (c_exp == 255 && c_mnt) 500 return fp32_process_NaN(c, mode, flags); 501 502 return 0; 503} 504 505static uint64_t 506fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags) 507{ 508 int a_exp = a >> 52 & 2047; 509 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); 510 int b_exp = b >> 52 & 2047; 511 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); 512 int c_exp = c >> 52 & 2047; 513 uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1); 514 515 // Handle signalling NaNs: 516 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) 517 return fp64_process_NaN(a, mode, flags); 518 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) 519 return fp64_process_NaN(b, mode, flags); 520 if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1)) 521 return fp64_process_NaN(c, mode, flags); 522 523 // Handle quiet NaNs: 524 if (a_exp == 2047 && a_mnt) 525 return fp64_process_NaN(a, mode, flags); 526 if (b_exp == 2047 && b_mnt) 527 return fp64_process_NaN(b, mode, flags); 528 if (c_exp == 2047 && c_mnt) 529 return fp64_process_NaN(c, mode, flags); 530 531 return 0; 532} 533 534static uint16_t 535fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags) 536{ 537 int biased_exp; // non-negative exponent value for result 538 uint16_t int_mant; // mantissa for result, less than (1 << 11) 539 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 540 541 assert(rm != FPRounding_TIEAWAY); 542 543 // There is no flush to zero in this case! 544 545 // The bottom 5 bits of mnt are orred together: 546 mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0); 547 548 if (exp > 0) { 549 biased_exp = exp; 550 int_mant = mnt >> 2; 551 error = mnt & 3; 552 } else { 553 biased_exp = 0; 554 int_mant = lsr16(mnt, 3 - exp); 555 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1)); 556 } 557 558 if (!biased_exp && error) { // xx should also check fpscr_val<11> 559 *flags |= FPLIB_UFC; 560 } 561 562 // Round up: 563 if ((rm == FPLIB_RN && (error == 3 || 564 (error == 2 && (int_mant & 1)))) || 565 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 566 ++int_mant; 567 if (int_mant == (uint32_t)1 << 10) { 568 // Rounded up from denormalized to normalized 569 biased_exp = 1; 570 } 571 if (int_mant == (uint32_t)1 << 11) { 572 // Rounded up to next exponent 573 ++biased_exp; 574 int_mant >>= 1; 575 } 576 } 577 578 // Handle rounding to odd aka Von Neumann rounding: 579 if (error && rm == FPRounding_ODD) 580 int_mant |= 1; 581 582 // Handle overflow: 583 if (!(mode & FPLIB_AHP)) { 584 if (biased_exp >= 31) { 585 *flags |= FPLIB_OFC | FPLIB_IXC; 586 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 587 (rm == FPLIB_RM && sgn)) { 588 return fp16_infinity(sgn); 589 } else { 590 return fp16_max_normal(sgn); 591 } 592 } 593 } else { 594 if (biased_exp >= 32) { 595 *flags |= FPLIB_IOC; 596 return fp16_pack(sgn, 31, -1); 597 } 598 } 599 600 if (error) { 601 *flags |= FPLIB_IXC; 602 } 603 604 return fp16_pack(sgn, biased_exp, int_mant); 605} 606 607static uint32_t 608fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags) 609{ 610 int biased_exp; // non-negative exponent value for result 611 uint32_t int_mant; // mantissa for result, less than (1 << 24) 612 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 613 614 assert(rm != FPRounding_TIEAWAY); 615 616 // Flush to zero: 617 if ((mode & FPLIB_FZ) && exp < 1) { 618 *flags |= FPLIB_UFC; 619 return fp32_zero(sgn); 620 } 621 622 // The bottom 8 bits of mnt are orred together: 623 mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0); 624 625 if (exp > 0) { 626 biased_exp = exp; 627 int_mant = mnt >> 2; 628 error = mnt & 3; 629 } else { 630 biased_exp = 0; 631 int_mant = lsr32(mnt, 3 - exp); 632 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1)); 633 } 634 635 if (!biased_exp && error) { // xx should also check fpscr_val<11> 636 *flags |= FPLIB_UFC; 637 } 638 639 // Round up: 640 if ((rm == FPLIB_RN && (error == 3 || 641 (error == 2 && (int_mant & 1)))) || 642 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 643 ++int_mant; 644 if (int_mant == (uint32_t)1 << 23) { 645 // Rounded up from denormalized to normalized 646 biased_exp = 1; 647 } 648 if (int_mant == (uint32_t)1 << 24) { 649 // Rounded up to next exponent 650 ++biased_exp; 651 int_mant >>= 1; 652 } 653 } 654 655 // Handle rounding to odd aka Von Neumann rounding: 656 if (error && rm == FPRounding_ODD) 657 int_mant |= 1; 658 659 // Handle overflow: 660 if (biased_exp >= 255) { 661 *flags |= FPLIB_OFC | FPLIB_IXC; 662 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 663 (rm == FPLIB_RM && sgn)) { 664 return fp32_infinity(sgn); 665 } else { 666 return fp32_max_normal(sgn); 667 } 668 } 669 670 if (error) { 671 *flags |= FPLIB_IXC; 672 } 673 674 return fp32_pack(sgn, biased_exp, int_mant); 675} 676 677static uint32_t 678fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags) 679{ 680 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); 681} 682 683static uint64_t 684fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags) 685{ 686 int biased_exp; // non-negative exponent value for result 687 uint64_t int_mant; // mantissa for result, less than (1 << 52) 688 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 689 690 assert(rm != FPRounding_TIEAWAY); 691 692 // Flush to zero: 693 if ((mode & FPLIB_FZ) && exp < 1) { 694 *flags |= FPLIB_UFC; 695 return fp64_zero(sgn); 696 } 697 698 // The bottom 11 bits of mnt are orred together: 699 mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0); 700 701 if (exp > 0) { 702 biased_exp = exp; 703 int_mant = mnt >> 2; 704 error = mnt & 3; 705 } else { 706 biased_exp = 0; 707 int_mant = lsr64(mnt, 3 - exp); 708 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1)); 709 } 710 711 if (!biased_exp && error) { // xx should also check fpscr_val<11> 712 *flags |= FPLIB_UFC; 713 } 714 715 // Round up: 716 if ((rm == FPLIB_RN && (error == 3 || 717 (error == 2 && (int_mant & 1)))) || 718 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { 719 ++int_mant; 720 if (int_mant == (uint64_t)1 << 52) { 721 // Rounded up from denormalized to normalized 722 biased_exp = 1; 723 } 724 if (int_mant == (uint64_t)1 << 53) { 725 // Rounded up to next exponent 726 ++biased_exp; 727 int_mant >>= 1; 728 } 729 } 730 731 // Handle rounding to odd aka Von Neumann rounding: 732 if (error && rm == FPRounding_ODD) 733 int_mant |= 1; 734 735 // Handle overflow: 736 if (biased_exp >= 2047) { 737 *flags |= FPLIB_OFC | FPLIB_IXC; 738 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || 739 (rm == FPLIB_RM && sgn)) { 740 return fp64_infinity(sgn); 741 } else { 742 return fp64_max_normal(sgn); 743 } 744 } 745 746 if (error) { 747 *flags |= FPLIB_IXC; 748 } 749 750 return fp64_pack(sgn, biased_exp, int_mant); 751} 752 753static uint64_t 754fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags) 755{ 756 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); 757} 758 759static int 760fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags) 761{ 762 int a_sgn, a_exp, b_sgn, b_exp; 763 uint32_t a_mnt, b_mnt; 764 765 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 766 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 767 768 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 769 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 770 if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) || 771 (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1))) 772 *flags |= FPLIB_IOC; 773 return 0; 774 } 775 return a == b || (!a_mnt && !b_mnt); 776} 777 778static int 779fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags) 780{ 781 int a_sgn, a_exp, b_sgn, b_exp; 782 uint32_t a_mnt, b_mnt; 783 784 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 785 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 786 787 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 788 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 789 *flags |= FPLIB_IOC; 790 return 0; 791 } 792 if (!a_mnt && !b_mnt) 793 return 1; 794 if (a_sgn != b_sgn) 795 return b_sgn; 796 if (a_exp != b_exp) 797 return a_sgn ^ (a_exp > b_exp); 798 if (a_mnt != b_mnt) 799 return a_sgn ^ (a_mnt > b_mnt); 800 return 1; 801} 802 803static int 804fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags) 805{ 806 int a_sgn, a_exp, b_sgn, b_exp; 807 uint32_t a_mnt, b_mnt; 808 809 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 810 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 811 812 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || 813 (b_exp == 255 && (uint32_t)(b_mnt << 9))) { 814 *flags |= FPLIB_IOC; 815 return 0; 816 } 817 if (!a_mnt && !b_mnt) 818 return 0; 819 if (a_sgn != b_sgn) 820 return b_sgn; 821 if (a_exp != b_exp) 822 return a_sgn ^ (a_exp > b_exp); 823 if (a_mnt != b_mnt) 824 return a_sgn ^ (a_mnt > b_mnt); 825 return 0; 826} 827 828static int 829fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags) 830{ 831 int a_sgn, a_exp, b_sgn, b_exp; 832 uint64_t a_mnt, b_mnt; 833 834 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 835 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 836 837 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 838 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 839 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) || 840 (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1))) 841 *flags |= FPLIB_IOC; 842 return 0; 843 } 844 return a == b || (!a_mnt && !b_mnt); 845} 846 847static int 848fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags) 849{ 850 int a_sgn, a_exp, b_sgn, b_exp; 851 uint64_t a_mnt, b_mnt; 852 853 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 854 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 855 856 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 857 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 858 *flags |= FPLIB_IOC; 859 return 0; 860 } 861 if (!a_mnt && !b_mnt) 862 return 1; 863 if (a_sgn != b_sgn) 864 return b_sgn; 865 if (a_exp != b_exp) 866 return a_sgn ^ (a_exp > b_exp); 867 if (a_mnt != b_mnt) 868 return a_sgn ^ (a_mnt > b_mnt); 869 return 1; 870} 871 872static int 873fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags) 874{ 875 int a_sgn, a_exp, b_sgn, b_exp; 876 uint64_t a_mnt, b_mnt; 877 878 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 879 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 880 881 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || 882 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { 883 *flags |= FPLIB_IOC; 884 return 0; 885 } 886 if (!a_mnt && !b_mnt) 887 return 0; 888 if (a_sgn != b_sgn) 889 return b_sgn; 890 if (a_exp != b_exp) 891 return a_sgn ^ (a_exp > b_exp); 892 if (a_mnt != b_mnt) 893 return a_sgn ^ (a_mnt > b_mnt); 894 return 0; 895} 896 897static uint32_t 898fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags) 899{ 900 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 901 uint32_t a_mnt, b_mnt, x, x_mnt; 902 903 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 904 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 905 906 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 907 return x; 908 } 909 910 b_sgn ^= neg; 911 912 // Handle infinities and zeroes: 913 if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) { 914 *flags |= FPLIB_IOC; 915 return fp32_defaultNaN(); 916 } else if (a_exp == 255) { 917 return fp32_infinity(a_sgn); 918 } else if (b_exp == 255) { 919 return fp32_infinity(b_sgn); 920 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 921 return fp32_zero(a_sgn); 922 } 923 924 a_mnt <<= 3; 925 b_mnt <<= 3; 926 if (a_exp >= b_exp) { 927 b_mnt = (lsr32(b_mnt, a_exp - b_exp) | 928 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); 929 b_exp = a_exp; 930 } else { 931 a_mnt = (lsr32(a_mnt, b_exp - a_exp) | 932 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); 933 a_exp = b_exp; 934 } 935 x_sgn = a_sgn; 936 x_exp = a_exp; 937 if (a_sgn == b_sgn) { 938 x_mnt = a_mnt + b_mnt; 939 } else if (a_mnt >= b_mnt) { 940 x_mnt = a_mnt - b_mnt; 941 } else { 942 x_sgn ^= 1; 943 x_mnt = b_mnt - a_mnt; 944 } 945 946 if (!x_mnt) { 947 // Sign of exact zero result depends on rounding mode 948 return fp32_zero((mode & 3) == 2); 949 } 950 951 x_mnt = fp32_normalise(x_mnt, &x_exp); 952 953 return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags); 954} 955 956static uint64_t 957fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags) 958{ 959 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 960 uint64_t a_mnt, b_mnt, x, x_mnt; 961 962 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 963 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 964 965 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 966 return x; 967 } 968 969 b_sgn ^= neg; 970 971 // Handle infinities and zeroes: 972 if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) { 973 *flags |= FPLIB_IOC; 974 return fp64_defaultNaN(); 975 } else if (a_exp == 2047) { 976 return fp64_infinity(a_sgn); 977 } else if (b_exp == 2047) { 978 return fp64_infinity(b_sgn); 979 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { 980 return fp64_zero(a_sgn); 981 } 982 983 a_mnt <<= 3; 984 b_mnt <<= 3; 985 if (a_exp >= b_exp) { 986 b_mnt = (lsr64(b_mnt, a_exp - b_exp) | 987 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); 988 b_exp = a_exp; 989 } else { 990 a_mnt = (lsr64(a_mnt, b_exp - a_exp) | 991 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); 992 a_exp = b_exp; 993 } 994 x_sgn = a_sgn; 995 x_exp = a_exp; 996 if (a_sgn == b_sgn) { 997 x_mnt = a_mnt + b_mnt; 998 } else if (a_mnt >= b_mnt) { 999 x_mnt = a_mnt - b_mnt; 1000 } else { 1001 x_sgn ^= 1; 1002 x_mnt = b_mnt - a_mnt; 1003 } 1004 1005 if (!x_mnt) { 1006 // Sign of exact zero result depends on rounding mode 1007 return fp64_zero((mode & 3) == 2); 1008 } 1009 1010 x_mnt = fp64_normalise(x_mnt, &x_exp); 1011 1012 return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags); 1013} 1014 1015static uint32_t 1016fp32_mul(uint32_t a, uint32_t b, int mode, int *flags) 1017{ 1018 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1019 uint32_t a_mnt, b_mnt, x; 1020 uint64_t x_mnt; 1021 1022 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1023 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1024 1025 if ((x = fp32_process_NaNs(a, b, mode, flags))) { 1026 return x; 1027 } 1028 1029 // Handle infinities and zeroes: 1030 if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) { 1031 *flags |= FPLIB_IOC; 1032 return fp32_defaultNaN(); 1033 } else if (a_exp == 255 || b_exp == 255) { 1034 return fp32_infinity(a_sgn ^ b_sgn); 1035 } else if (!a_mnt || !b_mnt) { 1036 return fp32_zero(a_sgn ^ b_sgn); 1037 } 1038 1039 // Multiply and normalise: 1040 x_sgn = a_sgn ^ b_sgn; 1041 x_exp = a_exp + b_exp - 110; 1042 x_mnt = (uint64_t)a_mnt * b_mnt; 1043 x_mnt = fp64_normalise(x_mnt, &x_exp); 1044 1045 // Convert to 32 bits, collapsing error into bottom bit: 1046 x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33); 1047 1048 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1049} 1050 1051static uint64_t 1052fp64_mul(uint64_t a, uint64_t b, int mode, int *flags) 1053{ 1054 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1055 uint64_t a_mnt, b_mnt, x; 1056 uint64_t x0_mnt, x1_mnt; 1057 1058 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1059 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1060 1061 if ((x = fp64_process_NaNs(a, b, mode, flags))) { 1062 return x; 1063 } 1064 1065 // Handle infinities and zeroes: 1066 if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) { 1067 *flags |= FPLIB_IOC; 1068 return fp64_defaultNaN(); 1069 } else if (a_exp == 2047 || b_exp == 2047) { 1070 return fp64_infinity(a_sgn ^ b_sgn); 1071 } else if (!a_mnt || !b_mnt) { 1072 return fp64_zero(a_sgn ^ b_sgn); 1073 } 1074 1075 // Multiply and normalise: 1076 x_sgn = a_sgn ^ b_sgn; 1077 x_exp = a_exp + b_exp - 1000; 1078 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); 1079 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1080 1081 // Convert to 64 bits, collapsing error into bottom bit: 1082 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1083 1084 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); 1085} 1086 1087static uint32_t 1088fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, 1089 int mode, int *flags) 1090{ 1091 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1092 uint32_t a_mnt, b_mnt, c_mnt, x; 1093 uint64_t x_mnt, y_mnt; 1094 1095 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1096 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1097 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1098 1099 x = fp32_process_NaNs3(a, b, c, mode, flags); 1100 1101 // Quiet NaN added to product of zero and infinity: 1102 if (a_exp == 255 && (a_mnt >> 22 & 1) && 1103 ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) || 1104 (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) { 1105 x = fp32_defaultNaN(); 1106 *flags |= FPLIB_IOC; 1107 } 1108 1109 if (x) { 1110 return x; 1111 } 1112 1113 // Handle infinities and zeroes: 1114 if ((b_exp == 255 && !c_mnt) || 1115 (c_exp == 255 && !b_mnt) || 1116 (a_exp == 255 && (b_exp == 255 || c_exp == 255) && 1117 (a_sgn != (b_sgn ^ c_sgn)))) { 1118 *flags |= FPLIB_IOC; 1119 return fp32_defaultNaN(); 1120 } 1121 if (a_exp == 255) 1122 return fp32_infinity(a_sgn); 1123 if (b_exp == 255 || c_exp == 255) 1124 return fp32_infinity(b_sgn ^ c_sgn); 1125 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1126 return fp32_zero(a_sgn); 1127 1128 x_sgn = a_sgn; 1129 x_exp = a_exp + 13; 1130 x_mnt = (uint64_t)a_mnt << 27; 1131 1132 // Multiply: 1133 y_sgn = b_sgn ^ c_sgn; 1134 y_exp = b_exp + c_exp - 113; 1135 y_mnt = (uint64_t)b_mnt * c_mnt << 3; 1136 if (!y_mnt) { 1137 y_exp = x_exp; 1138 } 1139 1140 // Add: 1141 if (x_exp >= y_exp) { 1142 y_mnt = (lsr64(y_mnt, x_exp - y_exp) | 1143 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); 1144 y_exp = x_exp; 1145 } else { 1146 x_mnt = (lsr64(x_mnt, y_exp - x_exp) | 1147 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); 1148 x_exp = y_exp; 1149 } 1150 if (x_sgn == y_sgn) { 1151 x_mnt = x_mnt + y_mnt; 1152 } else if (x_mnt >= y_mnt) { 1153 x_mnt = x_mnt - y_mnt; 1154 } else { 1155 x_sgn ^= 1; 1156 x_mnt = y_mnt - x_mnt; 1157 } 1158 1159 if (!x_mnt) { 1160 // Sign of exact zero result depends on rounding mode 1161 return fp32_zero((mode & 3) == 2); 1162 } 1163 1164 // Normalise and convert to 32 bits, collapsing error into bottom bit: 1165 x_mnt = fp64_normalise(x_mnt, &x_exp); 1166 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 1167 1168 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); 1169} 1170 1171static uint64_t 1172fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, 1173 int mode, int *flags) 1174{ 1175 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; 1176 uint64_t a_mnt, b_mnt, c_mnt, x; 1177 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; 1178 1179 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1180 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1181 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); 1182 1183 x = fp64_process_NaNs3(a, b, c, mode, flags); 1184 1185 // Quiet NaN added to product of zero and infinity: 1186 if (a_exp == 2047 && (a_mnt >> 51 & 1) && 1187 ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) || 1188 (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) { 1189 x = fp64_defaultNaN(); 1190 *flags |= FPLIB_IOC; 1191 } 1192 1193 if (x) { 1194 return x; 1195 } 1196 1197 // Handle infinities and zeroes: 1198 if ((b_exp == 2047 && !c_mnt) || 1199 (c_exp == 2047 && !b_mnt) || 1200 (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) && 1201 (a_sgn != (b_sgn ^ c_sgn)))) { 1202 *flags |= FPLIB_IOC; 1203 return fp64_defaultNaN(); 1204 } 1205 if (a_exp == 2047) 1206 return fp64_infinity(a_sgn); 1207 if (b_exp == 2047 || c_exp == 2047) 1208 return fp64_infinity(b_sgn ^ c_sgn); 1209 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) 1210 return fp64_zero(a_sgn); 1211 1212 x_sgn = a_sgn; 1213 x_exp = a_exp + 11; 1214 x0_mnt = 0; 1215 x1_mnt = a_mnt; 1216 1217 // Multiply: 1218 y_sgn = b_sgn ^ c_sgn; 1219 y_exp = b_exp + c_exp - 1003; 1220 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); 1221 if (!y0_mnt && !y1_mnt) { 1222 y_exp = x_exp; 1223 } 1224 1225 // Add: 1226 if (x_exp >= y_exp) { 1227 uint64_t t0, t1; 1228 lsl128(&t0, &t1, y0_mnt, y1_mnt, 1229 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); 1230 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); 1231 y0_mnt |= !!(t0 | t1); 1232 y_exp = x_exp; 1233 } else { 1234 uint64_t t0, t1; 1235 lsl128(&t0, &t1, x0_mnt, x1_mnt, 1236 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); 1237 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); 1238 x0_mnt |= !!(t0 | t1); 1239 x_exp = y_exp; 1240 } 1241 if (x_sgn == y_sgn) { 1242 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1243 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { 1244 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); 1245 } else { 1246 x_sgn ^= 1; 1247 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); 1248 } 1249 1250 if (!x0_mnt && !x1_mnt) { 1251 // Sign of exact zero result depends on rounding mode 1252 return fp64_zero((mode & 3) == 2); 1253 } 1254 1255 // Normalise and convert to 64 bits, collapsing error into bottom bit: 1256 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); 1257 x0_mnt = x1_mnt << 1 | !!x0_mnt; 1258 1259 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); 1260} 1261 1262static uint32_t 1263fp32_div(uint32_t a, uint32_t b, int mode, int *flags) 1264{ 1265 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; 1266 uint32_t a_mnt, b_mnt, x; 1267 uint64_t x_mnt; 1268 1269 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1270 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1271 1272 if ((x = fp32_process_NaNs(a, b, mode, flags))) 1273 return x; 1274 1275 // Handle infinities and zeroes: 1276 if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) { 1277 *flags |= FPLIB_IOC; 1278 return fp32_defaultNaN(); 1279 } 1280 if (a_exp == 255 || !b_mnt) { 1281 if (a_exp != 255) 1282 *flags |= FPLIB_DZC; 1283 return fp32_infinity(a_sgn ^ b_sgn); 1284 } 1285 if (!a_mnt || b_exp == 255) 1286 return fp32_zero(a_sgn ^ b_sgn); 1287 1288 // Divide, setting bottom bit if inexact: 1289 a_mnt = fp32_normalise(a_mnt, &a_exp); 1290 x_sgn = a_sgn ^ b_sgn; 1291 x_exp = a_exp - b_exp + 172; 1292 x_mnt = ((uint64_t)a_mnt << 18) / b_mnt; 1293 x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18); 1294 1295 // Normalise and convert to 32 bits, collapsing error into bottom bit: 1296 x_mnt = fp64_normalise(x_mnt, &x_exp); 1297 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 1298 1299 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 1300} 1301 1302static uint64_t 1303fp64_div(uint64_t a, uint64_t b, int mode, int *flags) 1304{ 1305 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; 1306 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; 1307 1308 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1309 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); 1310 1311 if ((x = fp64_process_NaNs(a, b, mode, flags))) 1312 return x; 1313 1314 // Handle infinities and zeroes: 1315 if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) { 1316 *flags |= FPLIB_IOC; 1317 return fp64_defaultNaN(); 1318 } 1319 if (a_exp == 2047 || !b_mnt) { 1320 if (a_exp != 2047) 1321 *flags |= FPLIB_DZC; 1322 return fp64_infinity(a_sgn ^ b_sgn); 1323 } 1324 if (!a_mnt || b_exp == 2047) 1325 return fp64_zero(a_sgn ^ b_sgn); 1326 1327 // Find reciprocal of divisor with Newton-Raphson: 1328 a_mnt = fp64_normalise(a_mnt, &a_exp); 1329 b_mnt = fp64_normalise(b_mnt, &b_exp); 1330 x_mnt = ~(uint64_t)0 / (b_mnt >> 31); 1331 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); 1332 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); 1333 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); 1334 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); 1335 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); 1336 1337 // Multiply by dividend: 1338 x_sgn = a_sgn ^ b_sgn; 1339 x_exp = a_exp - b_exp + 1031; 1340 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough 1341 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); 1342 x_mnt = x1_mnt; 1343 1344 // This is an underestimate, so try adding one: 1345 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough 1346 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); 1347 if (c <= 0) { 1348 ++x_mnt; 1349 } 1350 1351 x_mnt = fp64_normalise(x_mnt, &x_exp); 1352 1353 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 1354} 1355 1356static void 1357set_fpscr0(FPSCR &fpscr, int flags) 1358{ 1359 if (flags & FPLIB_IDC) { 1360 fpscr.idc = 1; 1361 } 1362 if (flags & FPLIB_IOC) { 1363 fpscr.ioc = 1; 1364 } 1365 if (flags & FPLIB_DZC) { 1366 fpscr.dzc = 1; 1367 } 1368 if (flags & FPLIB_OFC) { 1369 fpscr.ofc = 1; 1370 } 1371 if (flags & FPLIB_UFC) { 1372 fpscr.ufc = 1; 1373 } 1374 if (flags & FPLIB_IXC) { 1375 fpscr.ixc = 1; 1376 } 1377} 1378 1379static uint32_t 1380fp32_sqrt(uint32_t a, int mode, int *flags) 1381{ 1382 int a_sgn, a_exp, x_sgn, x_exp; 1383 uint32_t a_mnt, x, x_mnt; 1384 uint64_t t0, t1; 1385 1386 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1387 1388 // Handle NaNs: 1389 if (a_exp == 255 && (uint32_t)(a_mnt << 9)) 1390 return fp32_process_NaN(a, mode, flags); 1391 1392 // Handle infinities and zeroes: 1393 if (!a_mnt) { 1394 return fp32_zero(a_sgn); 1395 } 1396 if (a_exp == 255 && !a_sgn) { 1397 return fp32_infinity(a_sgn); 1398 } 1399 if (a_sgn) { 1400 *flags |= FPLIB_IOC; 1401 return fp32_defaultNaN(); 1402 } 1403 1404 a_mnt = fp32_normalise(a_mnt, &a_exp); 1405 if (!(a_exp & 1)) { 1406 ++a_exp; 1407 a_mnt >>= 1; 1408 } 1409 1410 // x = (a * 3 + 5) / 8 1411 x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28); 1412 1413 // x = (a / x + x) / 2; // 16-bit accuracy 1414 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 1415 1416 // x = (a / x + x) / 2; // 16-bit accuracy 1417 x = (a_mnt / (x >> 15) + (x >> 16)) << 15; 1418 1419 // x = (a / x + x) / 2; // 32-bit accuracy 1420 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); 1421 1422 x_sgn = 0; 1423 x_exp = (a_exp + 147) >> 1; 1424 x_mnt = ((x - (1 << 5)) >> 6) + 1; 1425 t1 = (uint64_t)x_mnt * x_mnt; 1426 t0 = (uint64_t)a_mnt << 19; 1427 if (t1 > t0) { 1428 --x_mnt; 1429 } 1430 1431 x_mnt = fp32_normalise(x_mnt, &x_exp); 1432 1433 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); 1434} 1435 1436static uint64_t 1437fp64_sqrt(uint64_t a, int mode, int *flags) 1438{ 1439 int a_sgn, a_exp, x_sgn, x_exp, c; 1440 uint64_t a_mnt, x_mnt, r, x0, x1; 1441 uint32_t x; 1442 1443 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); 1444 1445 // Handle NaNs: 1446 if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) { 1447 return fp64_process_NaN(a, mode, flags); 1448 } 1449 1450 // Handle infinities and zeroes: 1451 if (!a_mnt) 1452 return fp64_zero(a_sgn); 1453 if (a_exp == 2047 && !a_sgn) 1454 return fp64_infinity(a_sgn); 1455 if (a_sgn) { 1456 *flags |= FPLIB_IOC; 1457 return fp64_defaultNaN(); 1458 } 1459 1460 a_mnt = fp64_normalise(a_mnt, &a_exp); 1461 if (a_exp & 1) { 1462 ++a_exp; 1463 a_mnt >>= 1; 1464 } 1465 1466 // x = (a * 3 + 5) / 8 1467 x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28); 1468 1469 // x = (a / x + x) / 2; // 16-bit accuracy 1470 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 1471 1472 // x = (a / x + x) / 2; // 16-bit accuracy 1473 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; 1474 1475 // x = (a / x + x) / 2; // 32-bit accuracy 1476 x = ((a_mnt / x) >> 2) + (x >> 1); 1477 1478 // r = 1 / x; // 32-bit accuracy 1479 r = ((uint64_t)1 << 62) / x; 1480 1481 // r = r * (2 - x * r); // 64-bit accuracy 1482 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); 1483 lsr128(&x0, &x1, x0, x1, 31); 1484 1485 // x = (x + a * r) / 2; // 64-bit accuracy 1486 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); 1487 lsl128(&x0, &x1, x0, x1, 5); 1488 lsr128(&x0, &x1, x0, x1, 56); 1489 1490 x0 = ((uint64_t)x << 31) + (x0 >> 1); 1491 1492 x_sgn = 0; 1493 x_exp = (a_exp + 1053) >> 1; 1494 x_mnt = x0; 1495 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; 1496 mul62x62(&x0, &x1, x_mnt, x_mnt); 1497 lsl128(&x0, &x1, x0, x1, 19); 1498 c = cmp128(x0, x1, 0, a_mnt); 1499 if (c > 0) 1500 --x_mnt; 1501 1502 x_mnt = fp64_normalise(x_mnt, &x_exp); 1503 1504 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); 1505} 1506 1507static int 1508modeConv(FPSCR fpscr) 1509{ 1510 return (((int) fpscr) >> 22) & 0xF; 1511} 1512 1513static void 1514set_fpscr(FPSCR &fpscr, int flags) 1515{ 1516 // translate back to FPSCR 1517 bool underflow = false; 1518 if (flags & FPLIB_IDC) { 1519 fpscr.idc = 1; 1520 } 1521 if (flags & FPLIB_IOC) { 1522 fpscr.ioc = 1; 1523 } 1524 if (flags & FPLIB_DZC) { 1525 fpscr.dzc = 1; 1526 } 1527 if (flags & FPLIB_OFC) { 1528 fpscr.ofc = 1; 1529 } 1530 if (flags & FPLIB_UFC) { 1531 underflow = true; //xx Why is this required? 1532 fpscr.ufc = 1; 1533 } 1534 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { 1535 fpscr.ixc = 1; 1536 } 1537} 1538 1539template <> 1540bool 1541fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) 1542{ 1543 int flags = 0; 1544 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); 1545 set_fpscr(fpscr, flags); 1546 return x; 1547} 1548 1549template <> 1550bool 1551fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) 1552{ 1553 int flags = 0; 1554 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); 1555 set_fpscr(fpscr, flags); 1556 return x; 1557} 1558 1559template <> 1560bool 1561fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) 1562{ 1563 int flags = 0; 1564 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); 1565 set_fpscr(fpscr, flags); 1566 return x; 1567} 1568 1569template <> 1570bool 1571fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) 1572{ 1573 int flags = 0; 1574 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); 1575 set_fpscr(fpscr, flags); 1576 return x; 1577} 1578 1579template <> 1580bool 1581fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) 1582{ 1583 int flags = 0; 1584 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); 1585 set_fpscr(fpscr, flags); 1586 return x; 1587} 1588 1589template <> 1590bool 1591fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) 1592{ 1593 int flags = 0; 1594 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); 1595 set_fpscr(fpscr, flags); 1596 return x; 1597} 1598 1599template <> 1600uint32_t 1601fplibAbs(uint32_t op) 1602{ 1603 return op & ~((uint32_t)1 << 31); 1604} 1605 1606template <> 1607uint64_t 1608fplibAbs(uint64_t op) 1609{ 1610 return op & ~((uint64_t)1 << 63); 1611} 1612 1613template <> 1614uint32_t 1615fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) 1616{ 1617 int flags = 0; 1618 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); 1619 set_fpscr0(fpscr, flags); 1620 return result; 1621} 1622 1623template <> 1624uint64_t 1625fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) 1626{ 1627 int flags = 0; 1628 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); 1629 set_fpscr0(fpscr, flags); 1630 return result; 1631} 1632 1633template <> 1634int 1635fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) 1636{ 1637 int mode = modeConv(fpscr); 1638 int flags = 0; 1639 int sgn1, exp1, sgn2, exp2, result; 1640 uint32_t mnt1, mnt2; 1641 1642 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 1643 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 1644 1645 if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) || 1646 (exp2 == 255 && (uint32_t)(mnt2 << 9))) { 1647 result = 3; 1648 if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) || 1649 (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) || 1650 signal_nans) 1651 flags |= FPLIB_IOC; 1652 } else { 1653 if (op1 == op2 || (!mnt1 && !mnt2)) { 1654 result = 6; 1655 } else if (sgn1 != sgn2) { 1656 result = sgn1 ? 8 : 2; 1657 } else if (exp1 != exp2) { 1658 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 1659 } else { 1660 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 1661 } 1662 } 1663 1664 set_fpscr0(fpscr, flags); 1665 1666 return result; 1667} 1668 1669template <> 1670int 1671fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) 1672{ 1673 int mode = modeConv(fpscr); 1674 int flags = 0; 1675 int sgn1, exp1, sgn2, exp2, result; 1676 uint64_t mnt1, mnt2; 1677 1678 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 1679 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 1680 1681 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) || 1682 (exp2 == 2047 && (uint64_t)(mnt2 << 12))) { 1683 result = 3; 1684 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) || 1685 (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) || 1686 signal_nans) 1687 flags |= FPLIB_IOC; 1688 } else { 1689 if (op1 == op2 || (!mnt1 && !mnt2)) { 1690 result = 6; 1691 } else if (sgn1 != sgn2) { 1692 result = sgn1 ? 8 : 2; 1693 } else if (exp1 != exp2) { 1694 result = sgn1 ^ (exp1 < exp2) ? 8 : 2; 1695 } else { 1696 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; 1697 } 1698 } 1699 1700 set_fpscr0(fpscr, flags); 1701 1702 return result; 1703} 1704 1705static uint16_t 1706fp16_FPConvertNaN_32(uint32_t op) 1707{ 1708 return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13); 1709} 1710 1711static uint16_t 1712fp16_FPConvertNaN_64(uint64_t op) 1713{ 1714 return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42); 1715} 1716 1717static uint32_t 1718fp32_FPConvertNaN_16(uint16_t op) 1719{ 1720 return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13); 1721} 1722 1723static uint32_t 1724fp32_FPConvertNaN_64(uint64_t op) 1725{ 1726 return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29); 1727} 1728 1729static uint64_t 1730fp64_FPConvertNaN_16(uint16_t op) 1731{ 1732 return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42); 1733} 1734 1735static uint64_t 1736fp64_FPConvertNaN_32(uint32_t op) 1737{ 1738 return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29); 1739} 1740 1741static uint32_t 1742fp32_FPOnePointFive(int sgn) 1743{ 1744 return fp32_pack(sgn, 127, (uint64_t)1 << 22); 1745} 1746 1747static uint64_t 1748fp64_FPOnePointFive(int sgn) 1749{ 1750 return fp64_pack(sgn, 1023, (uint64_t)1 << 51); 1751} 1752 1753static uint32_t 1754fp32_FPThree(int sgn) 1755{ 1756 return fp32_pack(sgn, 128, (uint64_t)1 << 22); 1757} 1758 1759static uint64_t 1760fp64_FPThree(int sgn) 1761{ 1762 return fp64_pack(sgn, 1024, (uint64_t)1 << 51); 1763} 1764 1765static uint32_t 1766fp32_FPTwo(int sgn) 1767{ 1768 return fp32_pack(sgn, 128, 0); 1769} 1770 1771static uint64_t 1772fp64_FPTwo(int sgn) 1773{ 1774 return fp64_pack(sgn, 1024, 0); 1775} 1776 1777template <> 1778uint16_t 1779fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 1780{ 1781 int mode = modeConv(fpscr); 1782 int flags = 0; 1783 int sgn, exp; 1784 uint32_t mnt; 1785 uint16_t result; 1786 1787 // Unpack floating-point operand optionally with flush-to-zero: 1788 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1789 1790 bool alt_hp = fpscr.ahp; 1791 1792 if (exp == 255 && (uint32_t)(mnt << 9)) { 1793 if (alt_hp) { 1794 result = fp16_zero(sgn); 1795 } else if (fpscr.dn) { 1796 result = fp16_defaultNaN(); 1797 } else { 1798 result = fp16_FPConvertNaN_32(op); 1799 } 1800 if (!(mnt >> 22 & 1) || alt_hp) { 1801 flags |= FPLIB_IOC; 1802 } 1803 } else if (exp == 255) { 1804 if (alt_hp) { 1805 result = sgn << 15 | (uint16_t)0x7fff; 1806 flags |= FPLIB_IOC; 1807 } else { 1808 result = fp16_infinity(sgn); 1809 } 1810 } else if (!mnt) { 1811 result = fp16_zero(sgn); 1812 } else { 1813 result = fp16_round_(sgn, exp - 127 + 15, 1814 mnt >> 7 | !!(uint32_t)(mnt << 25), 1815 rounding, mode | alt_hp << 4, &flags); 1816 } 1817 1818 set_fpscr0(fpscr, flags); 1819 1820 return result; 1821} 1822 1823template <> 1824uint16_t 1825fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 1826{ 1827 int mode = modeConv(fpscr); 1828 int flags = 0; 1829 int sgn, exp; 1830 uint64_t mnt; 1831 uint16_t result; 1832 1833 // Unpack floating-point operand optionally with flush-to-zero: 1834 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1835 1836 bool alt_hp = fpscr.ahp; 1837 1838 if (exp == 2047 && (uint64_t)(mnt << 12)) { 1839 if (alt_hp) { 1840 result = fp16_zero(sgn); 1841 } else if (fpscr.dn) { 1842 result = fp16_defaultNaN(); 1843 } else { 1844 result = fp16_FPConvertNaN_64(op); 1845 } 1846 if (!(mnt >> 51 & 1) || alt_hp) { 1847 flags |= FPLIB_IOC; 1848 } 1849 } else if (exp == 2047) { 1850 if (alt_hp) { 1851 result = sgn << 15 | (uint16_t)0x7fff; 1852 flags |= FPLIB_IOC; 1853 } else { 1854 result = fp16_infinity(sgn); 1855 } 1856 } else if (!mnt) { 1857 result = fp16_zero(sgn); 1858 } else { 1859 result = fp16_round_(sgn, exp - 1023 + 15, 1860 mnt >> 36 | !!(uint64_t)(mnt << 28), 1861 rounding, mode | alt_hp << 4, &flags); 1862 } 1863 1864 set_fpscr0(fpscr, flags); 1865 1866 return result; 1867} 1868 1869template <> 1870uint32_t 1871fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 1872{ 1873 int mode = modeConv(fpscr); 1874 int flags = 0; 1875 int sgn, exp; 1876 uint16_t mnt; 1877 uint32_t result; 1878 1879 // Unpack floating-point operand optionally with flush-to-zero: 1880 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1881 1882 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { 1883 if (fpscr.dn) { 1884 result = fp32_defaultNaN(); 1885 } else { 1886 result = fp32_FPConvertNaN_16(op); 1887 } 1888 if (!(mnt >> 9 & 1)) { 1889 flags |= FPLIB_IOC; 1890 } 1891 } else if (exp == 31 && !fpscr.ahp) { 1892 result = fp32_infinity(sgn); 1893 } else if (!mnt) { 1894 result = fp32_zero(sgn); 1895 } else { 1896 mnt = fp16_normalise(mnt, &exp); 1897 result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8); 1898 } 1899 1900 set_fpscr0(fpscr, flags); 1901 1902 return result; 1903} 1904 1905template <> 1906uint32_t 1907fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) 1908{ 1909 int mode = modeConv(fpscr); 1910 int flags = 0; 1911 int sgn, exp; 1912 uint64_t mnt; 1913 uint32_t result; 1914 1915 // Unpack floating-point operand optionally with flush-to-zero: 1916 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1917 1918 if (exp == 2047 && (uint64_t)(mnt << 12)) { 1919 if (fpscr.dn) { 1920 result = fp32_defaultNaN(); 1921 } else { 1922 result = fp32_FPConvertNaN_64(op); 1923 } 1924 if (!(mnt >> 51 & 1)) { 1925 flags |= FPLIB_IOC; 1926 } 1927 } else if (exp == 2047) { 1928 result = fp32_infinity(sgn); 1929 } else if (!mnt) { 1930 result = fp32_zero(sgn); 1931 } else { 1932 result = fp32_round_(sgn, exp - 1023 + 127, 1933 mnt >> 20 | !!(uint64_t)(mnt << 44), 1934 rounding, mode, &flags); 1935 } 1936 1937 set_fpscr0(fpscr, flags); 1938 1939 return result; 1940} 1941 1942template <> 1943uint64_t 1944fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) 1945{ 1946 int mode = modeConv(fpscr); 1947 int flags = 0; 1948 int sgn, exp; 1949 uint16_t mnt; 1950 uint64_t result; 1951 1952 // Unpack floating-point operand optionally with flush-to-zero: 1953 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1954 1955 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { 1956 if (fpscr.dn) { 1957 result = fp64_defaultNaN(); 1958 } else { 1959 result = fp64_FPConvertNaN_16(op); 1960 } 1961 if (!(mnt >> 9 & 1)) { 1962 flags |= FPLIB_IOC; 1963 } 1964 } else if (exp == 31 && !fpscr.ahp) { 1965 result = fp64_infinity(sgn); 1966 } else if (!mnt) { 1967 result = fp64_zero(sgn); 1968 } else { 1969 mnt = fp16_normalise(mnt, &exp); 1970 result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37); 1971 } 1972 1973 set_fpscr0(fpscr, flags); 1974 1975 return result; 1976} 1977 1978template <> 1979uint64_t 1980fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) 1981{ 1982 int mode = modeConv(fpscr); 1983 int flags = 0; 1984 int sgn, exp; 1985 uint32_t mnt; 1986 uint64_t result; 1987 1988 // Unpack floating-point operand optionally with flush-to-zero: 1989 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 1990 1991 if (exp == 255 && (uint32_t)(mnt << 9)) { 1992 if (fpscr.dn) { 1993 result = fp64_defaultNaN(); 1994 } else { 1995 result = fp64_FPConvertNaN_32(op); 1996 } 1997 if (!(mnt >> 22 & 1)) { 1998 flags |= FPLIB_IOC; 1999 } 2000 } else if (exp == 255) { 2001 result = fp64_infinity(sgn); 2002 } else if (!mnt) { 2003 result = fp64_zero(sgn); 2004 } else { 2005 mnt = fp32_normalise(mnt, &exp); 2006 result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21); 2007 } 2008 2009 set_fpscr0(fpscr, flags); 2010 2011 return result; 2012} 2013 2014template <> 2015uint32_t 2016fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) 2017{ 2018 int flags = 0; 2019 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2020 set_fpscr0(fpscr, flags); 2021 return result; 2022} 2023 2024template <> 2025uint64_t 2026fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) 2027{ 2028 int flags = 0; 2029 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); 2030 set_fpscr0(fpscr, flags); 2031 return result; 2032} 2033 2034template <> 2035uint32_t 2036fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2037{ 2038 int flags = 0; 2039 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); 2040 set_fpscr0(fpscr, flags); 2041 return result; 2042} 2043 2044template <> 2045uint64_t 2046fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2047{ 2048 int flags = 0; 2049 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); 2050 set_fpscr0(fpscr, flags); 2051 return result; 2052} 2053 2054static uint32_t 2055fp32_repack(int sgn, int exp, uint32_t mnt) 2056{ 2057 return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt); 2058} 2059 2060static uint64_t 2061fp64_repack(int sgn, int exp, uint64_t mnt) 2062{ 2063 return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt); 2064} 2065 2066static void 2067fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn) 2068{ 2069 // Treat a single quiet-NaN as +Infinity/-Infinity 2070 if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23) 2071 *op1 = fp32_infinity(sgn); 2072 if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23) 2073 *op2 = fp32_infinity(sgn); 2074} 2075 2076static void 2077fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn) 2078{ 2079 // Treat a single quiet-NaN as +Infinity/-Infinity 2080 if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52) 2081 *op1 = fp64_infinity(sgn); 2082 if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52) 2083 *op2 = fp64_infinity(sgn); 2084} 2085 2086template <> 2087uint32_t 2088fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2089{ 2090 int mode = modeConv(fpscr); 2091 int flags = 0; 2092 int sgn1, exp1, sgn2, exp2; 2093 uint32_t mnt1, mnt2, x, result; 2094 2095 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2096 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2097 2098 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 2099 result = x; 2100 } else { 2101 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 2102 fp32_repack(sgn1, exp1, mnt1) : 2103 fp32_repack(sgn2, exp2, mnt2)); 2104 } 2105 set_fpscr0(fpscr, flags); 2106 return result; 2107} 2108 2109template <> 2110uint64_t 2111fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2112{ 2113 int mode = modeConv(fpscr); 2114 int flags = 0; 2115 int sgn1, exp1, sgn2, exp2; 2116 uint64_t mnt1, mnt2, x, result; 2117 2118 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2119 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2120 2121 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 2122 result = x; 2123 } else { 2124 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? 2125 fp64_repack(sgn1, exp1, mnt1) : 2126 fp64_repack(sgn2, exp2, mnt2)); 2127 } 2128 set_fpscr0(fpscr, flags); 2129 return result; 2130} 2131 2132template <> 2133uint32_t 2134fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2135{ 2136 fp32_minmaxnum(&op1, &op2, 1); 2137 return fplibMax<uint32_t>(op1, op2, fpscr); 2138} 2139 2140template <> 2141uint64_t 2142fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2143{ 2144 fp64_minmaxnum(&op1, &op2, 1); 2145 return fplibMax<uint64_t>(op1, op2, fpscr); 2146} 2147 2148template <> 2149uint32_t 2150fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2151{ 2152 int mode = modeConv(fpscr); 2153 int flags = 0; 2154 int sgn1, exp1, sgn2, exp2; 2155 uint32_t mnt1, mnt2, x, result; 2156 2157 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2158 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2159 2160 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { 2161 result = x; 2162 } else { 2163 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 2164 fp32_repack(sgn1, exp1, mnt1) : 2165 fp32_repack(sgn2, exp2, mnt2)); 2166 } 2167 set_fpscr0(fpscr, flags); 2168 return result; 2169} 2170 2171template <> 2172uint64_t 2173fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2174{ 2175 int mode = modeConv(fpscr); 2176 int flags = 0; 2177 int sgn1, exp1, sgn2, exp2; 2178 uint64_t mnt1, mnt2, x, result; 2179 2180 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2181 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2182 2183 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { 2184 result = x; 2185 } else { 2186 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? 2187 fp64_repack(sgn1, exp1, mnt1) : 2188 fp64_repack(sgn2, exp2, mnt2)); 2189 } 2190 set_fpscr0(fpscr, flags); 2191 return result; 2192} 2193 2194template <> 2195uint32_t 2196fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2197{ 2198 fp32_minmaxnum(&op1, &op2, 0); 2199 return fplibMin<uint32_t>(op1, op2, fpscr); 2200} 2201 2202template <> 2203uint64_t 2204fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2205{ 2206 fp64_minmaxnum(&op1, &op2, 0); 2207 return fplibMin<uint64_t>(op1, op2, fpscr); 2208} 2209 2210template <> 2211uint32_t 2212fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2213{ 2214 int flags = 0; 2215 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); 2216 set_fpscr0(fpscr, flags); 2217 return result; 2218} 2219 2220template <> 2221uint64_t 2222fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2223{ 2224 int flags = 0; 2225 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); 2226 set_fpscr0(fpscr, flags); 2227 return result; 2228} 2229 2230template <> 2231uint32_t 2232fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2233{ 2234 int mode = modeConv(fpscr); 2235 int flags = 0; 2236 int sgn1, exp1, sgn2, exp2; 2237 uint32_t mnt1, mnt2, result; 2238 2239 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2240 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2241 2242 result = fp32_process_NaNs(op1, op2, mode, &flags); 2243 if (!result) { 2244 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2245 result = fp32_FPTwo(sgn1 ^ sgn2); 2246 } else if (exp1 == 255 || exp2 == 255) { 2247 result = fp32_infinity(sgn1 ^ sgn2); 2248 } else if (!mnt1 || !mnt2) { 2249 result = fp32_zero(sgn1 ^ sgn2); 2250 } else { 2251 result = fp32_mul(op1, op2, mode, &flags); 2252 } 2253 } 2254 2255 set_fpscr0(fpscr, flags); 2256 2257 return result; 2258} 2259 2260template <> 2261uint64_t 2262fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2263{ 2264 int mode = modeConv(fpscr); 2265 int flags = 0; 2266 int sgn1, exp1, sgn2, exp2; 2267 uint64_t mnt1, mnt2, result; 2268 2269 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2270 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2271 2272 result = fp64_process_NaNs(op1, op2, mode, &flags); 2273 if (!result) { 2274 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2275 result = fp64_FPTwo(sgn1 ^ sgn2); 2276 } else if (exp1 == 2047 || exp2 == 2047) { 2277 result = fp64_infinity(sgn1 ^ sgn2); 2278 } else if (!mnt1 || !mnt2) { 2279 result = fp64_zero(sgn1 ^ sgn2); 2280 } else { 2281 result = fp64_mul(op1, op2, mode, &flags); 2282 } 2283 } 2284 2285 set_fpscr0(fpscr, flags); 2286 2287 return result; 2288} 2289 2290template <> 2291uint32_t 2292fplibNeg(uint32_t op) 2293{ 2294 return op ^ (uint32_t)1 << 31; 2295} 2296 2297template <> 2298uint64_t 2299fplibNeg(uint64_t op) 2300{ 2301 return op ^ (uint64_t)1 << 63; 2302} 2303 2304static const uint8_t recip_sqrt_estimate[256] = { 2305 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, 2306 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, 2307 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, 2308 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, 2309 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 2310 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 2311 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, 2312 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, 2313 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, 2314 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, 2315 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, 2316 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, 2317 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, 2318 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, 2319 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, 2320 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 2321}; 2322 2323template <> 2324uint32_t 2325fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) 2326{ 2327 int mode = modeConv(fpscr); 2328 int flags = 0; 2329 int sgn, exp; 2330 uint32_t mnt, result; 2331 2332 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2333 2334 if (exp == 255 && (uint32_t)(mnt << 9)) { 2335 result = fp32_process_NaN(op, mode, &flags); 2336 } else if (!mnt) { 2337 result = fp32_infinity(sgn); 2338 flags |= FPLIB_DZC; 2339 } else if (sgn) { 2340 result = fp32_defaultNaN(); 2341 flags |= FPLIB_IOC; 2342 } else if (exp == 255) { 2343 result = fp32_zero(0); 2344 } else { 2345 exp += 8; 2346 mnt = fp32_normalise(mnt, &exp); 2347 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)]; 2348 result = fp32_pack(0, (380 - exp) >> 1, mnt << 15); 2349 } 2350 2351 set_fpscr0(fpscr, flags); 2352 2353 return result; 2354} 2355 2356template <> 2357uint64_t 2358fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) 2359{ 2360 int mode = modeConv(fpscr); 2361 int flags = 0; 2362 int sgn, exp; 2363 uint64_t mnt, result; 2364 2365 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2366 2367 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2368 result = fp64_process_NaN(op, mode, &flags); 2369 } else if (!mnt) { 2370 result = fp64_infinity(sgn); 2371 flags |= FPLIB_DZC; 2372 } else if (sgn) { 2373 result = fp64_defaultNaN(); 2374 flags |= FPLIB_IOC; 2375 } else if (exp == 2047) { 2376 result = fp32_zero(0); 2377 } else { 2378 exp += 11; 2379 mnt = fp64_normalise(mnt, &exp); 2380 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)]; 2381 result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44); 2382 } 2383 2384 set_fpscr0(fpscr, flags); 2385 2386 return result; 2387} 2388 2389template <> 2390uint32_t 2391fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2392{ 2393 int mode = modeConv(fpscr); 2394 int flags = 0; 2395 int sgn1, exp1, sgn2, exp2; 2396 uint32_t mnt1, mnt2, result; 2397 2398 op1 = fplibNeg<uint32_t>(op1); 2399 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2400 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2401 2402 result = fp32_process_NaNs(op1, op2, mode, &flags); 2403 if (!result) { 2404 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2405 result = fp32_FPOnePointFive(0); 2406 } else if (exp1 == 255 || exp2 == 255) { 2407 result = fp32_infinity(sgn1 ^ sgn2); 2408 } else { 2409 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); 2410 } 2411 } 2412 2413 set_fpscr0(fpscr, flags); 2414 2415 return result; 2416} 2417 2418template <> 2419uint64_t 2420fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2421{ 2422 int mode = modeConv(fpscr); 2423 int flags = 0; 2424 int sgn1, exp1, sgn2, exp2; 2425 uint64_t mnt1, mnt2, result; 2426 2427 op1 = fplibNeg<uint64_t>(op1); 2428 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2429 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2430 2431 result = fp64_process_NaNs(op1, op2, mode, &flags); 2432 if (!result) { 2433 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2434 result = fp64_FPOnePointFive(0); 2435 } else if (exp1 == 2047 || exp2 == 2047) { 2436 result = fp64_infinity(sgn1 ^ sgn2); 2437 } else { 2438 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); 2439 } 2440 } 2441 2442 set_fpscr0(fpscr, flags); 2443 2444 return result; 2445} 2446 2447template <> 2448uint32_t 2449fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2450{ 2451 int mode = modeConv(fpscr); 2452 int flags = 0; 2453 int sgn1, exp1, sgn2, exp2; 2454 uint32_t mnt1, mnt2, result; 2455 2456 op1 = fplibNeg<uint32_t>(op1); 2457 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2458 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2459 2460 result = fp32_process_NaNs(op1, op2, mode, &flags); 2461 if (!result) { 2462 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { 2463 result = fp32_FPTwo(0); 2464 } else if (exp1 == 255 || exp2 == 255) { 2465 result = fp32_infinity(sgn1 ^ sgn2); 2466 } else { 2467 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags); 2468 } 2469 } 2470 2471 set_fpscr0(fpscr, flags); 2472 2473 return result; 2474} 2475 2476template <> 2477uint32_t 2478fplibRecipEstimate(uint32_t op, FPSCR &fpscr) 2479{ 2480 int mode = modeConv(fpscr); 2481 int flags = 0; 2482 int sgn, exp; 2483 uint32_t mnt, result; 2484 2485 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2486 2487 if (exp == 255 && (uint32_t)(mnt << 9)) { 2488 result = fp32_process_NaN(op, mode, &flags); 2489 } else if (exp == 255) { 2490 result = fp32_zero(sgn); 2491 } else if (!mnt) { 2492 result = fp32_infinity(sgn); 2493 flags |= FPLIB_DZC; 2494 } else if (!((uint32_t)(op << 1) >> 22)) { 2495 bool overflow_to_inf = false; 2496 switch (FPCRRounding(fpscr)) { 2497 case FPRounding_TIEEVEN: 2498 overflow_to_inf = true; 2499 break; 2500 case FPRounding_POSINF: 2501 overflow_to_inf = !sgn; 2502 break; 2503 case FPRounding_NEGINF: 2504 overflow_to_inf = sgn; 2505 break; 2506 case FPRounding_ZERO: 2507 overflow_to_inf = false; 2508 break; 2509 default: 2510 assert(0); 2511 } 2512 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn); 2513 flags |= FPLIB_OFC | FPLIB_IXC; 2514 } else if (fpscr.fz && exp >= 253) { 2515 result = fp32_zero(sgn); 2516 flags |= FPLIB_UFC; 2517 } else { 2518 exp += 8; 2519 mnt = fp32_normalise(mnt, &exp); 2520 int result_exp = 253 - exp; 2521 uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1; 2522 fraction <<= 15; 2523 if (result_exp == 0) { 2524 fraction >>= 1; 2525 } else if (result_exp == -1) { 2526 fraction >>= 2; 2527 result_exp = 0; 2528 } 2529 result = fp32_pack(sgn, result_exp, fraction); 2530 } 2531 2532 set_fpscr0(fpscr, flags); 2533 2534 return result; 2535} 2536 2537template <> 2538uint64_t 2539fplibRecipEstimate(uint64_t op, FPSCR &fpscr) 2540{ 2541 int mode = modeConv(fpscr); 2542 int flags = 0; 2543 int sgn, exp; 2544 uint64_t mnt, result; 2545 2546 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2547 2548 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2549 result = fp64_process_NaN(op, mode, &flags); 2550 } else if (exp == 2047) { 2551 result = fp64_zero(sgn); 2552 } else if (!mnt) { 2553 result = fp64_infinity(sgn); 2554 flags |= FPLIB_DZC; 2555 } else if (!((uint64_t)(op << 1) >> 51)) { 2556 bool overflow_to_inf = false; 2557 switch (FPCRRounding(fpscr)) { 2558 case FPRounding_TIEEVEN: 2559 overflow_to_inf = true; 2560 break; 2561 case FPRounding_POSINF: 2562 overflow_to_inf = !sgn; 2563 break; 2564 case FPRounding_NEGINF: 2565 overflow_to_inf = sgn; 2566 break; 2567 case FPRounding_ZERO: 2568 overflow_to_inf = false; 2569 break; 2570 default: 2571 assert(0); 2572 } 2573 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn); 2574 flags |= FPLIB_OFC | FPLIB_IXC; 2575 } else if (fpscr.fz && exp >= 2045) { 2576 result = fp64_zero(sgn); 2577 flags |= FPLIB_UFC; 2578 } else { 2579 exp += 11; 2580 mnt = fp64_normalise(mnt, &exp); 2581 int result_exp = 2045 - exp; 2582 uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1; 2583 fraction <<= 44; 2584 if (result_exp == 0) { 2585 fraction >>= 1; 2586 } else if (result_exp == -1) { 2587 fraction >>= 2; 2588 result_exp = 0; 2589 } 2590 result = fp64_pack(sgn, result_exp, fraction); 2591 } 2592 2593 set_fpscr0(fpscr, flags); 2594 2595 return result; 2596} 2597 2598template <> 2599uint64_t 2600fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2601{ 2602 int mode = modeConv(fpscr); 2603 int flags = 0; 2604 int sgn1, exp1, sgn2, exp2; 2605 uint64_t mnt1, mnt2, result; 2606 2607 op1 = fplibNeg<uint64_t>(op1); 2608 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); 2609 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); 2610 2611 result = fp64_process_NaNs(op1, op2, mode, &flags); 2612 if (!result) { 2613 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { 2614 result = fp64_FPTwo(0); 2615 } else if (exp1 == 2047 || exp2 == 2047) { 2616 result = fp64_infinity(sgn1 ^ sgn2); 2617 } else { 2618 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags); 2619 } 2620 } 2621 2622 set_fpscr0(fpscr, flags); 2623 2624 return result; 2625} 2626 2627template <> 2628uint32_t 2629fplibRecpX(uint32_t op, FPSCR &fpscr) 2630{ 2631 int mode = modeConv(fpscr); 2632 int flags = 0; 2633 int sgn, exp; 2634 uint32_t mnt, result; 2635 2636 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2637 2638 if (exp == 255 && (uint32_t)(mnt << 9)) { 2639 result = fp32_process_NaN(op, mode, &flags); 2640 } 2641 else { 2642 if (!mnt) { // Zero and denormals 2643 result = fp32_pack(sgn, 254, 0); 2644 } else { // Infinities and normals 2645 result = fp32_pack(sgn, exp ^ 255, 0); 2646 } 2647 } 2648 2649 set_fpscr0(fpscr, flags); 2650 2651 return result; 2652} 2653 2654template <> 2655uint64_t 2656fplibRecpX(uint64_t op, FPSCR &fpscr) 2657{ 2658 int mode = modeConv(fpscr); 2659 int flags = 0; 2660 int sgn, exp; 2661 uint64_t mnt, result; 2662 2663 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2664 2665 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2666 result = fp64_process_NaN(op, mode, &flags); 2667 } 2668 else { 2669 if (!mnt) { // Zero and denormals 2670 result = fp64_pack(sgn, 2046, 0); 2671 } else { // Infinities and normals 2672 result = fp64_pack(sgn, exp ^ 2047, 0); 2673 } 2674 } 2675 2676 set_fpscr0(fpscr, flags); 2677 2678 return result; 2679} 2680 2681template <> 2682uint32_t 2683fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 2684{ 2685 int mode = modeConv(fpscr); 2686 int flags = 0; 2687 int sgn, exp; 2688 uint32_t mnt, result; 2689 2690 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2691 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2692 2693 // Handle NaNs, infinities and zeroes: 2694 if (exp == 255 && (uint32_t)(mnt << 9)) { 2695 result = fp32_process_NaN(op, mode, &flags); 2696 } else if (exp == 255) { 2697 result = fp32_infinity(sgn); 2698 } else if (!mnt) { 2699 result = fp32_zero(sgn); 2700 } else if (exp >= 150) { 2701 // There are no fractional bits 2702 result = op; 2703 } else { 2704 // Truncate towards zero: 2705 uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp); 2706 int err = exp < 118 ? 1 : 2707 (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0); 2708 switch (rounding) { 2709 case FPRounding_TIEEVEN: 2710 x += (err == 3 || (err == 2 && (x & 1))); 2711 break; 2712 case FPRounding_POSINF: 2713 x += err && !sgn; 2714 break; 2715 case FPRounding_NEGINF: 2716 x += err && sgn; 2717 break; 2718 case FPRounding_ZERO: 2719 break; 2720 case FPRounding_TIEAWAY: 2721 x += err >> 1; 2722 break; 2723 default: 2724 assert(0); 2725 } 2726 2727 if (x == 0) { 2728 result = fp32_zero(sgn); 2729 } else { 2730 exp = 150; 2731 mnt = fp32_normalise(x, &exp); 2732 result = fp32_pack(sgn, exp + 8, mnt >> 8); 2733 } 2734 2735 if (err && exact) 2736 flags |= FPLIB_IXC; 2737 } 2738 2739 set_fpscr0(fpscr, flags); 2740 2741 return result; 2742} 2743 2744template <> 2745uint64_t 2746fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr) 2747{ 2748 int mode = modeConv(fpscr); 2749 int flags = 0; 2750 int sgn, exp; 2751 uint64_t mnt, result; 2752 2753 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2754 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); 2755 2756 // Handle NaNs, infinities and zeroes: 2757 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2758 result = fp64_process_NaN(op, mode, &flags); 2759 } else if (exp == 2047) { 2760 result = fp64_infinity(sgn); 2761 } else if (!mnt) { 2762 result = fp64_zero(sgn); 2763 } else if (exp >= 1075) { 2764 // There are no fractional bits 2765 result = op; 2766 } else { 2767 // Truncate towards zero: 2768 uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp); 2769 int err = exp < 1011 ? 1 : 2770 (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0); 2771 switch (rounding) { 2772 case FPRounding_TIEEVEN: 2773 x += (err == 3 || (err == 2 && (x & 1))); 2774 break; 2775 case FPRounding_POSINF: 2776 x += err && !sgn; 2777 break; 2778 case FPRounding_NEGINF: 2779 x += err && sgn; 2780 break; 2781 case FPRounding_ZERO: 2782 break; 2783 case FPRounding_TIEAWAY: 2784 x += err >> 1; 2785 break; 2786 default: 2787 assert(0); 2788 } 2789 2790 if (x == 0) { 2791 result = fp64_zero(sgn); 2792 } else { 2793 exp = 1075; 2794 mnt = fp64_normalise(x, &exp); 2795 result = fp64_pack(sgn, exp + 11, mnt >> 11); 2796 } 2797 2798 if (err && exact) 2799 flags |= FPLIB_IXC; 2800 } 2801 2802 set_fpscr0(fpscr, flags); 2803 2804 return result; 2805} 2806 2807template <> 2808uint32_t 2809fplibSqrt(uint32_t op, FPSCR &fpscr) 2810{ 2811 int flags = 0; 2812 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags); 2813 set_fpscr0(fpscr, flags); 2814 return result; 2815} 2816 2817template <> 2818uint64_t 2819fplibSqrt(uint64_t op, FPSCR &fpscr) 2820{ 2821 int flags = 0; 2822 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags); 2823 set_fpscr0(fpscr, flags); 2824 return result; 2825} 2826 2827template <> 2828uint32_t 2829fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr) 2830{ 2831 int flags = 0; 2832 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags); 2833 set_fpscr0(fpscr, flags); 2834 return result; 2835} 2836 2837template <> 2838uint64_t 2839fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr) 2840{ 2841 int flags = 0; 2842 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags); 2843 set_fpscr0(fpscr, flags); 2844 return result; 2845} 2846 2847static uint64_t 2848FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 2849 int *flags) 2850{ 2851 uint64_t x; 2852 int err; 2853 2854 if (exp > 1023 + 63) { 2855 *flags = FPLIB_IOC; 2856 return ((uint64_t)!u << 63) - !sgn; 2857 } 2858 2859 x = lsr64(mnt << 11, 1023 + 63 - exp); 2860 err = (exp > 1023 + 63 - 2 ? 0 : 2861 (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) | 2862 !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1))); 2863 2864 switch (rounding) { 2865 case FPRounding_TIEEVEN: 2866 x += (err == 3 || (err == 2 && (x & 1))); 2867 break; 2868 case FPRounding_POSINF: 2869 x += err && !sgn; 2870 break; 2871 case FPRounding_NEGINF: 2872 x += err && sgn; 2873 break; 2874 case FPRounding_ZERO: 2875 break; 2876 case FPRounding_TIEAWAY: 2877 x += err >> 1; 2878 break; 2879 default: 2880 assert(0); 2881 } 2882 2883 if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) { 2884 *flags = FPLIB_IOC; 2885 return ((uint64_t)!u << 63) - !sgn; 2886 } 2887 2888 if (err) { 2889 *flags = FPLIB_IXC; 2890 } 2891 2892 return sgn ? -x : x; 2893} 2894 2895static uint32_t 2896FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, 2897 int *flags) 2898{ 2899 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); 2900 if (u ? x >= (uint64_t)1 << 32 : 2901 !(x < (uint64_t)1 << 31 || 2902 (uint64_t)-x <= (uint64_t)1 << 31)) { 2903 *flags = FPLIB_IOC; 2904 x = ((uint32_t)!u << 31) - !sgn; 2905 } 2906 return x; 2907} 2908 2909template <> 2910uint32_t 2911fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2912{ 2913 int flags = 0; 2914 int sgn, exp; 2915 uint32_t mnt, result; 2916 2917 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2918 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2919 2920 // If NaN, set cumulative flag or take exception: 2921 if (exp == 255 && (uint32_t)(mnt << 9)) { 2922 flags = FPLIB_IOC; 2923 result = 0; 2924 } else { 2925 result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits, 2926 (uint64_t)mnt << (52 - 23), u, rounding, &flags); 2927 } 2928 2929 set_fpscr0(fpscr, flags); 2930 2931 return result; 2932} 2933 2934template <> 2935uint32_t 2936fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2937{ 2938 int flags = 0; 2939 int sgn, exp; 2940 uint64_t mnt; 2941 uint32_t result; 2942 2943 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2944 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2945 2946 // If NaN, set cumulative flag or take exception: 2947 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2948 flags = FPLIB_IOC; 2949 result = 0; 2950 } else { 2951 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags); 2952 } 2953 2954 set_fpscr0(fpscr, flags); 2955 2956 return result; 2957} 2958 2959template <> 2960uint64_t 2961fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2962{ 2963 int flags = 0; 2964 int sgn, exp; 2965 uint32_t mnt; 2966 uint64_t result; 2967 2968 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2969 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2970 2971 // If NaN, set cumulative flag or take exception: 2972 if (exp == 255 && (uint32_t)(mnt << 9)) { 2973 flags = FPLIB_IOC; 2974 result = 0; 2975 } else { 2976 result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits, 2977 (uint64_t)mnt << (52 - 23), u, rounding, &flags); 2978 } 2979 2980 set_fpscr0(fpscr, flags); 2981 2982 return result; 2983} 2984 2985template <> 2986uint64_t 2987fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 2988{ 2989 int flags = 0; 2990 int sgn, exp; 2991 uint64_t mnt, result; 2992 2993 // Unpack using FPCR to determine if subnormals are flushed-to-zero: 2994 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); 2995 2996 // If NaN, set cumulative flag or take exception: 2997 if (exp == 2047 && (uint64_t)(mnt << 12)) { 2998 flags = FPLIB_IOC; 2999 result = 0; 3000 } else { 3001 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags); 3002 } 3003 3004 set_fpscr0(fpscr, flags); 3005 3006 return result; 3007} 3008 3009static uint32_t 3010fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) 3011{ 3012 int x_sgn = !u && a >> 63; 3013 int x_exp = 190 - fbits; 3014 uint64_t x_mnt = x_sgn ? -a : a; 3015 3016 // Handle zero: 3017 if (!x_mnt) { 3018 return fp32_zero(0); 3019 } 3020 3021 // Normalise and convert to 32 bits, collapsing error into bottom bit: 3022 x_mnt = fp64_normalise(x_mnt, &x_exp); 3023 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); 3024 3025 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); 3026} 3027 3028static uint64_t 3029fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) 3030{ 3031 int x_sgn = !u && a >> 63; 3032 int x_exp = 1024 + 62 - fbits; 3033 uint64_t x_mnt = x_sgn ? -a : a; 3034 3035 // Handle zero: 3036 if (!x_mnt) { 3037 return fp64_zero(0); 3038 } 3039 3040 x_mnt = fp64_normalise(x_mnt, &x_exp); 3041 3042 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags); 3043} 3044 3045template <> 3046uint32_t 3047fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 3048{ 3049 int flags = 0; 3050 uint32_t res = fp32_cvtf(op, fbits, u, 3051 (int)rounding | ((uint32_t)fpscr >> 22 & 12), 3052 &flags); 3053 set_fpscr0(fpscr, flags); 3054 return res; 3055} 3056 3057template <> 3058uint64_t 3059fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) 3060{ 3061 int flags = 0; 3062 uint64_t res = fp64_cvtf(op, fbits, u, 3063 (int)rounding | ((uint32_t)fpscr >> 22 & 12), 3064 &flags); 3065 set_fpscr0(fpscr, flags); 3066 return res; 3067} 3068 3069} |