Deleted Added
sdiff udiff text old ( 10104:ff709c429b7b ) new ( 10272:336c7d36ac00 )
full compact
1/*
2* Copyright (c) 2012-2013 ARM Limited
3* All rights reserved
4*
5* The license below extends only to copyright in the software and shall
6* not be construed as granting a license to any other intellectual
7* property including but not limited to intellectual property relating
8* to a hardware implementation of the functionality of the software
9* licensed hereunder. You may use the software subject to the license
10* terms below provided that you ensure that this notice is replicated
11* unmodified and in its entirety in all distributions of the software,
12* modified or unmodified, in source code or in binary form.
13*
14* Redistribution and use in source and binary forms, with or without
15* modification, are permitted provided that the following conditions are
16* met: redistributions of source code must retain the above copyright
17* notice, this list of conditions and the following disclaimer;
18* redistributions in binary form must reproduce the above copyright
19* notice, this list of conditions and the following disclaimer in the
20* documentation and/or other materials provided with the distribution;
21* neither the name of the copyright holders nor the names of its
22* contributors may be used to endorse or promote products derived from
23* this software without specific prior written permission.
24*
25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36*
37* Authors: Edmund Grimley Evans
38* Thomas Grocutt
39*/
40
41#include <stdint.h>
42
43#include <cassert>
44
45#include "fplib.hh"
46
47namespace ArmISA
48{
49
50#define FPLIB_RN 0
51#define FPLIB_RP 1
52#define FPLIB_RM 2
53#define FPLIB_RZ 3
54#define FPLIB_FZ 4
55#define FPLIB_DN 8
56#define FPLIB_AHP 16
57
58#define FPLIB_IDC 128 // Input Denormal
59#define FPLIB_IXC 16 // Inexact
60#define FPLIB_UFC 8 // Underflow
61#define FPLIB_OFC 4 // Overflow
62#define FPLIB_DZC 2 // Division by Zero
63#define FPLIB_IOC 1 // Invalid Operation
64
65static inline uint16_t
66lsl16(uint16_t x, uint32_t shift)
67{
68 return shift < 16 ? x << shift : 0;
69}
70
71static inline uint16_t
72lsr16(uint16_t x, uint32_t shift)
73{
74 return shift < 16 ? x >> shift : 0;
75}
76
77static inline uint32_t
78lsl32(uint32_t x, uint32_t shift)
79{
80 return shift < 32 ? x << shift : 0;
81}
82
83static inline uint32_t
84lsr32(uint32_t x, uint32_t shift)
85{
86 return shift < 32 ? x >> shift : 0;
87}
88
89static inline uint64_t
90lsl64(uint64_t x, uint32_t shift)
91{
92 return shift < 64 ? x << shift : 0;
93}
94
95static inline uint64_t
96lsr64(uint64_t x, uint32_t shift)
97{
98 return shift < 64 ? x >> shift : 0;
99}
100
101static inline void
102lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
103{
104 if (shift < 64) {
105 *r1 = x1 << shift | x0 >> (64 - shift);
106 *r0 = x0 << shift;
107 } else if (shift < 128) {
108 *r1 = x0 << (shift - 64);
109 *r0 = 0;
110 } else {
111 *r1 = 0;
112 *r0 = 0;
113 }
114}
115
116static inline void
117lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
118{
119 if (shift < 64) {
120 *r0 = x0 >> shift | x1 << (64 - shift);
121 *r1 = x1 >> shift;
122 } else if (shift < 128) {
123 *r0 = x1 >> (shift - 64);
124 *r1 = 0;
125 } else {
126 *r0 = 0;
127 *r1 = 0;
128 }
129}
130
131static inline void
132mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
133{
134 uint32_t mask = ((uint32_t)1 << 31) - 1;
135 uint64_t a0 = a & mask;
136 uint64_t a1 = a >> 31 & mask;
137 uint64_t b0 = b & mask;
138 uint64_t b1 = b >> 31 & mask;
139 uint64_t p0 = a0 * b0;
140 uint64_t p2 = a1 * b1;
141 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
142 uint64_t s0 = p0;
143 uint64_t s1 = (s0 >> 31) + p1;
144 uint64_t s2 = (s1 >> 31) + p2;
145 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
146 *x1 = s2 >> 2;
147}
148
149static inline
150void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
151{
152 uint64_t t0 = (uint64_t)(uint32_t)a * b;
153 uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
154 *x0 = t1 << 32 | (uint32_t)t0;
155 *x1 = t1 >> 32;
156}
157
158static inline void
159add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
160 uint64_t b1)
161{
162 *x0 = a0 + b0;
163 *x1 = a1 + b1 + (*x0 < a0);
164}
165
166static inline void
167sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
168 uint64_t b1)
169{
170 *x0 = a0 - b0;
171 *x1 = a1 - b1 - (*x0 > a0);
172}
173
174static inline int
175cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
176{
177 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
178}
179
180static inline uint16_t
181fp16_normalise(uint16_t mnt, int *exp)
182{
183 int shift;
184
185 if (!mnt) {
186 return 0;
187 }
188
189 for (shift = 8; shift; shift >>= 1) {
190 if (!(mnt >> (16 - shift))) {
191 mnt <<= shift;
192 *exp -= shift;
193 }
194 }
195 return mnt;
196}
197
198static inline uint32_t
199fp32_normalise(uint32_t mnt, int *exp)
200{
201 int shift;
202
203 if (!mnt) {
204 return 0;
205 }
206
207 for (shift = 16; shift; shift >>= 1) {
208 if (!(mnt >> (32 - shift))) {
209 mnt <<= shift;
210 *exp -= shift;
211 }
212 }
213 return mnt;
214}
215
216static inline uint64_t
217fp64_normalise(uint64_t mnt, int *exp)
218{
219 int shift;
220
221 if (!mnt) {
222 return 0;
223 }
224
225 for (shift = 32; shift; shift >>= 1) {
226 if (!(mnt >> (64 - shift))) {
227 mnt <<= shift;
228 *exp -= shift;
229 }
230 }
231 return mnt;
232}
233
234static inline void
235fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
236{
237 uint64_t x0 = *mnt0;
238 uint64_t x1 = *mnt1;
239 int shift;
240
241 if (!x0 && !x1) {
242 return;
243 }
244
245 if (!x1) {
246 x1 = x0;
247 x0 = 0;
248 *exp -= 64;
249 }
250
251 for (shift = 32; shift; shift >>= 1) {
252 if (!(x1 >> (64 - shift))) {
253 x1 = x1 << shift | x0 >> (64 - shift);
254 x0 <<= shift;
255 *exp -= shift;
256 }
257 }
258
259 *mnt0 = x0;
260 *mnt1 = x1;
261}
262
263static inline uint16_t
264fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
265{
266 return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1));
267}
268
269static inline uint32_t
270fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
271{
272 return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1));
273}
274
275static inline uint64_t
276fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
277{
278 return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1));
279}
280
281static inline uint16_t
282fp16_zero(int sgn)
283{
284 return fp16_pack(sgn, 0, 0);
285}
286
287static inline uint32_t
288fp32_zero(int sgn)
289{
290 return fp32_pack(sgn, 0, 0);
291}
292
293static inline uint64_t
294fp64_zero(int sgn)
295{
296 return fp64_pack(sgn, 0, 0);
297}
298
299static inline uint16_t
300fp16_max_normal(int sgn)
301{
302 return fp16_pack(sgn, 30, -1);
303}
304
305static inline uint32_t
306fp32_max_normal(int sgn)
307{
308 return fp32_pack(sgn, 254, -1);
309}
310
311static inline uint64_t
312fp64_max_normal(int sgn)
313{
314 return fp64_pack(sgn, 2046, -1);
315}
316
317static inline uint16_t
318fp16_infinity(int sgn)
319{
320 return fp16_pack(sgn, 31, 0);
321}
322
323static inline uint32_t
324fp32_infinity(int sgn)
325{
326 return fp32_pack(sgn, 255, 0);
327}
328
329static inline uint64_t
330fp64_infinity(int sgn)
331{
332 return fp64_pack(sgn, 2047, 0);
333}
334
335static inline uint16_t
336fp16_defaultNaN()
337{
338 return fp16_pack(0, 31, (uint16_t)1 << 9);
339}
340
341static inline uint32_t
342fp32_defaultNaN()
343{
344 return fp32_pack(0, 255, (uint32_t)1 << 22);
345}
346
347static inline uint64_t
348fp64_defaultNaN()
349{
350 return fp64_pack(0, 2047, (uint64_t)1 << 51);
351}
352
353static inline void
354fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
355 int *flags)
356{
357 *sgn = x >> 15;
358 *exp = x >> 10 & 31;
359 *mnt = x & (((uint16_t)1 << 10) - 1);
360
361 // Handle subnormals:
362 if (*exp) {
363 *mnt |= (uint16_t)1 << 10;
364 } else {
365 ++*exp;
366 // There is no flush to zero in this case!
367 }
368}
369
370static inline void
371fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
372 int *flags)
373{
374 *sgn = x >> 31;
375 *exp = x >> 23 & 255;
376 *mnt = x & (((uint32_t)1 << 23) - 1);
377
378 // Handle subnormals:
379 if (*exp) {
380 *mnt |= (uint32_t)1 << 23;
381 } else {
382 ++*exp;
383 if ((mode & FPLIB_FZ) && *mnt) {
384 *flags |= FPLIB_IDC;
385 *mnt = 0;
386 }
387 }
388}
389
390static inline void
391fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
392 int *flags)
393{
394 *sgn = x >> 63;
395 *exp = x >> 52 & 2047;
396 *mnt = x & (((uint64_t)1 << 52) - 1);
397
398 // Handle subnormals:
399 if (*exp) {
400 *mnt |= (uint64_t)1 << 52;
401 } else {
402 ++*exp;
403 if ((mode & FPLIB_FZ) && *mnt) {
404 *flags |= FPLIB_IDC;
405 *mnt = 0;
406 }
407 }
408}
409
410static inline uint32_t
411fp32_process_NaN(uint32_t a, int mode, int *flags)
412{
413 if (!(a >> 22 & 1)) {
414 *flags |= FPLIB_IOC;
415 a |= (uint32_t)1 << 22;
416 }
417 return mode & FPLIB_DN ? fp32_defaultNaN() : a;
418}
419
420static inline uint64_t
421fp64_process_NaN(uint64_t a, int mode, int *flags)
422{
423 if (!(a >> 51 & 1)) {
424 *flags |= FPLIB_IOC;
425 a |= (uint64_t)1 << 51;
426 }
427 return mode & FPLIB_DN ? fp64_defaultNaN() : a;
428}
429
430static uint32_t
431fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
432{
433 int a_exp = a >> 23 & 255;
434 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
435 int b_exp = b >> 23 & 255;
436 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
437
438 // Handle signalling NaNs:
439 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
440 return fp32_process_NaN(a, mode, flags);
441 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
442 return fp32_process_NaN(b, mode, flags);
443
444 // Handle quiet NaNs:
445 if (a_exp == 255 && a_mnt)
446 return fp32_process_NaN(a, mode, flags);
447 if (b_exp == 255 && b_mnt)
448 return fp32_process_NaN(b, mode, flags);
449
450 return 0;
451}
452
453static uint64_t
454fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
455{
456 int a_exp = a >> 52 & 2047;
457 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
458 int b_exp = b >> 52 & 2047;
459 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
460
461 // Handle signalling NaNs:
462 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
463 return fp64_process_NaN(a, mode, flags);
464 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
465 return fp64_process_NaN(b, mode, flags);
466
467 // Handle quiet NaNs:
468 if (a_exp == 2047 && a_mnt)
469 return fp64_process_NaN(a, mode, flags);
470 if (b_exp == 2047 && b_mnt)
471 return fp64_process_NaN(b, mode, flags);
472
473 return 0;
474}
475
476static uint32_t
477fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
478{
479 int a_exp = a >> 23 & 255;
480 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
481 int b_exp = b >> 23 & 255;
482 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
483 int c_exp = c >> 23 & 255;
484 uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1);
485
486 // Handle signalling NaNs:
487 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
488 return fp32_process_NaN(a, mode, flags);
489 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
490 return fp32_process_NaN(b, mode, flags);
491 if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1))
492 return fp32_process_NaN(c, mode, flags);
493
494 // Handle quiet NaNs:
495 if (a_exp == 255 && a_mnt)
496 return fp32_process_NaN(a, mode, flags);
497 if (b_exp == 255 && b_mnt)
498 return fp32_process_NaN(b, mode, flags);
499 if (c_exp == 255 && c_mnt)
500 return fp32_process_NaN(c, mode, flags);
501
502 return 0;
503}
504
505static uint64_t
506fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
507{
508 int a_exp = a >> 52 & 2047;
509 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
510 int b_exp = b >> 52 & 2047;
511 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
512 int c_exp = c >> 52 & 2047;
513 uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1);
514
515 // Handle signalling NaNs:
516 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
517 return fp64_process_NaN(a, mode, flags);
518 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
519 return fp64_process_NaN(b, mode, flags);
520 if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1))
521 return fp64_process_NaN(c, mode, flags);
522
523 // Handle quiet NaNs:
524 if (a_exp == 2047 && a_mnt)
525 return fp64_process_NaN(a, mode, flags);
526 if (b_exp == 2047 && b_mnt)
527 return fp64_process_NaN(b, mode, flags);
528 if (c_exp == 2047 && c_mnt)
529 return fp64_process_NaN(c, mode, flags);
530
531 return 0;
532}
533
534static uint16_t
535fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
536{
537 int biased_exp; // non-negative exponent value for result
538 uint16_t int_mant; // mantissa for result, less than (1 << 11)
539 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
540
541 assert(rm != FPRounding_TIEAWAY);
542
543 // There is no flush to zero in this case!
544
545 // The bottom 5 bits of mnt are orred together:
546 mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0);
547
548 if (exp > 0) {
549 biased_exp = exp;
550 int_mant = mnt >> 2;
551 error = mnt & 3;
552 } else {
553 biased_exp = 0;
554 int_mant = lsr16(mnt, 3 - exp);
555 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
556 }
557
558 if (!biased_exp && error) { // xx should also check fpscr_val<11>
559 *flags |= FPLIB_UFC;
560 }
561
562 // Round up:
563 if ((rm == FPLIB_RN && (error == 3 ||
564 (error == 2 && (int_mant & 1)))) ||
565 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
566 ++int_mant;
567 if (int_mant == (uint32_t)1 << 10) {
568 // Rounded up from denormalized to normalized
569 biased_exp = 1;
570 }
571 if (int_mant == (uint32_t)1 << 11) {
572 // Rounded up to next exponent
573 ++biased_exp;
574 int_mant >>= 1;
575 }
576 }
577
578 // Handle rounding to odd aka Von Neumann rounding:
579 if (error && rm == FPRounding_ODD)
580 int_mant |= 1;
581
582 // Handle overflow:
583 if (!(mode & FPLIB_AHP)) {
584 if (biased_exp >= 31) {
585 *flags |= FPLIB_OFC | FPLIB_IXC;
586 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
587 (rm == FPLIB_RM && sgn)) {
588 return fp16_infinity(sgn);
589 } else {
590 return fp16_max_normal(sgn);
591 }
592 }
593 } else {
594 if (biased_exp >= 32) {
595 *flags |= FPLIB_IOC;
596 return fp16_pack(sgn, 31, -1);
597 }
598 }
599
600 if (error) {
601 *flags |= FPLIB_IXC;
602 }
603
604 return fp16_pack(sgn, biased_exp, int_mant);
605}
606
607static uint32_t
608fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
609{
610 int biased_exp; // non-negative exponent value for result
611 uint32_t int_mant; // mantissa for result, less than (1 << 24)
612 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
613
614 assert(rm != FPRounding_TIEAWAY);
615
616 // Flush to zero:
617 if ((mode & FPLIB_FZ) && exp < 1) {
618 *flags |= FPLIB_UFC;
619 return fp32_zero(sgn);
620 }
621
622 // The bottom 8 bits of mnt are orred together:
623 mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0);
624
625 if (exp > 0) {
626 biased_exp = exp;
627 int_mant = mnt >> 2;
628 error = mnt & 3;
629 } else {
630 biased_exp = 0;
631 int_mant = lsr32(mnt, 3 - exp);
632 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
633 }
634
635 if (!biased_exp && error) { // xx should also check fpscr_val<11>
636 *flags |= FPLIB_UFC;
637 }
638
639 // Round up:
640 if ((rm == FPLIB_RN && (error == 3 ||
641 (error == 2 && (int_mant & 1)))) ||
642 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
643 ++int_mant;
644 if (int_mant == (uint32_t)1 << 23) {
645 // Rounded up from denormalized to normalized
646 biased_exp = 1;
647 }
648 if (int_mant == (uint32_t)1 << 24) {
649 // Rounded up to next exponent
650 ++biased_exp;
651 int_mant >>= 1;
652 }
653 }
654
655 // Handle rounding to odd aka Von Neumann rounding:
656 if (error && rm == FPRounding_ODD)
657 int_mant |= 1;
658
659 // Handle overflow:
660 if (biased_exp >= 255) {
661 *flags |= FPLIB_OFC | FPLIB_IXC;
662 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
663 (rm == FPLIB_RM && sgn)) {
664 return fp32_infinity(sgn);
665 } else {
666 return fp32_max_normal(sgn);
667 }
668 }
669
670 if (error) {
671 *flags |= FPLIB_IXC;
672 }
673
674 return fp32_pack(sgn, biased_exp, int_mant);
675}
676
677static uint32_t
678fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
679{
680 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
681}
682
683static uint64_t
684fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
685{
686 int biased_exp; // non-negative exponent value for result
687 uint64_t int_mant; // mantissa for result, less than (1 << 52)
688 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
689
690 assert(rm != FPRounding_TIEAWAY);
691
692 // Flush to zero:
693 if ((mode & FPLIB_FZ) && exp < 1) {
694 *flags |= FPLIB_UFC;
695 return fp64_zero(sgn);
696 }
697
698 // The bottom 11 bits of mnt are orred together:
699 mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0);
700
701 if (exp > 0) {
702 biased_exp = exp;
703 int_mant = mnt >> 2;
704 error = mnt & 3;
705 } else {
706 biased_exp = 0;
707 int_mant = lsr64(mnt, 3 - exp);
708 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
709 }
710
711 if (!biased_exp && error) { // xx should also check fpscr_val<11>
712 *flags |= FPLIB_UFC;
713 }
714
715 // Round up:
716 if ((rm == FPLIB_RN && (error == 3 ||
717 (error == 2 && (int_mant & 1)))) ||
718 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
719 ++int_mant;
720 if (int_mant == (uint64_t)1 << 52) {
721 // Rounded up from denormalized to normalized
722 biased_exp = 1;
723 }
724 if (int_mant == (uint64_t)1 << 53) {
725 // Rounded up to next exponent
726 ++biased_exp;
727 int_mant >>= 1;
728 }
729 }
730
731 // Handle rounding to odd aka Von Neumann rounding:
732 if (error && rm == FPRounding_ODD)
733 int_mant |= 1;
734
735 // Handle overflow:
736 if (biased_exp >= 2047) {
737 *flags |= FPLIB_OFC | FPLIB_IXC;
738 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
739 (rm == FPLIB_RM && sgn)) {
740 return fp64_infinity(sgn);
741 } else {
742 return fp64_max_normal(sgn);
743 }
744 }
745
746 if (error) {
747 *flags |= FPLIB_IXC;
748 }
749
750 return fp64_pack(sgn, biased_exp, int_mant);
751}
752
753static uint64_t
754fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
755{
756 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
757}
758
759static int
760fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
761{
762 int a_sgn, a_exp, b_sgn, b_exp;
763 uint32_t a_mnt, b_mnt;
764
765 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
766 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
767
768 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
769 (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
770 if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) ||
771 (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1)))
772 *flags |= FPLIB_IOC;
773 return 0;
774 }
775 return a == b || (!a_mnt && !b_mnt);
776}
777
778static int
779fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
780{
781 int a_sgn, a_exp, b_sgn, b_exp;
782 uint32_t a_mnt, b_mnt;
783
784 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
785 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
786
787 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
788 (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
789 *flags |= FPLIB_IOC;
790 return 0;
791 }
792 if (!a_mnt && !b_mnt)
793 return 1;
794 if (a_sgn != b_sgn)
795 return b_sgn;
796 if (a_exp != b_exp)
797 return a_sgn ^ (a_exp > b_exp);
798 if (a_mnt != b_mnt)
799 return a_sgn ^ (a_mnt > b_mnt);
800 return 1;
801}
802
803static int
804fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
805{
806 int a_sgn, a_exp, b_sgn, b_exp;
807 uint32_t a_mnt, b_mnt;
808
809 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
810 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
811
812 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
813 (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
814 *flags |= FPLIB_IOC;
815 return 0;
816 }
817 if (!a_mnt && !b_mnt)
818 return 0;
819 if (a_sgn != b_sgn)
820 return b_sgn;
821 if (a_exp != b_exp)
822 return a_sgn ^ (a_exp > b_exp);
823 if (a_mnt != b_mnt)
824 return a_sgn ^ (a_mnt > b_mnt);
825 return 0;
826}
827
828static int
829fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
830{
831 int a_sgn, a_exp, b_sgn, b_exp;
832 uint64_t a_mnt, b_mnt;
833
834 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
835 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
836
837 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
838 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
839 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) ||
840 (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1)))
841 *flags |= FPLIB_IOC;
842 return 0;
843 }
844 return a == b || (!a_mnt && !b_mnt);
845}
846
847static int
848fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
849{
850 int a_sgn, a_exp, b_sgn, b_exp;
851 uint64_t a_mnt, b_mnt;
852
853 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
854 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
855
856 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
857 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
858 *flags |= FPLIB_IOC;
859 return 0;
860 }
861 if (!a_mnt && !b_mnt)
862 return 1;
863 if (a_sgn != b_sgn)
864 return b_sgn;
865 if (a_exp != b_exp)
866 return a_sgn ^ (a_exp > b_exp);
867 if (a_mnt != b_mnt)
868 return a_sgn ^ (a_mnt > b_mnt);
869 return 1;
870}
871
872static int
873fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
874{
875 int a_sgn, a_exp, b_sgn, b_exp;
876 uint64_t a_mnt, b_mnt;
877
878 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
879 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
880
881 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
882 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
883 *flags |= FPLIB_IOC;
884 return 0;
885 }
886 if (!a_mnt && !b_mnt)
887 return 0;
888 if (a_sgn != b_sgn)
889 return b_sgn;
890 if (a_exp != b_exp)
891 return a_sgn ^ (a_exp > b_exp);
892 if (a_mnt != b_mnt)
893 return a_sgn ^ (a_mnt > b_mnt);
894 return 0;
895}
896
897static uint32_t
898fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
899{
900 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
901 uint32_t a_mnt, b_mnt, x, x_mnt;
902
903 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
904 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
905
906 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
907 return x;
908 }
909
910 b_sgn ^= neg;
911
912 // Handle infinities and zeroes:
913 if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) {
914 *flags |= FPLIB_IOC;
915 return fp32_defaultNaN();
916 } else if (a_exp == 255) {
917 return fp32_infinity(a_sgn);
918 } else if (b_exp == 255) {
919 return fp32_infinity(b_sgn);
920 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
921 return fp32_zero(a_sgn);
922 }
923
924 a_mnt <<= 3;
925 b_mnt <<= 3;
926 if (a_exp >= b_exp) {
927 b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
928 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
929 b_exp = a_exp;
930 } else {
931 a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
932 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
933 a_exp = b_exp;
934 }
935 x_sgn = a_sgn;
936 x_exp = a_exp;
937 if (a_sgn == b_sgn) {
938 x_mnt = a_mnt + b_mnt;
939 } else if (a_mnt >= b_mnt) {
940 x_mnt = a_mnt - b_mnt;
941 } else {
942 x_sgn ^= 1;
943 x_mnt = b_mnt - a_mnt;
944 }
945
946 if (!x_mnt) {
947 // Sign of exact zero result depends on rounding mode
948 return fp32_zero((mode & 3) == 2);
949 }
950
951 x_mnt = fp32_normalise(x_mnt, &x_exp);
952
953 return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags);
954}
955
956static uint64_t
957fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
958{
959 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
960 uint64_t a_mnt, b_mnt, x, x_mnt;
961
962 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
963 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
964
965 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
966 return x;
967 }
968
969 b_sgn ^= neg;
970
971 // Handle infinities and zeroes:
972 if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) {
973 *flags |= FPLIB_IOC;
974 return fp64_defaultNaN();
975 } else if (a_exp == 2047) {
976 return fp64_infinity(a_sgn);
977 } else if (b_exp == 2047) {
978 return fp64_infinity(b_sgn);
979 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
980 return fp64_zero(a_sgn);
981 }
982
983 a_mnt <<= 3;
984 b_mnt <<= 3;
985 if (a_exp >= b_exp) {
986 b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
987 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
988 b_exp = a_exp;
989 } else {
990 a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
991 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
992 a_exp = b_exp;
993 }
994 x_sgn = a_sgn;
995 x_exp = a_exp;
996 if (a_sgn == b_sgn) {
997 x_mnt = a_mnt + b_mnt;
998 } else if (a_mnt >= b_mnt) {
999 x_mnt = a_mnt - b_mnt;
1000 } else {
1001 x_sgn ^= 1;
1002 x_mnt = b_mnt - a_mnt;
1003 }
1004
1005 if (!x_mnt) {
1006 // Sign of exact zero result depends on rounding mode
1007 return fp64_zero((mode & 3) == 2);
1008 }
1009
1010 x_mnt = fp64_normalise(x_mnt, &x_exp);
1011
1012 return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags);
1013}
1014
1015static uint32_t
1016fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1017{
1018 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1019 uint32_t a_mnt, b_mnt, x;
1020 uint64_t x_mnt;
1021
1022 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1023 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1024
1025 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1026 return x;
1027 }
1028
1029 // Handle infinities and zeroes:
1030 if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) {
1031 *flags |= FPLIB_IOC;
1032 return fp32_defaultNaN();
1033 } else if (a_exp == 255 || b_exp == 255) {
1034 return fp32_infinity(a_sgn ^ b_sgn);
1035 } else if (!a_mnt || !b_mnt) {
1036 return fp32_zero(a_sgn ^ b_sgn);
1037 }
1038
1039 // Multiply and normalise:
1040 x_sgn = a_sgn ^ b_sgn;
1041 x_exp = a_exp + b_exp - 110;
1042 x_mnt = (uint64_t)a_mnt * b_mnt;
1043 x_mnt = fp64_normalise(x_mnt, &x_exp);
1044
1045 // Convert to 32 bits, collapsing error into bottom bit:
1046 x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33);
1047
1048 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1049}
1050
1051static uint64_t
1052fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1053{
1054 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1055 uint64_t a_mnt, b_mnt, x;
1056 uint64_t x0_mnt, x1_mnt;
1057
1058 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1059 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1060
1061 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1062 return x;
1063 }
1064
1065 // Handle infinities and zeroes:
1066 if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) {
1067 *flags |= FPLIB_IOC;
1068 return fp64_defaultNaN();
1069 } else if (a_exp == 2047 || b_exp == 2047) {
1070 return fp64_infinity(a_sgn ^ b_sgn);
1071 } else if (!a_mnt || !b_mnt) {
1072 return fp64_zero(a_sgn ^ b_sgn);
1073 }
1074
1075 // Multiply and normalise:
1076 x_sgn = a_sgn ^ b_sgn;
1077 x_exp = a_exp + b_exp - 1000;
1078 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1079 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1080
1081 // Convert to 64 bits, collapsing error into bottom bit:
1082 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1083
1084 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1085}
1086
1087static uint32_t
1088fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1089 int mode, int *flags)
1090{
1091 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1092 uint32_t a_mnt, b_mnt, c_mnt, x;
1093 uint64_t x_mnt, y_mnt;
1094
1095 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1096 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1097 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1098
1099 x = fp32_process_NaNs3(a, b, c, mode, flags);
1100
1101 // Quiet NaN added to product of zero and infinity:
1102 if (a_exp == 255 && (a_mnt >> 22 & 1) &&
1103 ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) ||
1104 (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) {
1105 x = fp32_defaultNaN();
1106 *flags |= FPLIB_IOC;
1107 }
1108
1109 if (x) {
1110 return x;
1111 }
1112
1113 // Handle infinities and zeroes:
1114 if ((b_exp == 255 && !c_mnt) ||
1115 (c_exp == 255 && !b_mnt) ||
1116 (a_exp == 255 && (b_exp == 255 || c_exp == 255) &&
1117 (a_sgn != (b_sgn ^ c_sgn)))) {
1118 *flags |= FPLIB_IOC;
1119 return fp32_defaultNaN();
1120 }
1121 if (a_exp == 255)
1122 return fp32_infinity(a_sgn);
1123 if (b_exp == 255 || c_exp == 255)
1124 return fp32_infinity(b_sgn ^ c_sgn);
1125 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1126 return fp32_zero(a_sgn);
1127
1128 x_sgn = a_sgn;
1129 x_exp = a_exp + 13;
1130 x_mnt = (uint64_t)a_mnt << 27;
1131
1132 // Multiply:
1133 y_sgn = b_sgn ^ c_sgn;
1134 y_exp = b_exp + c_exp - 113;
1135 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1136 if (!y_mnt) {
1137 y_exp = x_exp;
1138 }
1139
1140 // Add:
1141 if (x_exp >= y_exp) {
1142 y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1143 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1144 y_exp = x_exp;
1145 } else {
1146 x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1147 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1148 x_exp = y_exp;
1149 }
1150 if (x_sgn == y_sgn) {
1151 x_mnt = x_mnt + y_mnt;
1152 } else if (x_mnt >= y_mnt) {
1153 x_mnt = x_mnt - y_mnt;
1154 } else {
1155 x_sgn ^= 1;
1156 x_mnt = y_mnt - x_mnt;
1157 }
1158
1159 if (!x_mnt) {
1160 // Sign of exact zero result depends on rounding mode
1161 return fp32_zero((mode & 3) == 2);
1162 }
1163
1164 // Normalise and convert to 32 bits, collapsing error into bottom bit:
1165 x_mnt = fp64_normalise(x_mnt, &x_exp);
1166 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1167
1168 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1169}
1170
1171static uint64_t
1172fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1173 int mode, int *flags)
1174{
1175 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1176 uint64_t a_mnt, b_mnt, c_mnt, x;
1177 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1178
1179 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1180 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1181 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1182
1183 x = fp64_process_NaNs3(a, b, c, mode, flags);
1184
1185 // Quiet NaN added to product of zero and infinity:
1186 if (a_exp == 2047 && (a_mnt >> 51 & 1) &&
1187 ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) ||
1188 (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) {
1189 x = fp64_defaultNaN();
1190 *flags |= FPLIB_IOC;
1191 }
1192
1193 if (x) {
1194 return x;
1195 }
1196
1197 // Handle infinities and zeroes:
1198 if ((b_exp == 2047 && !c_mnt) ||
1199 (c_exp == 2047 && !b_mnt) ||
1200 (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) &&
1201 (a_sgn != (b_sgn ^ c_sgn)))) {
1202 *flags |= FPLIB_IOC;
1203 return fp64_defaultNaN();
1204 }
1205 if (a_exp == 2047)
1206 return fp64_infinity(a_sgn);
1207 if (b_exp == 2047 || c_exp == 2047)
1208 return fp64_infinity(b_sgn ^ c_sgn);
1209 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1210 return fp64_zero(a_sgn);
1211
1212 x_sgn = a_sgn;
1213 x_exp = a_exp + 11;
1214 x0_mnt = 0;
1215 x1_mnt = a_mnt;
1216
1217 // Multiply:
1218 y_sgn = b_sgn ^ c_sgn;
1219 y_exp = b_exp + c_exp - 1003;
1220 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1221 if (!y0_mnt && !y1_mnt) {
1222 y_exp = x_exp;
1223 }
1224
1225 // Add:
1226 if (x_exp >= y_exp) {
1227 uint64_t t0, t1;
1228 lsl128(&t0, &t1, y0_mnt, y1_mnt,
1229 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1230 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1231 y0_mnt |= !!(t0 | t1);
1232 y_exp = x_exp;
1233 } else {
1234 uint64_t t0, t1;
1235 lsl128(&t0, &t1, x0_mnt, x1_mnt,
1236 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1237 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1238 x0_mnt |= !!(t0 | t1);
1239 x_exp = y_exp;
1240 }
1241 if (x_sgn == y_sgn) {
1242 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1243 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1244 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1245 } else {
1246 x_sgn ^= 1;
1247 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1248 }
1249
1250 if (!x0_mnt && !x1_mnt) {
1251 // Sign of exact zero result depends on rounding mode
1252 return fp64_zero((mode & 3) == 2);
1253 }
1254
1255 // Normalise and convert to 64 bits, collapsing error into bottom bit:
1256 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1257 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1258
1259 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1260}
1261
1262static uint32_t
1263fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1264{
1265 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1266 uint32_t a_mnt, b_mnt, x;
1267 uint64_t x_mnt;
1268
1269 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1270 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1271
1272 if ((x = fp32_process_NaNs(a, b, mode, flags)))
1273 return x;
1274
1275 // Handle infinities and zeroes:
1276 if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) {
1277 *flags |= FPLIB_IOC;
1278 return fp32_defaultNaN();
1279 }
1280 if (a_exp == 255 || !b_mnt) {
1281 if (a_exp != 255)
1282 *flags |= FPLIB_DZC;
1283 return fp32_infinity(a_sgn ^ b_sgn);
1284 }
1285 if (!a_mnt || b_exp == 255)
1286 return fp32_zero(a_sgn ^ b_sgn);
1287
1288 // Divide, setting bottom bit if inexact:
1289 a_mnt = fp32_normalise(a_mnt, &a_exp);
1290 x_sgn = a_sgn ^ b_sgn;
1291 x_exp = a_exp - b_exp + 172;
1292 x_mnt = ((uint64_t)a_mnt << 18) / b_mnt;
1293 x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18);
1294
1295 // Normalise and convert to 32 bits, collapsing error into bottom bit:
1296 x_mnt = fp64_normalise(x_mnt, &x_exp);
1297 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1298
1299 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1300}
1301
1302static uint64_t
1303fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1304{
1305 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1306 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1307
1308 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1309 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1310
1311 if ((x = fp64_process_NaNs(a, b, mode, flags)))
1312 return x;
1313
1314 // Handle infinities and zeroes:
1315 if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) {
1316 *flags |= FPLIB_IOC;
1317 return fp64_defaultNaN();
1318 }
1319 if (a_exp == 2047 || !b_mnt) {
1320 if (a_exp != 2047)
1321 *flags |= FPLIB_DZC;
1322 return fp64_infinity(a_sgn ^ b_sgn);
1323 }
1324 if (!a_mnt || b_exp == 2047)
1325 return fp64_zero(a_sgn ^ b_sgn);
1326
1327 // Find reciprocal of divisor with Newton-Raphson:
1328 a_mnt = fp64_normalise(a_mnt, &a_exp);
1329 b_mnt = fp64_normalise(b_mnt, &b_exp);
1330 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1331 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1332 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1333 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1334 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1335 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1336
1337 // Multiply by dividend:
1338 x_sgn = a_sgn ^ b_sgn;
1339 x_exp = a_exp - b_exp + 1031;
1340 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough
1341 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1342 x_mnt = x1_mnt;
1343
1344 // This is an underestimate, so try adding one:
1345 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough
1346 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1347 if (c <= 0) {
1348 ++x_mnt;
1349 }
1350
1351 x_mnt = fp64_normalise(x_mnt, &x_exp);
1352
1353 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1354}
1355
1356static void
1357set_fpscr0(FPSCR &fpscr, int flags)
1358{
1359 if (flags & FPLIB_IDC) {
1360 fpscr.idc = 1;
1361 }
1362 if (flags & FPLIB_IOC) {
1363 fpscr.ioc = 1;
1364 }
1365 if (flags & FPLIB_DZC) {
1366 fpscr.dzc = 1;
1367 }
1368 if (flags & FPLIB_OFC) {
1369 fpscr.ofc = 1;
1370 }
1371 if (flags & FPLIB_UFC) {
1372 fpscr.ufc = 1;
1373 }
1374 if (flags & FPLIB_IXC) {
1375 fpscr.ixc = 1;
1376 }
1377}
1378
1379static uint32_t
1380fp32_sqrt(uint32_t a, int mode, int *flags)
1381{
1382 int a_sgn, a_exp, x_sgn, x_exp;
1383 uint32_t a_mnt, x, x_mnt;
1384 uint64_t t0, t1;
1385
1386 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1387
1388 // Handle NaNs:
1389 if (a_exp == 255 && (uint32_t)(a_mnt << 9))
1390 return fp32_process_NaN(a, mode, flags);
1391
1392 // Handle infinities and zeroes:
1393 if (!a_mnt) {
1394 return fp32_zero(a_sgn);
1395 }
1396 if (a_exp == 255 && !a_sgn) {
1397 return fp32_infinity(a_sgn);
1398 }
1399 if (a_sgn) {
1400 *flags |= FPLIB_IOC;
1401 return fp32_defaultNaN();
1402 }
1403
1404 a_mnt = fp32_normalise(a_mnt, &a_exp);
1405 if (!(a_exp & 1)) {
1406 ++a_exp;
1407 a_mnt >>= 1;
1408 }
1409
1410 // x = (a * 3 + 5) / 8
1411 x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28);
1412
1413 // x = (a / x + x) / 2; // 16-bit accuracy
1414 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1415
1416 // x = (a / x + x) / 2; // 16-bit accuracy
1417 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1418
1419 // x = (a / x + x) / 2; // 32-bit accuracy
1420 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
1421
1422 x_sgn = 0;
1423 x_exp = (a_exp + 147) >> 1;
1424 x_mnt = ((x - (1 << 5)) >> 6) + 1;
1425 t1 = (uint64_t)x_mnt * x_mnt;
1426 t0 = (uint64_t)a_mnt << 19;
1427 if (t1 > t0) {
1428 --x_mnt;
1429 }
1430
1431 x_mnt = fp32_normalise(x_mnt, &x_exp);
1432
1433 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
1434}
1435
1436static uint64_t
1437fp64_sqrt(uint64_t a, int mode, int *flags)
1438{
1439 int a_sgn, a_exp, x_sgn, x_exp, c;
1440 uint64_t a_mnt, x_mnt, r, x0, x1;
1441 uint32_t x;
1442
1443 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1444
1445 // Handle NaNs:
1446 if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) {
1447 return fp64_process_NaN(a, mode, flags);
1448 }
1449
1450 // Handle infinities and zeroes:
1451 if (!a_mnt)
1452 return fp64_zero(a_sgn);
1453 if (a_exp == 2047 && !a_sgn)
1454 return fp64_infinity(a_sgn);
1455 if (a_sgn) {
1456 *flags |= FPLIB_IOC;
1457 return fp64_defaultNaN();
1458 }
1459
1460 a_mnt = fp64_normalise(a_mnt, &a_exp);
1461 if (a_exp & 1) {
1462 ++a_exp;
1463 a_mnt >>= 1;
1464 }
1465
1466 // x = (a * 3 + 5) / 8
1467 x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28);
1468
1469 // x = (a / x + x) / 2; // 16-bit accuracy
1470 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1471
1472 // x = (a / x + x) / 2; // 16-bit accuracy
1473 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1474
1475 // x = (a / x + x) / 2; // 32-bit accuracy
1476 x = ((a_mnt / x) >> 2) + (x >> 1);
1477
1478 // r = 1 / x; // 32-bit accuracy
1479 r = ((uint64_t)1 << 62) / x;
1480
1481 // r = r * (2 - x * r); // 64-bit accuracy
1482 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
1483 lsr128(&x0, &x1, x0, x1, 31);
1484
1485 // x = (x + a * r) / 2; // 64-bit accuracy
1486 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
1487 lsl128(&x0, &x1, x0, x1, 5);
1488 lsr128(&x0, &x1, x0, x1, 56);
1489
1490 x0 = ((uint64_t)x << 31) + (x0 >> 1);
1491
1492 x_sgn = 0;
1493 x_exp = (a_exp + 1053) >> 1;
1494 x_mnt = x0;
1495 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
1496 mul62x62(&x0, &x1, x_mnt, x_mnt);
1497 lsl128(&x0, &x1, x0, x1, 19);
1498 c = cmp128(x0, x1, 0, a_mnt);
1499 if (c > 0)
1500 --x_mnt;
1501
1502 x_mnt = fp64_normalise(x_mnt, &x_exp);
1503
1504 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1505}
1506
1507static int
1508modeConv(FPSCR fpscr)
1509{
1510 return (((int) fpscr) >> 22) & 0xF;
1511}
1512
1513static void
1514set_fpscr(FPSCR &fpscr, int flags)
1515{
1516 // translate back to FPSCR
1517 bool underflow = false;
1518 if (flags & FPLIB_IDC) {
1519 fpscr.idc = 1;
1520 }
1521 if (flags & FPLIB_IOC) {
1522 fpscr.ioc = 1;
1523 }
1524 if (flags & FPLIB_DZC) {
1525 fpscr.dzc = 1;
1526 }
1527 if (flags & FPLIB_OFC) {
1528 fpscr.ofc = 1;
1529 }
1530 if (flags & FPLIB_UFC) {
1531 underflow = true; //xx Why is this required?
1532 fpscr.ufc = 1;
1533 }
1534 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
1535 fpscr.ixc = 1;
1536 }
1537}
1538
1539template <>
1540bool
1541fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
1542{
1543 int flags = 0;
1544 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
1545 set_fpscr(fpscr, flags);
1546 return x;
1547}
1548
1549template <>
1550bool
1551fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
1552{
1553 int flags = 0;
1554 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
1555 set_fpscr(fpscr, flags);
1556 return x;
1557}
1558
1559template <>
1560bool
1561fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
1562{
1563 int flags = 0;
1564 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
1565 set_fpscr(fpscr, flags);
1566 return x;
1567}
1568
1569template <>
1570bool
1571fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
1572{
1573 int flags = 0;
1574 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
1575 set_fpscr(fpscr, flags);
1576 return x;
1577}
1578
1579template <>
1580bool
1581fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
1582{
1583 int flags = 0;
1584 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
1585 set_fpscr(fpscr, flags);
1586 return x;
1587}
1588
1589template <>
1590bool
1591fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
1592{
1593 int flags = 0;
1594 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
1595 set_fpscr(fpscr, flags);
1596 return x;
1597}
1598
1599template <>
1600uint32_t
1601fplibAbs(uint32_t op)
1602{
1603 return op & ~((uint32_t)1 << 31);
1604}
1605
1606template <>
1607uint64_t
1608fplibAbs(uint64_t op)
1609{
1610 return op & ~((uint64_t)1 << 63);
1611}
1612
1613template <>
1614uint32_t
1615fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
1616{
1617 int flags = 0;
1618 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
1619 set_fpscr0(fpscr, flags);
1620 return result;
1621}
1622
1623template <>
1624uint64_t
1625fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
1626{
1627 int flags = 0;
1628 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
1629 set_fpscr0(fpscr, flags);
1630 return result;
1631}
1632
1633template <>
1634int
1635fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
1636{
1637 int mode = modeConv(fpscr);
1638 int flags = 0;
1639 int sgn1, exp1, sgn2, exp2, result;
1640 uint32_t mnt1, mnt2;
1641
1642 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1643 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1644
1645 if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) ||
1646 (exp2 == 255 && (uint32_t)(mnt2 << 9))) {
1647 result = 3;
1648 if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) ||
1649 (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) ||
1650 signal_nans)
1651 flags |= FPLIB_IOC;
1652 } else {
1653 if (op1 == op2 || (!mnt1 && !mnt2)) {
1654 result = 6;
1655 } else if (sgn1 != sgn2) {
1656 result = sgn1 ? 8 : 2;
1657 } else if (exp1 != exp2) {
1658 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1659 } else {
1660 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1661 }
1662 }
1663
1664 set_fpscr0(fpscr, flags);
1665
1666 return result;
1667}
1668
1669template <>
1670int
1671fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
1672{
1673 int mode = modeConv(fpscr);
1674 int flags = 0;
1675 int sgn1, exp1, sgn2, exp2, result;
1676 uint64_t mnt1, mnt2;
1677
1678 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1679 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1680
1681 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) ||
1682 (exp2 == 2047 && (uint64_t)(mnt2 << 12))) {
1683 result = 3;
1684 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) ||
1685 (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) ||
1686 signal_nans)
1687 flags |= FPLIB_IOC;
1688 } else {
1689 if (op1 == op2 || (!mnt1 && !mnt2)) {
1690 result = 6;
1691 } else if (sgn1 != sgn2) {
1692 result = sgn1 ? 8 : 2;
1693 } else if (exp1 != exp2) {
1694 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1695 } else {
1696 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1697 }
1698 }
1699
1700 set_fpscr0(fpscr, flags);
1701
1702 return result;
1703}
1704
1705static uint16_t
1706fp16_FPConvertNaN_32(uint32_t op)
1707{
1708 return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13);
1709}
1710
1711static uint16_t
1712fp16_FPConvertNaN_64(uint64_t op)
1713{
1714 return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42);
1715}
1716
1717static uint32_t
1718fp32_FPConvertNaN_16(uint16_t op)
1719{
1720 return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13);
1721}
1722
1723static uint32_t
1724fp32_FPConvertNaN_64(uint64_t op)
1725{
1726 return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29);
1727}
1728
1729static uint64_t
1730fp64_FPConvertNaN_16(uint16_t op)
1731{
1732 return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42);
1733}
1734
1735static uint64_t
1736fp64_FPConvertNaN_32(uint32_t op)
1737{
1738 return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29);
1739}
1740
1741static uint32_t
1742fp32_FPOnePointFive(int sgn)
1743{
1744 return fp32_pack(sgn, 127, (uint64_t)1 << 22);
1745}
1746
1747static uint64_t
1748fp64_FPOnePointFive(int sgn)
1749{
1750 return fp64_pack(sgn, 1023, (uint64_t)1 << 51);
1751}
1752
1753static uint32_t
1754fp32_FPThree(int sgn)
1755{
1756 return fp32_pack(sgn, 128, (uint64_t)1 << 22);
1757}
1758
1759static uint64_t
1760fp64_FPThree(int sgn)
1761{
1762 return fp64_pack(sgn, 1024, (uint64_t)1 << 51);
1763}
1764
1765static uint32_t
1766fp32_FPTwo(int sgn)
1767{
1768 return fp32_pack(sgn, 128, 0);
1769}
1770
1771static uint64_t
1772fp64_FPTwo(int sgn)
1773{
1774 return fp64_pack(sgn, 1024, 0);
1775}
1776
1777template <>
1778uint16_t
1779fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1780{
1781 int mode = modeConv(fpscr);
1782 int flags = 0;
1783 int sgn, exp;
1784 uint32_t mnt;
1785 uint16_t result;
1786
1787 // Unpack floating-point operand optionally with flush-to-zero:
1788 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1789
1790 bool alt_hp = fpscr.ahp;
1791
1792 if (exp == 255 && (uint32_t)(mnt << 9)) {
1793 if (alt_hp) {
1794 result = fp16_zero(sgn);
1795 } else if (fpscr.dn) {
1796 result = fp16_defaultNaN();
1797 } else {
1798 result = fp16_FPConvertNaN_32(op);
1799 }
1800 if (!(mnt >> 22 & 1) || alt_hp) {
1801 flags |= FPLIB_IOC;
1802 }
1803 } else if (exp == 255) {
1804 if (alt_hp) {
1805 result = sgn << 15 | (uint16_t)0x7fff;
1806 flags |= FPLIB_IOC;
1807 } else {
1808 result = fp16_infinity(sgn);
1809 }
1810 } else if (!mnt) {
1811 result = fp16_zero(sgn);
1812 } else {
1813 result = fp16_round_(sgn, exp - 127 + 15,
1814 mnt >> 7 | !!(uint32_t)(mnt << 25),
1815 rounding, mode | alt_hp << 4, &flags);
1816 }
1817
1818 set_fpscr0(fpscr, flags);
1819
1820 return result;
1821}
1822
1823template <>
1824uint16_t
1825fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1826{
1827 int mode = modeConv(fpscr);
1828 int flags = 0;
1829 int sgn, exp;
1830 uint64_t mnt;
1831 uint16_t result;
1832
1833 // Unpack floating-point operand optionally with flush-to-zero:
1834 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1835
1836 bool alt_hp = fpscr.ahp;
1837
1838 if (exp == 2047 && (uint64_t)(mnt << 12)) {
1839 if (alt_hp) {
1840 result = fp16_zero(sgn);
1841 } else if (fpscr.dn) {
1842 result = fp16_defaultNaN();
1843 } else {
1844 result = fp16_FPConvertNaN_64(op);
1845 }
1846 if (!(mnt >> 51 & 1) || alt_hp) {
1847 flags |= FPLIB_IOC;
1848 }
1849 } else if (exp == 2047) {
1850 if (alt_hp) {
1851 result = sgn << 15 | (uint16_t)0x7fff;
1852 flags |= FPLIB_IOC;
1853 } else {
1854 result = fp16_infinity(sgn);
1855 }
1856 } else if (!mnt) {
1857 result = fp16_zero(sgn);
1858 } else {
1859 result = fp16_round_(sgn, exp - 1023 + 15,
1860 mnt >> 36 | !!(uint64_t)(mnt << 28),
1861 rounding, mode | alt_hp << 4, &flags);
1862 }
1863
1864 set_fpscr0(fpscr, flags);
1865
1866 return result;
1867}
1868
1869template <>
1870uint32_t
1871fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1872{
1873 int mode = modeConv(fpscr);
1874 int flags = 0;
1875 int sgn, exp;
1876 uint16_t mnt;
1877 uint32_t result;
1878
1879 // Unpack floating-point operand optionally with flush-to-zero:
1880 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1881
1882 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1883 if (fpscr.dn) {
1884 result = fp32_defaultNaN();
1885 } else {
1886 result = fp32_FPConvertNaN_16(op);
1887 }
1888 if (!(mnt >> 9 & 1)) {
1889 flags |= FPLIB_IOC;
1890 }
1891 } else if (exp == 31 && !fpscr.ahp) {
1892 result = fp32_infinity(sgn);
1893 } else if (!mnt) {
1894 result = fp32_zero(sgn);
1895 } else {
1896 mnt = fp16_normalise(mnt, &exp);
1897 result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8);
1898 }
1899
1900 set_fpscr0(fpscr, flags);
1901
1902 return result;
1903}
1904
1905template <>
1906uint32_t
1907fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1908{
1909 int mode = modeConv(fpscr);
1910 int flags = 0;
1911 int sgn, exp;
1912 uint64_t mnt;
1913 uint32_t result;
1914
1915 // Unpack floating-point operand optionally with flush-to-zero:
1916 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1917
1918 if (exp == 2047 && (uint64_t)(mnt << 12)) {
1919 if (fpscr.dn) {
1920 result = fp32_defaultNaN();
1921 } else {
1922 result = fp32_FPConvertNaN_64(op);
1923 }
1924 if (!(mnt >> 51 & 1)) {
1925 flags |= FPLIB_IOC;
1926 }
1927 } else if (exp == 2047) {
1928 result = fp32_infinity(sgn);
1929 } else if (!mnt) {
1930 result = fp32_zero(sgn);
1931 } else {
1932 result = fp32_round_(sgn, exp - 1023 + 127,
1933 mnt >> 20 | !!(uint64_t)(mnt << 44),
1934 rounding, mode, &flags);
1935 }
1936
1937 set_fpscr0(fpscr, flags);
1938
1939 return result;
1940}
1941
1942template <>
1943uint64_t
1944fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1945{
1946 int mode = modeConv(fpscr);
1947 int flags = 0;
1948 int sgn, exp;
1949 uint16_t mnt;
1950 uint64_t result;
1951
1952 // Unpack floating-point operand optionally with flush-to-zero:
1953 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1954
1955 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1956 if (fpscr.dn) {
1957 result = fp64_defaultNaN();
1958 } else {
1959 result = fp64_FPConvertNaN_16(op);
1960 }
1961 if (!(mnt >> 9 & 1)) {
1962 flags |= FPLIB_IOC;
1963 }
1964 } else if (exp == 31 && !fpscr.ahp) {
1965 result = fp64_infinity(sgn);
1966 } else if (!mnt) {
1967 result = fp64_zero(sgn);
1968 } else {
1969 mnt = fp16_normalise(mnt, &exp);
1970 result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37);
1971 }
1972
1973 set_fpscr0(fpscr, flags);
1974
1975 return result;
1976}
1977
1978template <>
1979uint64_t
1980fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1981{
1982 int mode = modeConv(fpscr);
1983 int flags = 0;
1984 int sgn, exp;
1985 uint32_t mnt;
1986 uint64_t result;
1987
1988 // Unpack floating-point operand optionally with flush-to-zero:
1989 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1990
1991 if (exp == 255 && (uint32_t)(mnt << 9)) {
1992 if (fpscr.dn) {
1993 result = fp64_defaultNaN();
1994 } else {
1995 result = fp64_FPConvertNaN_32(op);
1996 }
1997 if (!(mnt >> 22 & 1)) {
1998 flags |= FPLIB_IOC;
1999 }
2000 } else if (exp == 255) {
2001 result = fp64_infinity(sgn);
2002 } else if (!mnt) {
2003 result = fp64_zero(sgn);
2004 } else {
2005 mnt = fp32_normalise(mnt, &exp);
2006 result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21);
2007 }
2008
2009 set_fpscr0(fpscr, flags);
2010
2011 return result;
2012}
2013
2014template <>
2015uint32_t
2016fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2017{
2018 int flags = 0;
2019 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2020 set_fpscr0(fpscr, flags);
2021 return result;
2022}
2023
2024template <>
2025uint64_t
2026fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2027{
2028 int flags = 0;
2029 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2030 set_fpscr0(fpscr, flags);
2031 return result;
2032}
2033
2034template <>
2035uint32_t
2036fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2037{
2038 int flags = 0;
2039 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2040 set_fpscr0(fpscr, flags);
2041 return result;
2042}
2043
2044template <>
2045uint64_t
2046fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2047{
2048 int flags = 0;
2049 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2050 set_fpscr0(fpscr, flags);
2051 return result;
2052}
2053
2054static uint32_t
2055fp32_repack(int sgn, int exp, uint32_t mnt)
2056{
2057 return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt);
2058}
2059
2060static uint64_t
2061fp64_repack(int sgn, int exp, uint64_t mnt)
2062{
2063 return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt);
2064}
2065
2066static void
2067fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
2068{
2069 // Treat a single quiet-NaN as +Infinity/-Infinity
2070 if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23)
2071 *op1 = fp32_infinity(sgn);
2072 if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23)
2073 *op2 = fp32_infinity(sgn);
2074}
2075
2076static void
2077fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
2078{
2079 // Treat a single quiet-NaN as +Infinity/-Infinity
2080 if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52)
2081 *op1 = fp64_infinity(sgn);
2082 if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52)
2083 *op2 = fp64_infinity(sgn);
2084}
2085
2086template <>
2087uint32_t
2088fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2089{
2090 int mode = modeConv(fpscr);
2091 int flags = 0;
2092 int sgn1, exp1, sgn2, exp2;
2093 uint32_t mnt1, mnt2, x, result;
2094
2095 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2096 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2097
2098 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2099 result = x;
2100 } else {
2101 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2102 fp32_repack(sgn1, exp1, mnt1) :
2103 fp32_repack(sgn2, exp2, mnt2));
2104 }
2105 set_fpscr0(fpscr, flags);
2106 return result;
2107}
2108
2109template <>
2110uint64_t
2111fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2112{
2113 int mode = modeConv(fpscr);
2114 int flags = 0;
2115 int sgn1, exp1, sgn2, exp2;
2116 uint64_t mnt1, mnt2, x, result;
2117
2118 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2119 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2120
2121 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2122 result = x;
2123 } else {
2124 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2125 fp64_repack(sgn1, exp1, mnt1) :
2126 fp64_repack(sgn2, exp2, mnt2));
2127 }
2128 set_fpscr0(fpscr, flags);
2129 return result;
2130}
2131
2132template <>
2133uint32_t
2134fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2135{
2136 fp32_minmaxnum(&op1, &op2, 1);
2137 return fplibMax<uint32_t>(op1, op2, fpscr);
2138}
2139
2140template <>
2141uint64_t
2142fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2143{
2144 fp64_minmaxnum(&op1, &op2, 1);
2145 return fplibMax<uint64_t>(op1, op2, fpscr);
2146}
2147
2148template <>
2149uint32_t
2150fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2151{
2152 int mode = modeConv(fpscr);
2153 int flags = 0;
2154 int sgn1, exp1, sgn2, exp2;
2155 uint32_t mnt1, mnt2, x, result;
2156
2157 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2158 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2159
2160 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2161 result = x;
2162 } else {
2163 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2164 fp32_repack(sgn1, exp1, mnt1) :
2165 fp32_repack(sgn2, exp2, mnt2));
2166 }
2167 set_fpscr0(fpscr, flags);
2168 return result;
2169}
2170
2171template <>
2172uint64_t
2173fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2174{
2175 int mode = modeConv(fpscr);
2176 int flags = 0;
2177 int sgn1, exp1, sgn2, exp2;
2178 uint64_t mnt1, mnt2, x, result;
2179
2180 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2181 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2182
2183 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2184 result = x;
2185 } else {
2186 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2187 fp64_repack(sgn1, exp1, mnt1) :
2188 fp64_repack(sgn2, exp2, mnt2));
2189 }
2190 set_fpscr0(fpscr, flags);
2191 return result;
2192}
2193
2194template <>
2195uint32_t
2196fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2197{
2198 fp32_minmaxnum(&op1, &op2, 0);
2199 return fplibMin<uint32_t>(op1, op2, fpscr);
2200}
2201
2202template <>
2203uint64_t
2204fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2205{
2206 fp64_minmaxnum(&op1, &op2, 0);
2207 return fplibMin<uint64_t>(op1, op2, fpscr);
2208}
2209
2210template <>
2211uint32_t
2212fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2213{
2214 int flags = 0;
2215 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
2216 set_fpscr0(fpscr, flags);
2217 return result;
2218}
2219
2220template <>
2221uint64_t
2222fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2223{
2224 int flags = 0;
2225 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
2226 set_fpscr0(fpscr, flags);
2227 return result;
2228}
2229
2230template <>
2231uint32_t
2232fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2233{
2234 int mode = modeConv(fpscr);
2235 int flags = 0;
2236 int sgn1, exp1, sgn2, exp2;
2237 uint32_t mnt1, mnt2, result;
2238
2239 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2240 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2241
2242 result = fp32_process_NaNs(op1, op2, mode, &flags);
2243 if (!result) {
2244 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2245 result = fp32_FPTwo(sgn1 ^ sgn2);
2246 } else if (exp1 == 255 || exp2 == 255) {
2247 result = fp32_infinity(sgn1 ^ sgn2);
2248 } else if (!mnt1 || !mnt2) {
2249 result = fp32_zero(sgn1 ^ sgn2);
2250 } else {
2251 result = fp32_mul(op1, op2, mode, &flags);
2252 }
2253 }
2254
2255 set_fpscr0(fpscr, flags);
2256
2257 return result;
2258}
2259
2260template <>
2261uint64_t
2262fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2263{
2264 int mode = modeConv(fpscr);
2265 int flags = 0;
2266 int sgn1, exp1, sgn2, exp2;
2267 uint64_t mnt1, mnt2, result;
2268
2269 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2270 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2271
2272 result = fp64_process_NaNs(op1, op2, mode, &flags);
2273 if (!result) {
2274 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2275 result = fp64_FPTwo(sgn1 ^ sgn2);
2276 } else if (exp1 == 2047 || exp2 == 2047) {
2277 result = fp64_infinity(sgn1 ^ sgn2);
2278 } else if (!mnt1 || !mnt2) {
2279 result = fp64_zero(sgn1 ^ sgn2);
2280 } else {
2281 result = fp64_mul(op1, op2, mode, &flags);
2282 }
2283 }
2284
2285 set_fpscr0(fpscr, flags);
2286
2287 return result;
2288}
2289
2290template <>
2291uint32_t
2292fplibNeg(uint32_t op)
2293{
2294 return op ^ (uint32_t)1 << 31;
2295}
2296
2297template <>
2298uint64_t
2299fplibNeg(uint64_t op)
2300{
2301 return op ^ (uint64_t)1 << 63;
2302}
2303
2304static const uint8_t recip_sqrt_estimate[256] = {
2305 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
2306 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
2307 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
2308 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
2309 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
2310 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
2311 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
2312 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
2313 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
2314 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
2315 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
2316 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
2317 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
2318 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
2319 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
2320 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
2321};
2322
2323template <>
2324uint32_t
2325fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
2326{
2327 int mode = modeConv(fpscr);
2328 int flags = 0;
2329 int sgn, exp;
2330 uint32_t mnt, result;
2331
2332 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2333
2334 if (exp == 255 && (uint32_t)(mnt << 9)) {
2335 result = fp32_process_NaN(op, mode, &flags);
2336 } else if (!mnt) {
2337 result = fp32_infinity(sgn);
2338 flags |= FPLIB_DZC;
2339 } else if (sgn) {
2340 result = fp32_defaultNaN();
2341 flags |= FPLIB_IOC;
2342 } else if (exp == 255) {
2343 result = fp32_zero(0);
2344 } else {
2345 exp += 8;
2346 mnt = fp32_normalise(mnt, &exp);
2347 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)];
2348 result = fp32_pack(0, (380 - exp) >> 1, mnt << 15);
2349 }
2350
2351 set_fpscr0(fpscr, flags);
2352
2353 return result;
2354}
2355
2356template <>
2357uint64_t
2358fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
2359{
2360 int mode = modeConv(fpscr);
2361 int flags = 0;
2362 int sgn, exp;
2363 uint64_t mnt, result;
2364
2365 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2366
2367 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2368 result = fp64_process_NaN(op, mode, &flags);
2369 } else if (!mnt) {
2370 result = fp64_infinity(sgn);
2371 flags |= FPLIB_DZC;
2372 } else if (sgn) {
2373 result = fp64_defaultNaN();
2374 flags |= FPLIB_IOC;
2375 } else if (exp == 2047) {
2376 result = fp32_zero(0);
2377 } else {
2378 exp += 11;
2379 mnt = fp64_normalise(mnt, &exp);
2380 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)];
2381 result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44);
2382 }
2383
2384 set_fpscr0(fpscr, flags);
2385
2386 return result;
2387}
2388
2389template <>
2390uint32_t
2391fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2392{
2393 int mode = modeConv(fpscr);
2394 int flags = 0;
2395 int sgn1, exp1, sgn2, exp2;
2396 uint32_t mnt1, mnt2, result;
2397
2398 op1 = fplibNeg<uint32_t>(op1);
2399 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2400 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2401
2402 result = fp32_process_NaNs(op1, op2, mode, &flags);
2403 if (!result) {
2404 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2405 result = fp32_FPOnePointFive(0);
2406 } else if (exp1 == 255 || exp2 == 255) {
2407 result = fp32_infinity(sgn1 ^ sgn2);
2408 } else {
2409 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
2410 }
2411 }
2412
2413 set_fpscr0(fpscr, flags);
2414
2415 return result;
2416}
2417
2418template <>
2419uint64_t
2420fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2421{
2422 int mode = modeConv(fpscr);
2423 int flags = 0;
2424 int sgn1, exp1, sgn2, exp2;
2425 uint64_t mnt1, mnt2, result;
2426
2427 op1 = fplibNeg<uint64_t>(op1);
2428 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2429 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2430
2431 result = fp64_process_NaNs(op1, op2, mode, &flags);
2432 if (!result) {
2433 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2434 result = fp64_FPOnePointFive(0);
2435 } else if (exp1 == 2047 || exp2 == 2047) {
2436 result = fp64_infinity(sgn1 ^ sgn2);
2437 } else {
2438 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
2439 }
2440 }
2441
2442 set_fpscr0(fpscr, flags);
2443
2444 return result;
2445}
2446
2447template <>
2448uint32_t
2449fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2450{
2451 int mode = modeConv(fpscr);
2452 int flags = 0;
2453 int sgn1, exp1, sgn2, exp2;
2454 uint32_t mnt1, mnt2, result;
2455
2456 op1 = fplibNeg<uint32_t>(op1);
2457 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2458 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2459
2460 result = fp32_process_NaNs(op1, op2, mode, &flags);
2461 if (!result) {
2462 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2463 result = fp32_FPTwo(0);
2464 } else if (exp1 == 255 || exp2 == 255) {
2465 result = fp32_infinity(sgn1 ^ sgn2);
2466 } else {
2467 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
2468 }
2469 }
2470
2471 set_fpscr0(fpscr, flags);
2472
2473 return result;
2474}
2475
2476template <>
2477uint32_t
2478fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
2479{
2480 int mode = modeConv(fpscr);
2481 int flags = 0;
2482 int sgn, exp;
2483 uint32_t mnt, result;
2484
2485 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2486
2487 if (exp == 255 && (uint32_t)(mnt << 9)) {
2488 result = fp32_process_NaN(op, mode, &flags);
2489 } else if (exp == 255) {
2490 result = fp32_zero(sgn);
2491 } else if (!mnt) {
2492 result = fp32_infinity(sgn);
2493 flags |= FPLIB_DZC;
2494 } else if (!((uint32_t)(op << 1) >> 22)) {
2495 bool overflow_to_inf = false;
2496 switch (FPCRRounding(fpscr)) {
2497 case FPRounding_TIEEVEN:
2498 overflow_to_inf = true;
2499 break;
2500 case FPRounding_POSINF:
2501 overflow_to_inf = !sgn;
2502 break;
2503 case FPRounding_NEGINF:
2504 overflow_to_inf = sgn;
2505 break;
2506 case FPRounding_ZERO:
2507 overflow_to_inf = false;
2508 break;
2509 default:
2510 assert(0);
2511 }
2512 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
2513 flags |= FPLIB_OFC | FPLIB_IXC;
2514 } else if (fpscr.fz && exp >= 253) {
2515 result = fp32_zero(sgn);
2516 flags |= FPLIB_UFC;
2517 } else {
2518 exp += 8;
2519 mnt = fp32_normalise(mnt, &exp);
2520 int result_exp = 253 - exp;
2521 uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1;
2522 fraction <<= 15;
2523 if (result_exp == 0) {
2524 fraction >>= 1;
2525 } else if (result_exp == -1) {
2526 fraction >>= 2;
2527 result_exp = 0;
2528 }
2529 result = fp32_pack(sgn, result_exp, fraction);
2530 }
2531
2532 set_fpscr0(fpscr, flags);
2533
2534 return result;
2535}
2536
2537template <>
2538uint64_t
2539fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
2540{
2541 int mode = modeConv(fpscr);
2542 int flags = 0;
2543 int sgn, exp;
2544 uint64_t mnt, result;
2545
2546 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2547
2548 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2549 result = fp64_process_NaN(op, mode, &flags);
2550 } else if (exp == 2047) {
2551 result = fp64_zero(sgn);
2552 } else if (!mnt) {
2553 result = fp64_infinity(sgn);
2554 flags |= FPLIB_DZC;
2555 } else if (!((uint64_t)(op << 1) >> 51)) {
2556 bool overflow_to_inf = false;
2557 switch (FPCRRounding(fpscr)) {
2558 case FPRounding_TIEEVEN:
2559 overflow_to_inf = true;
2560 break;
2561 case FPRounding_POSINF:
2562 overflow_to_inf = !sgn;
2563 break;
2564 case FPRounding_NEGINF:
2565 overflow_to_inf = sgn;
2566 break;
2567 case FPRounding_ZERO:
2568 overflow_to_inf = false;
2569 break;
2570 default:
2571 assert(0);
2572 }
2573 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
2574 flags |= FPLIB_OFC | FPLIB_IXC;
2575 } else if (fpscr.fz && exp >= 2045) {
2576 result = fp64_zero(sgn);
2577 flags |= FPLIB_UFC;
2578 } else {
2579 exp += 11;
2580 mnt = fp64_normalise(mnt, &exp);
2581 int result_exp = 2045 - exp;
2582 uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1;
2583 fraction <<= 44;
2584 if (result_exp == 0) {
2585 fraction >>= 1;
2586 } else if (result_exp == -1) {
2587 fraction >>= 2;
2588 result_exp = 0;
2589 }
2590 result = fp64_pack(sgn, result_exp, fraction);
2591 }
2592
2593 set_fpscr0(fpscr, flags);
2594
2595 return result;
2596}
2597
2598template <>
2599uint64_t
2600fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2601{
2602 int mode = modeConv(fpscr);
2603 int flags = 0;
2604 int sgn1, exp1, sgn2, exp2;
2605 uint64_t mnt1, mnt2, result;
2606
2607 op1 = fplibNeg<uint64_t>(op1);
2608 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2609 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2610
2611 result = fp64_process_NaNs(op1, op2, mode, &flags);
2612 if (!result) {
2613 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2614 result = fp64_FPTwo(0);
2615 } else if (exp1 == 2047 || exp2 == 2047) {
2616 result = fp64_infinity(sgn1 ^ sgn2);
2617 } else {
2618 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
2619 }
2620 }
2621
2622 set_fpscr0(fpscr, flags);
2623
2624 return result;
2625}
2626
2627template <>
2628uint32_t
2629fplibRecpX(uint32_t op, FPSCR &fpscr)
2630{
2631 int mode = modeConv(fpscr);
2632 int flags = 0;
2633 int sgn, exp;
2634 uint32_t mnt, result;
2635
2636 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2637
2638 if (exp == 255 && (uint32_t)(mnt << 9)) {
2639 result = fp32_process_NaN(op, mode, &flags);
2640 }
2641 else {
2642 if (!mnt) { // Zero and denormals
2643 result = fp32_pack(sgn, 254, 0);
2644 } else { // Infinities and normals
2645 result = fp32_pack(sgn, exp ^ 255, 0);
2646 }
2647 }
2648
2649 set_fpscr0(fpscr, flags);
2650
2651 return result;
2652}
2653
2654template <>
2655uint64_t
2656fplibRecpX(uint64_t op, FPSCR &fpscr)
2657{
2658 int mode = modeConv(fpscr);
2659 int flags = 0;
2660 int sgn, exp;
2661 uint64_t mnt, result;
2662
2663 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2664
2665 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2666 result = fp64_process_NaN(op, mode, &flags);
2667 }
2668 else {
2669 if (!mnt) { // Zero and denormals
2670 result = fp64_pack(sgn, 2046, 0);
2671 } else { // Infinities and normals
2672 result = fp64_pack(sgn, exp ^ 2047, 0);
2673 }
2674 }
2675
2676 set_fpscr0(fpscr, flags);
2677
2678 return result;
2679}
2680
2681template <>
2682uint32_t
2683fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2684{
2685 int mode = modeConv(fpscr);
2686 int flags = 0;
2687 int sgn, exp;
2688 uint32_t mnt, result;
2689
2690 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2691 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2692
2693 // Handle NaNs, infinities and zeroes:
2694 if (exp == 255 && (uint32_t)(mnt << 9)) {
2695 result = fp32_process_NaN(op, mode, &flags);
2696 } else if (exp == 255) {
2697 result = fp32_infinity(sgn);
2698 } else if (!mnt) {
2699 result = fp32_zero(sgn);
2700 } else if (exp >= 150) {
2701 // There are no fractional bits
2702 result = op;
2703 } else {
2704 // Truncate towards zero:
2705 uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp);
2706 int err = exp < 118 ? 1 :
2707 (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0);
2708 switch (rounding) {
2709 case FPRounding_TIEEVEN:
2710 x += (err == 3 || (err == 2 && (x & 1)));
2711 break;
2712 case FPRounding_POSINF:
2713 x += err && !sgn;
2714 break;
2715 case FPRounding_NEGINF:
2716 x += err && sgn;
2717 break;
2718 case FPRounding_ZERO:
2719 break;
2720 case FPRounding_TIEAWAY:
2721 x += err >> 1;
2722 break;
2723 default:
2724 assert(0);
2725 }
2726
2727 if (x == 0) {
2728 result = fp32_zero(sgn);
2729 } else {
2730 exp = 150;
2731 mnt = fp32_normalise(x, &exp);
2732 result = fp32_pack(sgn, exp + 8, mnt >> 8);
2733 }
2734
2735 if (err && exact)
2736 flags |= FPLIB_IXC;
2737 }
2738
2739 set_fpscr0(fpscr, flags);
2740
2741 return result;
2742}
2743
2744template <>
2745uint64_t
2746fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2747{
2748 int mode = modeConv(fpscr);
2749 int flags = 0;
2750 int sgn, exp;
2751 uint64_t mnt, result;
2752
2753 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2754 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2755
2756 // Handle NaNs, infinities and zeroes:
2757 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2758 result = fp64_process_NaN(op, mode, &flags);
2759 } else if (exp == 2047) {
2760 result = fp64_infinity(sgn);
2761 } else if (!mnt) {
2762 result = fp64_zero(sgn);
2763 } else if (exp >= 1075) {
2764 // There are no fractional bits
2765 result = op;
2766 } else {
2767 // Truncate towards zero:
2768 uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp);
2769 int err = exp < 1011 ? 1 :
2770 (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0);
2771 switch (rounding) {
2772 case FPRounding_TIEEVEN:
2773 x += (err == 3 || (err == 2 && (x & 1)));
2774 break;
2775 case FPRounding_POSINF:
2776 x += err && !sgn;
2777 break;
2778 case FPRounding_NEGINF:
2779 x += err && sgn;
2780 break;
2781 case FPRounding_ZERO:
2782 break;
2783 case FPRounding_TIEAWAY:
2784 x += err >> 1;
2785 break;
2786 default:
2787 assert(0);
2788 }
2789
2790 if (x == 0) {
2791 result = fp64_zero(sgn);
2792 } else {
2793 exp = 1075;
2794 mnt = fp64_normalise(x, &exp);
2795 result = fp64_pack(sgn, exp + 11, mnt >> 11);
2796 }
2797
2798 if (err && exact)
2799 flags |= FPLIB_IXC;
2800 }
2801
2802 set_fpscr0(fpscr, flags);
2803
2804 return result;
2805}
2806
2807template <>
2808uint32_t
2809fplibSqrt(uint32_t op, FPSCR &fpscr)
2810{
2811 int flags = 0;
2812 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
2813 set_fpscr0(fpscr, flags);
2814 return result;
2815}
2816
2817template <>
2818uint64_t
2819fplibSqrt(uint64_t op, FPSCR &fpscr)
2820{
2821 int flags = 0;
2822 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
2823 set_fpscr0(fpscr, flags);
2824 return result;
2825}
2826
2827template <>
2828uint32_t
2829fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2830{
2831 int flags = 0;
2832 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
2833 set_fpscr0(fpscr, flags);
2834 return result;
2835}
2836
2837template <>
2838uint64_t
2839fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2840{
2841 int flags = 0;
2842 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
2843 set_fpscr0(fpscr, flags);
2844 return result;
2845}
2846
2847static uint64_t
2848FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2849 int *flags)
2850{
2851 uint64_t x;
2852 int err;
2853
2854 if (exp > 1023 + 63) {
2855 *flags = FPLIB_IOC;
2856 return ((uint64_t)!u << 63) - !sgn;
2857 }
2858
2859 x = lsr64(mnt << 11, 1023 + 63 - exp);
2860 err = (exp > 1023 + 63 - 2 ? 0 :
2861 (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) |
2862 !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1)));
2863
2864 switch (rounding) {
2865 case FPRounding_TIEEVEN:
2866 x += (err == 3 || (err == 2 && (x & 1)));
2867 break;
2868 case FPRounding_POSINF:
2869 x += err && !sgn;
2870 break;
2871 case FPRounding_NEGINF:
2872 x += err && sgn;
2873 break;
2874 case FPRounding_ZERO:
2875 break;
2876 case FPRounding_TIEAWAY:
2877 x += err >> 1;
2878 break;
2879 default:
2880 assert(0);
2881 }
2882
2883 if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) {
2884 *flags = FPLIB_IOC;
2885 return ((uint64_t)!u << 63) - !sgn;
2886 }
2887
2888 if (err) {
2889 *flags = FPLIB_IXC;
2890 }
2891
2892 return sgn ? -x : x;
2893}
2894
2895static uint32_t
2896FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2897 int *flags)
2898{
2899 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
2900 if (u ? x >= (uint64_t)1 << 32 :
2901 !(x < (uint64_t)1 << 31 ||
2902 (uint64_t)-x <= (uint64_t)1 << 31)) {
2903 *flags = FPLIB_IOC;
2904 x = ((uint32_t)!u << 31) - !sgn;
2905 }
2906 return x;
2907}
2908
2909template <>
2910uint32_t
2911fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2912{
2913 int flags = 0;
2914 int sgn, exp;
2915 uint32_t mnt, result;
2916
2917 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2918 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2919
2920 // If NaN, set cumulative flag or take exception:
2921 if (exp == 255 && (uint32_t)(mnt << 9)) {
2922 flags = FPLIB_IOC;
2923 result = 0;
2924 } else {
2925 result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits,
2926 (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2927 }
2928
2929 set_fpscr0(fpscr, flags);
2930
2931 return result;
2932}
2933
2934template <>
2935uint32_t
2936fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2937{
2938 int flags = 0;
2939 int sgn, exp;
2940 uint64_t mnt;
2941 uint32_t result;
2942
2943 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2944 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2945
2946 // If NaN, set cumulative flag or take exception:
2947 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2948 flags = FPLIB_IOC;
2949 result = 0;
2950 } else {
2951 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
2952 }
2953
2954 set_fpscr0(fpscr, flags);
2955
2956 return result;
2957}
2958
2959template <>
2960uint64_t
2961fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2962{
2963 int flags = 0;
2964 int sgn, exp;
2965 uint32_t mnt;
2966 uint64_t result;
2967
2968 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2969 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2970
2971 // If NaN, set cumulative flag or take exception:
2972 if (exp == 255 && (uint32_t)(mnt << 9)) {
2973 flags = FPLIB_IOC;
2974 result = 0;
2975 } else {
2976 result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits,
2977 (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2978 }
2979
2980 set_fpscr0(fpscr, flags);
2981
2982 return result;
2983}
2984
2985template <>
2986uint64_t
2987fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2988{
2989 int flags = 0;
2990 int sgn, exp;
2991 uint64_t mnt, result;
2992
2993 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2994 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2995
2996 // If NaN, set cumulative flag or take exception:
2997 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2998 flags = FPLIB_IOC;
2999 result = 0;
3000 } else {
3001 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
3002 }
3003
3004 set_fpscr0(fpscr, flags);
3005
3006 return result;
3007}
3008
3009static uint32_t
3010fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3011{
3012 int x_sgn = !u && a >> 63;
3013 int x_exp = 190 - fbits;
3014 uint64_t x_mnt = x_sgn ? -a : a;
3015
3016 // Handle zero:
3017 if (!x_mnt) {
3018 return fp32_zero(0);
3019 }
3020
3021 // Normalise and convert to 32 bits, collapsing error into bottom bit:
3022 x_mnt = fp64_normalise(x_mnt, &x_exp);
3023 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
3024
3025 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
3026}
3027
3028static uint64_t
3029fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3030{
3031 int x_sgn = !u && a >> 63;
3032 int x_exp = 1024 + 62 - fbits;
3033 uint64_t x_mnt = x_sgn ? -a : a;
3034
3035 // Handle zero:
3036 if (!x_mnt) {
3037 return fp64_zero(0);
3038 }
3039
3040 x_mnt = fp64_normalise(x_mnt, &x_exp);
3041
3042 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
3043}
3044
3045template <>
3046uint32_t
3047fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3048{
3049 int flags = 0;
3050 uint32_t res = fp32_cvtf(op, fbits, u,
3051 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3052 &flags);
3053 set_fpscr0(fpscr, flags);
3054 return res;
3055}
3056
3057template <>
3058uint64_t
3059fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3060{
3061 int flags = 0;
3062 uint64_t res = fp64_cvtf(op, fbits, u,
3063 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3064 &flags);
3065 set_fpscr0(fpscr, flags);
3066 return res;
3067}
3068
3069}