Deleted Added
sdiff udiff text old ( 13118:897ff5214d07 ) new ( 13449:2f7efa89c58b )
full compact
1/*
2* Copyright (c) 2012-2013, 2017-2018 ARM Limited
3* All rights reserved
4*
5* The license below extends only to copyright in the software and shall
6* not be construed as granting a license to any other intellectual
7* property including but not limited to intellectual property relating
8* to a hardware implementation of the functionality of the software
9* licensed hereunder. You may use the software subject to the license
10* terms below provided that you ensure that this notice is replicated
11* unmodified and in its entirety in all distributions of the software,
12* modified or unmodified, in source code or in binary form.
13*
14* Redistribution and use in source and binary forms, with or without
15* modification, are permitted provided that the following conditions are
16* met: redistributions of source code must retain the above copyright
17* notice, this list of conditions and the following disclaimer;
18* redistributions in binary form must reproduce the above copyright
19* notice, this list of conditions and the following disclaimer in the
20* documentation and/or other materials provided with the distribution;
21* neither the name of the copyright holders nor the names of its
22* contributors may be used to endorse or promote products derived from
23* this software without specific prior written permission.
24*
25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36*
37* Authors: Edmund Grimley Evans
38* Thomas Grocutt
39*/
40
41#include <stdint.h>
42
43#include <cassert>
44
45#include "base/logging.hh"
46#include "fplib.hh"
47
48namespace ArmISA
49{
50
51#define FPLIB_RN 0
52#define FPLIB_RP 1
53#define FPLIB_RM 2
54#define FPLIB_RZ 3
55#define FPLIB_FZ 4
56#define FPLIB_DN 8
57#define FPLIB_AHP 16
58#define FPLIB_FZ16 32
59
60#define FPLIB_IDC 128 // Input Denormal
61#define FPLIB_IXC 16 // Inexact
62#define FPLIB_UFC 8 // Underflow
63#define FPLIB_OFC 4 // Overflow
64#define FPLIB_DZC 2 // Division by Zero
65#define FPLIB_IOC 1 // Invalid Operation
66
67#define FP16_BITS 16
68#define FP32_BITS 32
69#define FP64_BITS 64
70
71#define FP16_EXP_BITS 5
72#define FP32_EXP_BITS 8
73#define FP64_EXP_BITS 11
74
75#define FP16_EXP_BIAS 15
76#define FP32_EXP_BIAS 127
77#define FP64_EXP_BIAS 1023
78
79#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
80#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
81#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
82
83#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
84#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
85#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
86
87#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
88#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
89#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
90
91#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
92#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
93#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
94
95static inline uint16_t
96lsl16(uint16_t x, uint32_t shift)
97{
98 return shift < 16 ? x << shift : 0;
99}
100
101static inline uint16_t
102lsr16(uint16_t x, uint32_t shift)
103{
104 return shift < 16 ? x >> shift : 0;
105}
106
107static inline uint32_t
108lsl32(uint32_t x, uint32_t shift)
109{
110 return shift < 32 ? x << shift : 0;
111}
112
113static inline uint32_t
114lsr32(uint32_t x, uint32_t shift)
115{
116 return shift < 32 ? x >> shift : 0;
117}
118
119static inline uint64_t
120lsl64(uint64_t x, uint32_t shift)
121{
122 return shift < 64 ? x << shift : 0;
123}
124
125static inline uint64_t
126lsr64(uint64_t x, uint32_t shift)
127{
128 return shift < 64 ? x >> shift : 0;
129}
130
131static inline void
132lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
133{
134 if (shift == 0) {
135 *r1 = x1;
136 *r0 = x0;
137 } else if (shift < 64) {
138 *r1 = x1 << shift | x0 >> (64 - shift);
139 *r0 = x0 << shift;
140 } else if (shift < 128) {
141 *r1 = x0 << (shift - 64);
142 *r0 = 0;
143 } else {
144 *r1 = 0;
145 *r0 = 0;
146 }
147}
148
149static inline void
150lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
151{
152 if (shift == 0) {
153 *r1 = x1;
154 *r0 = x0;
155 } else if (shift < 64) {
156 *r0 = x0 >> shift | x1 << (64 - shift);
157 *r1 = x1 >> shift;
158 } else if (shift < 128) {
159 *r0 = x1 >> (shift - 64);
160 *r1 = 0;
161 } else {
162 *r0 = 0;
163 *r1 = 0;
164 }
165}
166
167static inline void
168mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
169{
170 uint32_t mask = ((uint32_t)1 << 31) - 1;
171 uint64_t a0 = a & mask;
172 uint64_t a1 = a >> 31 & mask;
173 uint64_t b0 = b & mask;
174 uint64_t b1 = b >> 31 & mask;
175 uint64_t p0 = a0 * b0;
176 uint64_t p2 = a1 * b1;
177 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
178 uint64_t s0 = p0;
179 uint64_t s1 = (s0 >> 31) + p1;
180 uint64_t s2 = (s1 >> 31) + p2;
181 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
182 *x1 = s2 >> 2;
183}
184
185static inline
186void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
187{
188 uint64_t t0 = (uint64_t)(uint32_t)a * b;
189 uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
190 *x0 = t1 << 32 | (uint32_t)t0;
191 *x1 = t1 >> 32;
192}
193
194static inline void
195add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
196 uint64_t b1)
197{
198 *x0 = a0 + b0;
199 *x1 = a1 + b1 + (*x0 < a0);
200}
201
202static inline void
203sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
204 uint64_t b1)
205{
206 *x0 = a0 - b0;
207 *x1 = a1 - b1 - (*x0 > a0);
208}
209
210static inline int
211cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
212{
213 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
214}
215
216static inline uint16_t
217fp16_normalise(uint16_t mnt, int *exp)
218{
219 int shift;
220
221 if (!mnt) {
222 return 0;
223 }
224
225 for (shift = 8; shift; shift >>= 1) {
226 if (!(mnt >> (16 - shift))) {
227 mnt <<= shift;
228 *exp -= shift;
229 }
230 }
231 return mnt;
232}
233
234static inline uint32_t
235fp32_normalise(uint32_t mnt, int *exp)
236{
237 int shift;
238
239 if (!mnt) {
240 return 0;
241 }
242
243 for (shift = 16; shift; shift >>= 1) {
244 if (!(mnt >> (32 - shift))) {
245 mnt <<= shift;
246 *exp -= shift;
247 }
248 }
249 return mnt;
250}
251
252static inline uint64_t
253fp64_normalise(uint64_t mnt, int *exp)
254{
255 int shift;
256
257 if (!mnt) {
258 return 0;
259 }
260
261 for (shift = 32; shift; shift >>= 1) {
262 if (!(mnt >> (64 - shift))) {
263 mnt <<= shift;
264 *exp -= shift;
265 }
266 }
267 return mnt;
268}
269
270static inline void
271fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
272{
273 uint64_t x0 = *mnt0;
274 uint64_t x1 = *mnt1;
275 int shift;
276
277 if (!x0 && !x1) {
278 return;
279 }
280
281 if (!x1) {
282 x1 = x0;
283 x0 = 0;
284 *exp -= 64;
285 }
286
287 for (shift = 32; shift; shift >>= 1) {
288 if (!(x1 >> (64 - shift))) {
289 x1 = x1 << shift | x0 >> (64 - shift);
290 x0 <<= shift;
291 *exp -= shift;
292 }
293 }
294
295 *mnt0 = x0;
296 *mnt1 = x1;
297}
298
299static inline uint16_t
300fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
301{
302 return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt);
303}
304
305static inline uint32_t
306fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
307{
308 return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt);
309}
310
311static inline uint64_t
312fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
313{
314 return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt);
315}
316
317static inline uint16_t
318fp16_zero(int sgn)
319{
320 return fp16_pack(sgn, 0, 0);
321}
322
323static inline uint32_t
324fp32_zero(int sgn)
325{
326 return fp32_pack(sgn, 0, 0);
327}
328
329static inline uint64_t
330fp64_zero(int sgn)
331{
332 return fp64_pack(sgn, 0, 0);
333}
334
335static inline uint16_t
336fp16_max_normal(int sgn)
337{
338 return fp16_pack(sgn, FP16_EXP_INF - 1, -1);
339}
340
341static inline uint32_t
342fp32_max_normal(int sgn)
343{
344 return fp32_pack(sgn, FP32_EXP_INF - 1, -1);
345}
346
347static inline uint64_t
348fp64_max_normal(int sgn)
349{
350 return fp64_pack(sgn, FP64_EXP_INF - 1, -1);
351}
352
353static inline uint16_t
354fp16_infinity(int sgn)
355{
356 return fp16_pack(sgn, FP16_EXP_INF, 0);
357}
358
359static inline uint32_t
360fp32_infinity(int sgn)
361{
362 return fp32_pack(sgn, FP32_EXP_INF, 0);
363}
364
365static inline uint64_t
366fp64_infinity(int sgn)
367{
368 return fp64_pack(sgn, FP64_EXP_INF, 0);
369}
370
371static inline uint16_t
372fp16_defaultNaN()
373{
374 return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1));
375}
376
377static inline uint32_t
378fp32_defaultNaN()
379{
380 return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1));
381}
382
383static inline uint64_t
384fp64_defaultNaN()
385{
386 return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1));
387}
388
389static inline void
390fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
391 int *flags)
392{
393 *sgn = x >> (FP16_BITS - 1);
394 *exp = FP16_EXP(x);
395 *mnt = FP16_MANT(x);
396
397 // Handle subnormals:
398 if (*exp) {
399 *mnt |= 1ULL << FP16_MANT_BITS;
400 } else {
401 ++*exp;
402 // IDC (Input Denormal) is not set in this case.
403 if (mode & FPLIB_FZ16)
404 *mnt = 0;
405 }
406}
407
408static inline void
409fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
410 int *flags)
411{
412 *sgn = x >> (FP32_BITS - 1);
413 *exp = FP32_EXP(x);
414 *mnt = FP32_MANT(x);
415
416 // Handle subnormals:
417 if (*exp) {
418 *mnt |= 1ULL << FP32_MANT_BITS;
419 } else {
420 ++*exp;
421 if ((mode & FPLIB_FZ) && *mnt) {
422 *flags |= FPLIB_IDC;
423 *mnt = 0;
424 }
425 }
426}
427
428static inline void
429fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
430 int *flags)
431{
432 *sgn = x >> (FP64_BITS - 1);
433 *exp = FP64_EXP(x);
434 *mnt = FP64_MANT(x);
435
436 // Handle subnormals:
437 if (*exp) {
438 *mnt |= 1ULL << FP64_MANT_BITS;
439 } else {
440 ++*exp;
441 if ((mode & FPLIB_FZ) && *mnt) {
442 *flags |= FPLIB_IDC;
443 *mnt = 0;
444 }
445 }
446}
447
448static inline int
449fp16_is_NaN(int exp, uint16_t mnt)
450{
451 return exp == FP16_EXP_INF && FP16_MANT(mnt);
452}
453
454static inline int
455fp32_is_NaN(int exp, uint32_t mnt)
456{
457 return exp == FP32_EXP_INF && FP32_MANT(mnt);
458}
459
460static inline int
461fp64_is_NaN(int exp, uint64_t mnt)
462{
463 return exp == FP64_EXP_INF && FP64_MANT(mnt);
464}
465
466static inline int
467fp16_is_signalling_NaN(int exp, uint16_t mnt)
468{
469 return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1);
470}
471
472static inline int
473fp32_is_signalling_NaN(int exp, uint32_t mnt)
474{
475 return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1);
476}
477
478static inline int
479fp64_is_signalling_NaN(int exp, uint64_t mnt)
480{
481 return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1);
482}
483
484static inline int
485fp16_is_quiet_NaN(int exp, uint16_t mnt)
486{
487 return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1);
488}
489
490static inline int
491fp32_is_quiet_NaN(int exp, uint32_t mnt)
492{
493 return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1);
494}
495
496static inline int
497fp64_is_quiet_NaN(int exp, uint64_t mnt)
498{
499 return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1);
500}
501
502static inline int
503fp16_is_infinity(int exp, uint16_t mnt)
504{
505 return exp == FP16_EXP_INF && !FP16_MANT(mnt);
506}
507
508static inline int
509fp32_is_infinity(int exp, uint32_t mnt)
510{
511 return exp == FP32_EXP_INF && !FP32_MANT(mnt);
512}
513
514static inline int
515fp64_is_infinity(int exp, uint64_t mnt)
516{
517 return exp == FP64_EXP_INF && !FP64_MANT(mnt);
518}
519
520static inline uint16_t
521fp16_process_NaN(uint16_t a, int mode, int *flags)
522{
523 if (!(a >> (FP16_MANT_BITS - 1) & 1)) {
524 *flags |= FPLIB_IOC;
525 a |= 1ULL << (FP16_MANT_BITS - 1);
526 }
527 return mode & FPLIB_DN ? fp16_defaultNaN() : a;
528}
529
530static inline uint32_t
531fp32_process_NaN(uint32_t a, int mode, int *flags)
532{
533 if (!(a >> (FP32_MANT_BITS - 1) & 1)) {
534 *flags |= FPLIB_IOC;
535 a |= 1ULL << (FP32_MANT_BITS - 1);
536 }
537 return mode & FPLIB_DN ? fp32_defaultNaN() : a;
538}
539
540static inline uint64_t
541fp64_process_NaN(uint64_t a, int mode, int *flags)
542{
543 if (!(a >> (FP64_MANT_BITS - 1) & 1)) {
544 *flags |= FPLIB_IOC;
545 a |= 1ULL << (FP64_MANT_BITS - 1);
546 }
547 return mode & FPLIB_DN ? fp64_defaultNaN() : a;
548}
549
550static uint16_t
551fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
552{
553 int a_exp = FP16_EXP(a);
554 uint16_t a_mnt = FP16_MANT(a);
555 int b_exp = FP16_EXP(b);
556 uint16_t b_mnt = FP16_MANT(b);
557
558 // Handle signalling NaNs:
559 if (fp16_is_signalling_NaN(a_exp, a_mnt))
560 return fp16_process_NaN(a, mode, flags);
561 if (fp16_is_signalling_NaN(b_exp, b_mnt))
562 return fp16_process_NaN(b, mode, flags);
563
564 // Handle quiet NaNs:
565 if (fp16_is_NaN(a_exp, a_mnt))
566 return fp16_process_NaN(a, mode, flags);
567 if (fp16_is_NaN(b_exp, b_mnt))
568 return fp16_process_NaN(b, mode, flags);
569
570 return 0;
571}
572
573static uint32_t
574fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
575{
576 int a_exp = FP32_EXP(a);
577 uint32_t a_mnt = FP32_MANT(a);
578 int b_exp = FP32_EXP(b);
579 uint32_t b_mnt = FP32_MANT(b);
580
581 // Handle signalling NaNs:
582 if (fp32_is_signalling_NaN(a_exp, a_mnt))
583 return fp32_process_NaN(a, mode, flags);
584 if (fp32_is_signalling_NaN(b_exp, b_mnt))
585 return fp32_process_NaN(b, mode, flags);
586
587 // Handle quiet NaNs:
588 if (fp32_is_NaN(a_exp, a_mnt))
589 return fp32_process_NaN(a, mode, flags);
590 if (fp32_is_NaN(b_exp, b_mnt))
591 return fp32_process_NaN(b, mode, flags);
592
593 return 0;
594}
595
596static uint64_t
597fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
598{
599 int a_exp = FP64_EXP(a);
600 uint64_t a_mnt = FP64_MANT(a);
601 int b_exp = FP64_EXP(b);
602 uint64_t b_mnt = FP64_MANT(b);
603
604 // Handle signalling NaNs:
605 if (fp64_is_signalling_NaN(a_exp, a_mnt))
606 return fp64_process_NaN(a, mode, flags);
607 if (fp64_is_signalling_NaN(b_exp, b_mnt))
608 return fp64_process_NaN(b, mode, flags);
609
610 // Handle quiet NaNs:
611 if (fp64_is_NaN(a_exp, a_mnt))
612 return fp64_process_NaN(a, mode, flags);
613 if (fp64_is_NaN(b_exp, b_mnt))
614 return fp64_process_NaN(b, mode, flags);
615
616 return 0;
617}
618
619static uint16_t
620fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
621{
622 int a_exp = FP16_EXP(a);
623 uint16_t a_mnt = FP16_MANT(a);
624 int b_exp = FP16_EXP(b);
625 uint16_t b_mnt = FP16_MANT(b);
626 int c_exp = FP16_EXP(c);
627 uint16_t c_mnt = FP16_MANT(c);
628
629 // Handle signalling NaNs:
630 if (fp16_is_signalling_NaN(a_exp, a_mnt))
631 return fp16_process_NaN(a, mode, flags);
632 if (fp16_is_signalling_NaN(b_exp, b_mnt))
633 return fp16_process_NaN(b, mode, flags);
634 if (fp16_is_signalling_NaN(c_exp, c_mnt))
635 return fp16_process_NaN(c, mode, flags);
636
637 // Handle quiet NaNs:
638 if (fp16_is_NaN(a_exp, a_mnt))
639 return fp16_process_NaN(a, mode, flags);
640 if (fp16_is_NaN(b_exp, b_mnt))
641 return fp16_process_NaN(b, mode, flags);
642 if (fp16_is_NaN(c_exp, c_mnt))
643 return fp16_process_NaN(c, mode, flags);
644
645 return 0;
646}
647
648static uint32_t
649fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
650{
651 int a_exp = FP32_EXP(a);
652 uint32_t a_mnt = FP32_MANT(a);
653 int b_exp = FP32_EXP(b);
654 uint32_t b_mnt = FP32_MANT(b);
655 int c_exp = FP32_EXP(c);
656 uint32_t c_mnt = FP32_MANT(c);
657
658 // Handle signalling NaNs:
659 if (fp32_is_signalling_NaN(a_exp, a_mnt))
660 return fp32_process_NaN(a, mode, flags);
661 if (fp32_is_signalling_NaN(b_exp, b_mnt))
662 return fp32_process_NaN(b, mode, flags);
663 if (fp32_is_signalling_NaN(c_exp, c_mnt))
664 return fp32_process_NaN(c, mode, flags);
665
666 // Handle quiet NaNs:
667 if (fp32_is_NaN(a_exp, a_mnt))
668 return fp32_process_NaN(a, mode, flags);
669 if (fp32_is_NaN(b_exp, b_mnt))
670 return fp32_process_NaN(b, mode, flags);
671 if (fp32_is_NaN(c_exp, c_mnt))
672 return fp32_process_NaN(c, mode, flags);
673
674 return 0;
675}
676
677static uint64_t
678fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
679{
680 int a_exp = FP64_EXP(a);
681 uint64_t a_mnt = FP64_MANT(a);
682 int b_exp = FP64_EXP(b);
683 uint64_t b_mnt = FP64_MANT(b);
684 int c_exp = FP64_EXP(c);
685 uint64_t c_mnt = FP64_MANT(c);
686
687 // Handle signalling NaNs:
688 if (fp64_is_signalling_NaN(a_exp, a_mnt))
689 return fp64_process_NaN(a, mode, flags);
690 if (fp64_is_signalling_NaN(b_exp, b_mnt))
691 return fp64_process_NaN(b, mode, flags);
692 if (fp64_is_signalling_NaN(c_exp, c_mnt))
693 return fp64_process_NaN(c, mode, flags);
694
695 // Handle quiet NaNs:
696 if (fp64_is_NaN(a_exp, a_mnt))
697 return fp64_process_NaN(a, mode, flags);
698 if (fp64_is_NaN(b_exp, b_mnt))
699 return fp64_process_NaN(b, mode, flags);
700 if (fp64_is_NaN(c_exp, c_mnt))
701 return fp64_process_NaN(c, mode, flags);
702
703 return 0;
704}
705
706static uint16_t
707fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
708{
709 int biased_exp; // non-negative exponent value for result
710 uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS)
711 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
712
713 assert(rm != FPRounding_TIEAWAY);
714
715 // Flush to zero:
716 if ((mode & FPLIB_FZ16) && exp < 1) {
717 *flags |= FPLIB_UFC;
718 return fp16_zero(sgn);
719 }
720
721 // The bottom FP16_EXP_BITS bits of mnt are orred together:
722 mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) |
723 ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0));
724
725 if (exp > 0) {
726 biased_exp = exp;
727 int_mant = mnt >> 2;
728 error = mnt & 3;
729 } else {
730 biased_exp = 0;
731 int_mant = lsr16(mnt, 3 - exp);
732 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
733 }
734
735 if (!biased_exp && error) { // xx should also check fpscr_val<11>
736 *flags |= FPLIB_UFC;
737 }
738
739 // Round up:
740 if ((rm == FPLIB_RN && (error == 3 ||
741 (error == 2 && (int_mant & 1)))) ||
742 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
743 ++int_mant;
744 if (int_mant == 1ULL << FP16_MANT_BITS) {
745 // Rounded up from denormalized to normalized
746 biased_exp = 1;
747 }
748 if (int_mant == 2ULL << FP16_MANT_BITS) {
749 // Rounded up to next exponent
750 ++biased_exp;
751 int_mant >>= 1;
752 }
753 }
754
755 // Handle rounding to odd aka Von Neumann rounding:
756 if (error && rm == FPRounding_ODD)
757 int_mant |= 1;
758
759 // Handle overflow:
760 if (!(mode & FPLIB_AHP)) {
761 if (biased_exp >= (int)FP16_EXP_INF) {
762 *flags |= FPLIB_OFC | FPLIB_IXC;
763 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
764 (rm == FPLIB_RM && sgn)) {
765 return fp16_infinity(sgn);
766 } else {
767 return fp16_max_normal(sgn);
768 }
769 }
770 } else {
771 if (biased_exp >= (int)FP16_EXP_INF + 1) {
772 *flags |= FPLIB_IOC;
773 return fp16_pack(sgn, FP16_EXP_INF, -1);
774 }
775 }
776
777 if (error) {
778 *flags |= FPLIB_IXC;
779 }
780
781 return fp16_pack(sgn, biased_exp, int_mant);
782}
783
784static uint16_t
785fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
786{
787 return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags);
788}
789
790static uint32_t
791fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
792{
793 int biased_exp; // non-negative exponent value for result
794 uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS)
795 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
796
797 assert(rm != FPRounding_TIEAWAY);
798
799 // Flush to zero:
800 if ((mode & FPLIB_FZ) && exp < 1) {
801 *flags |= FPLIB_UFC;
802 return fp32_zero(sgn);
803 }
804
805 // The bottom FP32_EXP_BITS bits of mnt are orred together:
806 mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) |
807 ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0));
808
809 if (exp > 0) {
810 biased_exp = exp;
811 int_mant = mnt >> 2;
812 error = mnt & 3;
813 } else {
814 biased_exp = 0;
815 int_mant = lsr32(mnt, 3 - exp);
816 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
817 }
818
819 if (!biased_exp && error) { // xx should also check fpscr_val<11>
820 *flags |= FPLIB_UFC;
821 }
822
823 // Round up:
824 if ((rm == FPLIB_RN && (error == 3 ||
825 (error == 2 && (int_mant & 1)))) ||
826 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
827 ++int_mant;
828 if (int_mant == 1ULL << FP32_MANT_BITS) {
829 // Rounded up from denormalized to normalized
830 biased_exp = 1;
831 }
832 if (int_mant == 2ULL << FP32_MANT_BITS) {
833 // Rounded up to next exponent
834 ++biased_exp;
835 int_mant >>= 1;
836 }
837 }
838
839 // Handle rounding to odd aka Von Neumann rounding:
840 if (error && rm == FPRounding_ODD)
841 int_mant |= 1;
842
843 // Handle overflow:
844 if (biased_exp >= (int)FP32_EXP_INF) {
845 *flags |= FPLIB_OFC | FPLIB_IXC;
846 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
847 (rm == FPLIB_RM && sgn)) {
848 return fp32_infinity(sgn);
849 } else {
850 return fp32_max_normal(sgn);
851 }
852 }
853
854 if (error) {
855 *flags |= FPLIB_IXC;
856 }
857
858 return fp32_pack(sgn, biased_exp, int_mant);
859}
860
861static uint32_t
862fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
863{
864 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
865}
866
867static uint64_t
868fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
869{
870 int biased_exp; // non-negative exponent value for result
871 uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS)
872 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
873
874 assert(rm != FPRounding_TIEAWAY);
875
876 // Flush to zero:
877 if ((mode & FPLIB_FZ) && exp < 1) {
878 *flags |= FPLIB_UFC;
879 return fp64_zero(sgn);
880 }
881
882 // The bottom FP64_EXP_BITS bits of mnt are orred together:
883 mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) |
884 ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0));
885
886 if (exp > 0) {
887 biased_exp = exp;
888 int_mant = mnt >> 2;
889 error = mnt & 3;
890 } else {
891 biased_exp = 0;
892 int_mant = lsr64(mnt, 3 - exp);
893 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
894 }
895
896 if (!biased_exp && error) { // xx should also check fpscr_val<11>
897 *flags |= FPLIB_UFC;
898 }
899
900 // Round up:
901 if ((rm == FPLIB_RN && (error == 3 ||
902 (error == 2 && (int_mant & 1)))) ||
903 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
904 ++int_mant;
905 if (int_mant == 1ULL << FP64_MANT_BITS) {
906 // Rounded up from denormalized to normalized
907 biased_exp = 1;
908 }
909 if (int_mant == 2ULL << FP64_MANT_BITS) {
910 // Rounded up to next exponent
911 ++biased_exp;
912 int_mant >>= 1;
913 }
914 }
915
916 // Handle rounding to odd aka Von Neumann rounding:
917 if (error && rm == FPRounding_ODD)
918 int_mant |= 1;
919
920 // Handle overflow:
921 if (biased_exp >= (int)FP64_EXP_INF) {
922 *flags |= FPLIB_OFC | FPLIB_IXC;
923 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
924 (rm == FPLIB_RM && sgn)) {
925 return fp64_infinity(sgn);
926 } else {
927 return fp64_max_normal(sgn);
928 }
929 }
930
931 if (error) {
932 *flags |= FPLIB_IXC;
933 }
934
935 return fp64_pack(sgn, biased_exp, int_mant);
936}
937
938static uint64_t
939fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
940{
941 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
942}
943
944static int
945fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
946{
947 int a_sgn, a_exp, b_sgn, b_exp;
948 uint16_t a_mnt, b_mnt;
949
950 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
951 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
952
953 if (fp16_is_NaN(a_exp, a_mnt) ||
954 fp16_is_NaN(b_exp, b_mnt)) {
955 if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
956 fp16_is_signalling_NaN(b_exp, b_mnt))
957 *flags |= FPLIB_IOC;
958 return 0;
959 }
960 return a == b || (!a_mnt && !b_mnt);
961}
962
963static int
964fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
965{
966 int a_sgn, a_exp, b_sgn, b_exp;
967 uint16_t a_mnt, b_mnt;
968
969 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
970 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
971
972 if (fp16_is_NaN(a_exp, a_mnt) ||
973 fp16_is_NaN(b_exp, b_mnt)) {
974 *flags |= FPLIB_IOC;
975 return 0;
976 }
977 if (!a_mnt && !b_mnt)
978 return 1;
979 if (a_sgn != b_sgn)
980 return b_sgn;
981 if (a_exp != b_exp)
982 return a_sgn ^ (a_exp > b_exp);
983 if (a_mnt != b_mnt)
984 return a_sgn ^ (a_mnt > b_mnt);
985 return 1;
986}
987
988static int
989fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
990{
991 int a_sgn, a_exp, b_sgn, b_exp;
992 uint16_t a_mnt, b_mnt;
993
994 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
995 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
996
997 if (fp16_is_NaN(a_exp, a_mnt) ||
998 fp16_is_NaN(b_exp, b_mnt)) {
999 *flags |= FPLIB_IOC;
1000 return 0;
1001 }
1002 if (!a_mnt && !b_mnt)
1003 return 0;
1004 if (a_sgn != b_sgn)
1005 return b_sgn;
1006 if (a_exp != b_exp)
1007 return a_sgn ^ (a_exp > b_exp);
1008 if (a_mnt != b_mnt)
1009 return a_sgn ^ (a_mnt > b_mnt);
1010 return 0;
1011}
1012
1013static int
1014fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
1015{
1016 int a_sgn, a_exp, b_sgn, b_exp;
1017 uint16_t a_mnt, b_mnt;
1018
1019 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1020 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1021
1022 if (fp16_is_NaN(a_exp, a_mnt) ||
1023 fp16_is_NaN(b_exp, b_mnt)) {
1024 if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
1025 fp16_is_signalling_NaN(b_exp, b_mnt))
1026 *flags |= FPLIB_IOC;
1027 return 1;
1028 }
1029 return 0;
1030}
1031
1032static int
1033fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
1034{
1035 int a_sgn, a_exp, b_sgn, b_exp;
1036 uint32_t a_mnt, b_mnt;
1037
1038 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1039 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1040
1041 if (fp32_is_NaN(a_exp, a_mnt) ||
1042 fp32_is_NaN(b_exp, b_mnt)) {
1043 if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1044 fp32_is_signalling_NaN(b_exp, b_mnt))
1045 *flags |= FPLIB_IOC;
1046 return 0;
1047 }
1048 return a == b || (!a_mnt && !b_mnt);
1049}
1050
1051static int
1052fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
1053{
1054 int a_sgn, a_exp, b_sgn, b_exp;
1055 uint32_t a_mnt, b_mnt;
1056
1057 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1058 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1059
1060 if (fp32_is_NaN(a_exp, a_mnt) ||
1061 fp32_is_NaN(b_exp, b_mnt)) {
1062 *flags |= FPLIB_IOC;
1063 return 0;
1064 }
1065 if (!a_mnt && !b_mnt)
1066 return 1;
1067 if (a_sgn != b_sgn)
1068 return b_sgn;
1069 if (a_exp != b_exp)
1070 return a_sgn ^ (a_exp > b_exp);
1071 if (a_mnt != b_mnt)
1072 return a_sgn ^ (a_mnt > b_mnt);
1073 return 1;
1074}
1075
1076static int
1077fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
1078{
1079 int a_sgn, a_exp, b_sgn, b_exp;
1080 uint32_t a_mnt, b_mnt;
1081
1082 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1083 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1084
1085 if (fp32_is_NaN(a_exp, a_mnt) ||
1086 fp32_is_NaN(b_exp, b_mnt)) {
1087 *flags |= FPLIB_IOC;
1088 return 0;
1089 }
1090 if (!a_mnt && !b_mnt)
1091 return 0;
1092 if (a_sgn != b_sgn)
1093 return b_sgn;
1094 if (a_exp != b_exp)
1095 return a_sgn ^ (a_exp > b_exp);
1096 if (a_mnt != b_mnt)
1097 return a_sgn ^ (a_mnt > b_mnt);
1098 return 0;
1099}
1100
1101static int
1102fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
1103{
1104 int a_sgn, a_exp, b_sgn, b_exp;
1105 uint32_t a_mnt, b_mnt;
1106
1107 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1108 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1109
1110 if (fp32_is_NaN(a_exp, a_mnt) ||
1111 fp32_is_NaN(b_exp, b_mnt)) {
1112 if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1113 fp32_is_signalling_NaN(b_exp, b_mnt))
1114 *flags |= FPLIB_IOC;
1115 return 1;
1116 }
1117 return 0;
1118}
1119
1120static int
1121fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
1122{
1123 int a_sgn, a_exp, b_sgn, b_exp;
1124 uint64_t a_mnt, b_mnt;
1125
1126 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1127 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1128
1129 if (fp64_is_NaN(a_exp, a_mnt) ||
1130 fp64_is_NaN(b_exp, b_mnt)) {
1131 if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1132 fp64_is_signalling_NaN(b_exp, b_mnt))
1133 *flags |= FPLIB_IOC;
1134 return 0;
1135 }
1136 return a == b || (!a_mnt && !b_mnt);
1137}
1138
1139static int
1140fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
1141{
1142 int a_sgn, a_exp, b_sgn, b_exp;
1143 uint64_t a_mnt, b_mnt;
1144
1145 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1146 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1147
1148 if (fp64_is_NaN(a_exp, a_mnt) ||
1149 fp64_is_NaN(b_exp, b_mnt)) {
1150 *flags |= FPLIB_IOC;
1151 return 0;
1152 }
1153 if (!a_mnt && !b_mnt)
1154 return 1;
1155 if (a_sgn != b_sgn)
1156 return b_sgn;
1157 if (a_exp != b_exp)
1158 return a_sgn ^ (a_exp > b_exp);
1159 if (a_mnt != b_mnt)
1160 return a_sgn ^ (a_mnt > b_mnt);
1161 return 1;
1162}
1163
1164static int
1165fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
1166{
1167 int a_sgn, a_exp, b_sgn, b_exp;
1168 uint64_t a_mnt, b_mnt;
1169
1170 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1171 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1172
1173 if (fp64_is_NaN(a_exp, a_mnt) ||
1174 fp64_is_NaN(b_exp, b_mnt)) {
1175 *flags |= FPLIB_IOC;
1176 return 0;
1177 }
1178 if (!a_mnt && !b_mnt)
1179 return 0;
1180 if (a_sgn != b_sgn)
1181 return b_sgn;
1182 if (a_exp != b_exp)
1183 return a_sgn ^ (a_exp > b_exp);
1184 if (a_mnt != b_mnt)
1185 return a_sgn ^ (a_mnt > b_mnt);
1186 return 0;
1187}
1188
1189static int
1190fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
1191{
1192 int a_sgn, a_exp, b_sgn, b_exp;
1193 uint64_t a_mnt, b_mnt;
1194
1195 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1196 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1197
1198 if (fp64_is_NaN(a_exp, a_mnt) ||
1199 fp64_is_NaN(b_exp, b_mnt)) {
1200 if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1201 fp64_is_signalling_NaN(b_exp, b_mnt))
1202 *flags |= FPLIB_IOC;
1203 return 1;
1204 }
1205 return 0;
1206}
1207
1208static uint16_t
1209fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
1210{
1211 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1212 uint16_t a_mnt, b_mnt, x, x_mnt;
1213
1214 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1215 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1216
1217 if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1218 return x;
1219 }
1220
1221 b_sgn ^= neg;
1222
1223 // Handle infinities and zeroes:
1224 if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) {
1225 *flags |= FPLIB_IOC;
1226 return fp16_defaultNaN();
1227 } else if (a_exp == FP16_EXP_INF) {
1228 return fp16_infinity(a_sgn);
1229 } else if (b_exp == FP16_EXP_INF) {
1230 return fp16_infinity(b_sgn);
1231 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1232 return fp16_zero(a_sgn);
1233 }
1234
1235 a_mnt <<= 3;
1236 b_mnt <<= 3;
1237 if (a_exp >= b_exp) {
1238 b_mnt = (lsr16(b_mnt, a_exp - b_exp) |
1239 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1)));
1240 b_exp = a_exp;
1241 } else {
1242 a_mnt = (lsr16(a_mnt, b_exp - a_exp) |
1243 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1)));
1244 a_exp = b_exp;
1245 }
1246 x_sgn = a_sgn;
1247 x_exp = a_exp;
1248 if (a_sgn == b_sgn) {
1249 x_mnt = a_mnt + b_mnt;
1250 } else if (a_mnt >= b_mnt) {
1251 x_mnt = a_mnt - b_mnt;
1252 } else {
1253 x_sgn ^= 1;
1254 x_mnt = b_mnt - a_mnt;
1255 }
1256
1257 if (!x_mnt) {
1258 // Sign of exact zero result depends on rounding mode
1259 return fp16_zero((mode & 3) == 2);
1260 }
1261
1262 x_mnt = fp16_normalise(x_mnt, &x_exp);
1263
1264 return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1,
1265 mode, flags);
1266}
1267
1268static uint32_t
1269fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
1270{
1271 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1272 uint32_t a_mnt, b_mnt, x, x_mnt;
1273
1274 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1275 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1276
1277 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1278 return x;
1279 }
1280
1281 b_sgn ^= neg;
1282
1283 // Handle infinities and zeroes:
1284 if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) {
1285 *flags |= FPLIB_IOC;
1286 return fp32_defaultNaN();
1287 } else if (a_exp == FP32_EXP_INF) {
1288 return fp32_infinity(a_sgn);
1289 } else if (b_exp == FP32_EXP_INF) {
1290 return fp32_infinity(b_sgn);
1291 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1292 return fp32_zero(a_sgn);
1293 }
1294
1295 a_mnt <<= 3;
1296 b_mnt <<= 3;
1297 if (a_exp >= b_exp) {
1298 b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
1299 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
1300 b_exp = a_exp;
1301 } else {
1302 a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
1303 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
1304 a_exp = b_exp;
1305 }
1306 x_sgn = a_sgn;
1307 x_exp = a_exp;
1308 if (a_sgn == b_sgn) {
1309 x_mnt = a_mnt + b_mnt;
1310 } else if (a_mnt >= b_mnt) {
1311 x_mnt = a_mnt - b_mnt;
1312 } else {
1313 x_sgn ^= 1;
1314 x_mnt = b_mnt - a_mnt;
1315 }
1316
1317 if (!x_mnt) {
1318 // Sign of exact zero result depends on rounding mode
1319 return fp32_zero((mode & 3) == 2);
1320 }
1321
1322 x_mnt = fp32_normalise(x_mnt, &x_exp);
1323
1324 return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1,
1325 mode, flags);
1326}
1327
1328static uint64_t
1329fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
1330{
1331 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1332 uint64_t a_mnt, b_mnt, x, x_mnt;
1333
1334 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1335 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1336
1337 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1338 return x;
1339 }
1340
1341 b_sgn ^= neg;
1342
1343 // Handle infinities and zeroes:
1344 if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) {
1345 *flags |= FPLIB_IOC;
1346 return fp64_defaultNaN();
1347 } else if (a_exp == FP64_EXP_INF) {
1348 return fp64_infinity(a_sgn);
1349 } else if (b_exp == FP64_EXP_INF) {
1350 return fp64_infinity(b_sgn);
1351 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1352 return fp64_zero(a_sgn);
1353 }
1354
1355 a_mnt <<= 3;
1356 b_mnt <<= 3;
1357 if (a_exp >= b_exp) {
1358 b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
1359 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
1360 b_exp = a_exp;
1361 } else {
1362 a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
1363 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
1364 a_exp = b_exp;
1365 }
1366 x_sgn = a_sgn;
1367 x_exp = a_exp;
1368 if (a_sgn == b_sgn) {
1369 x_mnt = a_mnt + b_mnt;
1370 } else if (a_mnt >= b_mnt) {
1371 x_mnt = a_mnt - b_mnt;
1372 } else {
1373 x_sgn ^= 1;
1374 x_mnt = b_mnt - a_mnt;
1375 }
1376
1377 if (!x_mnt) {
1378 // Sign of exact zero result depends on rounding mode
1379 return fp64_zero((mode & 3) == 2);
1380 }
1381
1382 x_mnt = fp64_normalise(x_mnt, &x_exp);
1383
1384 return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1,
1385 mode, flags);
1386}
1387
1388static uint16_t
1389fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
1390{
1391 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1392 uint16_t a_mnt, b_mnt, x;
1393 uint32_t x_mnt;
1394
1395 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1396 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1397
1398 if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1399 return x;
1400 }
1401
1402 // Handle infinities and zeroes:
1403 if ((a_exp == FP16_EXP_INF && !b_mnt) ||
1404 (b_exp == FP16_EXP_INF && !a_mnt)) {
1405 *flags |= FPLIB_IOC;
1406 return fp16_defaultNaN();
1407 } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) {
1408 return fp16_infinity(a_sgn ^ b_sgn);
1409 } else if (!a_mnt || !b_mnt) {
1410 return fp16_zero(a_sgn ^ b_sgn);
1411 }
1412
1413 // Multiply and normalise:
1414 x_sgn = a_sgn ^ b_sgn;
1415 x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1;
1416 x_mnt = (uint32_t)a_mnt * b_mnt;
1417 x_mnt = fp32_normalise(x_mnt, &x_exp);
1418
1419 // Convert to FP16_BITS bits, collapsing error into bottom bit:
1420 x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1);
1421
1422 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1423}
1424
1425static uint32_t
1426fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1427{
1428 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1429 uint32_t a_mnt, b_mnt, x;
1430 uint64_t x_mnt;
1431
1432 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1433 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1434
1435 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1436 return x;
1437 }
1438
1439 // Handle infinities and zeroes:
1440 if ((a_exp == FP32_EXP_INF && !b_mnt) ||
1441 (b_exp == FP32_EXP_INF && !a_mnt)) {
1442 *flags |= FPLIB_IOC;
1443 return fp32_defaultNaN();
1444 } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) {
1445 return fp32_infinity(a_sgn ^ b_sgn);
1446 } else if (!a_mnt || !b_mnt) {
1447 return fp32_zero(a_sgn ^ b_sgn);
1448 }
1449
1450 // Multiply and normalise:
1451 x_sgn = a_sgn ^ b_sgn;
1452 x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1;
1453 x_mnt = (uint64_t)a_mnt * b_mnt;
1454 x_mnt = fp64_normalise(x_mnt, &x_exp);
1455
1456 // Convert to FP32_BITS bits, collapsing error into bottom bit:
1457 x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1);
1458
1459 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1460}
1461
1462static uint64_t
1463fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1464{
1465 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1466 uint64_t a_mnt, b_mnt, x;
1467 uint64_t x0_mnt, x1_mnt;
1468
1469 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1470 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1471
1472 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1473 return x;
1474 }
1475
1476 // Handle infinities and zeroes:
1477 if ((a_exp == FP64_EXP_INF && !b_mnt) ||
1478 (b_exp == FP64_EXP_INF && !a_mnt)) {
1479 *flags |= FPLIB_IOC;
1480 return fp64_defaultNaN();
1481 } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) {
1482 return fp64_infinity(a_sgn ^ b_sgn);
1483 } else if (!a_mnt || !b_mnt) {
1484 return fp64_zero(a_sgn ^ b_sgn);
1485 }
1486
1487 // Multiply and normalise:
1488 x_sgn = a_sgn ^ b_sgn;
1489 x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1;
1490 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1491 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1492
1493 // Convert to FP64_BITS bits, collapsing error into bottom bit:
1494 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1495
1496 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1497}
1498
1499static uint16_t
1500fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale,
1501 int mode, int *flags)
1502{
1503 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1504 uint16_t a_mnt, b_mnt, c_mnt, x;
1505 uint32_t x_mnt, y_mnt;
1506
1507 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1508 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1509 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1510
1511 x = fp16_process_NaNs3(a, b, c, mode, flags);
1512
1513 // Quiet NaN added to product of zero and infinity:
1514 if (fp16_is_quiet_NaN(a_exp, a_mnt) &&
1515 ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) ||
1516 (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) {
1517 x = fp16_defaultNaN();
1518 *flags |= FPLIB_IOC;
1519 }
1520
1521 if (x) {
1522 return x;
1523 }
1524
1525 // Handle infinities and zeroes:
1526 if ((b_exp == FP16_EXP_INF && !c_mnt) ||
1527 (c_exp == FP16_EXP_INF && !b_mnt) ||
1528 (a_exp == FP16_EXP_INF &&
1529 (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) &&
1530 (a_sgn != (b_sgn ^ c_sgn)))) {
1531 *flags |= FPLIB_IOC;
1532 return fp16_defaultNaN();
1533 }
1534 if (a_exp == FP16_EXP_INF)
1535 return fp16_infinity(a_sgn);
1536 if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF)
1537 return fp16_infinity(b_sgn ^ c_sgn);
1538 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1539 return fp16_zero(a_sgn);
1540
1541 x_sgn = a_sgn;
1542 x_exp = a_exp + 2 * FP16_EXP_BITS - 3;
1543 x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4);
1544
1545 // Multiply:
1546 y_sgn = b_sgn ^ c_sgn;
1547 y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3;
1548 y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1549 if (!y_mnt) {
1550 y_exp = x_exp;
1551 }
1552
1553 // Add:
1554 if (x_exp >= y_exp) {
1555 y_mnt = (lsr32(y_mnt, x_exp - y_exp) |
1556 !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1)));
1557 y_exp = x_exp;
1558 } else {
1559 x_mnt = (lsr32(x_mnt, y_exp - x_exp) |
1560 !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1)));
1561 x_exp = y_exp;
1562 }
1563 if (x_sgn == y_sgn) {
1564 x_mnt = x_mnt + y_mnt;
1565 } else if (x_mnt >= y_mnt) {
1566 x_mnt = x_mnt - y_mnt;
1567 } else {
1568 x_sgn ^= 1;
1569 x_mnt = y_mnt - x_mnt;
1570 }
1571
1572 if (!x_mnt) {
1573 // Sign of exact zero result depends on rounding mode
1574 return fp16_zero((mode & 3) == 2);
1575 }
1576
1577 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1578 x_mnt = fp32_normalise(x_mnt, &x_exp);
1579 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1580
1581 return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1582}
1583
1584static uint32_t
1585fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1586 int mode, int *flags)
1587{
1588 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1589 uint32_t a_mnt, b_mnt, c_mnt, x;
1590 uint64_t x_mnt, y_mnt;
1591
1592 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1593 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1594 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1595
1596 x = fp32_process_NaNs3(a, b, c, mode, flags);
1597
1598 // Quiet NaN added to product of zero and infinity:
1599 if (fp32_is_quiet_NaN(a_exp, a_mnt) &&
1600 ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) ||
1601 (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) {
1602 x = fp32_defaultNaN();
1603 *flags |= FPLIB_IOC;
1604 }
1605
1606 if (x) {
1607 return x;
1608 }
1609
1610 // Handle infinities and zeroes:
1611 if ((b_exp == FP32_EXP_INF && !c_mnt) ||
1612 (c_exp == FP32_EXP_INF && !b_mnt) ||
1613 (a_exp == FP32_EXP_INF &&
1614 (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) &&
1615 (a_sgn != (b_sgn ^ c_sgn)))) {
1616 *flags |= FPLIB_IOC;
1617 return fp32_defaultNaN();
1618 }
1619 if (a_exp == FP32_EXP_INF)
1620 return fp32_infinity(a_sgn);
1621 if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF)
1622 return fp32_infinity(b_sgn ^ c_sgn);
1623 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1624 return fp32_zero(a_sgn);
1625
1626 x_sgn = a_sgn;
1627 x_exp = a_exp + 2 * FP32_EXP_BITS - 3;
1628 x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4);
1629
1630 // Multiply:
1631 y_sgn = b_sgn ^ c_sgn;
1632 y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1633 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1634 if (!y_mnt) {
1635 y_exp = x_exp;
1636 }
1637
1638 // Add:
1639 if (x_exp >= y_exp) {
1640 y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1641 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1642 y_exp = x_exp;
1643 } else {
1644 x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1645 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1646 x_exp = y_exp;
1647 }
1648 if (x_sgn == y_sgn) {
1649 x_mnt = x_mnt + y_mnt;
1650 } else if (x_mnt >= y_mnt) {
1651 x_mnt = x_mnt - y_mnt;
1652 } else {
1653 x_sgn ^= 1;
1654 x_mnt = y_mnt - x_mnt;
1655 }
1656
1657 if (!x_mnt) {
1658 // Sign of exact zero result depends on rounding mode
1659 return fp32_zero((mode & 3) == 2);
1660 }
1661
1662 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1663 x_mnt = fp64_normalise(x_mnt, &x_exp);
1664 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1665
1666 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1667}
1668
1669static uint64_t
1670fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1671 int mode, int *flags)
1672{
1673 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1674 uint64_t a_mnt, b_mnt, c_mnt, x;
1675 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1676
1677 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1678 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1679 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1680
1681 x = fp64_process_NaNs3(a, b, c, mode, flags);
1682
1683 // Quiet NaN added to product of zero and infinity:
1684 if (fp64_is_quiet_NaN(a_exp, a_mnt) &&
1685 ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) ||
1686 (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) {
1687 x = fp64_defaultNaN();
1688 *flags |= FPLIB_IOC;
1689 }
1690
1691 if (x) {
1692 return x;
1693 }
1694
1695 // Handle infinities and zeroes:
1696 if ((b_exp == FP64_EXP_INF && !c_mnt) ||
1697 (c_exp == FP64_EXP_INF && !b_mnt) ||
1698 (a_exp == FP64_EXP_INF &&
1699 (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) &&
1700 (a_sgn != (b_sgn ^ c_sgn)))) {
1701 *flags |= FPLIB_IOC;
1702 return fp64_defaultNaN();
1703 }
1704 if (a_exp == FP64_EXP_INF)
1705 return fp64_infinity(a_sgn);
1706 if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF)
1707 return fp64_infinity(b_sgn ^ c_sgn);
1708 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1709 return fp64_zero(a_sgn);
1710
1711 x_sgn = a_sgn;
1712 x_exp = a_exp + FP64_EXP_BITS;
1713 x0_mnt = 0;
1714 x1_mnt = a_mnt;
1715
1716 // Multiply:
1717 y_sgn = b_sgn ^ c_sgn;
1718 y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3;
1719 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1720 if (!y0_mnt && !y1_mnt) {
1721 y_exp = x_exp;
1722 }
1723
1724 // Add:
1725 if (x_exp >= y_exp) {
1726 uint64_t t0, t1;
1727 lsl128(&t0, &t1, y0_mnt, y1_mnt,
1728 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1729 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1730 y0_mnt |= !!(t0 | t1);
1731 y_exp = x_exp;
1732 } else {
1733 uint64_t t0, t1;
1734 lsl128(&t0, &t1, x0_mnt, x1_mnt,
1735 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1736 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1737 x0_mnt |= !!(t0 | t1);
1738 x_exp = y_exp;
1739 }
1740 if (x_sgn == y_sgn) {
1741 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1742 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1743 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1744 } else {
1745 x_sgn ^= 1;
1746 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1747 }
1748
1749 if (!x0_mnt && !x1_mnt) {
1750 // Sign of exact zero result depends on rounding mode
1751 return fp64_zero((mode & 3) == 2);
1752 }
1753
1754 // Normalise into FP64_BITS bits, collapsing error into bottom bit:
1755 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1756 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1757
1758 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1759}
1760
1761static uint16_t
1762fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
1763{
1764 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1765 uint16_t a_mnt, b_mnt, x;
1766 uint32_t x_mnt;
1767
1768 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1769 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1770
1771 if ((x = fp16_process_NaNs(a, b, mode, flags)))
1772 return x;
1773
1774 // Handle infinities and zeroes:
1775 if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) ||
1776 (!a_mnt && !b_mnt)) {
1777 *flags |= FPLIB_IOC;
1778 return fp16_defaultNaN();
1779 }
1780 if (a_exp == FP16_EXP_INF || !b_mnt) {
1781 if (a_exp != FP16_EXP_INF)
1782 *flags |= FPLIB_DZC;
1783 return fp16_infinity(a_sgn ^ b_sgn);
1784 }
1785 if (!a_mnt || b_exp == FP16_EXP_INF)
1786 return fp16_zero(a_sgn ^ b_sgn);
1787
1788 // Divide, setting bottom bit if inexact:
1789 a_mnt = fp16_normalise(a_mnt, &a_exp);
1790 x_sgn = a_sgn ^ b_sgn;
1791 x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3);
1792 x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt;
1793 x_mnt |= (x_mnt * b_mnt !=
1794 (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3));
1795
1796 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1797 x_mnt = fp32_normalise(x_mnt, &x_exp);
1798 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1799
1800 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1801}
1802
1803static uint32_t
1804fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1805{
1806 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1807 uint32_t a_mnt, b_mnt, x;
1808 uint64_t x_mnt;
1809
1810 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1811 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1812
1813 if ((x = fp32_process_NaNs(a, b, mode, flags)))
1814 return x;
1815
1816 // Handle infinities and zeroes:
1817 if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) ||
1818 (!a_mnt && !b_mnt)) {
1819 *flags |= FPLIB_IOC;
1820 return fp32_defaultNaN();
1821 }
1822 if (a_exp == FP32_EXP_INF || !b_mnt) {
1823 if (a_exp != FP32_EXP_INF)
1824 *flags |= FPLIB_DZC;
1825 return fp32_infinity(a_sgn ^ b_sgn);
1826 }
1827 if (!a_mnt || b_exp == FP32_EXP_INF)
1828 return fp32_zero(a_sgn ^ b_sgn);
1829
1830 // Divide, setting bottom bit if inexact:
1831 a_mnt = fp32_normalise(a_mnt, &a_exp);
1832 x_sgn = a_sgn ^ b_sgn;
1833 x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3);
1834 x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt;
1835 x_mnt |= (x_mnt * b_mnt !=
1836 (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3));
1837
1838 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1839 x_mnt = fp64_normalise(x_mnt, &x_exp);
1840 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1841
1842 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1843}
1844
1845static uint64_t
1846fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1847{
1848 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1849 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1850
1851 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1852 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1853
1854 if ((x = fp64_process_NaNs(a, b, mode, flags)))
1855 return x;
1856
1857 // Handle infinities and zeroes:
1858 if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) ||
1859 (!a_mnt && !b_mnt)) {
1860 *flags |= FPLIB_IOC;
1861 return fp64_defaultNaN();
1862 }
1863 if (a_exp == FP64_EXP_INF || !b_mnt) {
1864 if (a_exp != FP64_EXP_INF)
1865 *flags |= FPLIB_DZC;
1866 return fp64_infinity(a_sgn ^ b_sgn);
1867 }
1868 if (!a_mnt || b_exp == FP64_EXP_INF)
1869 return fp64_zero(a_sgn ^ b_sgn);
1870
1871 // Find reciprocal of divisor with Newton-Raphson:
1872 a_mnt = fp64_normalise(a_mnt, &a_exp);
1873 b_mnt = fp64_normalise(b_mnt, &b_exp);
1874 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1875 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1876 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1877 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1878 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1879 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1880
1881 // Multiply by dividend:
1882 x_sgn = a_sgn ^ b_sgn;
1883 x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8;
1884 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1885 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1886 x_mnt = x1_mnt;
1887
1888 // This is an underestimate, so try adding one:
1889 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1890 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1891 if (c <= 0) {
1892 ++x_mnt;
1893 }
1894
1895 x_mnt = fp64_normalise(x_mnt, &x_exp);
1896
1897 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1898}
1899
1900static void
1901set_fpscr0(FPSCR &fpscr, int flags)
1902{
1903 if (flags & FPLIB_IDC) {
1904 fpscr.idc = 1;
1905 }
1906 if (flags & FPLIB_IOC) {
1907 fpscr.ioc = 1;
1908 }
1909 if (flags & FPLIB_DZC) {
1910 fpscr.dzc = 1;
1911 }
1912 if (flags & FPLIB_OFC) {
1913 fpscr.ofc = 1;
1914 }
1915 if (flags & FPLIB_UFC) {
1916 fpscr.ufc = 1;
1917 }
1918 if (flags & FPLIB_IXC) {
1919 fpscr.ixc = 1;
1920 }
1921}
1922
1923static uint16_t
1924fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
1925{
1926 int a_sgn, a_exp;
1927 uint16_t a_mnt;
1928
1929 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1930
1931 // Handle NaNs:
1932 if (fp16_is_NaN(a_exp, a_mnt)) {
1933 return fp16_process_NaN(a, mode, flags);
1934 }
1935
1936 // Handle zeroes:
1937 if (!a_mnt) {
1938 return fp16_zero(a_sgn);
1939 }
1940
1941 // Handle infinities:
1942 if (a_exp == FP16_EXP_INF) {
1943 return fp16_infinity(a_sgn);
1944 }
1945
1946 b = b < -300 ? -300 : b;
1947 b = b > 300 ? 300 : b;
1948 a_exp += b;
1949 a_mnt <<= 3;
1950
1951 a_mnt = fp16_normalise(a_mnt, &a_exp);
1952
1953 return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1,
1954 mode, flags);
1955}
1956
1957static uint32_t
1958fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
1959{
1960 int a_sgn, a_exp;
1961 uint32_t a_mnt;
1962
1963 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1964
1965 // Handle NaNs:
1966 if (fp32_is_NaN(a_exp, a_mnt)) {
1967 return fp32_process_NaN(a, mode, flags);
1968 }
1969
1970 // Handle zeroes:
1971 if (!a_mnt) {
1972 return fp32_zero(a_sgn);
1973 }
1974
1975 // Handle infinities:
1976 if (a_exp == FP32_EXP_INF) {
1977 return fp32_infinity(a_sgn);
1978 }
1979
1980 b = b < -300 ? -300 : b;
1981 b = b > 300 ? 300 : b;
1982 a_exp += b;
1983 a_mnt <<= 3;
1984
1985 a_mnt = fp32_normalise(a_mnt, &a_exp);
1986
1987 return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1,
1988 mode, flags);
1989}
1990
1991static uint64_t
1992fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
1993{
1994 int a_sgn, a_exp;
1995 uint64_t a_mnt;
1996
1997 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1998
1999 // Handle NaNs:
2000 if (fp64_is_NaN(a_exp, a_mnt)) {
2001 return fp64_process_NaN(a, mode, flags);
2002 }
2003
2004 // Handle zeroes:
2005 if (!a_mnt) {
2006 return fp64_zero(a_sgn);
2007 }
2008
2009 // Handle infinities:
2010 if (a_exp == FP64_EXP_INF) {
2011 return fp64_infinity(a_sgn);
2012 }
2013
2014 b = b < -3000 ? -3000 : b;
2015 b = b > 3000 ? 3000 : b;
2016 a_exp += b;
2017 a_mnt <<= 3;
2018
2019 a_mnt = fp64_normalise(a_mnt, &a_exp);
2020
2021 return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1,
2022 mode, flags);
2023}
2024
2025static uint16_t
2026fp16_sqrt(uint16_t a, int mode, int *flags)
2027{
2028 int a_sgn, a_exp, x_sgn, x_exp;
2029 uint16_t a_mnt, x_mnt;
2030 uint32_t x, t0, t1;
2031
2032 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2033
2034 // Handle NaNs:
2035 if (fp16_is_NaN(a_exp, a_mnt))
2036 return fp16_process_NaN(a, mode, flags);
2037
2038 // Handle infinities and zeroes:
2039 if (!a_mnt)
2040 return fp16_zero(a_sgn);
2041 if (a_exp == FP16_EXP_INF && !a_sgn)
2042 return fp16_infinity(a_sgn);
2043 if (a_sgn) {
2044 *flags |= FPLIB_IOC;
2045 return fp16_defaultNaN();
2046 }
2047
2048 a_mnt = fp16_normalise(a_mnt, &a_exp);
2049 if (a_exp & 1) {
2050 ++a_exp;
2051 a_mnt >>= 1;
2052 }
2053
2054 // x = (a * 3 + 5) / 8
2055 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2056
2057 // x = (a / x + x) / 2; // 8-bit accuracy
2058 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2059
2060 // x = (a / x + x) / 2; // 16-bit accuracy
2061 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2062
2063 x_sgn = 0;
2064 x_exp = (a_exp + 27) >> 1;
2065 x_mnt = ((x - (1 << 18)) >> 19) + 1;
2066 t1 = (uint32_t)x_mnt * x_mnt;
2067 t0 = (uint32_t)a_mnt << 9;
2068 if (t1 > t0) {
2069 --x_mnt;
2070 }
2071
2072 x_mnt = fp16_normalise(x_mnt, &x_exp);
2073
2074 return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2075}
2076
2077static uint32_t
2078fp32_sqrt(uint32_t a, int mode, int *flags)
2079{
2080 int a_sgn, a_exp, x_sgn, x_exp;
2081 uint32_t a_mnt, x, x_mnt;
2082 uint64_t t0, t1;
2083
2084 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2085
2086 // Handle NaNs:
2087 if (fp32_is_NaN(a_exp, a_mnt))
2088 return fp32_process_NaN(a, mode, flags);
2089
2090 // Handle infinities and zeroes:
2091 if (!a_mnt)
2092 return fp32_zero(a_sgn);
2093 if (a_exp == FP32_EXP_INF && !a_sgn)
2094 return fp32_infinity(a_sgn);
2095 if (a_sgn) {
2096 *flags |= FPLIB_IOC;
2097 return fp32_defaultNaN();
2098 }
2099
2100 a_mnt = fp32_normalise(a_mnt, &a_exp);
2101 if (!(a_exp & 1)) {
2102 ++a_exp;
2103 a_mnt >>= 1;
2104 }
2105
2106 // x = (a * 3 + 5) / 8
2107 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2108
2109 // x = (a / x + x) / 2; // 8-bit accuracy
2110 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2111
2112 // x = (a / x + x) / 2; // 16-bit accuracy
2113 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2114
2115 // x = (a / x + x) / 2; // 32-bit accuracy
2116 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
2117
2118 x_sgn = 0;
2119 x_exp = (a_exp + 147) >> 1;
2120 x_mnt = ((x - (1 << 5)) >> 6) + 1;
2121 t1 = (uint64_t)x_mnt * x_mnt;
2122 t0 = (uint64_t)a_mnt << 19;
2123 if (t1 > t0) {
2124 --x_mnt;
2125 }
2126
2127 x_mnt = fp32_normalise(x_mnt, &x_exp);
2128
2129 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2130}
2131
2132static uint64_t
2133fp64_sqrt(uint64_t a, int mode, int *flags)
2134{
2135 int a_sgn, a_exp, x_sgn, x_exp, c;
2136 uint64_t a_mnt, x_mnt, r, x0, x1;
2137 uint32_t x;
2138
2139 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2140
2141 // Handle NaNs:
2142 if (fp64_is_NaN(a_exp, a_mnt))
2143 return fp64_process_NaN(a, mode, flags);
2144
2145 // Handle infinities and zeroes:
2146 if (!a_mnt)
2147 return fp64_zero(a_sgn);
2148 if (a_exp == FP64_EXP_INF && !a_sgn)
2149 return fp64_infinity(a_sgn);
2150 if (a_sgn) {
2151 *flags |= FPLIB_IOC;
2152 return fp64_defaultNaN();
2153 }
2154
2155 a_mnt = fp64_normalise(a_mnt, &a_exp);
2156 if (a_exp & 1) {
2157 ++a_exp;
2158 a_mnt >>= 1;
2159 }
2160
2161 // x = (a * 3 + 5) / 8
2162 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2163
2164 // x = (a / x + x) / 2; // 8-bit accuracy
2165 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2166
2167 // x = (a / x + x) / 2; // 16-bit accuracy
2168 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2169
2170 // x = (a / x + x) / 2; // 32-bit accuracy
2171 x = ((a_mnt / x) >> 2) + (x >> 1);
2172
2173 // r = 1 / x; // 32-bit accuracy
2174 r = ((uint64_t)1 << 62) / x;
2175
2176 // r = r * (2 - x * r); // 64-bit accuracy
2177 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
2178 lsr128(&x0, &x1, x0, x1, 31);
2179
2180 // x = (x + a * r) / 2; // 64-bit accuracy
2181 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2182 lsl128(&x0, &x1, x0, x1, 5);
2183 lsr128(&x0, &x1, x0, x1, 56);
2184
2185 x0 = ((uint64_t)x << 31) + (x0 >> 1);
2186
2187 x_sgn = 0;
2188 x_exp = (a_exp + 1053) >> 1;
2189 x_mnt = x0;
2190 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2191 mul62x62(&x0, &x1, x_mnt, x_mnt);
2192 lsl128(&x0, &x1, x0, x1, 19);
2193 c = cmp128(x0, x1, 0, a_mnt);
2194 if (c > 0)
2195 --x_mnt;
2196
2197 x_mnt = fp64_normalise(x_mnt, &x_exp);
2198
2199 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2200}
2201
2202static int
2203modeConv(FPSCR fpscr)
2204{
2205 uint32_t x = (uint32_t)fpscr;
2206 return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0);
2207 // AHP bit is ignored. Only fplibConvert uses AHP.
2208}
2209
2210static void
2211set_fpscr(FPSCR &fpscr, int flags)
2212{
2213 // translate back to FPSCR
2214 bool underflow = false;
2215 if (flags & FPLIB_IDC) {
2216 fpscr.idc = 1;
2217 }
2218 if (flags & FPLIB_IOC) {
2219 fpscr.ioc = 1;
2220 }
2221 if (flags & FPLIB_DZC) {
2222 fpscr.dzc = 1;
2223 }
2224 if (flags & FPLIB_OFC) {
2225 fpscr.ofc = 1;
2226 }
2227 if (flags & FPLIB_UFC) {
2228 underflow = true; //xx Why is this required?
2229 fpscr.ufc = 1;
2230 }
2231 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
2232 fpscr.ixc = 1;
2233 }
2234}
2235
2236template <>
2237bool
2238fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
2239{
2240 int flags = 0;
2241 int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags);
2242 set_fpscr(fpscr, flags);
2243 return x;
2244}
2245
2246template <>
2247bool
2248fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
2249{
2250 int flags = 0;
2251 int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags);
2252 set_fpscr(fpscr, flags);
2253 return x;
2254}
2255
2256template <>
2257bool
2258fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
2259{
2260 int flags = 0;
2261 int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags);
2262 set_fpscr(fpscr, flags);
2263 return x;
2264}
2265
2266template <>
2267bool
2268fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
2269{
2270 int flags = 0;
2271 int x = fp16_compare_un(a, b, modeConv(fpscr), &flags);
2272 set_fpscr(fpscr, flags);
2273 return x;
2274}
2275
2276template <>
2277bool
2278fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
2279{
2280 int flags = 0;
2281 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
2282 set_fpscr(fpscr, flags);
2283 return x;
2284}
2285
2286template <>
2287bool
2288fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
2289{
2290 int flags = 0;
2291 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
2292 set_fpscr(fpscr, flags);
2293 return x;
2294}
2295
2296template <>
2297bool
2298fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
2299{
2300 int flags = 0;
2301 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
2302 set_fpscr(fpscr, flags);
2303 return x;
2304}
2305
2306template <>
2307bool
2308fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr)
2309{
2310 int flags = 0;
2311 int x = fp32_compare_un(a, b, modeConv(fpscr), &flags);
2312 set_fpscr(fpscr, flags);
2313 return x;
2314}
2315
2316template <>
2317bool
2318fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
2319{
2320 int flags = 0;
2321 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
2322 set_fpscr(fpscr, flags);
2323 return x;
2324}
2325
2326template <>
2327bool
2328fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
2329{
2330 int flags = 0;
2331 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
2332 set_fpscr(fpscr, flags);
2333 return x;
2334}
2335
2336template <>
2337bool
2338fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
2339{
2340 int flags = 0;
2341 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
2342 set_fpscr(fpscr, flags);
2343 return x;
2344}
2345
2346template <>
2347bool
2348fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr)
2349{
2350 int flags = 0;
2351 int x = fp64_compare_un(a, b, modeConv(fpscr), &flags);
2352 set_fpscr(fpscr, flags);
2353 return x;
2354}
2355
2356template <>
2357uint16_t
2358fplibAbs(uint16_t op)
2359{
2360 return op & ~(1ULL << (FP16_BITS - 1));
2361}
2362
2363template <>
2364uint32_t
2365fplibAbs(uint32_t op)
2366{
2367 return op & ~(1ULL << (FP32_BITS - 1));
2368}
2369
2370template <>
2371uint64_t
2372fplibAbs(uint64_t op)
2373{
2374 return op & ~(1ULL << (FP64_BITS - 1));
2375}
2376
2377template <>
2378uint16_t
2379fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2380{
2381 int flags = 0;
2382 uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags);
2383 set_fpscr0(fpscr, flags);
2384 return result;
2385}
2386
2387template <>
2388uint32_t
2389fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2390{
2391 int flags = 0;
2392 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
2393 set_fpscr0(fpscr, flags);
2394 return result;
2395}
2396
2397template <>
2398uint64_t
2399fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2400{
2401 int flags = 0;
2402 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
2403 set_fpscr0(fpscr, flags);
2404 return result;
2405}
2406
2407template <>
2408int
2409fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
2410{
2411 int mode = modeConv(fpscr);
2412 int flags = 0;
2413 int sgn1, exp1, sgn2, exp2, result;
2414 uint16_t mnt1, mnt2;
2415
2416 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2417 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2418
2419 if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) {
2420 result = 3;
2421 if (fp16_is_signalling_NaN(exp1, mnt1) ||
2422 fp16_is_signalling_NaN(exp2, mnt2) || signal_nans)
2423 flags |= FPLIB_IOC;
2424 } else {
2425 if (op1 == op2 || (!mnt1 && !mnt2)) {
2426 result = 6;
2427 } else if (sgn1 != sgn2) {
2428 result = sgn1 ? 8 : 2;
2429 } else if (exp1 != exp2) {
2430 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2431 } else {
2432 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2433 }
2434 }
2435
2436 set_fpscr0(fpscr, flags);
2437
2438 return result;
2439}
2440
2441template <>
2442int
2443fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
2444{
2445 int mode = modeConv(fpscr);
2446 int flags = 0;
2447 int sgn1, exp1, sgn2, exp2, result;
2448 uint32_t mnt1, mnt2;
2449
2450 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2451 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2452
2453 if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) {
2454 result = 3;
2455 if (fp32_is_signalling_NaN(exp1, mnt1) ||
2456 fp32_is_signalling_NaN(exp2, mnt2) || signal_nans)
2457 flags |= FPLIB_IOC;
2458 } else {
2459 if (op1 == op2 || (!mnt1 && !mnt2)) {
2460 result = 6;
2461 } else if (sgn1 != sgn2) {
2462 result = sgn1 ? 8 : 2;
2463 } else if (exp1 != exp2) {
2464 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2465 } else {
2466 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2467 }
2468 }
2469
2470 set_fpscr0(fpscr, flags);
2471
2472 return result;
2473}
2474
2475template <>
2476int
2477fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
2478{
2479 int mode = modeConv(fpscr);
2480 int flags = 0;
2481 int sgn1, exp1, sgn2, exp2, result;
2482 uint64_t mnt1, mnt2;
2483
2484 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2485 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2486
2487 if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) {
2488 result = 3;
2489 if (fp64_is_signalling_NaN(exp1, mnt1) ||
2490 fp64_is_signalling_NaN(exp2, mnt2) || signal_nans)
2491 flags |= FPLIB_IOC;
2492 } else {
2493 if (op1 == op2 || (!mnt1 && !mnt2)) {
2494 result = 6;
2495 } else if (sgn1 != sgn2) {
2496 result = sgn1 ? 8 : 2;
2497 } else if (exp1 != exp2) {
2498 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2499 } else {
2500 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2501 }
2502 }
2503
2504 set_fpscr0(fpscr, flags);
2505
2506 return result;
2507}
2508
2509static uint16_t
2510fp16_FPConvertNaN_32(uint32_t op)
2511{
2512 return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF,
2513 1ULL << (FP16_MANT_BITS - 1) |
2514 op >> (FP32_MANT_BITS - FP16_MANT_BITS));
2515}
2516
2517static uint16_t
2518fp16_FPConvertNaN_64(uint64_t op)
2519{
2520 return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF,
2521 1ULL << (FP16_MANT_BITS - 1) |
2522 op >> (FP64_MANT_BITS - FP16_MANT_BITS));
2523}
2524
2525static uint32_t
2526fp32_FPConvertNaN_16(uint16_t op)
2527{
2528 return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF,
2529 1ULL << (FP32_MANT_BITS - 1) |
2530 (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS));
2531}
2532
2533static uint32_t
2534fp32_FPConvertNaN_64(uint64_t op)
2535{
2536 return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF,
2537 1ULL << (FP32_MANT_BITS - 1) |
2538 op >> (FP64_MANT_BITS - FP32_MANT_BITS));
2539}
2540
2541static uint64_t
2542fp64_FPConvertNaN_16(uint16_t op)
2543{
2544 return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF,
2545 1ULL << (FP64_MANT_BITS - 1) |
2546 (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS));
2547}
2548
2549static uint64_t
2550fp64_FPConvertNaN_32(uint32_t op)
2551{
2552 return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF,
2553 1ULL << (FP64_MANT_BITS - 1) |
2554 (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS));
2555}
2556
2557static uint16_t
2558fp16_FPOnePointFive(int sgn)
2559{
2560 return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1));
2561}
2562
2563static uint32_t
2564fp32_FPOnePointFive(int sgn)
2565{
2566 return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1));
2567}
2568
2569static uint64_t
2570fp64_FPOnePointFive(int sgn)
2571{
2572 return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1));
2573}
2574
2575static uint16_t
2576fp16_FPThree(int sgn)
2577{
2578 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1));
2579}
2580
2581static uint32_t
2582fp32_FPThree(int sgn)
2583{
2584 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1));
2585}
2586
2587static uint64_t
2588fp64_FPThree(int sgn)
2589{
2590 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1));
2591}
2592
2593static uint16_t
2594fp16_FPTwo(int sgn)
2595{
2596 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0);
2597}
2598
2599static uint32_t
2600fp32_FPTwo(int sgn)
2601{
2602 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0);
2603}
2604
2605static uint64_t
2606fp64_FPTwo(int sgn)
2607{
2608 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0);
2609}
2610
2611template <>
2612uint16_t
2613fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2614{
2615 int mode = modeConv(fpscr);
2616 int flags = 0;
2617 int sgn, exp;
2618 uint32_t mnt;
2619 uint16_t result;
2620
2621 // Unpack floating-point operand optionally with flush-to-zero:
2622 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2623
2624 bool alt_hp = fpscr.ahp;
2625
2626 if (fp32_is_NaN(exp, mnt)) {
2627 if (alt_hp) {
2628 result = fp16_zero(sgn);
2629 } else if (fpscr.dn) {
2630 result = fp16_defaultNaN();
2631 } else {
2632 result = fp16_FPConvertNaN_32(op);
2633 }
2634 if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) {
2635 flags |= FPLIB_IOC;
2636 }
2637 } else if (exp == FP32_EXP_INF) {
2638 if (alt_hp) {
2639 result = ((uint16_t)sgn << (FP16_BITS - 1) |
2640 ((1ULL << (FP16_BITS - 1)) - 1));
2641 flags |= FPLIB_IOC;
2642 } else {
2643 result = fp16_infinity(sgn);
2644 }
2645 } else if (!mnt) {
2646 result = fp16_zero(sgn);
2647 } else {
2648 result =
2649 fp16_round_(sgn, exp - FP32_EXP_BIAS + FP16_EXP_BIAS,
2650 mnt >> (FP32_MANT_BITS - FP16_BITS) |
2651 !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)),
2652 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2653 }
2654
2655 set_fpscr0(fpscr, flags);
2656
2657 return result;
2658}
2659
2660template <>
2661uint16_t
2662fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2663{
2664 int mode = modeConv(fpscr);
2665 int flags = 0;
2666 int sgn, exp;
2667 uint64_t mnt;
2668 uint16_t result;
2669
2670 // Unpack floating-point operand optionally with flush-to-zero:
2671 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2672
2673 bool alt_hp = fpscr.ahp;
2674
2675 if (fp64_is_NaN(exp, mnt)) {
2676 if (alt_hp) {
2677 result = fp16_zero(sgn);
2678 } else if (fpscr.dn) {
2679 result = fp16_defaultNaN();
2680 } else {
2681 result = fp16_FPConvertNaN_64(op);
2682 }
2683 if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) {
2684 flags |= FPLIB_IOC;
2685 }
2686 } else if (exp == FP64_EXP_INF) {
2687 if (alt_hp) {
2688 result = ((uint16_t)sgn << (FP16_BITS - 1) |
2689 ((1ULL << (FP16_BITS - 1)) - 1));
2690 flags |= FPLIB_IOC;
2691 } else {
2692 result = fp16_infinity(sgn);
2693 }
2694 } else if (!mnt) {
2695 result = fp16_zero(sgn);
2696 } else {
2697 result =
2698 fp16_round_(sgn, exp - FP64_EXP_BIAS + FP16_EXP_BIAS,
2699 mnt >> (FP64_MANT_BITS - FP16_BITS) |
2700 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)),
2701 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2702 }
2703
2704 set_fpscr0(fpscr, flags);
2705
2706 return result;
2707}
2708
2709template <>
2710uint32_t
2711fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2712{
2713 int mode = modeConv(fpscr);
2714 int flags = 0;
2715 int sgn, exp;
2716 uint16_t mnt;
2717 uint32_t result;
2718
2719 // Unpack floating-point operand optionally with flush-to-zero:
2720 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2721
2722 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2723 if (fpscr.dn) {
2724 result = fp32_defaultNaN();
2725 } else {
2726 result = fp32_FPConvertNaN_16(op);
2727 }
2728 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2729 flags |= FPLIB_IOC;
2730 }
2731 } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2732 result = fp32_infinity(sgn);
2733 } else if (!mnt) {
2734 result = fp32_zero(sgn);
2735 } else {
2736 mnt = fp16_normalise(mnt, &exp);
2737 result = fp32_pack(sgn, (exp - FP16_EXP_BIAS +
2738 FP32_EXP_BIAS + FP16_EXP_BITS),
2739 (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1));
2740 }
2741
2742 set_fpscr0(fpscr, flags);
2743
2744 return result;
2745}
2746
2747template <>
2748uint32_t
2749fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2750{
2751 int mode = modeConv(fpscr);
2752 int flags = 0;
2753 int sgn, exp;
2754 uint64_t mnt;
2755 uint32_t result;
2756
2757 // Unpack floating-point operand optionally with flush-to-zero:
2758 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2759
2760 if (fp64_is_NaN(exp, mnt)) {
2761 if (fpscr.dn) {
2762 result = fp32_defaultNaN();
2763 } else {
2764 result = fp32_FPConvertNaN_64(op);
2765 }
2766 if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) {
2767 flags |= FPLIB_IOC;
2768 }
2769 } else if (exp == FP64_EXP_INF) {
2770 result = fp32_infinity(sgn);
2771 } else if (!mnt) {
2772 result = fp32_zero(sgn);
2773 } else {
2774 result =
2775 fp32_round_(sgn, exp - FP64_EXP_BIAS + FP32_EXP_BIAS,
2776 mnt >> (FP64_MANT_BITS - FP32_BITS) |
2777 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)),
2778 rounding, mode, &flags);
2779 }
2780
2781 set_fpscr0(fpscr, flags);
2782
2783 return result;
2784}
2785
2786template <>
2787uint64_t
2788fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2789{
2790 int mode = modeConv(fpscr);
2791 int flags = 0;
2792 int sgn, exp;
2793 uint16_t mnt;
2794 uint64_t result;
2795
2796 // Unpack floating-point operand optionally with flush-to-zero:
2797 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2798
2799 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2800 if (fpscr.dn) {
2801 result = fp64_defaultNaN();
2802 } else {
2803 result = fp64_FPConvertNaN_16(op);
2804 }
2805 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2806 flags |= FPLIB_IOC;
2807 }
2808 } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2809 result = fp64_infinity(sgn);
2810 } else if (!mnt) {
2811 result = fp64_zero(sgn);
2812 } else {
2813 mnt = fp16_normalise(mnt, &exp);
2814 result = fp64_pack(sgn, (exp - FP16_EXP_BIAS +
2815 FP64_EXP_BIAS + FP16_EXP_BITS),
2816 (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1));
2817 }
2818
2819 set_fpscr0(fpscr, flags);
2820
2821 return result;
2822}
2823
2824template <>
2825uint64_t
2826fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2827{
2828 int mode = modeConv(fpscr);
2829 int flags = 0;
2830 int sgn, exp;
2831 uint32_t mnt;
2832 uint64_t result;
2833
2834 // Unpack floating-point operand optionally with flush-to-zero:
2835 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2836
2837 if (fp32_is_NaN(exp, mnt)) {
2838 if (fpscr.dn) {
2839 result = fp64_defaultNaN();
2840 } else {
2841 result = fp64_FPConvertNaN_32(op);
2842 }
2843 if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) {
2844 flags |= FPLIB_IOC;
2845 }
2846 } else if (exp == FP32_EXP_INF) {
2847 result = fp64_infinity(sgn);
2848 } else if (!mnt) {
2849 result = fp64_zero(sgn);
2850 } else {
2851 mnt = fp32_normalise(mnt, &exp);
2852 result = fp64_pack(sgn, (exp - FP32_EXP_BIAS +
2853 FP64_EXP_BIAS + FP32_EXP_BITS),
2854 (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1));
2855 }
2856
2857 set_fpscr0(fpscr, flags);
2858
2859 return result;
2860}
2861
2862template <>
2863uint16_t
2864fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
2865{
2866 int flags = 0;
2867 uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2868 set_fpscr0(fpscr, flags);
2869 return result;
2870}
2871
2872template <>
2873uint32_t
2874fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2875{
2876 int flags = 0;
2877 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2878 set_fpscr0(fpscr, flags);
2879 return result;
2880}
2881
2882template <>
2883uint64_t
2884fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2885{
2886 int flags = 0;
2887 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2888 set_fpscr0(fpscr, flags);
2889 return result;
2890}
2891
2892template <>
2893uint16_t
2894fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2895{
2896 int flags = 0;
2897 uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags);
2898 set_fpscr0(fpscr, flags);
2899 return result;
2900}
2901
2902template <>
2903uint32_t
2904fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2905{
2906 int flags = 0;
2907 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2908 set_fpscr0(fpscr, flags);
2909 return result;
2910}
2911
2912template <>
2913uint64_t
2914fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2915{
2916 int flags = 0;
2917 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2918 set_fpscr0(fpscr, flags);
2919 return result;
2920}
2921
2922template <>
2923uint16_t
2924fplibExpA(uint16_t op)
2925{
2926 static uint16_t coeff[32] = {
2927 0x0000,
2928 0x0016,
2929 0x002d,
2930 0x0045,
2931 0x005d,
2932 0x0075,
2933 0x008e,
2934 0x00a8,
2935 0x00c2,
2936 0x00dc,
2937 0x00f8,
2938 0x0114,
2939 0x0130,
2940 0x014d,
2941 0x016b,
2942 0x0189,
2943 0x01a8,
2944 0x01c8,
2945 0x01e8,
2946 0x0209,
2947 0x022b,
2948 0x024e,
2949 0x0271,
2950 0x0295,
2951 0x02ba,
2952 0x02e0,
2953 0x0306,
2954 0x032e,
2955 0x0356,
2956 0x037f,
2957 0x03a9,
2958 0x03d4
2959 };
2960 return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) |
2961 coeff[op & ((1 << 5) - 1)]);
2962}
2963
2964template <>
2965uint32_t
2966fplibExpA(uint32_t op)
2967{
2968 static uint32_t coeff[64] = {
2969 0x000000,
2970 0x0164d2,
2971 0x02cd87,
2972 0x043a29,
2973 0x05aac3,
2974 0x071f62,
2975 0x08980f,
2976 0x0a14d5,
2977 0x0b95c2,
2978 0x0d1adf,
2979 0x0ea43a,
2980 0x1031dc,
2981 0x11c3d3,
2982 0x135a2b,
2983 0x14f4f0,
2984 0x16942d,
2985 0x1837f0,
2986 0x19e046,
2987 0x1b8d3a,
2988 0x1d3eda,
2989 0x1ef532,
2990 0x20b051,
2991 0x227043,
2992 0x243516,
2993 0x25fed7,
2994 0x27cd94,
2995 0x29a15b,
2996 0x2b7a3a,
2997 0x2d583f,
2998 0x2f3b79,
2999 0x3123f6,
3000 0x3311c4,
3001 0x3504f3,
3002 0x36fd92,
3003 0x38fbaf,
3004 0x3aff5b,
3005 0x3d08a4,
3006 0x3f179a,
3007 0x412c4d,
3008 0x4346cd,
3009 0x45672a,
3010 0x478d75,
3011 0x49b9be,
3012 0x4bec15,
3013 0x4e248c,
3014 0x506334,
3015 0x52a81e,
3016 0x54f35b,
3017 0x5744fd,
3018 0x599d16,
3019 0x5bfbb8,
3020 0x5e60f5,
3021 0x60ccdf,
3022 0x633f89,
3023 0x65b907,
3024 0x68396a,
3025 0x6ac0c7,
3026 0x6d4f30,
3027 0x6fe4ba,
3028 0x728177,
3029 0x75257d,
3030 0x77d0df,
3031 0x7a83b3,
3032 0x7d3e0c
3033 };
3034 return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) |
3035 coeff[op & ((1 << 6) - 1)]);
3036}
3037
3038template <>
3039uint64_t
3040fplibExpA(uint64_t op)
3041{
3042 static uint64_t coeff[64] = {
3043 0x0000000000000ULL,
3044 0x02c9a3e778061ULL,
3045 0x059b0d3158574ULL,
3046 0x0874518759bc8ULL,
3047 0x0b5586cf9890fULL,
3048 0x0e3ec32d3d1a2ULL,
3049 0x11301d0125b51ULL,
3050 0x1429aaea92de0ULL,
3051 0x172b83c7d517bULL,
3052 0x1a35beb6fcb75ULL,
3053 0x1d4873168b9aaULL,
3054 0x2063b88628cd6ULL,
3055 0x2387a6e756238ULL,
3056 0x26b4565e27cddULL,
3057 0x29e9df51fdee1ULL,
3058 0x2d285a6e4030bULL,
3059 0x306fe0a31b715ULL,
3060 0x33c08b26416ffULL,
3061 0x371a7373aa9cbULL,
3062 0x3a7db34e59ff7ULL,
3063 0x3dea64c123422ULL,
3064 0x4160a21f72e2aULL,
3065 0x44e086061892dULL,
3066 0x486a2b5c13cd0ULL,
3067 0x4bfdad5362a27ULL,
3068 0x4f9b2769d2ca7ULL,
3069 0x5342b569d4f82ULL,
3070 0x56f4736b527daULL,
3071 0x5ab07dd485429ULL,
3072 0x5e76f15ad2148ULL,
3073 0x6247eb03a5585ULL,
3074 0x6623882552225ULL,
3075 0x6a09e667f3bcdULL,
3076 0x6dfb23c651a2fULL,
3077 0x71f75e8ec5f74ULL,
3078 0x75feb564267c9ULL,
3079 0x7a11473eb0187ULL,
3080 0x7e2f336cf4e62ULL,
3081 0x82589994cce13ULL,
3082 0x868d99b4492edULL,
3083 0x8ace5422aa0dbULL,
3084 0x8f1ae99157736ULL,
3085 0x93737b0cdc5e5ULL,
3086 0x97d829fde4e50ULL,
3087 0x9c49182a3f090ULL,
3088 0xa0c667b5de565ULL,
3089 0xa5503b23e255dULL,
3090 0xa9e6b5579fdbfULL,
3091 0xae89f995ad3adULL,
3092 0xb33a2b84f15fbULL,
3093 0xb7f76f2fb5e47ULL,
3094 0xbcc1e904bc1d2ULL,
3095 0xc199bdd85529cULL,
3096 0xc67f12e57d14bULL,
3097 0xcb720dcef9069ULL,
3098 0xd072d4a07897cULL,
3099 0xd5818dcfba487ULL,
3100 0xda9e603db3285ULL,
3101 0xdfc97337b9b5fULL,
3102 0xe502ee78b3ff6ULL,
3103 0xea4afa2a490daULL,
3104 0xefa1bee615a27ULL,
3105 0xf50765b6e4540ULL,
3106 0xfa7c1819e90d8ULL
3107 };
3108 return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) |
3109 coeff[op & ((1 << 6) - 1)]);
3110}
3111
3112static uint16_t
3113fp16_repack(int sgn, int exp, uint16_t mnt)
3114{
3115 return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt);
3116}
3117
3118static uint32_t
3119fp32_repack(int sgn, int exp, uint32_t mnt)
3120{
3121 return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt);
3122}
3123
3124static uint64_t
3125fp64_repack(int sgn, int exp, uint64_t mnt)
3126{
3127 return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt);
3128}
3129
3130static void
3131fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
3132{
3133 // Treat a single quiet-NaN as +Infinity/-Infinity
3134 if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) &&
3135 (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS)
3136 *op1 = fp16_infinity(sgn);
3137 if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) &&
3138 (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS)
3139 *op2 = fp16_infinity(sgn);
3140}
3141
3142static void
3143fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
3144{
3145 // Treat a single quiet-NaN as +Infinity/-Infinity
3146 if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) &&
3147 (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS)
3148 *op1 = fp32_infinity(sgn);
3149 if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) &&
3150 (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS)
3151 *op2 = fp32_infinity(sgn);
3152}
3153
3154static void
3155fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
3156{
3157 // Treat a single quiet-NaN as +Infinity/-Infinity
3158 if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) &&
3159 (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS)
3160 *op1 = fp64_infinity(sgn);
3161 if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) &&
3162 (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS)
3163 *op2 = fp64_infinity(sgn);
3164}
3165
3166template <>
3167uint16_t
3168fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3169{
3170 int mode = modeConv(fpscr);
3171 int flags = 0;
3172 int sgn1, exp1, sgn2, exp2;
3173 uint16_t mnt1, mnt2, x, result;
3174
3175 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3176 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3177
3178 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3179 result = x;
3180 } else {
3181 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3182 fp16_repack(sgn1, exp1, mnt1) :
3183 fp16_repack(sgn2, exp2, mnt2));
3184 }
3185 set_fpscr0(fpscr, flags);
3186 return result;
3187}
3188
3189template <>
3190uint32_t
3191fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3192{
3193 int mode = modeConv(fpscr);
3194 int flags = 0;
3195 int sgn1, exp1, sgn2, exp2;
3196 uint32_t mnt1, mnt2, x, result;
3197
3198 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3199 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3200
3201 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3202 result = x;
3203 } else {
3204 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3205 fp32_repack(sgn1, exp1, mnt1) :
3206 fp32_repack(sgn2, exp2, mnt2));
3207 }
3208 set_fpscr0(fpscr, flags);
3209 return result;
3210}
3211
3212template <>
3213uint64_t
3214fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3215{
3216 int mode = modeConv(fpscr);
3217 int flags = 0;
3218 int sgn1, exp1, sgn2, exp2;
3219 uint64_t mnt1, mnt2, x, result;
3220
3221 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3222 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3223
3224 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3225 result = x;
3226 } else {
3227 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3228 fp64_repack(sgn1, exp1, mnt1) :
3229 fp64_repack(sgn2, exp2, mnt2));
3230 }
3231 set_fpscr0(fpscr, flags);
3232 return result;
3233}
3234
3235template <>
3236uint16_t
3237fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3238{
3239 fp16_minmaxnum(&op1, &op2, 1);
3240 return fplibMax<uint16_t>(op1, op2, fpscr);
3241}
3242
3243template <>
3244uint32_t
3245fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3246{
3247 fp32_minmaxnum(&op1, &op2, 1);
3248 return fplibMax<uint32_t>(op1, op2, fpscr);
3249}
3250
3251template <>
3252uint64_t
3253fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3254{
3255 fp64_minmaxnum(&op1, &op2, 1);
3256 return fplibMax<uint64_t>(op1, op2, fpscr);
3257}
3258
3259template <>
3260uint16_t
3261fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3262{
3263 int mode = modeConv(fpscr);
3264 int flags = 0;
3265 int sgn1, exp1, sgn2, exp2;
3266 uint16_t mnt1, mnt2, x, result;
3267
3268 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3269 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3270
3271 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3272 result = x;
3273 } else {
3274 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3275 fp16_repack(sgn1, exp1, mnt1) :
3276 fp16_repack(sgn2, exp2, mnt2));
3277 }
3278 set_fpscr0(fpscr, flags);
3279 return result;
3280}
3281
3282template <>
3283uint32_t
3284fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3285{
3286 int mode = modeConv(fpscr);
3287 int flags = 0;
3288 int sgn1, exp1, sgn2, exp2;
3289 uint32_t mnt1, mnt2, x, result;
3290
3291 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3292 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3293
3294 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3295 result = x;
3296 } else {
3297 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3298 fp32_repack(sgn1, exp1, mnt1) :
3299 fp32_repack(sgn2, exp2, mnt2));
3300 }
3301 set_fpscr0(fpscr, flags);
3302 return result;
3303}
3304
3305template <>
3306uint64_t
3307fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3308{
3309 int mode = modeConv(fpscr);
3310 int flags = 0;
3311 int sgn1, exp1, sgn2, exp2;
3312 uint64_t mnt1, mnt2, x, result;
3313
3314 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3315 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3316
3317 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3318 result = x;
3319 } else {
3320 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3321 fp64_repack(sgn1, exp1, mnt1) :
3322 fp64_repack(sgn2, exp2, mnt2));
3323 }
3324 set_fpscr0(fpscr, flags);
3325 return result;
3326}
3327
3328template <>
3329uint16_t
3330fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3331{
3332 fp16_minmaxnum(&op1, &op2, 0);
3333 return fplibMin<uint16_t>(op1, op2, fpscr);
3334}
3335
3336template <>
3337uint32_t
3338fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3339{
3340 fp32_minmaxnum(&op1, &op2, 0);
3341 return fplibMin<uint32_t>(op1, op2, fpscr);
3342}
3343
3344template <>
3345uint64_t
3346fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3347{
3348 fp64_minmaxnum(&op1, &op2, 0);
3349 return fplibMin<uint64_t>(op1, op2, fpscr);
3350}
3351
3352template <>
3353uint16_t
3354fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3355{
3356 int flags = 0;
3357 uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags);
3358 set_fpscr0(fpscr, flags);
3359 return result;
3360}
3361
3362template <>
3363uint32_t
3364fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3365{
3366 int flags = 0;
3367 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
3368 set_fpscr0(fpscr, flags);
3369 return result;
3370}
3371
3372template <>
3373uint64_t
3374fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3375{
3376 int flags = 0;
3377 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
3378 set_fpscr0(fpscr, flags);
3379 return result;
3380}
3381
3382template <>
3383uint16_t
3384fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3385{
3386 int mode = modeConv(fpscr);
3387 int flags = 0;
3388 int sgn1, exp1, sgn2, exp2;
3389 uint16_t mnt1, mnt2, result;
3390
3391 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3392 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3393
3394 result = fp16_process_NaNs(op1, op2, mode, &flags);
3395 if (!result) {
3396 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3397 (exp2 == FP16_EXP_INF && !mnt1)) {
3398 result = fp16_FPTwo(sgn1 ^ sgn2);
3399 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3400 result = fp16_infinity(sgn1 ^ sgn2);
3401 } else if (!mnt1 || !mnt2) {
3402 result = fp16_zero(sgn1 ^ sgn2);
3403 } else {
3404 result = fp16_mul(op1, op2, mode, &flags);
3405 }
3406 }
3407
3408 set_fpscr0(fpscr, flags);
3409
3410 return result;
3411}
3412
3413template <>
3414uint32_t
3415fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3416{
3417 int mode = modeConv(fpscr);
3418 int flags = 0;
3419 int sgn1, exp1, sgn2, exp2;
3420 uint32_t mnt1, mnt2, result;
3421
3422 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3423 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3424
3425 result = fp32_process_NaNs(op1, op2, mode, &flags);
3426 if (!result) {
3427 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3428 (exp2 == FP32_EXP_INF && !mnt1)) {
3429 result = fp32_FPTwo(sgn1 ^ sgn2);
3430 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3431 result = fp32_infinity(sgn1 ^ sgn2);
3432 } else if (!mnt1 || !mnt2) {
3433 result = fp32_zero(sgn1 ^ sgn2);
3434 } else {
3435 result = fp32_mul(op1, op2, mode, &flags);
3436 }
3437 }
3438
3439 set_fpscr0(fpscr, flags);
3440
3441 return result;
3442}
3443
3444template <>
3445uint64_t
3446fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3447{
3448 int mode = modeConv(fpscr);
3449 int flags = 0;
3450 int sgn1, exp1, sgn2, exp2;
3451 uint64_t mnt1, mnt2, result;
3452
3453 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3454 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3455
3456 result = fp64_process_NaNs(op1, op2, mode, &flags);
3457 if (!result) {
3458 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3459 (exp2 == FP64_EXP_INF && !mnt1)) {
3460 result = fp64_FPTwo(sgn1 ^ sgn2);
3461 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3462 result = fp64_infinity(sgn1 ^ sgn2);
3463 } else if (!mnt1 || !mnt2) {
3464 result = fp64_zero(sgn1 ^ sgn2);
3465 } else {
3466 result = fp64_mul(op1, op2, mode, &flags);
3467 }
3468 }
3469
3470 set_fpscr0(fpscr, flags);
3471
3472 return result;
3473}
3474
3475template <>
3476uint16_t
3477fplibNeg(uint16_t op)
3478{
3479 return op ^ 1ULL << (FP16_BITS - 1);
3480}
3481
3482template <>
3483uint32_t
3484fplibNeg(uint32_t op)
3485{
3486 return op ^ 1ULL << (FP32_BITS - 1);
3487}
3488
3489template <>
3490uint64_t
3491fplibNeg(uint64_t op)
3492{
3493 return op ^ 1ULL << (FP64_BITS - 1);
3494}
3495
3496static const uint8_t recip_sqrt_estimate[256] = {
3497 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3498 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3499 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3500 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3501 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3502 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3503 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3504 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3505 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
3506 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
3507 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
3508 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
3509 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
3510 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
3511 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
3512 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
3513};
3514
3515template <>
3516uint16_t
3517fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
3518{
3519 int mode = modeConv(fpscr);
3520 int flags = 0;
3521 int sgn, exp;
3522 uint16_t mnt, result;
3523
3524 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3525
3526 if (fp16_is_NaN(exp, mnt)) {
3527 result = fp16_process_NaN(op, mode, &flags);
3528 } else if (!mnt) {
3529 result = fp16_infinity(sgn);
3530 flags |= FPLIB_DZC;
3531 } else if (sgn) {
3532 result = fp16_defaultNaN();
3533 flags |= FPLIB_IOC;
3534 } else if (exp == FP16_EXP_INF) {
3535 result = fp16_zero(0);
3536 } else {
3537 exp += FP16_EXP_BITS;
3538 mnt = fp16_normalise(mnt, &exp);
3539 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3540 (mnt >> (FP16_BITS - 8) & 127)];
3541 result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1,
3542 mnt << (FP16_MANT_BITS - 8));
3543 }
3544
3545 set_fpscr0(fpscr, flags);
3546
3547 return result;
3548}
3549
3550template <>
3551uint32_t
3552fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
3553{
3554 int mode = modeConv(fpscr);
3555 int flags = 0;
3556 int sgn, exp;
3557 uint32_t mnt, result;
3558
3559 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3560
3561 if (fp32_is_NaN(exp, mnt)) {
3562 result = fp32_process_NaN(op, mode, &flags);
3563 } else if (!mnt) {
3564 result = fp32_infinity(sgn);
3565 flags |= FPLIB_DZC;
3566 } else if (sgn) {
3567 result = fp32_defaultNaN();
3568 flags |= FPLIB_IOC;
3569 } else if (exp == FP32_EXP_INF) {
3570 result = fp32_zero(0);
3571 } else {
3572 exp += FP32_EXP_BITS;
3573 mnt = fp32_normalise(mnt, &exp);
3574 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3575 (mnt >> (FP32_BITS - 8) & 127)];
3576 result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1,
3577 mnt << (FP32_MANT_BITS - 8));
3578 }
3579
3580 set_fpscr0(fpscr, flags);
3581
3582 return result;
3583}
3584
3585template <>
3586uint64_t
3587fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
3588{
3589 int mode = modeConv(fpscr);
3590 int flags = 0;
3591 int sgn, exp;
3592 uint64_t mnt, result;
3593
3594 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3595
3596 if (fp64_is_NaN(exp, mnt)) {
3597 result = fp64_process_NaN(op, mode, &flags);
3598 } else if (!mnt) {
3599 result = fp64_infinity(sgn);
3600 flags |= FPLIB_DZC;
3601 } else if (sgn) {
3602 result = fp64_defaultNaN();
3603 flags |= FPLIB_IOC;
3604 } else if (exp == FP64_EXP_INF) {
3605 result = fp32_zero(0);
3606 } else {
3607 exp += FP64_EXP_BITS;
3608 mnt = fp64_normalise(mnt, &exp);
3609 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3610 (mnt >> (FP64_BITS - 8) & 127)];
3611 result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1,
3612 mnt << (FP64_MANT_BITS - 8));
3613 }
3614
3615 set_fpscr0(fpscr, flags);
3616
3617 return result;
3618}
3619
3620template <>
3621uint16_t
3622fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3623{
3624 int mode = modeConv(fpscr);
3625 int flags = 0;
3626 int sgn1, exp1, sgn2, exp2;
3627 uint16_t mnt1, mnt2, result;
3628
3629 op1 = fplibNeg<uint16_t>(op1);
3630 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3631 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3632
3633 result = fp16_process_NaNs(op1, op2, mode, &flags);
3634 if (!result) {
3635 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3636 (exp2 == FP16_EXP_INF && !mnt1)) {
3637 result = fp16_FPOnePointFive(0);
3638 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3639 result = fp16_infinity(sgn1 ^ sgn2);
3640 } else {
3641 result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags);
3642 }
3643 }
3644
3645 set_fpscr0(fpscr, flags);
3646
3647 return result;
3648}
3649
3650template <>
3651uint32_t
3652fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3653{
3654 int mode = modeConv(fpscr);
3655 int flags = 0;
3656 int sgn1, exp1, sgn2, exp2;
3657 uint32_t mnt1, mnt2, result;
3658
3659 op1 = fplibNeg<uint32_t>(op1);
3660 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3661 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3662
3663 result = fp32_process_NaNs(op1, op2, mode, &flags);
3664 if (!result) {
3665 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3666 (exp2 == FP32_EXP_INF && !mnt1)) {
3667 result = fp32_FPOnePointFive(0);
3668 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3669 result = fp32_infinity(sgn1 ^ sgn2);
3670 } else {
3671 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
3672 }
3673 }
3674
3675 set_fpscr0(fpscr, flags);
3676
3677 return result;
3678}
3679
3680template <>
3681uint64_t
3682fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3683{
3684 int mode = modeConv(fpscr);
3685 int flags = 0;
3686 int sgn1, exp1, sgn2, exp2;
3687 uint64_t mnt1, mnt2, result;
3688
3689 op1 = fplibNeg<uint64_t>(op1);
3690 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3691 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3692
3693 result = fp64_process_NaNs(op1, op2, mode, &flags);
3694 if (!result) {
3695 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3696 (exp2 == FP64_EXP_INF && !mnt1)) {
3697 result = fp64_FPOnePointFive(0);
3698 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3699 result = fp64_infinity(sgn1 ^ sgn2);
3700 } else {
3701 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
3702 }
3703 }
3704
3705 set_fpscr0(fpscr, flags);
3706
3707 return result;
3708}
3709
3710template <>
3711uint16_t
3712fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
3713{
3714 int mode = modeConv(fpscr);
3715 int flags = 0;
3716 int sgn, exp;
3717 uint16_t mnt, result;
3718
3719 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3720
3721 if (fp16_is_NaN(exp, mnt)) {
3722 result = fp16_process_NaN(op, mode, &flags);
3723 } else if (exp == FP16_EXP_INF) {
3724 result = fp16_zero(sgn);
3725 } else if (!mnt) {
3726 result = fp16_infinity(sgn);
3727 flags |= FPLIB_DZC;
3728 } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) {
3729 bool overflow_to_inf = false;
3730 switch (FPCRRounding(fpscr)) {
3731 case FPRounding_TIEEVEN:
3732 overflow_to_inf = true;
3733 break;
3734 case FPRounding_POSINF:
3735 overflow_to_inf = !sgn;
3736 break;
3737 case FPRounding_NEGINF:
3738 overflow_to_inf = sgn;
3739 break;
3740 case FPRounding_ZERO:
3741 overflow_to_inf = false;
3742 break;
3743 default:
3744 panic("Unrecognized FP rounding mode");
3745 }
3746 result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn);
3747 flags |= FPLIB_OFC | FPLIB_IXC;
3748 } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) {
3749 result = fp16_zero(sgn);
3750 flags |= FPLIB_UFC;
3751 } else {
3752 exp += FP16_EXP_BITS;
3753 mnt = fp16_normalise(mnt, &exp);
3754 int result_exp = 2 * FP16_EXP_BIAS - 1 - exp;
3755 uint16_t fraction = (((uint32_t)1 << 19) /
3756 (mnt >> (FP16_BITS - 10) | 1) + 1) >> 1;
3757 fraction <<= FP16_MANT_BITS - 8;
3758 if (result_exp == 0) {
3759 fraction >>= 1;
3760 } else if (result_exp == -1) {
3761 fraction >>= 2;
3762 result_exp = 0;
3763 }
3764 result = fp16_pack(sgn, result_exp, fraction);
3765 }
3766
3767 set_fpscr0(fpscr, flags);
3768
3769 return result;
3770}
3771
3772template <>
3773uint32_t
3774fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
3775{
3776 int mode = modeConv(fpscr);
3777 int flags = 0;
3778 int sgn, exp;
3779 uint32_t mnt, result;
3780
3781 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3782
3783 if (fp32_is_NaN(exp, mnt)) {
3784 result = fp32_process_NaN(op, mode, &flags);
3785 } else if (exp == FP32_EXP_INF) {
3786 result = fp32_zero(sgn);
3787 } else if (!mnt) {
3788 result = fp32_infinity(sgn);
3789 flags |= FPLIB_DZC;
3790 } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) {
3791 bool overflow_to_inf = false;
3792 switch (FPCRRounding(fpscr)) {
3793 case FPRounding_TIEEVEN:
3794 overflow_to_inf = true;
3795 break;
3796 case FPRounding_POSINF:
3797 overflow_to_inf = !sgn;
3798 break;
3799 case FPRounding_NEGINF:
3800 overflow_to_inf = sgn;
3801 break;
3802 case FPRounding_ZERO:
3803 overflow_to_inf = false;
3804 break;
3805 default:
3806 panic("Unrecognized FP rounding mode");
3807 }
3808 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
3809 flags |= FPLIB_OFC | FPLIB_IXC;
3810 } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) {
3811 result = fp32_zero(sgn);
3812 flags |= FPLIB_UFC;
3813 } else {
3814 exp += FP32_EXP_BITS;
3815 mnt = fp32_normalise(mnt, &exp);
3816 int result_exp = 2 * FP32_EXP_BIAS - 1 - exp;
3817 uint32_t fraction = (((uint32_t)1 << 19) /
3818 (mnt >> (FP32_BITS - 10) | 1) + 1) >> 1;
3819 fraction <<= FP32_MANT_BITS - 8;
3820 if (result_exp == 0) {
3821 fraction >>= 1;
3822 } else if (result_exp == -1) {
3823 fraction >>= 2;
3824 result_exp = 0;
3825 }
3826 result = fp32_pack(sgn, result_exp, fraction);
3827 }
3828
3829 set_fpscr0(fpscr, flags);
3830
3831 return result;
3832}
3833
3834template <>
3835uint64_t
3836fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
3837{
3838 int mode = modeConv(fpscr);
3839 int flags = 0;
3840 int sgn, exp;
3841 uint64_t mnt, result;
3842
3843 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3844
3845 if (fp64_is_NaN(exp, mnt)) {
3846 result = fp64_process_NaN(op, mode, &flags);
3847 } else if (exp == FP64_EXP_INF) {
3848 result = fp64_zero(sgn);
3849 } else if (!mnt) {
3850 result = fp64_infinity(sgn);
3851 flags |= FPLIB_DZC;
3852 } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) {
3853 bool overflow_to_inf = false;
3854 switch (FPCRRounding(fpscr)) {
3855 case FPRounding_TIEEVEN:
3856 overflow_to_inf = true;
3857 break;
3858 case FPRounding_POSINF:
3859 overflow_to_inf = !sgn;
3860 break;
3861 case FPRounding_NEGINF:
3862 overflow_to_inf = sgn;
3863 break;
3864 case FPRounding_ZERO:
3865 overflow_to_inf = false;
3866 break;
3867 default:
3868 panic("Unrecognized FP rounding mode");
3869 }
3870 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
3871 flags |= FPLIB_OFC | FPLIB_IXC;
3872 } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) {
3873 result = fp64_zero(sgn);
3874 flags |= FPLIB_UFC;
3875 } else {
3876 exp += FP64_EXP_BITS;
3877 mnt = fp64_normalise(mnt, &exp);
3878 int result_exp = 2 * FP64_EXP_BIAS - 1 - exp;
3879 uint64_t fraction = (((uint32_t)1 << 19) /
3880 (mnt >> (FP64_BITS - 10) | 1) + 1) >> 1;
3881 fraction <<= FP64_MANT_BITS - 8;
3882 if (result_exp == 0) {
3883 fraction >>= 1;
3884 } else if (result_exp == -1) {
3885 fraction >>= 2;
3886 result_exp = 0;
3887 }
3888 result = fp64_pack(sgn, result_exp, fraction);
3889 }
3890
3891 set_fpscr0(fpscr, flags);
3892
3893 return result;
3894}
3895
3896template <>
3897uint16_t
3898fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3899{
3900 int mode = modeConv(fpscr);
3901 int flags = 0;
3902 int sgn1, exp1, sgn2, exp2;
3903 uint16_t mnt1, mnt2, result;
3904
3905 op1 = fplibNeg<uint16_t>(op1);
3906 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3907 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3908
3909 result = fp16_process_NaNs(op1, op2, mode, &flags);
3910 if (!result) {
3911 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3912 (exp2 == FP16_EXP_INF && !mnt1)) {
3913 result = fp16_FPTwo(0);
3914 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3915 result = fp16_infinity(sgn1 ^ sgn2);
3916 } else {
3917 result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags);
3918 }
3919 }
3920
3921 set_fpscr0(fpscr, flags);
3922
3923 return result;
3924}
3925
3926template <>
3927uint32_t
3928fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3929{
3930 int mode = modeConv(fpscr);
3931 int flags = 0;
3932 int sgn1, exp1, sgn2, exp2;
3933 uint32_t mnt1, mnt2, result;
3934
3935 op1 = fplibNeg<uint32_t>(op1);
3936 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3937 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3938
3939 result = fp32_process_NaNs(op1, op2, mode, &flags);
3940 if (!result) {
3941 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3942 (exp2 == FP32_EXP_INF && !mnt1)) {
3943 result = fp32_FPTwo(0);
3944 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3945 result = fp32_infinity(sgn1 ^ sgn2);
3946 } else {
3947 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
3948 }
3949 }
3950
3951 set_fpscr0(fpscr, flags);
3952
3953 return result;
3954}
3955
3956template <>
3957uint64_t
3958fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3959{
3960 int mode = modeConv(fpscr);
3961 int flags = 0;
3962 int sgn1, exp1, sgn2, exp2;
3963 uint64_t mnt1, mnt2, result;
3964
3965 op1 = fplibNeg<uint64_t>(op1);
3966 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3967 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3968
3969 result = fp64_process_NaNs(op1, op2, mode, &flags);
3970 if (!result) {
3971 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3972 (exp2 == FP64_EXP_INF && !mnt1)) {
3973 result = fp64_FPTwo(0);
3974 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3975 result = fp64_infinity(sgn1 ^ sgn2);
3976 } else {
3977 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
3978 }
3979 }
3980
3981 set_fpscr0(fpscr, flags);
3982
3983 return result;
3984}
3985
3986template <>
3987uint16_t
3988fplibRecpX(uint16_t op, FPSCR &fpscr)
3989{
3990 int mode = modeConv(fpscr);
3991 int flags = 0;
3992 int sgn, exp;
3993 uint16_t mnt, result;
3994
3995 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3996
3997 if (fp16_is_NaN(exp, mnt)) {
3998 result = fp16_process_NaN(op, mode, &flags);
3999 }
4000 else {
4001 if (!mnt) { // Zero and denormals
4002 result = fp16_pack(sgn, FP16_EXP_INF - 1, 0);
4003 } else { // Infinities and normals
4004 result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0);
4005 }
4006 }
4007
4008 set_fpscr0(fpscr, flags);
4009
4010 return result;
4011}
4012
4013template <>
4014uint32_t
4015fplibRecpX(uint32_t op, FPSCR &fpscr)
4016{
4017 int mode = modeConv(fpscr);
4018 int flags = 0;
4019 int sgn, exp;
4020 uint32_t mnt, result;
4021
4022 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4023
4024 if (fp32_is_NaN(exp, mnt)) {
4025 result = fp32_process_NaN(op, mode, &flags);
4026 }
4027 else {
4028 if (!mnt) { // Zero and denormals
4029 result = fp32_pack(sgn, FP32_EXP_INF - 1, 0);
4030 } else { // Infinities and normals
4031 result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0);
4032 }
4033 }
4034
4035 set_fpscr0(fpscr, flags);
4036
4037 return result;
4038}
4039
4040template <>
4041uint64_t
4042fplibRecpX(uint64_t op, FPSCR &fpscr)
4043{
4044 int mode = modeConv(fpscr);
4045 int flags = 0;
4046 int sgn, exp;
4047 uint64_t mnt, result;
4048
4049 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4050
4051 if (fp64_is_NaN(exp, mnt)) {
4052 result = fp64_process_NaN(op, mode, &flags);
4053 }
4054 else {
4055 if (!mnt) { // Zero and denormals
4056 result = fp64_pack(sgn, FP64_EXP_INF - 1, 0);
4057 } else { // Infinities and normals
4058 result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0);
4059 }
4060 }
4061
4062 set_fpscr0(fpscr, flags);
4063
4064 return result;
4065}
4066
4067template <>
4068uint16_t
4069fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4070{
4071 int expint = FP16_EXP_BIAS + FP16_MANT_BITS;
4072 int mode = modeConv(fpscr);
4073 int flags = 0;
4074 int sgn, exp;
4075 uint16_t mnt, result;
4076
4077 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4078 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4079
4080 // Handle NaNs, infinities and zeroes:
4081 if (fp16_is_NaN(exp, mnt)) {
4082 result = fp16_process_NaN(op, mode, &flags);
4083 } else if (exp == FP16_EXP_INF) {
4084 result = fp16_infinity(sgn);
4085 } else if (!mnt) {
4086 result = fp16_zero(sgn);
4087 } else if (exp >= expint) {
4088 // There are no fractional bits
4089 result = op;
4090 } else {
4091 // Truncate towards zero:
4092 uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp);
4093 int err = exp < expint - FP16_BITS ? 1 :
4094 ((mnt << 1 >> (expint - exp - 1) & 3) |
4095 ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0));
4096 switch (rounding) {
4097 case FPRounding_TIEEVEN:
4098 x += (err == 3 || (err == 2 && (x & 1)));
4099 break;
4100 case FPRounding_POSINF:
4101 x += err && !sgn;
4102 break;
4103 case FPRounding_NEGINF:
4104 x += err && sgn;
4105 break;
4106 case FPRounding_ZERO:
4107 break;
4108 case FPRounding_TIEAWAY:
4109 x += err >> 1;
4110 break;
4111 default:
4112 panic("Unrecognized FP rounding mode");
4113 }
4114
4115 if (x == 0) {
4116 result = fp16_zero(sgn);
4117 } else {
4118 exp = expint;
4119 mnt = fp16_normalise(x, &exp);
4120 result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS);
4121 }
4122
4123 if (err && exact)
4124 flags |= FPLIB_IXC;
4125 }
4126
4127 set_fpscr0(fpscr, flags);
4128
4129 return result;
4130}
4131
4132template <>
4133uint32_t
4134fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4135{
4136 int expint = FP32_EXP_BIAS + FP32_MANT_BITS;
4137 int mode = modeConv(fpscr);
4138 int flags = 0;
4139 int sgn, exp;
4140 uint32_t mnt, result;
4141
4142 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4143 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4144
4145 // Handle NaNs, infinities and zeroes:
4146 if (fp32_is_NaN(exp, mnt)) {
4147 result = fp32_process_NaN(op, mode, &flags);
4148 } else if (exp == FP32_EXP_INF) {
4149 result = fp32_infinity(sgn);
4150 } else if (!mnt) {
4151 result = fp32_zero(sgn);
4152 } else if (exp >= expint) {
4153 // There are no fractional bits
4154 result = op;
4155 } else {
4156 // Truncate towards zero:
4157 uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp);
4158 int err = exp < expint - FP32_BITS ? 1 :
4159 ((mnt << 1 >> (expint - exp - 1) & 3) |
4160 ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0));
4161 switch (rounding) {
4162 case FPRounding_TIEEVEN:
4163 x += (err == 3 || (err == 2 && (x & 1)));
4164 break;
4165 case FPRounding_POSINF:
4166 x += err && !sgn;
4167 break;
4168 case FPRounding_NEGINF:
4169 x += err && sgn;
4170 break;
4171 case FPRounding_ZERO:
4172 break;
4173 case FPRounding_TIEAWAY:
4174 x += err >> 1;
4175 break;
4176 default:
4177 panic("Unrecognized FP rounding mode");
4178 }
4179
4180 if (x == 0) {
4181 result = fp32_zero(sgn);
4182 } else {
4183 exp = expint;
4184 mnt = fp32_normalise(x, &exp);
4185 result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS);
4186 }
4187
4188 if (err && exact)
4189 flags |= FPLIB_IXC;
4190 }
4191
4192 set_fpscr0(fpscr, flags);
4193
4194 return result;
4195}
4196
4197template <>
4198uint64_t
4199fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4200{
4201 int expint = FP64_EXP_BIAS + FP64_MANT_BITS;
4202 int mode = modeConv(fpscr);
4203 int flags = 0;
4204 int sgn, exp;
4205 uint64_t mnt, result;
4206
4207 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4208 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4209
4210 // Handle NaNs, infinities and zeroes:
4211 if (fp64_is_NaN(exp, mnt)) {
4212 result = fp64_process_NaN(op, mode, &flags);
4213 } else if (exp == FP64_EXP_INF) {
4214 result = fp64_infinity(sgn);
4215 } else if (!mnt) {
4216 result = fp64_zero(sgn);
4217 } else if (exp >= expint) {
4218 // There are no fractional bits
4219 result = op;
4220 } else {
4221 // Truncate towards zero:
4222 uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp);
4223 int err = exp < expint - FP64_BITS ? 1 :
4224 ((mnt << 1 >> (expint - exp - 1) & 3) |
4225 ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0));
4226 switch (rounding) {
4227 case FPRounding_TIEEVEN:
4228 x += (err == 3 || (err == 2 && (x & 1)));
4229 break;
4230 case FPRounding_POSINF:
4231 x += err && !sgn;
4232 break;
4233 case FPRounding_NEGINF:
4234 x += err && sgn;
4235 break;
4236 case FPRounding_ZERO:
4237 break;
4238 case FPRounding_TIEAWAY:
4239 x += err >> 1;
4240 break;
4241 default:
4242 panic("Unrecognized FP rounding mode");
4243 }
4244
4245 if (x == 0) {
4246 result = fp64_zero(sgn);
4247 } else {
4248 exp = expint;
4249 mnt = fp64_normalise(x, &exp);
4250 result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS);
4251 }
4252
4253 if (err && exact)
4254 flags |= FPLIB_IXC;
4255 }
4256
4257 set_fpscr0(fpscr, flags);
4258
4259 return result;
4260}
4261
4262template <>
4263uint16_t
4264fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4265{
4266 int flags = 0;
4267 uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags);
4268 set_fpscr0(fpscr, flags);
4269 return result;
4270}
4271
4272template <>
4273uint32_t
4274fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4275{
4276 int flags = 0;
4277 uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags);
4278 set_fpscr0(fpscr, flags);
4279 return result;
4280}
4281
4282template <>
4283uint64_t
4284fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4285{
4286 int flags = 0;
4287 uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags);
4288 set_fpscr0(fpscr, flags);
4289 return result;
4290}
4291
4292template <>
4293uint16_t
4294fplibSqrt(uint16_t op, FPSCR &fpscr)
4295{
4296 int flags = 0;
4297 uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags);
4298 set_fpscr0(fpscr, flags);
4299 return result;
4300}
4301
4302template <>
4303uint32_t
4304fplibSqrt(uint32_t op, FPSCR &fpscr)
4305{
4306 int flags = 0;
4307 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
4308 set_fpscr0(fpscr, flags);
4309 return result;
4310}
4311
4312template <>
4313uint64_t
4314fplibSqrt(uint64_t op, FPSCR &fpscr)
4315{
4316 int flags = 0;
4317 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
4318 set_fpscr0(fpscr, flags);
4319 return result;
4320}
4321
4322template <>
4323uint16_t
4324fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4325{
4326 int flags = 0;
4327 uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags);
4328 set_fpscr0(fpscr, flags);
4329 return result;
4330}
4331
4332template <>
4333uint32_t
4334fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4335{
4336 int flags = 0;
4337 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
4338 set_fpscr0(fpscr, flags);
4339 return result;
4340}
4341
4342template <>
4343uint64_t
4344fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4345{
4346 int flags = 0;
4347 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
4348 set_fpscr0(fpscr, flags);
4349 return result;
4350}
4351
4352template <>
4353uint16_t
4354fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
4355{
4356 static uint16_t coeff[2][8] = {
4357 {
4358 0x3c00,
4359 0xb155,
4360 0x2030,
4361 0x0000,
4362 0x0000,
4363 0x0000,
4364 0x0000,
4365 0x0000,
4366 },
4367 {
4368 0x3c00,
4369 0xb800,
4370 0x293a,
4371 0x0000,
4372 0x0000,
4373 0x0000,
4374 0x0000,
4375 0x0000
4376 }
4377 };
4378 int flags = 0;
4379 uint16_t result =
4380 fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1,
4381 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4382 set_fpscr0(fpscr, flags);
4383 return result;
4384}
4385
4386template <>
4387uint32_t
4388fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr)
4389{
4390 static uint32_t coeff[2][8] = {
4391 {
4392 0x3f800000,
4393 0xbe2aaaab,
4394 0x3c088886,
4395 0xb95008b9,
4396 0x36369d6d,
4397 0x00000000,
4398 0x00000000,
4399 0x00000000
4400 },
4401 {
4402 0x3f800000,
4403 0xbf000000,
4404 0x3d2aaaa6,
4405 0xbab60705,
4406 0x37cd37cc,
4407 0x00000000,
4408 0x00000000,
4409 0x00000000
4410 }
4411 };
4412 int flags = 0;
4413 uint32_t result =
4414 fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1,
4415 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4416 set_fpscr0(fpscr, flags);
4417 return result;
4418}
4419
4420template <>
4421uint64_t
4422fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr)
4423{
4424 static uint64_t coeff[2][8] = {
4425 {
4426 0x3ff0000000000000ULL,
4427 0xbfc5555555555543ULL,
4428 0x3f8111111110f30cULL,
4429 0xbf2a01a019b92fc6ULL,
4430 0x3ec71de351f3d22bULL,
4431 0xbe5ae5e2b60f7b91ULL,
4432 0x3de5d8408868552fULL,
4433 0x0000000000000000ULL
4434 },
4435 {
4436 0x3ff0000000000000ULL,
4437 0xbfe0000000000000ULL,
4438 0x3fa5555555555536ULL,
4439 0xbf56c16c16c13a0bULL,
4440 0x3efa01a019b1e8d8ULL,
4441 0xbe927e4f7282f468ULL,
4442 0x3e21ee96d2641b13ULL,
4443 0xbda8f76380fbb401ULL
4444 }
4445 };
4446 int flags = 0;
4447 uint64_t result =
4448 fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1,
4449 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4450 set_fpscr0(fpscr, flags);
4451 return result;
4452}
4453
4454template <>
4455uint16_t
4456fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4457{
4458 int flags = 0;
4459 int sgn, exp;
4460 uint16_t mnt;
4461
4462 int mode = modeConv(fpscr);
4463 uint16_t result = fp16_mul(op1, op1, mode, &flags);
4464 set_fpscr0(fpscr, flags);
4465
4466 fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4467 if (!fp16_is_NaN(exp, mnt)) {
4468 result = (result & ~(1ULL << (FP16_BITS - 1))) |
4469 op2 << (FP16_BITS - 1);
4470 }
4471 return result;
4472}
4473
4474template <>
4475uint32_t
4476fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4477{
4478 int flags = 0;
4479 int sgn, exp;
4480 uint32_t mnt;
4481
4482 int mode = modeConv(fpscr);
4483 uint32_t result = fp32_mul(op1, op1, mode, &flags);
4484 set_fpscr0(fpscr, flags);
4485
4486 fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4487 if (!fp32_is_NaN(exp, mnt)) {
4488 result = (result & ~(1ULL << (FP32_BITS - 1))) | op2 << (FP32_BITS - 1);
4489 }
4490 return result;
4491}
4492
4493template <>
4494uint64_t
4495fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4496{
4497 int flags = 0;
4498 int sgn, exp;
4499 uint64_t mnt;
4500
4501 int mode = modeConv(fpscr);
4502 uint64_t result = fp64_mul(op1, op1, mode, &flags);
4503 set_fpscr0(fpscr, flags);
4504
4505 fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4506 if (!fp64_is_NaN(exp, mnt)) {
4507 result = (result & ~(1ULL << (FP64_BITS - 1))) | op2 << (FP64_BITS - 1);
4508 }
4509 return result;
4510}
4511
4512template <>
4513uint16_t
4514fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4515{
4516 static constexpr uint16_t fpOne =
4517 (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0
4518 if (op2 & 1)
4519 op1 = fpOne;
4520 return op1 ^ ((op2 >> 1) << (FP16_BITS - 1));
4521}
4522
4523template <>
4524uint32_t
4525fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4526{
4527 static constexpr uint32_t fpOne =
4528 (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0
4529 if (op2 & 1)
4530 op1 = fpOne;
4531 return op1 ^ ((op2 >> 1) << (FP32_BITS - 1));
4532}
4533
4534template <>
4535uint64_t
4536fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4537{
4538 static constexpr uint64_t fpOne =
4539 (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0
4540 if (op2 & 1)
4541 op1 = fpOne;
4542 return op1 ^ ((op2 >> 1) << (FP64_BITS - 1));
4543}
4544
4545static uint64_t
4546FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4547 int *flags)
4548{
4549 int expmax = FP64_EXP_BIAS + FP64_BITS - 1;
4550 uint64_t x;
4551 int err;
4552
4553 if (exp > expmax) {
4554 *flags = FPLIB_IOC;
4555 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4556 }
4557
4558 x = lsr64(mnt << FP64_EXP_BITS, expmax - exp);
4559 err = (exp > expmax - 2 ? 0 :
4560 (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) |
4561 !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1)));
4562
4563 switch (rounding) {
4564 case FPRounding_TIEEVEN:
4565 x += (err == 3 || (err == 2 && (x & 1)));
4566 break;
4567 case FPRounding_POSINF:
4568 x += err && !sgn;
4569 break;
4570 case FPRounding_NEGINF:
4571 x += err && sgn;
4572 break;
4573 case FPRounding_ZERO:
4574 break;
4575 case FPRounding_TIEAWAY:
4576 x += err >> 1;
4577 break;
4578 default:
4579 panic("Unrecognized FP rounding mode");
4580 }
4581
4582 if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) {
4583 *flags = FPLIB_IOC;
4584 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4585 }
4586
4587 if (err) {
4588 *flags = FPLIB_IXC;
4589 }
4590
4591 return sgn ? -x : x;
4592}
4593
4594static uint32_t
4595FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4596 int *flags)
4597{
4598 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4599 if (u ? x >= 1ULL << FP32_BITS :
4600 !(x < 1ULL << (FP32_BITS - 1) ||
4601 (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) {
4602 *flags = FPLIB_IOC;
4603 x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn;
4604 }
4605 return x;
4606}
4607
4608static uint16_t
4609FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4610 int *flags)
4611{
4612 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4613 if (u ? x >= 1ULL << FP16_BITS :
4614 !(x < 1ULL << (FP16_BITS - 1) ||
4615 (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) {
4616 *flags = FPLIB_IOC;
4617 x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn;
4618 }
4619 return x;
4620}
4621
4622template <>
4623uint16_t
4624fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4625 FPSCR &fpscr)
4626{
4627 int flags = 0;
4628 int sgn, exp;
4629 uint16_t mnt, result;
4630
4631 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4632 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4633
4634 // If NaN, set cumulative flag or take exception:
4635 if (fp16_is_NaN(exp, mnt)) {
4636 flags = FPLIB_IOC;
4637 result = 0;
4638 } else {
4639 assert(fbits >= 0);
4640 // Infinity is treated as an ordinary normalised number that saturates.
4641 result =
4642 FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4643 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4644 u, rounding, &flags);
4645 }
4646
4647 set_fpscr0(fpscr, flags);
4648
4649 return result;
4650}
4651
4652template <>
4653uint32_t
4654fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4655 FPSCR &fpscr)
4656{
4657 int flags = 0;
4658 int sgn, exp;
4659 uint16_t mnt;
4660 uint32_t result;
4661
4662 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4663 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4664
4665 // If NaN, set cumulative flag or take exception:
4666 if (fp16_is_NaN(exp, mnt)) {
4667 flags = FPLIB_IOC;
4668 result = 0;
4669 } else {
4670 assert(fbits >= 0);
4671 if (exp == FP16_EXP_INF)
4672 exp = 255; // infinity: make it big enough to saturate
4673 result =
4674 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4675 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4676 u, rounding, &flags);
4677 }
4678
4679 set_fpscr0(fpscr, flags);
4680
4681 return result;
4682}
4683
4684template <>
4685uint32_t
4686fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4687{
4688 int flags = 0;
4689 int sgn, exp;
4690 uint32_t mnt, result;
4691
4692 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4693 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4694
4695 // If NaN, set cumulative flag or take exception:
4696 if (fp32_is_NaN(exp, mnt)) {
4697 flags = FPLIB_IOC;
4698 result = 0;
4699 } else {
4700 assert(fbits >= 0);
4701 // Infinity is treated as an ordinary normalised number that saturates.
4702 result =
4703 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4704 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4705 u, rounding, &flags);
4706 }
4707
4708 set_fpscr0(fpscr, flags);
4709
4710 return result;
4711}
4712
4713template <>
4714uint32_t
4715fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4716{
4717 int flags = 0;
4718 int sgn, exp;
4719 uint64_t mnt;
4720 uint32_t result;
4721
4722 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4723 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4724
4725 // If NaN, set cumulative flag or take exception:
4726 if (fp64_is_NaN(exp, mnt)) {
4727 flags = FPLIB_IOC;
4728 result = 0;
4729 } else {
4730 assert(fbits >= 0);
4731 // Infinity is treated as an ordinary normalised number that saturates.
4732 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
4733 }
4734
4735 set_fpscr0(fpscr, flags);
4736
4737 return result;
4738}
4739
4740template <>
4741uint64_t
4742fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4743 FPSCR &fpscr)
4744{
4745 int flags = 0;
4746 int sgn, exp;
4747 uint16_t mnt;
4748 uint64_t result;
4749
4750 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4751 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4752
4753 // If NaN, set cumulative flag or take exception:
4754 if (fp16_is_NaN(exp, mnt)) {
4755 flags = FPLIB_IOC;
4756 result = 0;
4757 } else {
4758 assert(fbits >= 0);
4759 if (exp == FP16_EXP_INF)
4760 exp = 255; // infinity: make it big enough to saturate
4761 result =
4762 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4763 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4764 u, rounding, &flags);
4765 }
4766
4767 set_fpscr0(fpscr, flags);
4768
4769 return result;
4770}
4771
4772template <>
4773uint64_t
4774fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4775{
4776 int flags = 0;
4777 int sgn, exp;
4778 uint32_t mnt;
4779 uint64_t result;
4780
4781 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4782 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4783
4784 // If NaN, set cumulative flag or take exception:
4785 if (fp32_is_NaN(exp, mnt)) {
4786 flags = FPLIB_IOC;
4787 result = 0;
4788 } else {
4789 assert(fbits >= 0);
4790 // Infinity is treated as an ordinary normalised number that saturates.
4791 result =
4792 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4793 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4794 u, rounding, &flags);
4795 }
4796
4797 set_fpscr0(fpscr, flags);
4798
4799 return result;
4800}
4801
4802template <>
4803uint64_t
4804fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4805{
4806 int flags = 0;
4807 int sgn, exp;
4808 uint64_t mnt, result;
4809
4810 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4811 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4812
4813 // If NaN, set cumulative flag or take exception:
4814 if (fp64_is_NaN(exp, mnt)) {
4815 flags = FPLIB_IOC;
4816 result = 0;
4817 } else {
4818 assert(fbits >= 0);
4819 // Infinity is treated as an ordinary normalised number that saturates.
4820 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
4821 }
4822
4823 set_fpscr0(fpscr, flags);
4824
4825 return result;
4826}
4827
4828static uint16_t
4829fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4830{
4831 int x_sgn = !u && a >> (FP64_BITS - 1);
4832 int x_exp = FP16_EXP_BIAS + FP64_BITS - 1 - fbits;
4833 uint64_t x_mnt = x_sgn ? -a : a;
4834
4835 // Handle zero:
4836 if (!x_mnt) {
4837 return fp16_zero(0);
4838 }
4839
4840 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
4841 x_mnt = fp64_normalise(x_mnt, &x_exp);
4842 x_mnt = (x_mnt >> (FP64_BITS - FP16_BITS - 1) |
4843 !!(x_mnt & ((1ULL << (FP64_BITS - FP16_BITS - 1)) - 1)));
4844
4845 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
4846}
4847
4848static uint32_t
4849fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4850{
4851 int x_sgn = !u && a >> (FP64_BITS - 1);
4852 int x_exp = FP32_EXP_BIAS + FP64_BITS - 1 - fbits;
4853 uint64_t x_mnt = x_sgn ? -a : a;
4854
4855 // Handle zero:
4856 if (!x_mnt) {
4857 return fp32_zero(0);
4858 }
4859
4860 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
4861 x_mnt = fp64_normalise(x_mnt, &x_exp);
4862 x_mnt = (x_mnt >> (FP64_BITS - FP32_BITS - 1) |
4863 !!(x_mnt & ((1ULL << (FP64_BITS - FP32_BITS - 1)) - 1)));
4864
4865 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
4866}
4867
4868static uint64_t
4869fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4870{
4871 int x_sgn = !u && a >> (FP64_BITS - 1);
4872 int x_exp = FP64_EXP_BIAS + FP64_BITS - 1 - fbits;
4873 uint64_t x_mnt = x_sgn ? -a : a;
4874
4875 // Handle zero:
4876 if (!x_mnt) {
4877 return fp64_zero(0);
4878 }
4879
4880 x_mnt = fp64_normalise(x_mnt, &x_exp);
4881
4882 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
4883}
4884
4885template <>
4886uint16_t
4887fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
4888 FPSCR &fpscr)
4889{
4890 int flags = 0;
4891 uint16_t res = fp16_cvtf(op, fbits, u,
4892 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4893 &flags);
4894 set_fpscr0(fpscr, flags);
4895 return res;
4896}
4897
4898template <>
4899uint32_t
4900fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4901{
4902 int flags = 0;
4903 uint32_t res = fp32_cvtf(op, fbits, u,
4904 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4905 &flags);
4906 set_fpscr0(fpscr, flags);
4907 return res;
4908}
4909
4910template <>
4911uint64_t
4912fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4913{
4914 int flags = 0;
4915 uint64_t res = fp64_cvtf(op, fbits, u,
4916 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4917 &flags);
4918 set_fpscr0(fpscr, flags);
4919 return res;
4920}
4921
4922template <>
4923uint16_t
4924fplibInfinity(int sgn)
4925{
4926 return fp16_infinity(sgn);
4927}
4928
4929template <>
4930uint32_t
4931fplibInfinity(int sgn)
4932{
4933 return fp32_infinity(sgn);
4934}
4935
4936template <>
4937uint64_t
4938fplibInfinity(int sgn)
4939{
4940 return fp64_infinity(sgn);
4941}
4942
4943template <>
4944uint16_t
4945fplibDefaultNaN()
4946{
4947 return fp16_defaultNaN();
4948}
4949
4950template <>
4951uint32_t
4952fplibDefaultNaN()
4953{
4954 return fp32_defaultNaN();
4955}
4956
4957template <>
4958uint64_t
4959fplibDefaultNaN()
4960{
4961 return fp64_defaultNaN();
4962}
4963
4964}