fplib.cc revision 13449:2f7efa89c58b
1/*
2* Copyright (c) 2012-2013, 2017-2018 ARM Limited
3* All rights reserved
4*
5* The license below extends only to copyright in the software and shall
6* not be construed as granting a license to any other intellectual
7* property including but not limited to intellectual property relating
8* to a hardware implementation of the functionality of the software
9* licensed hereunder.  You may use the software subject to the license
10* terms below provided that you ensure that this notice is replicated
11* unmodified and in its entirety in all distributions of the software,
12* modified or unmodified, in source code or in binary form.
13*
14* Redistribution and use in source and binary forms, with or without
15* modification, are permitted provided that the following conditions are
16* met: redistributions of source code must retain the above copyright
17* notice, this list of conditions and the following disclaimer;
18* redistributions in binary form must reproduce the above copyright
19* notice, this list of conditions and the following disclaimer in the
20* documentation and/or other materials provided with the distribution;
21* neither the name of the copyright holders nor the names of its
22* contributors may be used to endorse or promote products derived from
23* this software without specific prior written permission.
24*
25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36*
37* Authors: Edmund Grimley Evans
38*          Thomas Grocutt
39*/
40
41#include <stdint.h>
42
43#include <cassert>
44
45#include "base/logging.hh"
46#include "fplib.hh"
47
48namespace ArmISA
49{
50
51#define FPLIB_RN 0
52#define FPLIB_RP 1
53#define FPLIB_RM 2
54#define FPLIB_RZ 3
55#define FPLIB_FZ 4
56#define FPLIB_DN 8
57#define FPLIB_AHP 16
58#define FPLIB_FZ16 32
59
60#define FPLIB_IDC 128 // Input Denormal
61#define FPLIB_IXC 16  // Inexact
62#define FPLIB_UFC 8   // Underflow
63#define FPLIB_OFC 4   // Overflow
64#define FPLIB_DZC 2   // Division by Zero
65#define FPLIB_IOC 1   // Invalid Operation
66
67#define FP16_BITS 16
68#define FP32_BITS 32
69#define FP64_BITS 64
70
71#define FP16_EXP_BITS 5
72#define FP32_EXP_BITS 8
73#define FP64_EXP_BITS 11
74
75#define FP16_EXP_BIAS 15
76#define FP32_EXP_BIAS 127
77#define FP64_EXP_BIAS 1023
78
79#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
80#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
81#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
82
83#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
84#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
85#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
86
87#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
88#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
89#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
90
91#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
92#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
93#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
94
95static inline uint16_t
96lsl16(uint16_t x, uint32_t shift)
97{
98    return shift < 16 ? x << shift : 0;
99}
100
101static inline uint16_t
102lsr16(uint16_t x, uint32_t shift)
103{
104    return shift < 16 ? x >> shift : 0;
105}
106
107static inline uint32_t
108lsl32(uint32_t x, uint32_t shift)
109{
110    return shift < 32 ? x << shift : 0;
111}
112
113static inline uint32_t
114lsr32(uint32_t x, uint32_t shift)
115{
116    return shift < 32 ? x >> shift : 0;
117}
118
119static inline uint64_t
120lsl64(uint64_t x, uint32_t shift)
121{
122    return shift < 64 ? x << shift : 0;
123}
124
125static inline uint64_t
126lsr64(uint64_t x, uint32_t shift)
127{
128    return shift < 64 ? x >> shift : 0;
129}
130
131static inline void
132lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
133{
134    if (shift == 0) {
135        *r1 = x1;
136        *r0 = x0;
137    } else if (shift < 64) {
138        *r1 = x1 << shift | x0 >> (64 - shift);
139        *r0 = x0 << shift;
140    } else if (shift < 128) {
141        *r1 = x0 << (shift - 64);
142        *r0 = 0;
143    } else {
144        *r1 = 0;
145        *r0 = 0;
146    }
147}
148
149static inline void
150lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
151{
152    if (shift == 0) {
153        *r1 = x1;
154        *r0 = x0;
155    } else if (shift < 64) {
156        *r0 = x0 >> shift | x1 << (64 - shift);
157        *r1 = x1 >> shift;
158    } else if (shift < 128) {
159        *r0 = x1 >> (shift - 64);
160        *r1 = 0;
161    } else {
162        *r0 = 0;
163        *r1 = 0;
164    }
165}
166
167static inline void
168mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
169{
170    uint32_t mask = ((uint32_t)1 << 31) - 1;
171    uint64_t a0 = a & mask;
172    uint64_t a1 = a >> 31 & mask;
173    uint64_t b0 = b & mask;
174    uint64_t b1 = b >> 31 & mask;
175    uint64_t p0 = a0 * b0;
176    uint64_t p2 = a1 * b1;
177    uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
178    uint64_t s0 = p0;
179    uint64_t s1 = (s0 >> 31) + p1;
180    uint64_t s2 = (s1 >> 31) + p2;
181    *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
182    *x1 = s2 >> 2;
183}
184
185static inline
186void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
187{
188    uint64_t t0 = (uint64_t)(uint32_t)a * b;
189    uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
190    *x0 = t1 << 32 | (uint32_t)t0;
191    *x1 = t1 >> 32;
192}
193
194static inline void
195add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
196       uint64_t b1)
197{
198    *x0 = a0 + b0;
199    *x1 = a1 + b1 + (*x0 < a0);
200}
201
202static inline void
203sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
204       uint64_t b1)
205{
206    *x0 = a0 - b0;
207    *x1 = a1 - b1 - (*x0 > a0);
208}
209
210static inline int
211cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
212{
213    return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
214}
215
216static inline uint16_t
217fp16_normalise(uint16_t mnt, int *exp)
218{
219    int shift;
220
221    if (!mnt) {
222        return 0;
223    }
224
225    for (shift = 8; shift; shift >>= 1) {
226        if (!(mnt >> (16 - shift))) {
227            mnt <<= shift;
228            *exp -= shift;
229        }
230    }
231    return mnt;
232}
233
234static inline uint32_t
235fp32_normalise(uint32_t mnt, int *exp)
236{
237    int shift;
238
239    if (!mnt) {
240        return 0;
241    }
242
243    for (shift = 16; shift; shift >>= 1) {
244        if (!(mnt >> (32 - shift))) {
245            mnt <<= shift;
246            *exp -= shift;
247        }
248    }
249    return mnt;
250}
251
252static inline uint64_t
253fp64_normalise(uint64_t mnt, int *exp)
254{
255    int shift;
256
257    if (!mnt) {
258        return 0;
259    }
260
261    for (shift = 32; shift; shift >>= 1) {
262        if (!(mnt >> (64 - shift))) {
263            mnt <<= shift;
264            *exp -= shift;
265        }
266    }
267    return mnt;
268}
269
270static inline void
271fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
272{
273    uint64_t x0 = *mnt0;
274    uint64_t x1 = *mnt1;
275    int shift;
276
277    if (!x0 && !x1) {
278        return;
279    }
280
281    if (!x1) {
282        x1 = x0;
283        x0 = 0;
284        *exp -= 64;
285    }
286
287    for (shift = 32; shift; shift >>= 1) {
288        if (!(x1 >> (64 - shift))) {
289            x1 = x1 << shift | x0 >> (64 - shift);
290            x0 <<= shift;
291            *exp -= shift;
292        }
293    }
294
295    *mnt0 = x0;
296    *mnt1 = x1;
297}
298
299static inline uint16_t
300fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
301{
302    return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt);
303}
304
305static inline uint32_t
306fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
307{
308    return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt);
309}
310
311static inline uint64_t
312fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
313{
314    return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt);
315}
316
317static inline uint16_t
318fp16_zero(int sgn)
319{
320    return fp16_pack(sgn, 0, 0);
321}
322
323static inline uint32_t
324fp32_zero(int sgn)
325{
326    return fp32_pack(sgn, 0, 0);
327}
328
329static inline uint64_t
330fp64_zero(int sgn)
331{
332    return fp64_pack(sgn, 0, 0);
333}
334
335static inline uint16_t
336fp16_max_normal(int sgn)
337{
338    return fp16_pack(sgn, FP16_EXP_INF - 1, -1);
339}
340
341static inline uint32_t
342fp32_max_normal(int sgn)
343{
344    return fp32_pack(sgn, FP32_EXP_INF - 1, -1);
345}
346
347static inline uint64_t
348fp64_max_normal(int sgn)
349{
350    return fp64_pack(sgn, FP64_EXP_INF - 1, -1);
351}
352
353static inline uint16_t
354fp16_infinity(int sgn)
355{
356    return fp16_pack(sgn, FP16_EXP_INF, 0);
357}
358
359static inline uint32_t
360fp32_infinity(int sgn)
361{
362    return fp32_pack(sgn, FP32_EXP_INF, 0);
363}
364
365static inline uint64_t
366fp64_infinity(int sgn)
367{
368    return fp64_pack(sgn, FP64_EXP_INF, 0);
369}
370
371static inline uint16_t
372fp16_defaultNaN()
373{
374    return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1));
375}
376
377static inline uint32_t
378fp32_defaultNaN()
379{
380    return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1));
381}
382
383static inline uint64_t
384fp64_defaultNaN()
385{
386    return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1));
387}
388
389static inline void
390fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
391            int *flags)
392{
393    *sgn = x >> (FP16_BITS - 1);
394    *exp = FP16_EXP(x);
395    *mnt = FP16_MANT(x);
396
397    // Handle subnormals:
398    if (*exp) {
399        *mnt |= 1ULL << FP16_MANT_BITS;
400    } else {
401        ++*exp;
402        // IDC (Input Denormal) is not set in this case.
403        if (mode & FPLIB_FZ16)
404            *mnt = 0;
405    }
406}
407
408static inline void
409fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
410            int *flags)
411{
412    *sgn = x >> (FP32_BITS - 1);
413    *exp = FP32_EXP(x);
414    *mnt = FP32_MANT(x);
415
416    // Handle subnormals:
417    if (*exp) {
418        *mnt |= 1ULL << FP32_MANT_BITS;
419    } else {
420        ++*exp;
421        if ((mode & FPLIB_FZ) && *mnt) {
422            *flags |= FPLIB_IDC;
423            *mnt = 0;
424        }
425    }
426}
427
428static inline void
429fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
430            int *flags)
431{
432    *sgn = x >> (FP64_BITS - 1);
433    *exp = FP64_EXP(x);
434    *mnt = FP64_MANT(x);
435
436    // Handle subnormals:
437    if (*exp) {
438        *mnt |= 1ULL << FP64_MANT_BITS;
439    } else {
440        ++*exp;
441        if ((mode & FPLIB_FZ) && *mnt) {
442            *flags |= FPLIB_IDC;
443            *mnt = 0;
444        }
445    }
446}
447
448static inline int
449fp16_is_NaN(int exp, uint16_t mnt)
450{
451    return exp == FP16_EXP_INF && FP16_MANT(mnt);
452}
453
454static inline int
455fp32_is_NaN(int exp, uint32_t mnt)
456{
457    return exp == FP32_EXP_INF && FP32_MANT(mnt);
458}
459
460static inline int
461fp64_is_NaN(int exp, uint64_t mnt)
462{
463    return exp == FP64_EXP_INF && FP64_MANT(mnt);
464}
465
466static inline int
467fp16_is_signalling_NaN(int exp, uint16_t mnt)
468{
469    return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1);
470}
471
472static inline int
473fp32_is_signalling_NaN(int exp, uint32_t mnt)
474{
475    return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1);
476}
477
478static inline int
479fp64_is_signalling_NaN(int exp, uint64_t mnt)
480{
481    return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1);
482}
483
484static inline int
485fp16_is_quiet_NaN(int exp, uint16_t mnt)
486{
487    return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1);
488}
489
490static inline int
491fp32_is_quiet_NaN(int exp, uint32_t mnt)
492{
493    return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1);
494}
495
496static inline int
497fp64_is_quiet_NaN(int exp, uint64_t mnt)
498{
499    return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1);
500}
501
502static inline int
503fp16_is_infinity(int exp, uint16_t mnt)
504{
505    return exp == FP16_EXP_INF && !FP16_MANT(mnt);
506}
507
508static inline int
509fp32_is_infinity(int exp, uint32_t mnt)
510{
511    return exp == FP32_EXP_INF && !FP32_MANT(mnt);
512}
513
514static inline int
515fp64_is_infinity(int exp, uint64_t mnt)
516{
517    return exp == FP64_EXP_INF && !FP64_MANT(mnt);
518}
519
520static inline uint16_t
521fp16_process_NaN(uint16_t a, int mode, int *flags)
522{
523    if (!(a >> (FP16_MANT_BITS - 1) & 1)) {
524        *flags |= FPLIB_IOC;
525        a |= 1ULL << (FP16_MANT_BITS - 1);
526    }
527    return mode & FPLIB_DN ? fp16_defaultNaN() : a;
528}
529
530static inline uint32_t
531fp32_process_NaN(uint32_t a, int mode, int *flags)
532{
533    if (!(a >> (FP32_MANT_BITS - 1) & 1)) {
534        *flags |= FPLIB_IOC;
535        a |= 1ULL << (FP32_MANT_BITS - 1);
536    }
537    return mode & FPLIB_DN ? fp32_defaultNaN() : a;
538}
539
540static inline uint64_t
541fp64_process_NaN(uint64_t a, int mode, int *flags)
542{
543    if (!(a >> (FP64_MANT_BITS - 1) & 1)) {
544        *flags |= FPLIB_IOC;
545        a |= 1ULL << (FP64_MANT_BITS - 1);
546    }
547    return mode & FPLIB_DN ? fp64_defaultNaN() : a;
548}
549
550static uint16_t
551fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
552{
553    int a_exp = FP16_EXP(a);
554    uint16_t a_mnt = FP16_MANT(a);
555    int b_exp = FP16_EXP(b);
556    uint16_t b_mnt = FP16_MANT(b);
557
558    // Handle signalling NaNs:
559    if (fp16_is_signalling_NaN(a_exp, a_mnt))
560        return fp16_process_NaN(a, mode, flags);
561    if (fp16_is_signalling_NaN(b_exp, b_mnt))
562        return fp16_process_NaN(b, mode, flags);
563
564    // Handle quiet NaNs:
565    if (fp16_is_NaN(a_exp, a_mnt))
566        return fp16_process_NaN(a, mode, flags);
567    if (fp16_is_NaN(b_exp, b_mnt))
568        return fp16_process_NaN(b, mode, flags);
569
570    return 0;
571}
572
573static uint32_t
574fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
575{
576    int a_exp = FP32_EXP(a);
577    uint32_t a_mnt = FP32_MANT(a);
578    int b_exp = FP32_EXP(b);
579    uint32_t b_mnt = FP32_MANT(b);
580
581    // Handle signalling NaNs:
582    if (fp32_is_signalling_NaN(a_exp, a_mnt))
583        return fp32_process_NaN(a, mode, flags);
584    if (fp32_is_signalling_NaN(b_exp, b_mnt))
585        return fp32_process_NaN(b, mode, flags);
586
587    // Handle quiet NaNs:
588    if (fp32_is_NaN(a_exp, a_mnt))
589        return fp32_process_NaN(a, mode, flags);
590    if (fp32_is_NaN(b_exp, b_mnt))
591        return fp32_process_NaN(b, mode, flags);
592
593    return 0;
594}
595
596static uint64_t
597fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
598{
599    int a_exp = FP64_EXP(a);
600    uint64_t a_mnt = FP64_MANT(a);
601    int b_exp = FP64_EXP(b);
602    uint64_t b_mnt = FP64_MANT(b);
603
604    // Handle signalling NaNs:
605    if (fp64_is_signalling_NaN(a_exp, a_mnt))
606        return fp64_process_NaN(a, mode, flags);
607    if (fp64_is_signalling_NaN(b_exp, b_mnt))
608        return fp64_process_NaN(b, mode, flags);
609
610    // Handle quiet NaNs:
611    if (fp64_is_NaN(a_exp, a_mnt))
612        return fp64_process_NaN(a, mode, flags);
613    if (fp64_is_NaN(b_exp, b_mnt))
614        return fp64_process_NaN(b, mode, flags);
615
616    return 0;
617}
618
619static uint16_t
620fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
621{
622    int a_exp = FP16_EXP(a);
623    uint16_t a_mnt = FP16_MANT(a);
624    int b_exp = FP16_EXP(b);
625    uint16_t b_mnt = FP16_MANT(b);
626    int c_exp = FP16_EXP(c);
627    uint16_t c_mnt = FP16_MANT(c);
628
629    // Handle signalling NaNs:
630    if (fp16_is_signalling_NaN(a_exp, a_mnt))
631        return fp16_process_NaN(a, mode, flags);
632    if (fp16_is_signalling_NaN(b_exp, b_mnt))
633        return fp16_process_NaN(b, mode, flags);
634    if (fp16_is_signalling_NaN(c_exp, c_mnt))
635        return fp16_process_NaN(c, mode, flags);
636
637    // Handle quiet NaNs:
638    if (fp16_is_NaN(a_exp, a_mnt))
639        return fp16_process_NaN(a, mode, flags);
640    if (fp16_is_NaN(b_exp, b_mnt))
641        return fp16_process_NaN(b, mode, flags);
642    if (fp16_is_NaN(c_exp, c_mnt))
643        return fp16_process_NaN(c, mode, flags);
644
645    return 0;
646}
647
648static uint32_t
649fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
650{
651    int a_exp = FP32_EXP(a);
652    uint32_t a_mnt = FP32_MANT(a);
653    int b_exp = FP32_EXP(b);
654    uint32_t b_mnt = FP32_MANT(b);
655    int c_exp = FP32_EXP(c);
656    uint32_t c_mnt = FP32_MANT(c);
657
658    // Handle signalling NaNs:
659    if (fp32_is_signalling_NaN(a_exp, a_mnt))
660        return fp32_process_NaN(a, mode, flags);
661    if (fp32_is_signalling_NaN(b_exp, b_mnt))
662        return fp32_process_NaN(b, mode, flags);
663    if (fp32_is_signalling_NaN(c_exp, c_mnt))
664        return fp32_process_NaN(c, mode, flags);
665
666    // Handle quiet NaNs:
667    if (fp32_is_NaN(a_exp, a_mnt))
668        return fp32_process_NaN(a, mode, flags);
669    if (fp32_is_NaN(b_exp, b_mnt))
670        return fp32_process_NaN(b, mode, flags);
671    if (fp32_is_NaN(c_exp, c_mnt))
672        return fp32_process_NaN(c, mode, flags);
673
674    return 0;
675}
676
677static uint64_t
678fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
679{
680    int a_exp = FP64_EXP(a);
681    uint64_t a_mnt = FP64_MANT(a);
682    int b_exp = FP64_EXP(b);
683    uint64_t b_mnt = FP64_MANT(b);
684    int c_exp = FP64_EXP(c);
685    uint64_t c_mnt = FP64_MANT(c);
686
687    // Handle signalling NaNs:
688    if (fp64_is_signalling_NaN(a_exp, a_mnt))
689        return fp64_process_NaN(a, mode, flags);
690    if (fp64_is_signalling_NaN(b_exp, b_mnt))
691        return fp64_process_NaN(b, mode, flags);
692    if (fp64_is_signalling_NaN(c_exp, c_mnt))
693        return fp64_process_NaN(c, mode, flags);
694
695    // Handle quiet NaNs:
696    if (fp64_is_NaN(a_exp, a_mnt))
697        return fp64_process_NaN(a, mode, flags);
698    if (fp64_is_NaN(b_exp, b_mnt))
699        return fp64_process_NaN(b, mode, flags);
700    if (fp64_is_NaN(c_exp, c_mnt))
701        return fp64_process_NaN(c, mode, flags);
702
703    return 0;
704}
705
706static uint16_t
707fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
708{
709    int biased_exp; // non-negative exponent value for result
710    uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS)
711    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
712
713    assert(rm != FPRounding_TIEAWAY);
714
715    // Flush to zero:
716    if ((mode & FPLIB_FZ16) && exp < 1) {
717        *flags |= FPLIB_UFC;
718        return fp16_zero(sgn);
719    }
720
721    // The bottom FP16_EXP_BITS bits of mnt are orred together:
722    mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) |
723           ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0));
724
725    if (exp > 0) {
726        biased_exp = exp;
727        int_mant = mnt >> 2;
728        error = mnt & 3;
729    } else {
730        biased_exp = 0;
731        int_mant = lsr16(mnt, 3 - exp);
732        error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
733    }
734
735    if (!biased_exp && error) { // xx should also check fpscr_val<11>
736        *flags |= FPLIB_UFC;
737    }
738
739    // Round up:
740    if ((rm == FPLIB_RN && (error == 3 ||
741                            (error == 2 && (int_mant & 1)))) ||
742        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
743        ++int_mant;
744        if (int_mant == 1ULL << FP16_MANT_BITS) {
745            // Rounded up from denormalized to normalized
746            biased_exp = 1;
747        }
748        if (int_mant == 2ULL << FP16_MANT_BITS) {
749            // Rounded up to next exponent
750            ++biased_exp;
751            int_mant >>= 1;
752        }
753    }
754
755    // Handle rounding to odd aka Von Neumann rounding:
756    if (error && rm == FPRounding_ODD)
757        int_mant |= 1;
758
759    // Handle overflow:
760    if (!(mode & FPLIB_AHP)) {
761        if (biased_exp >= (int)FP16_EXP_INF) {
762            *flags |= FPLIB_OFC | FPLIB_IXC;
763            if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
764                (rm == FPLIB_RM && sgn)) {
765                return fp16_infinity(sgn);
766            } else {
767                return fp16_max_normal(sgn);
768            }
769        }
770    } else {
771        if (biased_exp >= (int)FP16_EXP_INF + 1) {
772            *flags |= FPLIB_IOC;
773            return fp16_pack(sgn, FP16_EXP_INF, -1);
774        }
775    }
776
777    if (error) {
778        *flags |= FPLIB_IXC;
779    }
780
781    return fp16_pack(sgn, biased_exp, int_mant);
782}
783
784static uint16_t
785fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
786{
787    return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags);
788}
789
790static uint32_t
791fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
792{
793    int biased_exp; // non-negative exponent value for result
794    uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS)
795    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
796
797    assert(rm != FPRounding_TIEAWAY);
798
799    // Flush to zero:
800    if ((mode & FPLIB_FZ) && exp < 1) {
801        *flags |= FPLIB_UFC;
802        return fp32_zero(sgn);
803    }
804
805    // The bottom FP32_EXP_BITS bits of mnt are orred together:
806    mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) |
807           ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0));
808
809    if (exp > 0) {
810        biased_exp = exp;
811        int_mant = mnt >> 2;
812        error = mnt & 3;
813    } else {
814        biased_exp = 0;
815        int_mant = lsr32(mnt, 3 - exp);
816        error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
817    }
818
819    if (!biased_exp && error) { // xx should also check fpscr_val<11>
820        *flags |= FPLIB_UFC;
821    }
822
823    // Round up:
824    if ((rm == FPLIB_RN && (error == 3 ||
825                            (error == 2 && (int_mant & 1)))) ||
826        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
827        ++int_mant;
828        if (int_mant == 1ULL << FP32_MANT_BITS) {
829            // Rounded up from denormalized to normalized
830            biased_exp = 1;
831        }
832        if (int_mant == 2ULL << FP32_MANT_BITS) {
833            // Rounded up to next exponent
834            ++biased_exp;
835            int_mant >>= 1;
836        }
837    }
838
839    // Handle rounding to odd aka Von Neumann rounding:
840    if (error && rm == FPRounding_ODD)
841        int_mant |= 1;
842
843    // Handle overflow:
844    if (biased_exp >= (int)FP32_EXP_INF) {
845        *flags |= FPLIB_OFC | FPLIB_IXC;
846        if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
847            (rm == FPLIB_RM && sgn)) {
848            return fp32_infinity(sgn);
849        } else {
850            return fp32_max_normal(sgn);
851        }
852    }
853
854    if (error) {
855        *flags |= FPLIB_IXC;
856    }
857
858    return fp32_pack(sgn, biased_exp, int_mant);
859}
860
861static uint32_t
862fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
863{
864    return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
865}
866
867static uint64_t
868fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
869{
870    int biased_exp; // non-negative exponent value for result
871    uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS)
872    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
873
874    assert(rm != FPRounding_TIEAWAY);
875
876    // Flush to zero:
877    if ((mode & FPLIB_FZ) && exp < 1) {
878        *flags |= FPLIB_UFC;
879        return fp64_zero(sgn);
880    }
881
882    // The bottom FP64_EXP_BITS bits of mnt are orred together:
883    mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) |
884           ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0));
885
886    if (exp > 0) {
887        biased_exp = exp;
888        int_mant = mnt >> 2;
889        error = mnt & 3;
890    } else {
891        biased_exp = 0;
892        int_mant = lsr64(mnt, 3 - exp);
893        error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
894    }
895
896    if (!biased_exp && error) { // xx should also check fpscr_val<11>
897        *flags |= FPLIB_UFC;
898    }
899
900    // Round up:
901    if ((rm == FPLIB_RN && (error == 3 ||
902                            (error == 2 && (int_mant & 1)))) ||
903        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
904        ++int_mant;
905        if (int_mant == 1ULL << FP64_MANT_BITS) {
906            // Rounded up from denormalized to normalized
907            biased_exp = 1;
908        }
909        if (int_mant == 2ULL << FP64_MANT_BITS) {
910            // Rounded up to next exponent
911            ++biased_exp;
912            int_mant >>= 1;
913        }
914    }
915
916    // Handle rounding to odd aka Von Neumann rounding:
917    if (error && rm == FPRounding_ODD)
918        int_mant |= 1;
919
920    // Handle overflow:
921    if (biased_exp >= (int)FP64_EXP_INF) {
922        *flags |= FPLIB_OFC | FPLIB_IXC;
923        if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
924            (rm == FPLIB_RM && sgn)) {
925            return fp64_infinity(sgn);
926        } else {
927            return fp64_max_normal(sgn);
928        }
929    }
930
931    if (error) {
932        *flags |= FPLIB_IXC;
933    }
934
935    return fp64_pack(sgn, biased_exp, int_mant);
936}
937
938static uint64_t
939fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
940{
941    return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
942}
943
944static int
945fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
946{
947    int a_sgn, a_exp, b_sgn, b_exp;
948    uint16_t a_mnt, b_mnt;
949
950    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
951    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
952
953    if (fp16_is_NaN(a_exp, a_mnt) ||
954        fp16_is_NaN(b_exp, b_mnt)) {
955        if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
956            fp16_is_signalling_NaN(b_exp, b_mnt))
957            *flags |= FPLIB_IOC;
958        return 0;
959    }
960    return a == b || (!a_mnt && !b_mnt);
961}
962
963static int
964fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
965{
966    int a_sgn, a_exp, b_sgn, b_exp;
967    uint16_t a_mnt, b_mnt;
968
969    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
970    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
971
972    if (fp16_is_NaN(a_exp, a_mnt) ||
973        fp16_is_NaN(b_exp, b_mnt)) {
974        *flags |= FPLIB_IOC;
975        return 0;
976    }
977    if (!a_mnt && !b_mnt)
978        return 1;
979    if (a_sgn != b_sgn)
980        return b_sgn;
981    if (a_exp != b_exp)
982        return a_sgn ^ (a_exp > b_exp);
983    if (a_mnt != b_mnt)
984        return a_sgn ^ (a_mnt > b_mnt);
985    return 1;
986}
987
988static int
989fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
990{
991    int a_sgn, a_exp, b_sgn, b_exp;
992    uint16_t a_mnt, b_mnt;
993
994    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
995    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
996
997    if (fp16_is_NaN(a_exp, a_mnt) ||
998        fp16_is_NaN(b_exp, b_mnt)) {
999        *flags |= FPLIB_IOC;
1000        return 0;
1001    }
1002    if (!a_mnt && !b_mnt)
1003        return 0;
1004    if (a_sgn != b_sgn)
1005        return b_sgn;
1006    if (a_exp != b_exp)
1007        return a_sgn ^ (a_exp > b_exp);
1008    if (a_mnt != b_mnt)
1009        return a_sgn ^ (a_mnt > b_mnt);
1010    return 0;
1011}
1012
1013static int
1014fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
1015{
1016    int a_sgn, a_exp, b_sgn, b_exp;
1017    uint16_t a_mnt, b_mnt;
1018
1019    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1020    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1021
1022    if (fp16_is_NaN(a_exp, a_mnt) ||
1023        fp16_is_NaN(b_exp, b_mnt)) {
1024        if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
1025            fp16_is_signalling_NaN(b_exp, b_mnt))
1026            *flags |= FPLIB_IOC;
1027        return 1;
1028    }
1029    return 0;
1030}
1031
1032static int
1033fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
1034{
1035    int a_sgn, a_exp, b_sgn, b_exp;
1036    uint32_t a_mnt, b_mnt;
1037
1038    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1039    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1040
1041    if (fp32_is_NaN(a_exp, a_mnt) ||
1042        fp32_is_NaN(b_exp, b_mnt)) {
1043        if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1044            fp32_is_signalling_NaN(b_exp, b_mnt))
1045            *flags |= FPLIB_IOC;
1046        return 0;
1047    }
1048    return a == b || (!a_mnt && !b_mnt);
1049}
1050
1051static int
1052fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
1053{
1054    int a_sgn, a_exp, b_sgn, b_exp;
1055    uint32_t a_mnt, b_mnt;
1056
1057    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1058    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1059
1060    if (fp32_is_NaN(a_exp, a_mnt) ||
1061        fp32_is_NaN(b_exp, b_mnt)) {
1062        *flags |= FPLIB_IOC;
1063        return 0;
1064    }
1065    if (!a_mnt && !b_mnt)
1066        return 1;
1067    if (a_sgn != b_sgn)
1068        return b_sgn;
1069    if (a_exp != b_exp)
1070        return a_sgn ^ (a_exp > b_exp);
1071    if (a_mnt != b_mnt)
1072        return a_sgn ^ (a_mnt > b_mnt);
1073    return 1;
1074}
1075
1076static int
1077fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
1078{
1079    int a_sgn, a_exp, b_sgn, b_exp;
1080    uint32_t a_mnt, b_mnt;
1081
1082    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1083    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1084
1085    if (fp32_is_NaN(a_exp, a_mnt) ||
1086        fp32_is_NaN(b_exp, b_mnt)) {
1087        *flags |= FPLIB_IOC;
1088        return 0;
1089    }
1090    if (!a_mnt && !b_mnt)
1091        return 0;
1092    if (a_sgn != b_sgn)
1093        return b_sgn;
1094    if (a_exp != b_exp)
1095        return a_sgn ^ (a_exp > b_exp);
1096    if (a_mnt != b_mnt)
1097        return a_sgn ^ (a_mnt > b_mnt);
1098    return 0;
1099}
1100
1101static int
1102fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
1103{
1104    int a_sgn, a_exp, b_sgn, b_exp;
1105    uint32_t a_mnt, b_mnt;
1106
1107    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1108    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1109
1110    if (fp32_is_NaN(a_exp, a_mnt) ||
1111        fp32_is_NaN(b_exp, b_mnt)) {
1112        if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1113            fp32_is_signalling_NaN(b_exp, b_mnt))
1114            *flags |= FPLIB_IOC;
1115        return 1;
1116    }
1117    return 0;
1118}
1119
1120static int
1121fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
1122{
1123    int a_sgn, a_exp, b_sgn, b_exp;
1124    uint64_t a_mnt, b_mnt;
1125
1126    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1127    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1128
1129    if (fp64_is_NaN(a_exp, a_mnt) ||
1130        fp64_is_NaN(b_exp, b_mnt)) {
1131        if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1132            fp64_is_signalling_NaN(b_exp, b_mnt))
1133            *flags |= FPLIB_IOC;
1134        return 0;
1135    }
1136    return a == b || (!a_mnt && !b_mnt);
1137}
1138
1139static int
1140fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
1141{
1142    int a_sgn, a_exp, b_sgn, b_exp;
1143    uint64_t a_mnt, b_mnt;
1144
1145    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1146    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1147
1148    if (fp64_is_NaN(a_exp, a_mnt) ||
1149        fp64_is_NaN(b_exp, b_mnt)) {
1150        *flags |= FPLIB_IOC;
1151        return 0;
1152    }
1153    if (!a_mnt && !b_mnt)
1154        return 1;
1155    if (a_sgn != b_sgn)
1156        return b_sgn;
1157    if (a_exp != b_exp)
1158        return a_sgn ^ (a_exp > b_exp);
1159    if (a_mnt != b_mnt)
1160        return a_sgn ^ (a_mnt > b_mnt);
1161    return 1;
1162}
1163
1164static int
1165fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
1166{
1167    int a_sgn, a_exp, b_sgn, b_exp;
1168    uint64_t a_mnt, b_mnt;
1169
1170    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1171    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1172
1173    if (fp64_is_NaN(a_exp, a_mnt) ||
1174        fp64_is_NaN(b_exp, b_mnt)) {
1175        *flags |= FPLIB_IOC;
1176        return 0;
1177    }
1178    if (!a_mnt && !b_mnt)
1179        return 0;
1180    if (a_sgn != b_sgn)
1181        return b_sgn;
1182    if (a_exp != b_exp)
1183        return a_sgn ^ (a_exp > b_exp);
1184    if (a_mnt != b_mnt)
1185        return a_sgn ^ (a_mnt > b_mnt);
1186    return 0;
1187}
1188
1189static int
1190fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
1191{
1192    int a_sgn, a_exp, b_sgn, b_exp;
1193    uint64_t a_mnt, b_mnt;
1194
1195    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1196    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1197
1198    if (fp64_is_NaN(a_exp, a_mnt) ||
1199        fp64_is_NaN(b_exp, b_mnt)) {
1200        if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1201            fp64_is_signalling_NaN(b_exp, b_mnt))
1202            *flags |= FPLIB_IOC;
1203        return 1;
1204    }
1205    return 0;
1206}
1207
1208static uint16_t
1209fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
1210{
1211    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1212    uint16_t a_mnt, b_mnt, x, x_mnt;
1213
1214    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1215    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1216
1217    if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1218        return x;
1219    }
1220
1221    b_sgn ^= neg;
1222
1223    // Handle infinities and zeroes:
1224    if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) {
1225        *flags |= FPLIB_IOC;
1226        return fp16_defaultNaN();
1227    } else if (a_exp == FP16_EXP_INF) {
1228        return fp16_infinity(a_sgn);
1229    } else if (b_exp == FP16_EXP_INF) {
1230        return fp16_infinity(b_sgn);
1231    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1232        return fp16_zero(a_sgn);
1233    }
1234
1235    a_mnt <<= 3;
1236    b_mnt <<= 3;
1237    if (a_exp >= b_exp) {
1238        b_mnt = (lsr16(b_mnt, a_exp - b_exp) |
1239                 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1)));
1240        b_exp = a_exp;
1241    } else {
1242        a_mnt = (lsr16(a_mnt, b_exp - a_exp) |
1243                 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1)));
1244        a_exp = b_exp;
1245    }
1246    x_sgn = a_sgn;
1247    x_exp = a_exp;
1248    if (a_sgn == b_sgn) {
1249        x_mnt = a_mnt + b_mnt;
1250    } else if (a_mnt >= b_mnt) {
1251        x_mnt = a_mnt - b_mnt;
1252    } else {
1253        x_sgn ^= 1;
1254        x_mnt = b_mnt - a_mnt;
1255    }
1256
1257    if (!x_mnt) {
1258        // Sign of exact zero result depends on rounding mode
1259        return fp16_zero((mode & 3) == 2);
1260    }
1261
1262    x_mnt = fp16_normalise(x_mnt, &x_exp);
1263
1264    return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1,
1265                      mode, flags);
1266}
1267
1268static uint32_t
1269fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
1270{
1271    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1272    uint32_t a_mnt, b_mnt, x, x_mnt;
1273
1274    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1275    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1276
1277    if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1278        return x;
1279    }
1280
1281    b_sgn ^= neg;
1282
1283    // Handle infinities and zeroes:
1284    if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) {
1285        *flags |= FPLIB_IOC;
1286        return fp32_defaultNaN();
1287    } else if (a_exp == FP32_EXP_INF) {
1288        return fp32_infinity(a_sgn);
1289    } else if (b_exp == FP32_EXP_INF) {
1290        return fp32_infinity(b_sgn);
1291    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1292        return fp32_zero(a_sgn);
1293    }
1294
1295    a_mnt <<= 3;
1296    b_mnt <<= 3;
1297    if (a_exp >= b_exp) {
1298        b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
1299                 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
1300        b_exp = a_exp;
1301    } else {
1302        a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
1303                 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
1304        a_exp = b_exp;
1305    }
1306    x_sgn = a_sgn;
1307    x_exp = a_exp;
1308    if (a_sgn == b_sgn) {
1309        x_mnt = a_mnt + b_mnt;
1310    } else if (a_mnt >= b_mnt) {
1311        x_mnt = a_mnt - b_mnt;
1312    } else {
1313        x_sgn ^= 1;
1314        x_mnt = b_mnt - a_mnt;
1315    }
1316
1317    if (!x_mnt) {
1318        // Sign of exact zero result depends on rounding mode
1319        return fp32_zero((mode & 3) == 2);
1320    }
1321
1322    x_mnt = fp32_normalise(x_mnt, &x_exp);
1323
1324    return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1,
1325                      mode, flags);
1326}
1327
1328static uint64_t
1329fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
1330{
1331    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1332    uint64_t a_mnt, b_mnt, x, x_mnt;
1333
1334    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1335    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1336
1337    if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1338        return x;
1339    }
1340
1341    b_sgn ^= neg;
1342
1343    // Handle infinities and zeroes:
1344    if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) {
1345        *flags |= FPLIB_IOC;
1346        return fp64_defaultNaN();
1347    } else if (a_exp == FP64_EXP_INF) {
1348        return fp64_infinity(a_sgn);
1349    } else if (b_exp == FP64_EXP_INF) {
1350        return fp64_infinity(b_sgn);
1351    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1352        return fp64_zero(a_sgn);
1353    }
1354
1355    a_mnt <<= 3;
1356    b_mnt <<= 3;
1357    if (a_exp >= b_exp) {
1358        b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
1359                 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
1360        b_exp = a_exp;
1361    } else {
1362        a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
1363                 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
1364        a_exp = b_exp;
1365    }
1366    x_sgn = a_sgn;
1367    x_exp = a_exp;
1368    if (a_sgn == b_sgn) {
1369        x_mnt = a_mnt + b_mnt;
1370    } else if (a_mnt >= b_mnt) {
1371        x_mnt = a_mnt - b_mnt;
1372    } else {
1373        x_sgn ^= 1;
1374        x_mnt = b_mnt - a_mnt;
1375    }
1376
1377    if (!x_mnt) {
1378        // Sign of exact zero result depends on rounding mode
1379        return fp64_zero((mode & 3) == 2);
1380    }
1381
1382    x_mnt = fp64_normalise(x_mnt, &x_exp);
1383
1384    return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1,
1385                      mode, flags);
1386}
1387
1388static uint16_t
1389fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
1390{
1391    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1392    uint16_t a_mnt, b_mnt, x;
1393    uint32_t x_mnt;
1394
1395    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1396    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1397
1398    if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1399        return x;
1400    }
1401
1402    // Handle infinities and zeroes:
1403    if ((a_exp == FP16_EXP_INF && !b_mnt) ||
1404        (b_exp == FP16_EXP_INF && !a_mnt)) {
1405        *flags |= FPLIB_IOC;
1406        return fp16_defaultNaN();
1407    } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) {
1408        return fp16_infinity(a_sgn ^ b_sgn);
1409    } else if (!a_mnt || !b_mnt) {
1410        return fp16_zero(a_sgn ^ b_sgn);
1411    }
1412
1413    // Multiply and normalise:
1414    x_sgn = a_sgn ^ b_sgn;
1415    x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1;
1416    x_mnt = (uint32_t)a_mnt * b_mnt;
1417    x_mnt = fp32_normalise(x_mnt, &x_exp);
1418
1419    // Convert to FP16_BITS bits, collapsing error into bottom bit:
1420    x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1);
1421
1422    return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1423}
1424
1425static uint32_t
1426fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1427{
1428    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1429    uint32_t a_mnt, b_mnt, x;
1430    uint64_t x_mnt;
1431
1432    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1433    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1434
1435    if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1436        return x;
1437    }
1438
1439    // Handle infinities and zeroes:
1440    if ((a_exp == FP32_EXP_INF && !b_mnt) ||
1441        (b_exp == FP32_EXP_INF && !a_mnt)) {
1442        *flags |= FPLIB_IOC;
1443        return fp32_defaultNaN();
1444    } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) {
1445        return fp32_infinity(a_sgn ^ b_sgn);
1446    } else if (!a_mnt || !b_mnt) {
1447        return fp32_zero(a_sgn ^ b_sgn);
1448    }
1449
1450    // Multiply and normalise:
1451    x_sgn = a_sgn ^ b_sgn;
1452    x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1;
1453    x_mnt = (uint64_t)a_mnt * b_mnt;
1454    x_mnt = fp64_normalise(x_mnt, &x_exp);
1455
1456    // Convert to FP32_BITS bits, collapsing error into bottom bit:
1457    x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1);
1458
1459    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1460}
1461
1462static uint64_t
1463fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1464{
1465    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1466    uint64_t a_mnt, b_mnt, x;
1467    uint64_t x0_mnt, x1_mnt;
1468
1469    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1470    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1471
1472    if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1473        return x;
1474    }
1475
1476    // Handle infinities and zeroes:
1477    if ((a_exp == FP64_EXP_INF && !b_mnt) ||
1478        (b_exp == FP64_EXP_INF && !a_mnt)) {
1479        *flags |= FPLIB_IOC;
1480        return fp64_defaultNaN();
1481    } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) {
1482        return fp64_infinity(a_sgn ^ b_sgn);
1483    } else if (!a_mnt || !b_mnt) {
1484        return fp64_zero(a_sgn ^ b_sgn);
1485    }
1486
1487    // Multiply and normalise:
1488    x_sgn = a_sgn ^ b_sgn;
1489    x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1;
1490    mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1491    fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1492
1493    // Convert to FP64_BITS bits, collapsing error into bottom bit:
1494    x0_mnt = x1_mnt << 1 | !!x0_mnt;
1495
1496    return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1497}
1498
1499static uint16_t
1500fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale,
1501            int mode, int *flags)
1502{
1503    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1504    uint16_t a_mnt, b_mnt, c_mnt, x;
1505    uint32_t x_mnt, y_mnt;
1506
1507    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1508    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1509    fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1510
1511    x = fp16_process_NaNs3(a, b, c, mode, flags);
1512
1513    // Quiet NaN added to product of zero and infinity:
1514    if (fp16_is_quiet_NaN(a_exp, a_mnt) &&
1515        ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) ||
1516         (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) {
1517        x = fp16_defaultNaN();
1518        *flags |= FPLIB_IOC;
1519    }
1520
1521    if (x) {
1522        return x;
1523    }
1524
1525    // Handle infinities and zeroes:
1526    if ((b_exp == FP16_EXP_INF && !c_mnt) ||
1527        (c_exp == FP16_EXP_INF && !b_mnt) ||
1528        (a_exp == FP16_EXP_INF &&
1529         (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) &&
1530         (a_sgn != (b_sgn ^ c_sgn)))) {
1531        *flags |= FPLIB_IOC;
1532        return fp16_defaultNaN();
1533    }
1534    if (a_exp == FP16_EXP_INF)
1535        return fp16_infinity(a_sgn);
1536    if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF)
1537        return fp16_infinity(b_sgn ^ c_sgn);
1538    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1539        return fp16_zero(a_sgn);
1540
1541    x_sgn = a_sgn;
1542    x_exp = a_exp + 2 * FP16_EXP_BITS - 3;
1543    x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4);
1544
1545    // Multiply:
1546    y_sgn = b_sgn ^ c_sgn;
1547    y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3;
1548    y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1549    if (!y_mnt) {
1550        y_exp = x_exp;
1551    }
1552
1553    // Add:
1554    if (x_exp >= y_exp) {
1555        y_mnt = (lsr32(y_mnt, x_exp - y_exp) |
1556                 !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1)));
1557        y_exp = x_exp;
1558    } else {
1559        x_mnt = (lsr32(x_mnt, y_exp - x_exp) |
1560                 !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1)));
1561        x_exp = y_exp;
1562    }
1563    if (x_sgn == y_sgn) {
1564        x_mnt = x_mnt + y_mnt;
1565    } else if (x_mnt >= y_mnt) {
1566        x_mnt = x_mnt - y_mnt;
1567    } else {
1568        x_sgn ^= 1;
1569        x_mnt = y_mnt - x_mnt;
1570    }
1571
1572    if (!x_mnt) {
1573        // Sign of exact zero result depends on rounding mode
1574        return fp16_zero((mode & 3) == 2);
1575    }
1576
1577    // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1578    x_mnt = fp32_normalise(x_mnt, &x_exp);
1579    x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1580
1581    return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1582}
1583
1584static uint32_t
1585fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1586            int mode, int *flags)
1587{
1588    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1589    uint32_t a_mnt, b_mnt, c_mnt, x;
1590    uint64_t x_mnt, y_mnt;
1591
1592    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1593    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1594    fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1595
1596    x = fp32_process_NaNs3(a, b, c, mode, flags);
1597
1598    // Quiet NaN added to product of zero and infinity:
1599    if (fp32_is_quiet_NaN(a_exp, a_mnt) &&
1600        ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) ||
1601         (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) {
1602        x = fp32_defaultNaN();
1603        *flags |= FPLIB_IOC;
1604    }
1605
1606    if (x) {
1607        return x;
1608    }
1609
1610    // Handle infinities and zeroes:
1611    if ((b_exp == FP32_EXP_INF && !c_mnt) ||
1612        (c_exp == FP32_EXP_INF && !b_mnt) ||
1613        (a_exp == FP32_EXP_INF &&
1614         (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) &&
1615         (a_sgn != (b_sgn ^ c_sgn)))) {
1616        *flags |= FPLIB_IOC;
1617        return fp32_defaultNaN();
1618    }
1619    if (a_exp == FP32_EXP_INF)
1620        return fp32_infinity(a_sgn);
1621    if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF)
1622        return fp32_infinity(b_sgn ^ c_sgn);
1623    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1624        return fp32_zero(a_sgn);
1625
1626    x_sgn = a_sgn;
1627    x_exp = a_exp + 2 * FP32_EXP_BITS - 3;
1628    x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4);
1629
1630    // Multiply:
1631    y_sgn = b_sgn ^ c_sgn;
1632    y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1633    y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1634    if (!y_mnt) {
1635        y_exp = x_exp;
1636    }
1637
1638    // Add:
1639    if (x_exp >= y_exp) {
1640        y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1641                 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1642        y_exp = x_exp;
1643    } else {
1644        x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1645                 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1646        x_exp = y_exp;
1647    }
1648    if (x_sgn == y_sgn) {
1649        x_mnt = x_mnt + y_mnt;
1650    } else if (x_mnt >= y_mnt) {
1651        x_mnt = x_mnt - y_mnt;
1652    } else {
1653        x_sgn ^= 1;
1654        x_mnt = y_mnt - x_mnt;
1655    }
1656
1657    if (!x_mnt) {
1658        // Sign of exact zero result depends on rounding mode
1659        return fp32_zero((mode & 3) == 2);
1660    }
1661
1662    // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1663    x_mnt = fp64_normalise(x_mnt, &x_exp);
1664    x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1665
1666    return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1667}
1668
1669static uint64_t
1670fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1671            int mode, int *flags)
1672{
1673    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1674    uint64_t a_mnt, b_mnt, c_mnt, x;
1675    uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1676
1677    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1678    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1679    fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1680
1681    x = fp64_process_NaNs3(a, b, c, mode, flags);
1682
1683    // Quiet NaN added to product of zero and infinity:
1684    if (fp64_is_quiet_NaN(a_exp, a_mnt) &&
1685        ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) ||
1686         (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) {
1687        x = fp64_defaultNaN();
1688        *flags |= FPLIB_IOC;
1689    }
1690
1691    if (x) {
1692        return x;
1693    }
1694
1695    // Handle infinities and zeroes:
1696    if ((b_exp == FP64_EXP_INF && !c_mnt) ||
1697        (c_exp == FP64_EXP_INF && !b_mnt) ||
1698        (a_exp == FP64_EXP_INF &&
1699         (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) &&
1700         (a_sgn != (b_sgn ^ c_sgn)))) {
1701        *flags |= FPLIB_IOC;
1702        return fp64_defaultNaN();
1703    }
1704    if (a_exp == FP64_EXP_INF)
1705        return fp64_infinity(a_sgn);
1706    if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF)
1707        return fp64_infinity(b_sgn ^ c_sgn);
1708    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1709        return fp64_zero(a_sgn);
1710
1711    x_sgn = a_sgn;
1712    x_exp = a_exp + FP64_EXP_BITS;
1713    x0_mnt = 0;
1714    x1_mnt = a_mnt;
1715
1716    // Multiply:
1717    y_sgn = b_sgn ^ c_sgn;
1718    y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3;
1719    mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1720    if (!y0_mnt && !y1_mnt) {
1721        y_exp = x_exp;
1722    }
1723
1724    // Add:
1725    if (x_exp >= y_exp) {
1726        uint64_t t0, t1;
1727        lsl128(&t0, &t1, y0_mnt, y1_mnt,
1728               x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1729        lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1730        y0_mnt |= !!(t0 | t1);
1731        y_exp = x_exp;
1732    } else {
1733        uint64_t t0, t1;
1734        lsl128(&t0, &t1, x0_mnt, x1_mnt,
1735               y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1736        lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1737        x0_mnt |= !!(t0 | t1);
1738        x_exp = y_exp;
1739    }
1740    if (x_sgn == y_sgn) {
1741        add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1742    } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1743        sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1744    } else {
1745        x_sgn ^= 1;
1746        sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1747    }
1748
1749    if (!x0_mnt && !x1_mnt) {
1750        // Sign of exact zero result depends on rounding mode
1751        return fp64_zero((mode & 3) == 2);
1752    }
1753
1754    // Normalise into FP64_BITS bits, collapsing error into bottom bit:
1755    fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1756    x0_mnt = x1_mnt << 1 | !!x0_mnt;
1757
1758    return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1759}
1760
1761static uint16_t
1762fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
1763{
1764    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1765    uint16_t a_mnt, b_mnt, x;
1766    uint32_t x_mnt;
1767
1768    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1769    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1770
1771    if ((x = fp16_process_NaNs(a, b, mode, flags)))
1772        return x;
1773
1774    // Handle infinities and zeroes:
1775    if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) ||
1776        (!a_mnt && !b_mnt)) {
1777        *flags |= FPLIB_IOC;
1778        return fp16_defaultNaN();
1779    }
1780    if (a_exp == FP16_EXP_INF || !b_mnt) {
1781        if (a_exp != FP16_EXP_INF)
1782            *flags |= FPLIB_DZC;
1783        return fp16_infinity(a_sgn ^ b_sgn);
1784    }
1785    if (!a_mnt || b_exp == FP16_EXP_INF)
1786        return fp16_zero(a_sgn ^ b_sgn);
1787
1788    // Divide, setting bottom bit if inexact:
1789    a_mnt = fp16_normalise(a_mnt, &a_exp);
1790    x_sgn = a_sgn ^ b_sgn;
1791    x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3);
1792    x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt;
1793    x_mnt |= (x_mnt * b_mnt !=
1794              (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3));
1795
1796    // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1797    x_mnt = fp32_normalise(x_mnt, &x_exp);
1798    x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1799
1800    return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1801}
1802
1803static uint32_t
1804fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1805{
1806    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1807    uint32_t a_mnt, b_mnt, x;
1808    uint64_t x_mnt;
1809
1810    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1811    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1812
1813    if ((x = fp32_process_NaNs(a, b, mode, flags)))
1814        return x;
1815
1816    // Handle infinities and zeroes:
1817    if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) ||
1818        (!a_mnt && !b_mnt)) {
1819        *flags |= FPLIB_IOC;
1820        return fp32_defaultNaN();
1821    }
1822    if (a_exp == FP32_EXP_INF || !b_mnt) {
1823        if (a_exp != FP32_EXP_INF)
1824            *flags |= FPLIB_DZC;
1825        return fp32_infinity(a_sgn ^ b_sgn);
1826    }
1827    if (!a_mnt || b_exp == FP32_EXP_INF)
1828        return fp32_zero(a_sgn ^ b_sgn);
1829
1830    // Divide, setting bottom bit if inexact:
1831    a_mnt = fp32_normalise(a_mnt, &a_exp);
1832    x_sgn = a_sgn ^ b_sgn;
1833    x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3);
1834    x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt;
1835    x_mnt |= (x_mnt * b_mnt !=
1836              (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3));
1837
1838    // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1839    x_mnt = fp64_normalise(x_mnt, &x_exp);
1840    x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1841
1842    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1843}
1844
1845static uint64_t
1846fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1847{
1848    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1849    uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1850
1851    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1852    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1853
1854    if ((x = fp64_process_NaNs(a, b, mode, flags)))
1855        return x;
1856
1857    // Handle infinities and zeroes:
1858    if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) ||
1859        (!a_mnt && !b_mnt)) {
1860        *flags |= FPLIB_IOC;
1861        return fp64_defaultNaN();
1862    }
1863    if (a_exp == FP64_EXP_INF || !b_mnt) {
1864        if (a_exp != FP64_EXP_INF)
1865            *flags |= FPLIB_DZC;
1866        return fp64_infinity(a_sgn ^ b_sgn);
1867    }
1868    if (!a_mnt || b_exp == FP64_EXP_INF)
1869        return fp64_zero(a_sgn ^ b_sgn);
1870
1871    // Find reciprocal of divisor with Newton-Raphson:
1872    a_mnt = fp64_normalise(a_mnt, &a_exp);
1873    b_mnt = fp64_normalise(b_mnt, &b_exp);
1874    x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1875    mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1876    sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1877    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1878    mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1879    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1880
1881    // Multiply by dividend:
1882    x_sgn = a_sgn ^ b_sgn;
1883    x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8;
1884    mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1885    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1886    x_mnt = x1_mnt;
1887
1888    // This is an underestimate, so try adding one:
1889    mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1890    c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1891    if (c <= 0) {
1892        ++x_mnt;
1893    }
1894
1895    x_mnt = fp64_normalise(x_mnt, &x_exp);
1896
1897    return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1898}
1899
1900static void
1901set_fpscr0(FPSCR &fpscr, int flags)
1902{
1903    if (flags & FPLIB_IDC) {
1904        fpscr.idc = 1;
1905    }
1906    if (flags & FPLIB_IOC) {
1907        fpscr.ioc = 1;
1908    }
1909    if (flags & FPLIB_DZC) {
1910        fpscr.dzc = 1;
1911    }
1912    if (flags & FPLIB_OFC) {
1913        fpscr.ofc = 1;
1914    }
1915    if (flags & FPLIB_UFC) {
1916        fpscr.ufc = 1;
1917    }
1918    if (flags & FPLIB_IXC) {
1919        fpscr.ixc = 1;
1920    }
1921}
1922
1923static uint16_t
1924fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
1925{
1926    int a_sgn, a_exp;
1927    uint16_t a_mnt;
1928
1929    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1930
1931    // Handle NaNs:
1932    if (fp16_is_NaN(a_exp, a_mnt)) {
1933        return fp16_process_NaN(a, mode, flags);
1934    }
1935
1936    // Handle zeroes:
1937    if (!a_mnt) {
1938        return fp16_zero(a_sgn);
1939    }
1940
1941    // Handle infinities:
1942    if (a_exp == FP16_EXP_INF) {
1943        return fp16_infinity(a_sgn);
1944    }
1945
1946    b = b < -300 ? -300 : b;
1947    b = b >  300 ?  300 : b;
1948    a_exp += b;
1949    a_mnt <<= 3;
1950
1951    a_mnt = fp16_normalise(a_mnt, &a_exp);
1952
1953    return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1,
1954                      mode, flags);
1955}
1956
1957static uint32_t
1958fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
1959{
1960    int a_sgn, a_exp;
1961    uint32_t a_mnt;
1962
1963    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1964
1965    // Handle NaNs:
1966    if (fp32_is_NaN(a_exp, a_mnt)) {
1967        return fp32_process_NaN(a, mode, flags);
1968    }
1969
1970    // Handle zeroes:
1971    if (!a_mnt) {
1972        return fp32_zero(a_sgn);
1973    }
1974
1975    // Handle infinities:
1976    if (a_exp == FP32_EXP_INF) {
1977        return fp32_infinity(a_sgn);
1978    }
1979
1980    b = b < -300 ? -300 : b;
1981    b = b >  300 ?  300 : b;
1982    a_exp += b;
1983    a_mnt <<= 3;
1984
1985    a_mnt = fp32_normalise(a_mnt, &a_exp);
1986
1987    return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1,
1988                      mode, flags);
1989}
1990
1991static uint64_t
1992fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
1993{
1994    int a_sgn, a_exp;
1995    uint64_t a_mnt;
1996
1997    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1998
1999    // Handle NaNs:
2000    if (fp64_is_NaN(a_exp, a_mnt)) {
2001        return fp64_process_NaN(a, mode, flags);
2002    }
2003
2004    // Handle zeroes:
2005    if (!a_mnt) {
2006        return fp64_zero(a_sgn);
2007    }
2008
2009    // Handle infinities:
2010    if (a_exp == FP64_EXP_INF) {
2011        return fp64_infinity(a_sgn);
2012    }
2013
2014    b = b < -3000 ? -3000 : b;
2015    b = b >  3000 ?  3000 : b;
2016    a_exp += b;
2017    a_mnt <<= 3;
2018
2019    a_mnt = fp64_normalise(a_mnt, &a_exp);
2020
2021    return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1,
2022                      mode, flags);
2023}
2024
2025static uint16_t
2026fp16_sqrt(uint16_t a, int mode, int *flags)
2027{
2028    int a_sgn, a_exp, x_sgn, x_exp;
2029    uint16_t a_mnt, x_mnt;
2030    uint32_t x, t0, t1;
2031
2032    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2033
2034    // Handle NaNs:
2035    if (fp16_is_NaN(a_exp, a_mnt))
2036        return fp16_process_NaN(a, mode, flags);
2037
2038    // Handle infinities and zeroes:
2039    if (!a_mnt)
2040        return fp16_zero(a_sgn);
2041    if (a_exp == FP16_EXP_INF && !a_sgn)
2042        return fp16_infinity(a_sgn);
2043    if (a_sgn) {
2044        *flags |= FPLIB_IOC;
2045        return fp16_defaultNaN();
2046    }
2047
2048    a_mnt = fp16_normalise(a_mnt, &a_exp);
2049    if (a_exp & 1) {
2050        ++a_exp;
2051        a_mnt >>= 1;
2052    }
2053
2054    // x = (a * 3 + 5) / 8
2055    x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2056
2057    // x = (a / x + x) / 2; // 8-bit accuracy
2058    x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2059
2060    // x = (a / x + x) / 2; // 16-bit accuracy
2061    x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2062
2063    x_sgn = 0;
2064    x_exp = (a_exp + 27) >> 1;
2065    x_mnt = ((x - (1 << 18)) >> 19) + 1;
2066    t1 = (uint32_t)x_mnt * x_mnt;
2067    t0 = (uint32_t)a_mnt << 9;
2068    if (t1 > t0) {
2069        --x_mnt;
2070    }
2071
2072    x_mnt = fp16_normalise(x_mnt, &x_exp);
2073
2074    return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2075}
2076
2077static uint32_t
2078fp32_sqrt(uint32_t a, int mode, int *flags)
2079{
2080    int a_sgn, a_exp, x_sgn, x_exp;
2081    uint32_t a_mnt, x, x_mnt;
2082    uint64_t t0, t1;
2083
2084    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2085
2086    // Handle NaNs:
2087    if (fp32_is_NaN(a_exp, a_mnt))
2088        return fp32_process_NaN(a, mode, flags);
2089
2090    // Handle infinities and zeroes:
2091    if (!a_mnt)
2092        return fp32_zero(a_sgn);
2093    if (a_exp == FP32_EXP_INF && !a_sgn)
2094        return fp32_infinity(a_sgn);
2095    if (a_sgn) {
2096        *flags |= FPLIB_IOC;
2097        return fp32_defaultNaN();
2098    }
2099
2100    a_mnt = fp32_normalise(a_mnt, &a_exp);
2101    if (!(a_exp & 1)) {
2102        ++a_exp;
2103        a_mnt >>= 1;
2104    }
2105
2106    // x = (a * 3 + 5) / 8
2107    x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2108
2109    // x = (a / x + x) / 2; // 8-bit accuracy
2110    x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2111
2112    // x = (a / x + x) / 2; // 16-bit accuracy
2113    x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2114
2115    // x = (a / x + x) / 2; // 32-bit accuracy
2116    x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
2117
2118    x_sgn = 0;
2119    x_exp = (a_exp + 147) >> 1;
2120    x_mnt = ((x - (1 << 5)) >> 6) + 1;
2121    t1 = (uint64_t)x_mnt * x_mnt;
2122    t0 = (uint64_t)a_mnt << 19;
2123    if (t1 > t0) {
2124        --x_mnt;
2125    }
2126
2127    x_mnt = fp32_normalise(x_mnt, &x_exp);
2128
2129    return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2130}
2131
2132static uint64_t
2133fp64_sqrt(uint64_t a, int mode, int *flags)
2134{
2135    int a_sgn, a_exp, x_sgn, x_exp, c;
2136    uint64_t a_mnt, x_mnt, r, x0, x1;
2137    uint32_t x;
2138
2139    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2140
2141    // Handle NaNs:
2142    if (fp64_is_NaN(a_exp, a_mnt))
2143        return fp64_process_NaN(a, mode, flags);
2144
2145    // Handle infinities and zeroes:
2146    if (!a_mnt)
2147        return fp64_zero(a_sgn);
2148    if (a_exp == FP64_EXP_INF && !a_sgn)
2149        return fp64_infinity(a_sgn);
2150    if (a_sgn) {
2151        *flags |= FPLIB_IOC;
2152        return fp64_defaultNaN();
2153    }
2154
2155    a_mnt = fp64_normalise(a_mnt, &a_exp);
2156    if (a_exp & 1) {
2157        ++a_exp;
2158        a_mnt >>= 1;
2159    }
2160
2161    // x = (a * 3 + 5) / 8
2162    x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2163
2164    // x = (a / x + x) / 2; // 8-bit accuracy
2165    x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2166
2167    // x = (a / x + x) / 2; // 16-bit accuracy
2168    x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2169
2170    // x = (a / x + x) / 2; // 32-bit accuracy
2171    x = ((a_mnt / x) >> 2) + (x >> 1);
2172
2173    // r = 1 / x; // 32-bit accuracy
2174    r = ((uint64_t)1 << 62) / x;
2175
2176    // r = r * (2 - x * r); // 64-bit accuracy
2177    mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
2178    lsr128(&x0, &x1, x0, x1, 31);
2179
2180    // x = (x + a * r) / 2; // 64-bit accuracy
2181    mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2182    lsl128(&x0, &x1, x0, x1, 5);
2183    lsr128(&x0, &x1, x0, x1, 56);
2184
2185    x0 = ((uint64_t)x << 31) + (x0 >> 1);
2186
2187    x_sgn = 0;
2188    x_exp = (a_exp + 1053) >> 1;
2189    x_mnt = x0;
2190    x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2191    mul62x62(&x0, &x1, x_mnt, x_mnt);
2192    lsl128(&x0, &x1, x0, x1, 19);
2193    c = cmp128(x0, x1, 0, a_mnt);
2194    if (c > 0)
2195        --x_mnt;
2196
2197    x_mnt = fp64_normalise(x_mnt, &x_exp);
2198
2199    return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2200}
2201
2202static int
2203modeConv(FPSCR fpscr)
2204{
2205    uint32_t x = (uint32_t)fpscr;
2206    return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0);
2207    // AHP bit is ignored. Only fplibConvert uses AHP.
2208}
2209
2210static void
2211set_fpscr(FPSCR &fpscr, int flags)
2212{
2213    // translate back to FPSCR
2214    bool underflow = false;
2215    if (flags & FPLIB_IDC) {
2216        fpscr.idc = 1;
2217    }
2218    if (flags & FPLIB_IOC) {
2219        fpscr.ioc = 1;
2220    }
2221    if (flags & FPLIB_DZC) {
2222        fpscr.dzc = 1;
2223    }
2224    if (flags & FPLIB_OFC) {
2225        fpscr.ofc = 1;
2226    }
2227    if (flags & FPLIB_UFC) {
2228        underflow = true; //xx Why is this required?
2229        fpscr.ufc = 1;
2230    }
2231    if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
2232        fpscr.ixc = 1;
2233    }
2234}
2235
2236template <>
2237bool
2238fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
2239{
2240    int flags = 0;
2241    int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags);
2242    set_fpscr(fpscr, flags);
2243    return x;
2244}
2245
2246template <>
2247bool
2248fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
2249{
2250    int flags = 0;
2251    int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags);
2252    set_fpscr(fpscr, flags);
2253    return x;
2254}
2255
2256template <>
2257bool
2258fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
2259{
2260    int flags = 0;
2261    int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags);
2262    set_fpscr(fpscr, flags);
2263    return x;
2264}
2265
2266template <>
2267bool
2268fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
2269{
2270    int flags = 0;
2271    int x = fp16_compare_un(a, b, modeConv(fpscr), &flags);
2272    set_fpscr(fpscr, flags);
2273    return x;
2274}
2275
2276template <>
2277bool
2278fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
2279{
2280    int flags = 0;
2281    int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
2282    set_fpscr(fpscr, flags);
2283    return x;
2284}
2285
2286template <>
2287bool
2288fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
2289{
2290    int flags = 0;
2291    int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
2292    set_fpscr(fpscr, flags);
2293    return x;
2294}
2295
2296template <>
2297bool
2298fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
2299{
2300    int flags = 0;
2301    int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
2302    set_fpscr(fpscr, flags);
2303    return x;
2304}
2305
2306template <>
2307bool
2308fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr)
2309{
2310    int flags = 0;
2311    int x = fp32_compare_un(a, b, modeConv(fpscr), &flags);
2312    set_fpscr(fpscr, flags);
2313    return x;
2314}
2315
2316template <>
2317bool
2318fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
2319{
2320    int flags = 0;
2321    int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
2322    set_fpscr(fpscr, flags);
2323    return x;
2324}
2325
2326template <>
2327bool
2328fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
2329{
2330    int flags = 0;
2331    int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
2332    set_fpscr(fpscr, flags);
2333    return x;
2334}
2335
2336template <>
2337bool
2338fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
2339{
2340    int flags = 0;
2341    int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
2342    set_fpscr(fpscr, flags);
2343    return x;
2344}
2345
2346template <>
2347bool
2348fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr)
2349{
2350    int flags = 0;
2351    int x = fp64_compare_un(a, b, modeConv(fpscr), &flags);
2352    set_fpscr(fpscr, flags);
2353    return x;
2354}
2355
2356template <>
2357uint16_t
2358fplibAbs(uint16_t op)
2359{
2360    return op & ~(1ULL << (FP16_BITS - 1));
2361}
2362
2363template <>
2364uint32_t
2365fplibAbs(uint32_t op)
2366{
2367    return op & ~(1ULL << (FP32_BITS - 1));
2368}
2369
2370template <>
2371uint64_t
2372fplibAbs(uint64_t op)
2373{
2374    return op & ~(1ULL << (FP64_BITS - 1));
2375}
2376
2377template <>
2378uint16_t
2379fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2380{
2381    int flags = 0;
2382    uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags);
2383    set_fpscr0(fpscr, flags);
2384    return result;
2385}
2386
2387template <>
2388uint32_t
2389fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2390{
2391    int flags = 0;
2392    uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
2393    set_fpscr0(fpscr, flags);
2394    return result;
2395}
2396
2397template <>
2398uint64_t
2399fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2400{
2401    int flags = 0;
2402    uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
2403    set_fpscr0(fpscr, flags);
2404    return result;
2405}
2406
2407template <>
2408int
2409fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
2410{
2411    int mode = modeConv(fpscr);
2412    int flags = 0;
2413    int sgn1, exp1, sgn2, exp2, result;
2414    uint16_t mnt1, mnt2;
2415
2416    fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2417    fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2418
2419    if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) {
2420        result = 3;
2421        if (fp16_is_signalling_NaN(exp1, mnt1) ||
2422            fp16_is_signalling_NaN(exp2, mnt2) || signal_nans)
2423            flags |= FPLIB_IOC;
2424    } else {
2425        if (op1 == op2 || (!mnt1 && !mnt2)) {
2426            result = 6;
2427        } else if (sgn1 != sgn2) {
2428            result = sgn1 ? 8 : 2;
2429        } else if (exp1 != exp2) {
2430            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2431        } else {
2432            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2433        }
2434    }
2435
2436    set_fpscr0(fpscr, flags);
2437
2438    return result;
2439}
2440
2441template <>
2442int
2443fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
2444{
2445    int mode = modeConv(fpscr);
2446    int flags = 0;
2447    int sgn1, exp1, sgn2, exp2, result;
2448    uint32_t mnt1, mnt2;
2449
2450    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2451    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2452
2453    if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) {
2454        result = 3;
2455        if (fp32_is_signalling_NaN(exp1, mnt1) ||
2456            fp32_is_signalling_NaN(exp2, mnt2) || signal_nans)
2457            flags |= FPLIB_IOC;
2458    } else {
2459        if (op1 == op2 || (!mnt1 && !mnt2)) {
2460            result = 6;
2461        } else if (sgn1 != sgn2) {
2462            result = sgn1 ? 8 : 2;
2463        } else if (exp1 != exp2) {
2464            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2465        } else {
2466            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2467        }
2468    }
2469
2470    set_fpscr0(fpscr, flags);
2471
2472    return result;
2473}
2474
2475template <>
2476int
2477fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
2478{
2479    int mode = modeConv(fpscr);
2480    int flags = 0;
2481    int sgn1, exp1, sgn2, exp2, result;
2482    uint64_t mnt1, mnt2;
2483
2484    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2485    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2486
2487    if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) {
2488        result = 3;
2489        if (fp64_is_signalling_NaN(exp1, mnt1) ||
2490            fp64_is_signalling_NaN(exp2, mnt2) || signal_nans)
2491            flags |= FPLIB_IOC;
2492    } else {
2493        if (op1 == op2 || (!mnt1 && !mnt2)) {
2494            result = 6;
2495        } else if (sgn1 != sgn2) {
2496            result = sgn1 ? 8 : 2;
2497        } else if (exp1 != exp2) {
2498            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2499        } else {
2500            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2501        }
2502    }
2503
2504    set_fpscr0(fpscr, flags);
2505
2506    return result;
2507}
2508
2509static uint16_t
2510fp16_FPConvertNaN_32(uint32_t op)
2511{
2512    return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF,
2513                     1ULL << (FP16_MANT_BITS - 1) |
2514                     op >> (FP32_MANT_BITS - FP16_MANT_BITS));
2515}
2516
2517static uint16_t
2518fp16_FPConvertNaN_64(uint64_t op)
2519{
2520    return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF,
2521                     1ULL << (FP16_MANT_BITS - 1) |
2522                     op >> (FP64_MANT_BITS - FP16_MANT_BITS));
2523}
2524
2525static uint32_t
2526fp32_FPConvertNaN_16(uint16_t op)
2527{
2528    return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF,
2529                     1ULL << (FP32_MANT_BITS - 1) |
2530                     (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS));
2531}
2532
2533static uint32_t
2534fp32_FPConvertNaN_64(uint64_t op)
2535{
2536    return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF,
2537                     1ULL << (FP32_MANT_BITS - 1) |
2538                     op >> (FP64_MANT_BITS - FP32_MANT_BITS));
2539}
2540
2541static uint64_t
2542fp64_FPConvertNaN_16(uint16_t op)
2543{
2544    return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF,
2545                     1ULL << (FP64_MANT_BITS - 1) |
2546                     (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS));
2547}
2548
2549static uint64_t
2550fp64_FPConvertNaN_32(uint32_t op)
2551{
2552    return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF,
2553                     1ULL << (FP64_MANT_BITS - 1) |
2554                     (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS));
2555}
2556
2557static uint16_t
2558fp16_FPOnePointFive(int sgn)
2559{
2560    return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1));
2561}
2562
2563static uint32_t
2564fp32_FPOnePointFive(int sgn)
2565{
2566    return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1));
2567}
2568
2569static uint64_t
2570fp64_FPOnePointFive(int sgn)
2571{
2572    return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1));
2573}
2574
2575static uint16_t
2576fp16_FPThree(int sgn)
2577{
2578    return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1));
2579}
2580
2581static uint32_t
2582fp32_FPThree(int sgn)
2583{
2584    return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1));
2585}
2586
2587static uint64_t
2588fp64_FPThree(int sgn)
2589{
2590    return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1));
2591}
2592
2593static uint16_t
2594fp16_FPTwo(int sgn)
2595{
2596    return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0);
2597}
2598
2599static uint32_t
2600fp32_FPTwo(int sgn)
2601{
2602    return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0);
2603}
2604
2605static uint64_t
2606fp64_FPTwo(int sgn)
2607{
2608    return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0);
2609}
2610
2611template <>
2612uint16_t
2613fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2614{
2615    int mode = modeConv(fpscr);
2616    int flags = 0;
2617    int sgn, exp;
2618    uint32_t mnt;
2619    uint16_t result;
2620
2621    // Unpack floating-point operand optionally with flush-to-zero:
2622    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2623
2624    bool alt_hp = fpscr.ahp;
2625
2626    if (fp32_is_NaN(exp, mnt)) {
2627        if (alt_hp) {
2628            result = fp16_zero(sgn);
2629        } else if (fpscr.dn) {
2630            result = fp16_defaultNaN();
2631        } else {
2632            result = fp16_FPConvertNaN_32(op);
2633        }
2634        if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) {
2635            flags |= FPLIB_IOC;
2636        }
2637    } else if (exp == FP32_EXP_INF) {
2638        if (alt_hp) {
2639            result = ((uint16_t)sgn << (FP16_BITS - 1) |
2640                      ((1ULL << (FP16_BITS - 1)) - 1));
2641            flags |= FPLIB_IOC;
2642        } else {
2643            result = fp16_infinity(sgn);
2644        }
2645    } else if (!mnt) {
2646        result = fp16_zero(sgn);
2647    } else {
2648        result =
2649            fp16_round_(sgn, exp - FP32_EXP_BIAS + FP16_EXP_BIAS,
2650                        mnt >> (FP32_MANT_BITS - FP16_BITS) |
2651                        !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)),
2652                        rounding, (mode & 0xf) | alt_hp << 4, &flags);
2653    }
2654
2655    set_fpscr0(fpscr, flags);
2656
2657    return result;
2658}
2659
2660template <>
2661uint16_t
2662fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2663{
2664    int mode = modeConv(fpscr);
2665    int flags = 0;
2666    int sgn, exp;
2667    uint64_t mnt;
2668    uint16_t result;
2669
2670    // Unpack floating-point operand optionally with flush-to-zero:
2671    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2672
2673    bool alt_hp = fpscr.ahp;
2674
2675    if (fp64_is_NaN(exp, mnt)) {
2676        if (alt_hp) {
2677            result = fp16_zero(sgn);
2678        } else if (fpscr.dn) {
2679            result = fp16_defaultNaN();
2680        } else {
2681            result = fp16_FPConvertNaN_64(op);
2682        }
2683        if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) {
2684            flags |= FPLIB_IOC;
2685        }
2686    } else if (exp == FP64_EXP_INF) {
2687        if (alt_hp) {
2688            result = ((uint16_t)sgn << (FP16_BITS - 1) |
2689                      ((1ULL << (FP16_BITS - 1)) - 1));
2690            flags |= FPLIB_IOC;
2691        } else {
2692            result = fp16_infinity(sgn);
2693        }
2694    } else if (!mnt) {
2695        result = fp16_zero(sgn);
2696    } else {
2697        result =
2698            fp16_round_(sgn, exp - FP64_EXP_BIAS + FP16_EXP_BIAS,
2699                        mnt >> (FP64_MANT_BITS - FP16_BITS) |
2700                        !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)),
2701                        rounding, (mode & 0xf) | alt_hp << 4, &flags);
2702    }
2703
2704    set_fpscr0(fpscr, flags);
2705
2706    return result;
2707}
2708
2709template <>
2710uint32_t
2711fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2712{
2713    int mode = modeConv(fpscr);
2714    int flags = 0;
2715    int sgn, exp;
2716    uint16_t mnt;
2717    uint32_t result;
2718
2719    // Unpack floating-point operand optionally with flush-to-zero:
2720    fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2721
2722    if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2723        if (fpscr.dn) {
2724            result = fp32_defaultNaN();
2725        } else {
2726            result = fp32_FPConvertNaN_16(op);
2727        }
2728        if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2729            flags |= FPLIB_IOC;
2730        }
2731    } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2732        result = fp32_infinity(sgn);
2733    } else if (!mnt) {
2734        result = fp32_zero(sgn);
2735    } else {
2736        mnt = fp16_normalise(mnt, &exp);
2737        result = fp32_pack(sgn, (exp - FP16_EXP_BIAS +
2738                                 FP32_EXP_BIAS + FP16_EXP_BITS),
2739                           (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1));
2740    }
2741
2742    set_fpscr0(fpscr, flags);
2743
2744    return result;
2745}
2746
2747template <>
2748uint32_t
2749fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2750{
2751    int mode = modeConv(fpscr);
2752    int flags = 0;
2753    int sgn, exp;
2754    uint64_t mnt;
2755    uint32_t result;
2756
2757    // Unpack floating-point operand optionally with flush-to-zero:
2758    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2759
2760    if (fp64_is_NaN(exp, mnt)) {
2761        if (fpscr.dn) {
2762            result = fp32_defaultNaN();
2763        } else {
2764            result = fp32_FPConvertNaN_64(op);
2765        }
2766        if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) {
2767            flags |= FPLIB_IOC;
2768        }
2769    } else if (exp == FP64_EXP_INF) {
2770        result = fp32_infinity(sgn);
2771    } else if (!mnt) {
2772        result = fp32_zero(sgn);
2773    } else {
2774        result =
2775            fp32_round_(sgn, exp - FP64_EXP_BIAS + FP32_EXP_BIAS,
2776                        mnt >> (FP64_MANT_BITS - FP32_BITS) |
2777                        !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)),
2778                        rounding, mode, &flags);
2779    }
2780
2781    set_fpscr0(fpscr, flags);
2782
2783    return result;
2784}
2785
2786template <>
2787uint64_t
2788fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2789{
2790    int mode = modeConv(fpscr);
2791    int flags = 0;
2792    int sgn, exp;
2793    uint16_t mnt;
2794    uint64_t result;
2795
2796    // Unpack floating-point operand optionally with flush-to-zero:
2797    fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2798
2799    if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2800        if (fpscr.dn) {
2801            result = fp64_defaultNaN();
2802        } else {
2803            result = fp64_FPConvertNaN_16(op);
2804        }
2805        if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2806            flags |= FPLIB_IOC;
2807        }
2808    } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2809        result = fp64_infinity(sgn);
2810    } else if (!mnt) {
2811        result = fp64_zero(sgn);
2812    } else {
2813        mnt = fp16_normalise(mnt, &exp);
2814        result = fp64_pack(sgn, (exp - FP16_EXP_BIAS +
2815                                 FP64_EXP_BIAS + FP16_EXP_BITS),
2816                           (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1));
2817    }
2818
2819    set_fpscr0(fpscr, flags);
2820
2821    return result;
2822}
2823
2824template <>
2825uint64_t
2826fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2827{
2828    int mode = modeConv(fpscr);
2829    int flags = 0;
2830    int sgn, exp;
2831    uint32_t mnt;
2832    uint64_t result;
2833
2834    // Unpack floating-point operand optionally with flush-to-zero:
2835    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2836
2837    if (fp32_is_NaN(exp, mnt)) {
2838        if (fpscr.dn) {
2839            result = fp64_defaultNaN();
2840        } else {
2841            result = fp64_FPConvertNaN_32(op);
2842        }
2843        if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) {
2844            flags |= FPLIB_IOC;
2845        }
2846    } else if (exp == FP32_EXP_INF) {
2847        result = fp64_infinity(sgn);
2848    } else if (!mnt) {
2849        result = fp64_zero(sgn);
2850    } else {
2851        mnt = fp32_normalise(mnt, &exp);
2852        result = fp64_pack(sgn, (exp - FP32_EXP_BIAS +
2853                                 FP64_EXP_BIAS + FP32_EXP_BITS),
2854                           (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1));
2855    }
2856
2857    set_fpscr0(fpscr, flags);
2858
2859    return result;
2860}
2861
2862template <>
2863uint16_t
2864fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
2865{
2866    int flags = 0;
2867    uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2868    set_fpscr0(fpscr, flags);
2869    return result;
2870}
2871
2872template <>
2873uint32_t
2874fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2875{
2876    int flags = 0;
2877    uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2878    set_fpscr0(fpscr, flags);
2879    return result;
2880}
2881
2882template <>
2883uint64_t
2884fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2885{
2886    int flags = 0;
2887    uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2888    set_fpscr0(fpscr, flags);
2889    return result;
2890}
2891
2892template <>
2893uint16_t
2894fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2895{
2896    int flags = 0;
2897    uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags);
2898    set_fpscr0(fpscr, flags);
2899    return result;
2900}
2901
2902template <>
2903uint32_t
2904fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2905{
2906    int flags = 0;
2907    uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2908    set_fpscr0(fpscr, flags);
2909    return result;
2910}
2911
2912template <>
2913uint64_t
2914fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2915{
2916    int flags = 0;
2917    uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2918    set_fpscr0(fpscr, flags);
2919    return result;
2920}
2921
2922template <>
2923uint16_t
2924fplibExpA(uint16_t op)
2925{
2926    static uint16_t coeff[32] = {
2927        0x0000,
2928        0x0016,
2929        0x002d,
2930        0x0045,
2931        0x005d,
2932        0x0075,
2933        0x008e,
2934        0x00a8,
2935        0x00c2,
2936        0x00dc,
2937        0x00f8,
2938        0x0114,
2939        0x0130,
2940        0x014d,
2941        0x016b,
2942        0x0189,
2943        0x01a8,
2944        0x01c8,
2945        0x01e8,
2946        0x0209,
2947        0x022b,
2948        0x024e,
2949        0x0271,
2950        0x0295,
2951        0x02ba,
2952        0x02e0,
2953        0x0306,
2954        0x032e,
2955        0x0356,
2956        0x037f,
2957        0x03a9,
2958        0x03d4
2959    };
2960    return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) |
2961            coeff[op & ((1 << 5) - 1)]);
2962}
2963
2964template <>
2965uint32_t
2966fplibExpA(uint32_t op)
2967{
2968    static uint32_t coeff[64] = {
2969        0x000000,
2970        0x0164d2,
2971        0x02cd87,
2972        0x043a29,
2973        0x05aac3,
2974        0x071f62,
2975        0x08980f,
2976        0x0a14d5,
2977        0x0b95c2,
2978        0x0d1adf,
2979        0x0ea43a,
2980        0x1031dc,
2981        0x11c3d3,
2982        0x135a2b,
2983        0x14f4f0,
2984        0x16942d,
2985        0x1837f0,
2986        0x19e046,
2987        0x1b8d3a,
2988        0x1d3eda,
2989        0x1ef532,
2990        0x20b051,
2991        0x227043,
2992        0x243516,
2993        0x25fed7,
2994        0x27cd94,
2995        0x29a15b,
2996        0x2b7a3a,
2997        0x2d583f,
2998        0x2f3b79,
2999        0x3123f6,
3000        0x3311c4,
3001        0x3504f3,
3002        0x36fd92,
3003        0x38fbaf,
3004        0x3aff5b,
3005        0x3d08a4,
3006        0x3f179a,
3007        0x412c4d,
3008        0x4346cd,
3009        0x45672a,
3010        0x478d75,
3011        0x49b9be,
3012        0x4bec15,
3013        0x4e248c,
3014        0x506334,
3015        0x52a81e,
3016        0x54f35b,
3017        0x5744fd,
3018        0x599d16,
3019        0x5bfbb8,
3020        0x5e60f5,
3021        0x60ccdf,
3022        0x633f89,
3023        0x65b907,
3024        0x68396a,
3025        0x6ac0c7,
3026        0x6d4f30,
3027        0x6fe4ba,
3028        0x728177,
3029        0x75257d,
3030        0x77d0df,
3031        0x7a83b3,
3032        0x7d3e0c
3033    };
3034    return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) |
3035            coeff[op & ((1 << 6) - 1)]);
3036}
3037
3038template <>
3039uint64_t
3040fplibExpA(uint64_t op)
3041{
3042    static uint64_t coeff[64] = {
3043        0x0000000000000ULL,
3044        0x02c9a3e778061ULL,
3045        0x059b0d3158574ULL,
3046        0x0874518759bc8ULL,
3047        0x0b5586cf9890fULL,
3048        0x0e3ec32d3d1a2ULL,
3049        0x11301d0125b51ULL,
3050        0x1429aaea92de0ULL,
3051        0x172b83c7d517bULL,
3052        0x1a35beb6fcb75ULL,
3053        0x1d4873168b9aaULL,
3054        0x2063b88628cd6ULL,
3055        0x2387a6e756238ULL,
3056        0x26b4565e27cddULL,
3057        0x29e9df51fdee1ULL,
3058        0x2d285a6e4030bULL,
3059        0x306fe0a31b715ULL,
3060        0x33c08b26416ffULL,
3061        0x371a7373aa9cbULL,
3062        0x3a7db34e59ff7ULL,
3063        0x3dea64c123422ULL,
3064        0x4160a21f72e2aULL,
3065        0x44e086061892dULL,
3066        0x486a2b5c13cd0ULL,
3067        0x4bfdad5362a27ULL,
3068        0x4f9b2769d2ca7ULL,
3069        0x5342b569d4f82ULL,
3070        0x56f4736b527daULL,
3071        0x5ab07dd485429ULL,
3072        0x5e76f15ad2148ULL,
3073        0x6247eb03a5585ULL,
3074        0x6623882552225ULL,
3075        0x6a09e667f3bcdULL,
3076        0x6dfb23c651a2fULL,
3077        0x71f75e8ec5f74ULL,
3078        0x75feb564267c9ULL,
3079        0x7a11473eb0187ULL,
3080        0x7e2f336cf4e62ULL,
3081        0x82589994cce13ULL,
3082        0x868d99b4492edULL,
3083        0x8ace5422aa0dbULL,
3084        0x8f1ae99157736ULL,
3085        0x93737b0cdc5e5ULL,
3086        0x97d829fde4e50ULL,
3087        0x9c49182a3f090ULL,
3088        0xa0c667b5de565ULL,
3089        0xa5503b23e255dULL,
3090        0xa9e6b5579fdbfULL,
3091        0xae89f995ad3adULL,
3092        0xb33a2b84f15fbULL,
3093        0xb7f76f2fb5e47ULL,
3094        0xbcc1e904bc1d2ULL,
3095        0xc199bdd85529cULL,
3096        0xc67f12e57d14bULL,
3097        0xcb720dcef9069ULL,
3098        0xd072d4a07897cULL,
3099        0xd5818dcfba487ULL,
3100        0xda9e603db3285ULL,
3101        0xdfc97337b9b5fULL,
3102        0xe502ee78b3ff6ULL,
3103        0xea4afa2a490daULL,
3104        0xefa1bee615a27ULL,
3105        0xf50765b6e4540ULL,
3106        0xfa7c1819e90d8ULL
3107    };
3108    return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) |
3109            coeff[op & ((1 << 6) - 1)]);
3110}
3111
3112static uint16_t
3113fp16_repack(int sgn, int exp, uint16_t mnt)
3114{
3115    return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt);
3116}
3117
3118static uint32_t
3119fp32_repack(int sgn, int exp, uint32_t mnt)
3120{
3121    return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt);
3122}
3123
3124static uint64_t
3125fp64_repack(int sgn, int exp, uint64_t mnt)
3126{
3127    return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt);
3128}
3129
3130static void
3131fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
3132{
3133    // Treat a single quiet-NaN as +Infinity/-Infinity
3134    if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) &&
3135        (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS)
3136        *op1 = fp16_infinity(sgn);
3137    if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) &&
3138        (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS)
3139        *op2 = fp16_infinity(sgn);
3140}
3141
3142static void
3143fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
3144{
3145    // Treat a single quiet-NaN as +Infinity/-Infinity
3146    if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) &&
3147        (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS)
3148        *op1 = fp32_infinity(sgn);
3149    if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) &&
3150        (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS)
3151        *op2 = fp32_infinity(sgn);
3152}
3153
3154static void
3155fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
3156{
3157    // Treat a single quiet-NaN as +Infinity/-Infinity
3158    if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) &&
3159        (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS)
3160        *op1 = fp64_infinity(sgn);
3161    if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) &&
3162        (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS)
3163        *op2 = fp64_infinity(sgn);
3164}
3165
3166template <>
3167uint16_t
3168fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3169{
3170    int mode = modeConv(fpscr);
3171    int flags = 0;
3172    int sgn1, exp1, sgn2, exp2;
3173    uint16_t mnt1, mnt2, x, result;
3174
3175    fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3176    fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3177
3178    if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3179        result = x;
3180    } else {
3181        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3182                  fp16_repack(sgn1, exp1, mnt1) :
3183                  fp16_repack(sgn2, exp2, mnt2));
3184    }
3185    set_fpscr0(fpscr, flags);
3186    return result;
3187}
3188
3189template <>
3190uint32_t
3191fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3192{
3193    int mode = modeConv(fpscr);
3194    int flags = 0;
3195    int sgn1, exp1, sgn2, exp2;
3196    uint32_t mnt1, mnt2, x, result;
3197
3198    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3199    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3200
3201    if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3202        result = x;
3203    } else {
3204        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3205                  fp32_repack(sgn1, exp1, mnt1) :
3206                  fp32_repack(sgn2, exp2, mnt2));
3207    }
3208    set_fpscr0(fpscr, flags);
3209    return result;
3210}
3211
3212template <>
3213uint64_t
3214fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3215{
3216    int mode = modeConv(fpscr);
3217    int flags = 0;
3218    int sgn1, exp1, sgn2, exp2;
3219    uint64_t mnt1, mnt2, x, result;
3220
3221    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3222    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3223
3224    if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3225        result = x;
3226    } else {
3227        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3228                  fp64_repack(sgn1, exp1, mnt1) :
3229                  fp64_repack(sgn2, exp2, mnt2));
3230    }
3231    set_fpscr0(fpscr, flags);
3232    return result;
3233}
3234
3235template <>
3236uint16_t
3237fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3238{
3239    fp16_minmaxnum(&op1, &op2, 1);
3240    return fplibMax<uint16_t>(op1, op2, fpscr);
3241}
3242
3243template <>
3244uint32_t
3245fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3246{
3247    fp32_minmaxnum(&op1, &op2, 1);
3248    return fplibMax<uint32_t>(op1, op2, fpscr);
3249}
3250
3251template <>
3252uint64_t
3253fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3254{
3255    fp64_minmaxnum(&op1, &op2, 1);
3256    return fplibMax<uint64_t>(op1, op2, fpscr);
3257}
3258
3259template <>
3260uint16_t
3261fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3262{
3263    int mode = modeConv(fpscr);
3264    int flags = 0;
3265    int sgn1, exp1, sgn2, exp2;
3266    uint16_t mnt1, mnt2, x, result;
3267
3268    fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3269    fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3270
3271    if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3272        result = x;
3273    } else {
3274        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3275                  fp16_repack(sgn1, exp1, mnt1) :
3276                  fp16_repack(sgn2, exp2, mnt2));
3277    }
3278    set_fpscr0(fpscr, flags);
3279    return result;
3280}
3281
3282template <>
3283uint32_t
3284fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3285{
3286    int mode = modeConv(fpscr);
3287    int flags = 0;
3288    int sgn1, exp1, sgn2, exp2;
3289    uint32_t mnt1, mnt2, x, result;
3290
3291    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3292    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3293
3294    if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3295        result = x;
3296    } else {
3297        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3298                  fp32_repack(sgn1, exp1, mnt1) :
3299                  fp32_repack(sgn2, exp2, mnt2));
3300    }
3301    set_fpscr0(fpscr, flags);
3302    return result;
3303}
3304
3305template <>
3306uint64_t
3307fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3308{
3309    int mode = modeConv(fpscr);
3310    int flags = 0;
3311    int sgn1, exp1, sgn2, exp2;
3312    uint64_t mnt1, mnt2, x, result;
3313
3314    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3315    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3316
3317    if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3318        result = x;
3319    } else {
3320        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3321                  fp64_repack(sgn1, exp1, mnt1) :
3322                  fp64_repack(sgn2, exp2, mnt2));
3323    }
3324    set_fpscr0(fpscr, flags);
3325    return result;
3326}
3327
3328template <>
3329uint16_t
3330fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3331{
3332    fp16_minmaxnum(&op1, &op2, 0);
3333    return fplibMin<uint16_t>(op1, op2, fpscr);
3334}
3335
3336template <>
3337uint32_t
3338fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3339{
3340    fp32_minmaxnum(&op1, &op2, 0);
3341    return fplibMin<uint32_t>(op1, op2, fpscr);
3342}
3343
3344template <>
3345uint64_t
3346fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3347{
3348    fp64_minmaxnum(&op1, &op2, 0);
3349    return fplibMin<uint64_t>(op1, op2, fpscr);
3350}
3351
3352template <>
3353uint16_t
3354fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3355{
3356    int flags = 0;
3357    uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags);
3358    set_fpscr0(fpscr, flags);
3359    return result;
3360}
3361
3362template <>
3363uint32_t
3364fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3365{
3366    int flags = 0;
3367    uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
3368    set_fpscr0(fpscr, flags);
3369    return result;
3370}
3371
3372template <>
3373uint64_t
3374fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3375{
3376    int flags = 0;
3377    uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
3378    set_fpscr0(fpscr, flags);
3379    return result;
3380}
3381
3382template <>
3383uint16_t
3384fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3385{
3386    int mode = modeConv(fpscr);
3387    int flags = 0;
3388    int sgn1, exp1, sgn2, exp2;
3389    uint16_t mnt1, mnt2, result;
3390
3391    fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3392    fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3393
3394    result = fp16_process_NaNs(op1, op2, mode, &flags);
3395    if (!result) {
3396        if ((exp1 == FP16_EXP_INF && !mnt2) ||
3397            (exp2 == FP16_EXP_INF && !mnt1)) {
3398            result = fp16_FPTwo(sgn1 ^ sgn2);
3399        } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3400            result = fp16_infinity(sgn1 ^ sgn2);
3401        } else if (!mnt1 || !mnt2) {
3402            result = fp16_zero(sgn1 ^ sgn2);
3403        } else {
3404            result = fp16_mul(op1, op2, mode, &flags);
3405        }
3406    }
3407
3408    set_fpscr0(fpscr, flags);
3409
3410    return result;
3411}
3412
3413template <>
3414uint32_t
3415fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3416{
3417    int mode = modeConv(fpscr);
3418    int flags = 0;
3419    int sgn1, exp1, sgn2, exp2;
3420    uint32_t mnt1, mnt2, result;
3421
3422    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3423    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3424
3425    result = fp32_process_NaNs(op1, op2, mode, &flags);
3426    if (!result) {
3427        if ((exp1 == FP32_EXP_INF && !mnt2) ||
3428            (exp2 == FP32_EXP_INF && !mnt1)) {
3429            result = fp32_FPTwo(sgn1 ^ sgn2);
3430        } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3431            result = fp32_infinity(sgn1 ^ sgn2);
3432        } else if (!mnt1 || !mnt2) {
3433            result = fp32_zero(sgn1 ^ sgn2);
3434        } else {
3435            result = fp32_mul(op1, op2, mode, &flags);
3436        }
3437    }
3438
3439    set_fpscr0(fpscr, flags);
3440
3441    return result;
3442}
3443
3444template <>
3445uint64_t
3446fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3447{
3448    int mode = modeConv(fpscr);
3449    int flags = 0;
3450    int sgn1, exp1, sgn2, exp2;
3451    uint64_t mnt1, mnt2, result;
3452
3453    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3454    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3455
3456    result = fp64_process_NaNs(op1, op2, mode, &flags);
3457    if (!result) {
3458        if ((exp1 == FP64_EXP_INF && !mnt2) ||
3459            (exp2 == FP64_EXP_INF && !mnt1)) {
3460            result = fp64_FPTwo(sgn1 ^ sgn2);
3461        } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3462            result = fp64_infinity(sgn1 ^ sgn2);
3463        } else if (!mnt1 || !mnt2) {
3464            result = fp64_zero(sgn1 ^ sgn2);
3465        } else {
3466            result = fp64_mul(op1, op2, mode, &flags);
3467        }
3468    }
3469
3470    set_fpscr0(fpscr, flags);
3471
3472    return result;
3473}
3474
3475template <>
3476uint16_t
3477fplibNeg(uint16_t op)
3478{
3479    return op ^ 1ULL << (FP16_BITS - 1);
3480}
3481
3482template <>
3483uint32_t
3484fplibNeg(uint32_t op)
3485{
3486    return op ^ 1ULL << (FP32_BITS - 1);
3487}
3488
3489template <>
3490uint64_t
3491fplibNeg(uint64_t op)
3492{
3493    return op ^ 1ULL << (FP64_BITS - 1);
3494}
3495
3496static const uint8_t recip_sqrt_estimate[256] = {
3497    255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3498    226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3499    201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3500    180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3501    162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3502    145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3503    131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3504    118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3505    105, 104, 103, 101, 100,  99,  97,  96,  95,  93,  92,  91,  90,  88,  87,  86,
3506    85,  84,  82,  81,  80,  79,  78,  77,  76,  75,  74,  72,  71,  70,  69,  68,
3507    67,  66,  65,  64,  63,  62,  61,  60,  60,  59,  58,  57,  56,  55,  54,  53,
3508    52,  51,  51,  50,  49,  48,  47,  46,  46,  45,  44,  43,  42,  42,  41,  40,
3509    39,  38,  38,  37,  36,  35,  35,  34,  33,  33,  32,  31,  30,  30,  29,  28,
3510    28,  27,  26,  26,  25,  24,  24,  23,  22,  22,  21,  20,  20,  19,  19,  18,
3511    17,  17,  16,  16,  15,  14,  14,  13,  13,  12,  11,  11,  10,  10,   9,   9,
3512    8,   8,   7,   6,   6,   5,   5,   4,   4,   3,   3,   2,   2,   1,   1,   0
3513};
3514
3515template <>
3516uint16_t
3517fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
3518{
3519    int mode = modeConv(fpscr);
3520    int flags = 0;
3521    int sgn, exp;
3522    uint16_t mnt, result;
3523
3524    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3525
3526    if (fp16_is_NaN(exp, mnt)) {
3527        result = fp16_process_NaN(op, mode, &flags);
3528    } else if (!mnt) {
3529        result = fp16_infinity(sgn);
3530        flags |= FPLIB_DZC;
3531    } else if (sgn) {
3532        result = fp16_defaultNaN();
3533        flags |= FPLIB_IOC;
3534    } else if (exp == FP16_EXP_INF) {
3535        result = fp16_zero(0);
3536    } else {
3537        exp += FP16_EXP_BITS;
3538        mnt = fp16_normalise(mnt, &exp);
3539        mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3540                                  (mnt >> (FP16_BITS - 8) & 127)];
3541        result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1,
3542                           mnt << (FP16_MANT_BITS - 8));
3543    }
3544
3545    set_fpscr0(fpscr, flags);
3546
3547    return result;
3548}
3549
3550template <>
3551uint32_t
3552fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
3553{
3554    int mode = modeConv(fpscr);
3555    int flags = 0;
3556    int sgn, exp;
3557    uint32_t mnt, result;
3558
3559    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3560
3561    if (fp32_is_NaN(exp, mnt)) {
3562        result = fp32_process_NaN(op, mode, &flags);
3563    } else if (!mnt) {
3564        result = fp32_infinity(sgn);
3565        flags |= FPLIB_DZC;
3566    } else if (sgn) {
3567        result = fp32_defaultNaN();
3568        flags |= FPLIB_IOC;
3569    } else if (exp == FP32_EXP_INF) {
3570        result = fp32_zero(0);
3571    } else {
3572        exp += FP32_EXP_BITS;
3573        mnt = fp32_normalise(mnt, &exp);
3574        mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3575                                  (mnt >> (FP32_BITS - 8) & 127)];
3576        result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1,
3577                           mnt << (FP32_MANT_BITS - 8));
3578    }
3579
3580    set_fpscr0(fpscr, flags);
3581
3582    return result;
3583}
3584
3585template <>
3586uint64_t
3587fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
3588{
3589    int mode = modeConv(fpscr);
3590    int flags = 0;
3591    int sgn, exp;
3592    uint64_t mnt, result;
3593
3594    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3595
3596    if (fp64_is_NaN(exp, mnt)) {
3597        result = fp64_process_NaN(op, mode, &flags);
3598    } else if (!mnt) {
3599        result = fp64_infinity(sgn);
3600        flags |= FPLIB_DZC;
3601    } else if (sgn) {
3602        result = fp64_defaultNaN();
3603        flags |= FPLIB_IOC;
3604    } else if (exp == FP64_EXP_INF) {
3605        result = fp32_zero(0);
3606    } else {
3607        exp += FP64_EXP_BITS;
3608        mnt = fp64_normalise(mnt, &exp);
3609        mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3610                                  (mnt >> (FP64_BITS - 8) & 127)];
3611        result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1,
3612                           mnt << (FP64_MANT_BITS - 8));
3613    }
3614
3615    set_fpscr0(fpscr, flags);
3616
3617    return result;
3618}
3619
3620template <>
3621uint16_t
3622fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3623{
3624    int mode = modeConv(fpscr);
3625    int flags = 0;
3626    int sgn1, exp1, sgn2, exp2;
3627    uint16_t mnt1, mnt2, result;
3628
3629    op1 = fplibNeg<uint16_t>(op1);
3630    fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3631    fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3632
3633    result = fp16_process_NaNs(op1, op2, mode, &flags);
3634    if (!result) {
3635        if ((exp1 == FP16_EXP_INF && !mnt2) ||
3636            (exp2 == FP16_EXP_INF && !mnt1)) {
3637            result = fp16_FPOnePointFive(0);
3638        } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3639            result = fp16_infinity(sgn1 ^ sgn2);
3640        } else {
3641            result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags);
3642        }
3643    }
3644
3645    set_fpscr0(fpscr, flags);
3646
3647    return result;
3648}
3649
3650template <>
3651uint32_t
3652fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3653{
3654    int mode = modeConv(fpscr);
3655    int flags = 0;
3656    int sgn1, exp1, sgn2, exp2;
3657    uint32_t mnt1, mnt2, result;
3658
3659    op1 = fplibNeg<uint32_t>(op1);
3660    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3661    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3662
3663    result = fp32_process_NaNs(op1, op2, mode, &flags);
3664    if (!result) {
3665        if ((exp1 == FP32_EXP_INF && !mnt2) ||
3666            (exp2 == FP32_EXP_INF && !mnt1)) {
3667            result = fp32_FPOnePointFive(0);
3668        } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3669            result = fp32_infinity(sgn1 ^ sgn2);
3670        } else {
3671            result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
3672        }
3673    }
3674
3675    set_fpscr0(fpscr, flags);
3676
3677    return result;
3678}
3679
3680template <>
3681uint64_t
3682fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3683{
3684    int mode = modeConv(fpscr);
3685    int flags = 0;
3686    int sgn1, exp1, sgn2, exp2;
3687    uint64_t mnt1, mnt2, result;
3688
3689    op1 = fplibNeg<uint64_t>(op1);
3690    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3691    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3692
3693    result = fp64_process_NaNs(op1, op2, mode, &flags);
3694    if (!result) {
3695        if ((exp1 == FP64_EXP_INF && !mnt2) ||
3696            (exp2 == FP64_EXP_INF && !mnt1)) {
3697            result = fp64_FPOnePointFive(0);
3698        } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3699            result = fp64_infinity(sgn1 ^ sgn2);
3700        } else {
3701            result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
3702        }
3703    }
3704
3705    set_fpscr0(fpscr, flags);
3706
3707    return result;
3708}
3709
3710template <>
3711uint16_t
3712fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
3713{
3714    int mode = modeConv(fpscr);
3715    int flags = 0;
3716    int sgn, exp;
3717    uint16_t mnt, result;
3718
3719    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3720
3721    if (fp16_is_NaN(exp, mnt)) {
3722        result = fp16_process_NaN(op, mode, &flags);
3723    } else if (exp == FP16_EXP_INF) {
3724        result = fp16_zero(sgn);
3725    } else if (!mnt) {
3726        result = fp16_infinity(sgn);
3727        flags |= FPLIB_DZC;
3728    } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) {
3729        bool overflow_to_inf = false;
3730        switch (FPCRRounding(fpscr)) {
3731          case FPRounding_TIEEVEN:
3732            overflow_to_inf = true;
3733            break;
3734          case FPRounding_POSINF:
3735            overflow_to_inf = !sgn;
3736            break;
3737          case FPRounding_NEGINF:
3738            overflow_to_inf = sgn;
3739            break;
3740          case FPRounding_ZERO:
3741            overflow_to_inf = false;
3742            break;
3743          default:
3744            panic("Unrecognized FP rounding mode");
3745        }
3746        result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn);
3747        flags |= FPLIB_OFC | FPLIB_IXC;
3748    } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) {
3749        result = fp16_zero(sgn);
3750        flags |= FPLIB_UFC;
3751    } else {
3752        exp += FP16_EXP_BITS;
3753        mnt = fp16_normalise(mnt, &exp);
3754        int result_exp = 2 * FP16_EXP_BIAS - 1 - exp;
3755        uint16_t fraction = (((uint32_t)1 << 19) /
3756                             (mnt >> (FP16_BITS - 10) | 1) + 1) >> 1;
3757        fraction <<= FP16_MANT_BITS - 8;
3758        if (result_exp == 0) {
3759            fraction >>= 1;
3760        } else if (result_exp == -1) {
3761            fraction >>= 2;
3762            result_exp = 0;
3763        }
3764        result = fp16_pack(sgn, result_exp, fraction);
3765    }
3766
3767    set_fpscr0(fpscr, flags);
3768
3769    return result;
3770}
3771
3772template <>
3773uint32_t
3774fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
3775{
3776    int mode = modeConv(fpscr);
3777    int flags = 0;
3778    int sgn, exp;
3779    uint32_t mnt, result;
3780
3781    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3782
3783    if (fp32_is_NaN(exp, mnt)) {
3784        result = fp32_process_NaN(op, mode, &flags);
3785    } else if (exp == FP32_EXP_INF) {
3786        result = fp32_zero(sgn);
3787    } else if (!mnt) {
3788        result = fp32_infinity(sgn);
3789        flags |= FPLIB_DZC;
3790    } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) {
3791        bool overflow_to_inf = false;
3792        switch (FPCRRounding(fpscr)) {
3793          case FPRounding_TIEEVEN:
3794            overflow_to_inf = true;
3795            break;
3796          case FPRounding_POSINF:
3797            overflow_to_inf = !sgn;
3798            break;
3799          case FPRounding_NEGINF:
3800            overflow_to_inf = sgn;
3801            break;
3802          case FPRounding_ZERO:
3803            overflow_to_inf = false;
3804            break;
3805          default:
3806            panic("Unrecognized FP rounding mode");
3807        }
3808        result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
3809        flags |= FPLIB_OFC | FPLIB_IXC;
3810    } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) {
3811        result = fp32_zero(sgn);
3812        flags |= FPLIB_UFC;
3813    } else {
3814        exp += FP32_EXP_BITS;
3815        mnt = fp32_normalise(mnt, &exp);
3816        int result_exp = 2 * FP32_EXP_BIAS - 1 - exp;
3817        uint32_t fraction = (((uint32_t)1 << 19) /
3818                             (mnt >> (FP32_BITS - 10) | 1) + 1) >> 1;
3819        fraction <<= FP32_MANT_BITS - 8;
3820        if (result_exp == 0) {
3821            fraction >>= 1;
3822        } else if (result_exp == -1) {
3823            fraction >>= 2;
3824            result_exp = 0;
3825        }
3826        result = fp32_pack(sgn, result_exp, fraction);
3827    }
3828
3829    set_fpscr0(fpscr, flags);
3830
3831    return result;
3832}
3833
3834template <>
3835uint64_t
3836fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
3837{
3838    int mode = modeConv(fpscr);
3839    int flags = 0;
3840    int sgn, exp;
3841    uint64_t mnt, result;
3842
3843    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3844
3845    if (fp64_is_NaN(exp, mnt)) {
3846        result = fp64_process_NaN(op, mode, &flags);
3847    } else if (exp == FP64_EXP_INF) {
3848        result = fp64_zero(sgn);
3849    } else if (!mnt) {
3850        result = fp64_infinity(sgn);
3851        flags |= FPLIB_DZC;
3852    } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) {
3853        bool overflow_to_inf = false;
3854        switch (FPCRRounding(fpscr)) {
3855          case FPRounding_TIEEVEN:
3856            overflow_to_inf = true;
3857            break;
3858          case FPRounding_POSINF:
3859            overflow_to_inf = !sgn;
3860            break;
3861          case FPRounding_NEGINF:
3862            overflow_to_inf = sgn;
3863            break;
3864          case FPRounding_ZERO:
3865            overflow_to_inf = false;
3866            break;
3867          default:
3868            panic("Unrecognized FP rounding mode");
3869        }
3870        result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
3871        flags |= FPLIB_OFC | FPLIB_IXC;
3872    } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) {
3873        result = fp64_zero(sgn);
3874        flags |= FPLIB_UFC;
3875    } else {
3876        exp += FP64_EXP_BITS;
3877        mnt = fp64_normalise(mnt, &exp);
3878        int result_exp = 2 * FP64_EXP_BIAS - 1 - exp;
3879        uint64_t fraction = (((uint32_t)1 << 19) /
3880                             (mnt >> (FP64_BITS - 10) | 1) + 1) >> 1;
3881        fraction <<= FP64_MANT_BITS - 8;
3882        if (result_exp == 0) {
3883            fraction >>= 1;
3884        } else if (result_exp == -1) {
3885            fraction >>= 2;
3886            result_exp = 0;
3887        }
3888        result = fp64_pack(sgn, result_exp, fraction);
3889    }
3890
3891    set_fpscr0(fpscr, flags);
3892
3893    return result;
3894}
3895
3896template <>
3897uint16_t
3898fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3899{
3900    int mode = modeConv(fpscr);
3901    int flags = 0;
3902    int sgn1, exp1, sgn2, exp2;
3903    uint16_t mnt1, mnt2, result;
3904
3905    op1 = fplibNeg<uint16_t>(op1);
3906    fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3907    fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3908
3909    result = fp16_process_NaNs(op1, op2, mode, &flags);
3910    if (!result) {
3911        if ((exp1 == FP16_EXP_INF && !mnt2) ||
3912            (exp2 == FP16_EXP_INF && !mnt1)) {
3913            result = fp16_FPTwo(0);
3914        } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3915            result = fp16_infinity(sgn1 ^ sgn2);
3916        } else {
3917            result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags);
3918        }
3919    }
3920
3921    set_fpscr0(fpscr, flags);
3922
3923    return result;
3924}
3925
3926template <>
3927uint32_t
3928fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3929{
3930    int mode = modeConv(fpscr);
3931    int flags = 0;
3932    int sgn1, exp1, sgn2, exp2;
3933    uint32_t mnt1, mnt2, result;
3934
3935    op1 = fplibNeg<uint32_t>(op1);
3936    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3937    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3938
3939    result = fp32_process_NaNs(op1, op2, mode, &flags);
3940    if (!result) {
3941        if ((exp1 == FP32_EXP_INF && !mnt2) ||
3942            (exp2 == FP32_EXP_INF && !mnt1)) {
3943            result = fp32_FPTwo(0);
3944        } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3945            result = fp32_infinity(sgn1 ^ sgn2);
3946        } else {
3947            result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
3948        }
3949    }
3950
3951    set_fpscr0(fpscr, flags);
3952
3953    return result;
3954}
3955
3956template <>
3957uint64_t
3958fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3959{
3960    int mode = modeConv(fpscr);
3961    int flags = 0;
3962    int sgn1, exp1, sgn2, exp2;
3963    uint64_t mnt1, mnt2, result;
3964
3965    op1 = fplibNeg<uint64_t>(op1);
3966    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3967    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3968
3969    result = fp64_process_NaNs(op1, op2, mode, &flags);
3970    if (!result) {
3971        if ((exp1 == FP64_EXP_INF && !mnt2) ||
3972            (exp2 == FP64_EXP_INF && !mnt1)) {
3973            result = fp64_FPTwo(0);
3974        } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3975            result = fp64_infinity(sgn1 ^ sgn2);
3976        } else {
3977            result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
3978        }
3979    }
3980
3981    set_fpscr0(fpscr, flags);
3982
3983    return result;
3984}
3985
3986template <>
3987uint16_t
3988fplibRecpX(uint16_t op, FPSCR &fpscr)
3989{
3990    int mode = modeConv(fpscr);
3991    int flags = 0;
3992    int sgn, exp;
3993    uint16_t mnt, result;
3994
3995    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3996
3997    if (fp16_is_NaN(exp, mnt)) {
3998        result = fp16_process_NaN(op, mode, &flags);
3999    }
4000    else {
4001        if (!mnt) { // Zero and denormals
4002            result = fp16_pack(sgn, FP16_EXP_INF - 1, 0);
4003        } else { // Infinities and normals
4004            result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0);
4005        }
4006    }
4007
4008    set_fpscr0(fpscr, flags);
4009
4010    return result;
4011}
4012
4013template <>
4014uint32_t
4015fplibRecpX(uint32_t op, FPSCR &fpscr)
4016{
4017    int mode = modeConv(fpscr);
4018    int flags = 0;
4019    int sgn, exp;
4020    uint32_t mnt, result;
4021
4022    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4023
4024    if (fp32_is_NaN(exp, mnt)) {
4025        result = fp32_process_NaN(op, mode, &flags);
4026    }
4027    else {
4028        if (!mnt) { // Zero and denormals
4029            result = fp32_pack(sgn, FP32_EXP_INF - 1, 0);
4030        } else { // Infinities and normals
4031            result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0);
4032        }
4033    }
4034
4035    set_fpscr0(fpscr, flags);
4036
4037    return result;
4038}
4039
4040template <>
4041uint64_t
4042fplibRecpX(uint64_t op, FPSCR &fpscr)
4043{
4044    int mode = modeConv(fpscr);
4045    int flags = 0;
4046    int sgn, exp;
4047    uint64_t mnt, result;
4048
4049    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4050
4051    if (fp64_is_NaN(exp, mnt)) {
4052        result = fp64_process_NaN(op, mode, &flags);
4053    }
4054    else {
4055        if (!mnt) { // Zero and denormals
4056            result = fp64_pack(sgn, FP64_EXP_INF - 1, 0);
4057        } else { // Infinities and normals
4058            result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0);
4059        }
4060    }
4061
4062    set_fpscr0(fpscr, flags);
4063
4064    return result;
4065}
4066
4067template <>
4068uint16_t
4069fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4070{
4071    int expint = FP16_EXP_BIAS + FP16_MANT_BITS;
4072    int mode = modeConv(fpscr);
4073    int flags = 0;
4074    int sgn, exp;
4075    uint16_t mnt, result;
4076
4077    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4078    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4079
4080    // Handle NaNs, infinities and zeroes:
4081    if (fp16_is_NaN(exp, mnt)) {
4082        result = fp16_process_NaN(op, mode, &flags);
4083    } else if (exp == FP16_EXP_INF) {
4084        result = fp16_infinity(sgn);
4085    } else if (!mnt) {
4086        result = fp16_zero(sgn);
4087    } else if (exp >= expint) {
4088        // There are no fractional bits
4089        result = op;
4090    } else {
4091        // Truncate towards zero:
4092        uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp);
4093        int err = exp < expint - FP16_BITS ? 1 :
4094            ((mnt << 1 >> (expint - exp - 1) & 3) |
4095             ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0));
4096        switch (rounding) {
4097          case FPRounding_TIEEVEN:
4098            x += (err == 3 || (err == 2 && (x & 1)));
4099            break;
4100          case FPRounding_POSINF:
4101            x += err && !sgn;
4102            break;
4103          case FPRounding_NEGINF:
4104            x += err && sgn;
4105            break;
4106          case FPRounding_ZERO:
4107            break;
4108          case FPRounding_TIEAWAY:
4109            x += err >> 1;
4110            break;
4111          default:
4112            panic("Unrecognized FP rounding mode");
4113        }
4114
4115        if (x == 0) {
4116            result = fp16_zero(sgn);
4117        } else {
4118            exp = expint;
4119            mnt = fp16_normalise(x, &exp);
4120            result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS);
4121        }
4122
4123        if (err && exact)
4124            flags |= FPLIB_IXC;
4125    }
4126
4127    set_fpscr0(fpscr, flags);
4128
4129    return result;
4130}
4131
4132template <>
4133uint32_t
4134fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4135{
4136    int expint = FP32_EXP_BIAS + FP32_MANT_BITS;
4137    int mode = modeConv(fpscr);
4138    int flags = 0;
4139    int sgn, exp;
4140    uint32_t mnt, result;
4141
4142    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4143    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4144
4145    // Handle NaNs, infinities and zeroes:
4146    if (fp32_is_NaN(exp, mnt)) {
4147        result = fp32_process_NaN(op, mode, &flags);
4148    } else if (exp == FP32_EXP_INF) {
4149        result = fp32_infinity(sgn);
4150    } else if (!mnt) {
4151        result = fp32_zero(sgn);
4152    } else if (exp >= expint) {
4153        // There are no fractional bits
4154        result = op;
4155    } else {
4156        // Truncate towards zero:
4157        uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp);
4158        int err = exp < expint - FP32_BITS ? 1 :
4159            ((mnt << 1 >> (expint - exp - 1) & 3) |
4160             ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0));
4161        switch (rounding) {
4162          case FPRounding_TIEEVEN:
4163            x += (err == 3 || (err == 2 && (x & 1)));
4164            break;
4165          case FPRounding_POSINF:
4166            x += err && !sgn;
4167            break;
4168          case FPRounding_NEGINF:
4169            x += err && sgn;
4170            break;
4171          case FPRounding_ZERO:
4172            break;
4173          case FPRounding_TIEAWAY:
4174            x += err >> 1;
4175            break;
4176          default:
4177            panic("Unrecognized FP rounding mode");
4178        }
4179
4180        if (x == 0) {
4181            result = fp32_zero(sgn);
4182        } else {
4183            exp = expint;
4184            mnt = fp32_normalise(x, &exp);
4185            result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS);
4186        }
4187
4188        if (err && exact)
4189            flags |= FPLIB_IXC;
4190    }
4191
4192    set_fpscr0(fpscr, flags);
4193
4194    return result;
4195}
4196
4197template <>
4198uint64_t
4199fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4200{
4201    int expint = FP64_EXP_BIAS + FP64_MANT_BITS;
4202    int mode = modeConv(fpscr);
4203    int flags = 0;
4204    int sgn, exp;
4205    uint64_t mnt, result;
4206
4207    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4208    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4209
4210    // Handle NaNs, infinities and zeroes:
4211    if (fp64_is_NaN(exp, mnt)) {
4212        result = fp64_process_NaN(op, mode, &flags);
4213    } else if (exp == FP64_EXP_INF) {
4214        result = fp64_infinity(sgn);
4215    } else if (!mnt) {
4216        result = fp64_zero(sgn);
4217    } else if (exp >= expint) {
4218        // There are no fractional bits
4219        result = op;
4220    } else {
4221        // Truncate towards zero:
4222        uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp);
4223        int err = exp < expint - FP64_BITS ? 1 :
4224            ((mnt << 1 >> (expint - exp - 1) & 3) |
4225             ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0));
4226        switch (rounding) {
4227          case FPRounding_TIEEVEN:
4228            x += (err == 3 || (err == 2 && (x & 1)));
4229            break;
4230          case FPRounding_POSINF:
4231            x += err && !sgn;
4232            break;
4233          case FPRounding_NEGINF:
4234            x += err && sgn;
4235            break;
4236          case FPRounding_ZERO:
4237            break;
4238          case FPRounding_TIEAWAY:
4239            x += err >> 1;
4240            break;
4241          default:
4242            panic("Unrecognized FP rounding mode");
4243        }
4244
4245        if (x == 0) {
4246            result = fp64_zero(sgn);
4247        } else {
4248            exp = expint;
4249            mnt = fp64_normalise(x, &exp);
4250            result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS);
4251        }
4252
4253        if (err && exact)
4254            flags |= FPLIB_IXC;
4255    }
4256
4257    set_fpscr0(fpscr, flags);
4258
4259    return result;
4260}
4261
4262template <>
4263uint16_t
4264fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4265{
4266    int flags = 0;
4267    uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags);
4268    set_fpscr0(fpscr, flags);
4269    return result;
4270}
4271
4272template <>
4273uint32_t
4274fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4275{
4276    int flags = 0;
4277    uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags);
4278    set_fpscr0(fpscr, flags);
4279    return result;
4280}
4281
4282template <>
4283uint64_t
4284fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4285{
4286    int flags = 0;
4287    uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags);
4288    set_fpscr0(fpscr, flags);
4289    return result;
4290}
4291
4292template <>
4293uint16_t
4294fplibSqrt(uint16_t op, FPSCR &fpscr)
4295{
4296    int flags = 0;
4297    uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags);
4298    set_fpscr0(fpscr, flags);
4299    return result;
4300}
4301
4302template <>
4303uint32_t
4304fplibSqrt(uint32_t op, FPSCR &fpscr)
4305{
4306    int flags = 0;
4307    uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
4308    set_fpscr0(fpscr, flags);
4309    return result;
4310}
4311
4312template <>
4313uint64_t
4314fplibSqrt(uint64_t op, FPSCR &fpscr)
4315{
4316    int flags = 0;
4317    uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
4318    set_fpscr0(fpscr, flags);
4319    return result;
4320}
4321
4322template <>
4323uint16_t
4324fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4325{
4326    int flags = 0;
4327    uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags);
4328    set_fpscr0(fpscr, flags);
4329    return result;
4330}
4331
4332template <>
4333uint32_t
4334fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4335{
4336    int flags = 0;
4337    uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
4338    set_fpscr0(fpscr, flags);
4339    return result;
4340}
4341
4342template <>
4343uint64_t
4344fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4345{
4346    int flags = 0;
4347    uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
4348    set_fpscr0(fpscr, flags);
4349    return result;
4350}
4351
4352template <>
4353uint16_t
4354fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
4355{
4356    static uint16_t coeff[2][8] = {
4357        {
4358            0x3c00,
4359            0xb155,
4360            0x2030,
4361            0x0000,
4362            0x0000,
4363            0x0000,
4364            0x0000,
4365            0x0000,
4366        },
4367        {
4368            0x3c00,
4369            0xb800,
4370            0x293a,
4371            0x0000,
4372            0x0000,
4373            0x0000,
4374            0x0000,
4375            0x0000
4376        }
4377    };
4378    int flags = 0;
4379    uint16_t result =
4380        fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1,
4381                    fplibAbs(op2), 0, modeConv(fpscr), &flags);
4382    set_fpscr0(fpscr, flags);
4383    return result;
4384}
4385
4386template <>
4387uint32_t
4388fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr)
4389{
4390    static uint32_t coeff[2][8] = {
4391        {
4392            0x3f800000,
4393            0xbe2aaaab,
4394            0x3c088886,
4395            0xb95008b9,
4396            0x36369d6d,
4397            0x00000000,
4398            0x00000000,
4399            0x00000000
4400        },
4401        {
4402            0x3f800000,
4403            0xbf000000,
4404            0x3d2aaaa6,
4405            0xbab60705,
4406            0x37cd37cc,
4407            0x00000000,
4408            0x00000000,
4409            0x00000000
4410        }
4411    };
4412    int flags = 0;
4413    uint32_t result =
4414        fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1,
4415                    fplibAbs(op2), 0, modeConv(fpscr), &flags);
4416    set_fpscr0(fpscr, flags);
4417    return result;
4418}
4419
4420template <>
4421uint64_t
4422fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr)
4423{
4424    static uint64_t coeff[2][8] = {
4425        {
4426            0x3ff0000000000000ULL,
4427            0xbfc5555555555543ULL,
4428            0x3f8111111110f30cULL,
4429            0xbf2a01a019b92fc6ULL,
4430            0x3ec71de351f3d22bULL,
4431            0xbe5ae5e2b60f7b91ULL,
4432            0x3de5d8408868552fULL,
4433            0x0000000000000000ULL
4434        },
4435        {
4436            0x3ff0000000000000ULL,
4437            0xbfe0000000000000ULL,
4438            0x3fa5555555555536ULL,
4439            0xbf56c16c16c13a0bULL,
4440            0x3efa01a019b1e8d8ULL,
4441            0xbe927e4f7282f468ULL,
4442            0x3e21ee96d2641b13ULL,
4443            0xbda8f76380fbb401ULL
4444        }
4445    };
4446    int flags = 0;
4447    uint64_t result =
4448        fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1,
4449                    fplibAbs(op2), 0, modeConv(fpscr), &flags);
4450    set_fpscr0(fpscr, flags);
4451    return result;
4452}
4453
4454template <>
4455uint16_t
4456fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4457{
4458    int flags = 0;
4459    int sgn, exp;
4460    uint16_t mnt;
4461
4462    int mode = modeConv(fpscr);
4463    uint16_t result = fp16_mul(op1, op1, mode, &flags);
4464    set_fpscr0(fpscr, flags);
4465
4466    fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4467    if (!fp16_is_NaN(exp, mnt)) {
4468        result = (result & ~(1ULL << (FP16_BITS - 1))) |
4469            op2 << (FP16_BITS - 1);
4470    }
4471    return result;
4472}
4473
4474template <>
4475uint32_t
4476fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4477{
4478    int flags = 0;
4479    int sgn, exp;
4480    uint32_t mnt;
4481
4482    int mode = modeConv(fpscr);
4483    uint32_t result = fp32_mul(op1, op1, mode, &flags);
4484    set_fpscr0(fpscr, flags);
4485
4486    fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4487    if (!fp32_is_NaN(exp, mnt)) {
4488        result = (result & ~(1ULL << (FP32_BITS - 1))) | op2 << (FP32_BITS - 1);
4489    }
4490    return result;
4491}
4492
4493template <>
4494uint64_t
4495fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4496{
4497    int flags = 0;
4498    int sgn, exp;
4499    uint64_t mnt;
4500
4501    int mode = modeConv(fpscr);
4502    uint64_t result = fp64_mul(op1, op1, mode, &flags);
4503    set_fpscr0(fpscr, flags);
4504
4505    fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4506    if (!fp64_is_NaN(exp, mnt)) {
4507        result = (result & ~(1ULL << (FP64_BITS - 1))) | op2 << (FP64_BITS - 1);
4508    }
4509    return result;
4510}
4511
4512template <>
4513uint16_t
4514fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4515{
4516    static constexpr uint16_t fpOne =
4517        (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0
4518    if (op2 & 1)
4519        op1 = fpOne;
4520    return op1 ^ ((op2 >> 1) << (FP16_BITS - 1));
4521}
4522
4523template <>
4524uint32_t
4525fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4526{
4527    static constexpr uint32_t fpOne =
4528        (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0
4529    if (op2 & 1)
4530        op1 = fpOne;
4531    return op1 ^ ((op2 >> 1) << (FP32_BITS - 1));
4532}
4533
4534template <>
4535uint64_t
4536fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4537{
4538    static constexpr uint64_t fpOne =
4539        (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0
4540    if (op2 & 1)
4541        op1 = fpOne;
4542    return op1 ^ ((op2 >> 1) << (FP64_BITS - 1));
4543}
4544
4545static uint64_t
4546FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4547             int *flags)
4548{
4549    int expmax = FP64_EXP_BIAS + FP64_BITS - 1;
4550    uint64_t x;
4551    int err;
4552
4553    if (exp > expmax) {
4554        *flags = FPLIB_IOC;
4555        return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4556    }
4557
4558    x = lsr64(mnt << FP64_EXP_BITS, expmax - exp);
4559    err = (exp > expmax - 2 ? 0 :
4560           (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) |
4561           !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1)));
4562
4563    switch (rounding) {
4564      case FPRounding_TIEEVEN:
4565        x += (err == 3 || (err == 2 && (x & 1)));
4566        break;
4567      case FPRounding_POSINF:
4568        x += err && !sgn;
4569        break;
4570      case FPRounding_NEGINF:
4571        x += err && sgn;
4572        break;
4573      case FPRounding_ZERO:
4574        break;
4575      case FPRounding_TIEAWAY:
4576        x += err >> 1;
4577        break;
4578      default:
4579        panic("Unrecognized FP rounding mode");
4580    }
4581
4582    if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) {
4583        *flags = FPLIB_IOC;
4584        return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4585    }
4586
4587    if (err) {
4588        *flags = FPLIB_IXC;
4589    }
4590
4591    return sgn ? -x : x;
4592}
4593
4594static uint32_t
4595FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4596             int *flags)
4597{
4598    uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4599    if (u ? x >= 1ULL << FP32_BITS :
4600        !(x < 1ULL << (FP32_BITS - 1) ||
4601          (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) {
4602        *flags = FPLIB_IOC;
4603        x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn;
4604    }
4605    return x;
4606}
4607
4608static uint16_t
4609FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4610             int *flags)
4611{
4612    uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4613    if (u ? x >= 1ULL << FP16_BITS :
4614        !(x < 1ULL << (FP16_BITS - 1) ||
4615          (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) {
4616        *flags = FPLIB_IOC;
4617        x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn;
4618    }
4619    return x;
4620}
4621
4622template <>
4623uint16_t
4624fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4625               FPSCR &fpscr)
4626{
4627    int flags = 0;
4628    int sgn, exp;
4629    uint16_t mnt, result;
4630
4631    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4632    fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4633
4634    // If NaN, set cumulative flag or take exception:
4635    if (fp16_is_NaN(exp, mnt)) {
4636        flags = FPLIB_IOC;
4637        result = 0;
4638    } else {
4639        assert(fbits >= 0);
4640        // Infinity is treated as an ordinary normalised number that saturates.
4641        result =
4642            FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4643                         (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4644                         u, rounding, &flags);
4645    }
4646
4647    set_fpscr0(fpscr, flags);
4648
4649    return result;
4650}
4651
4652template <>
4653uint32_t
4654fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4655               FPSCR &fpscr)
4656{
4657    int flags = 0;
4658    int sgn, exp;
4659    uint16_t mnt;
4660    uint32_t result;
4661
4662    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4663    fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4664
4665    // If NaN, set cumulative flag or take exception:
4666    if (fp16_is_NaN(exp, mnt)) {
4667        flags = FPLIB_IOC;
4668        result = 0;
4669    } else {
4670        assert(fbits >= 0);
4671        if (exp == FP16_EXP_INF)
4672            exp = 255; // infinity: make it big enough to saturate
4673        result =
4674            FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4675                         (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4676                         u, rounding, &flags);
4677    }
4678
4679    set_fpscr0(fpscr, flags);
4680
4681    return result;
4682}
4683
4684template <>
4685uint32_t
4686fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4687{
4688    int flags = 0;
4689    int sgn, exp;
4690    uint32_t mnt, result;
4691
4692    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4693    fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4694
4695    // If NaN, set cumulative flag or take exception:
4696    if (fp32_is_NaN(exp, mnt)) {
4697        flags = FPLIB_IOC;
4698        result = 0;
4699    } else {
4700        assert(fbits >= 0);
4701        // Infinity is treated as an ordinary normalised number that saturates.
4702        result =
4703            FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4704                         (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4705                         u, rounding, &flags);
4706    }
4707
4708    set_fpscr0(fpscr, flags);
4709
4710    return result;
4711}
4712
4713template <>
4714uint32_t
4715fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4716{
4717    int flags = 0;
4718    int sgn, exp;
4719    uint64_t mnt;
4720    uint32_t result;
4721
4722    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4723    fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4724
4725    // If NaN, set cumulative flag or take exception:
4726    if (fp64_is_NaN(exp, mnt)) {
4727        flags = FPLIB_IOC;
4728        result = 0;
4729    } else {
4730        assert(fbits >= 0);
4731        // Infinity is treated as an ordinary normalised number that saturates.
4732        result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
4733    }
4734
4735    set_fpscr0(fpscr, flags);
4736
4737    return result;
4738}
4739
4740template <>
4741uint64_t
4742fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4743               FPSCR &fpscr)
4744{
4745    int flags = 0;
4746    int sgn, exp;
4747    uint16_t mnt;
4748    uint64_t result;
4749
4750    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4751    fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4752
4753    // If NaN, set cumulative flag or take exception:
4754    if (fp16_is_NaN(exp, mnt)) {
4755        flags = FPLIB_IOC;
4756        result = 0;
4757    } else {
4758        assert(fbits >= 0);
4759        if (exp == FP16_EXP_INF)
4760            exp = 255; // infinity: make it big enough to saturate
4761        result =
4762            FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4763                         (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4764                         u, rounding, &flags);
4765    }
4766
4767    set_fpscr0(fpscr, flags);
4768
4769    return result;
4770}
4771
4772template <>
4773uint64_t
4774fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4775{
4776    int flags = 0;
4777    int sgn, exp;
4778    uint32_t mnt;
4779    uint64_t result;
4780
4781    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4782    fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4783
4784    // If NaN, set cumulative flag or take exception:
4785    if (fp32_is_NaN(exp, mnt)) {
4786        flags = FPLIB_IOC;
4787        result = 0;
4788    } else {
4789        assert(fbits >= 0);
4790        // Infinity is treated as an ordinary normalised number that saturates.
4791        result =
4792            FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4793                         (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4794                         u, rounding, &flags);
4795    }
4796
4797    set_fpscr0(fpscr, flags);
4798
4799    return result;
4800}
4801
4802template <>
4803uint64_t
4804fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4805{
4806    int flags = 0;
4807    int sgn, exp;
4808    uint64_t mnt, result;
4809
4810    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4811    fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4812
4813    // If NaN, set cumulative flag or take exception:
4814    if (fp64_is_NaN(exp, mnt)) {
4815        flags = FPLIB_IOC;
4816        result = 0;
4817    } else {
4818        assert(fbits >= 0);
4819        // Infinity is treated as an ordinary normalised number that saturates.
4820        result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
4821    }
4822
4823    set_fpscr0(fpscr, flags);
4824
4825    return result;
4826}
4827
4828static uint16_t
4829fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4830{
4831    int x_sgn = !u && a >> (FP64_BITS - 1);
4832    int x_exp = FP16_EXP_BIAS + FP64_BITS - 1 - fbits;
4833    uint64_t x_mnt = x_sgn ? -a : a;
4834
4835    // Handle zero:
4836    if (!x_mnt) {
4837        return fp16_zero(0);
4838    }
4839
4840    // Normalise into FP16_BITS bits, collapsing error into bottom bit:
4841    x_mnt = fp64_normalise(x_mnt, &x_exp);
4842    x_mnt = (x_mnt >> (FP64_BITS - FP16_BITS - 1) |
4843             !!(x_mnt & ((1ULL << (FP64_BITS - FP16_BITS - 1)) - 1)));
4844
4845    return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
4846}
4847
4848static uint32_t
4849fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4850{
4851    int x_sgn = !u && a >> (FP64_BITS - 1);
4852    int x_exp = FP32_EXP_BIAS + FP64_BITS - 1 - fbits;
4853    uint64_t x_mnt = x_sgn ? -a : a;
4854
4855    // Handle zero:
4856    if (!x_mnt) {
4857        return fp32_zero(0);
4858    }
4859
4860    // Normalise into FP32_BITS bits, collapsing error into bottom bit:
4861    x_mnt = fp64_normalise(x_mnt, &x_exp);
4862    x_mnt = (x_mnt >> (FP64_BITS - FP32_BITS - 1) |
4863             !!(x_mnt & ((1ULL << (FP64_BITS - FP32_BITS - 1)) - 1)));
4864
4865    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
4866}
4867
4868static uint64_t
4869fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4870{
4871    int x_sgn = !u && a >> (FP64_BITS - 1);
4872    int x_exp = FP64_EXP_BIAS + FP64_BITS - 1 - fbits;
4873    uint64_t x_mnt = x_sgn ? -a : a;
4874
4875    // Handle zero:
4876    if (!x_mnt) {
4877        return fp64_zero(0);
4878    }
4879
4880    x_mnt = fp64_normalise(x_mnt, &x_exp);
4881
4882    return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
4883}
4884
4885template <>
4886uint16_t
4887fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
4888               FPSCR &fpscr)
4889{
4890    int flags = 0;
4891    uint16_t res = fp16_cvtf(op, fbits, u,
4892                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4893                             &flags);
4894    set_fpscr0(fpscr, flags);
4895    return res;
4896}
4897
4898template <>
4899uint32_t
4900fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4901{
4902    int flags = 0;
4903    uint32_t res = fp32_cvtf(op, fbits, u,
4904                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4905                             &flags);
4906    set_fpscr0(fpscr, flags);
4907    return res;
4908}
4909
4910template <>
4911uint64_t
4912fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4913{
4914    int flags = 0;
4915    uint64_t res = fp64_cvtf(op, fbits, u,
4916                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4917                             &flags);
4918    set_fpscr0(fpscr, flags);
4919    return res;
4920}
4921
4922template <>
4923uint16_t
4924fplibInfinity(int sgn)
4925{
4926    return fp16_infinity(sgn);
4927}
4928
4929template <>
4930uint32_t
4931fplibInfinity(int sgn)
4932{
4933    return fp32_infinity(sgn);
4934}
4935
4936template <>
4937uint64_t
4938fplibInfinity(int sgn)
4939{
4940    return fp64_infinity(sgn);
4941}
4942
4943template <>
4944uint16_t
4945fplibDefaultNaN()
4946{
4947    return fp16_defaultNaN();
4948}
4949
4950template <>
4951uint32_t
4952fplibDefaultNaN()
4953{
4954    return fp32_defaultNaN();
4955}
4956
4957template <>
4958uint64_t
4959fplibDefaultNaN()
4960{
4961    return fp64_defaultNaN();
4962}
4963
4964}
4965