fplib.cc revision 13118:897ff5214d07
1/*
2* Copyright (c) 2012-2013, 2017-2018 ARM Limited
3* All rights reserved
4*
5* The license below extends only to copyright in the software and shall
6* not be construed as granting a license to any other intellectual
7* property including but not limited to intellectual property relating
8* to a hardware implementation of the functionality of the software
9* licensed hereunder.  You may use the software subject to the license
10* terms below provided that you ensure that this notice is replicated
11* unmodified and in its entirety in all distributions of the software,
12* modified or unmodified, in source code or in binary form.
13*
14* Redistribution and use in source and binary forms, with or without
15* modification, are permitted provided that the following conditions are
16* met: redistributions of source code must retain the above copyright
17* notice, this list of conditions and the following disclaimer;
18* redistributions in binary form must reproduce the above copyright
19* notice, this list of conditions and the following disclaimer in the
20* documentation and/or other materials provided with the distribution;
21* neither the name of the copyright holders nor the names of its
22* contributors may be used to endorse or promote products derived from
23* this software without specific prior written permission.
24*
25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36*
37* Authors: Edmund Grimley Evans
38*          Thomas Grocutt
39*/
40
41#include <stdint.h>
42
43#include <cassert>
44
45#include "fplib.hh"
46
47namespace ArmISA
48{
49
50#define FPLIB_RN 0
51#define FPLIB_RP 1
52#define FPLIB_RM 2
53#define FPLIB_RZ 3
54#define FPLIB_FZ 4
55#define FPLIB_DN 8
56#define FPLIB_AHP 16
57#define FPLIB_FZ16 32
58
59#define FPLIB_IDC 128 // Input Denormal
60#define FPLIB_IXC 16  // Inexact
61#define FPLIB_UFC 8   // Underflow
62#define FPLIB_OFC 4   // Overflow
63#define FPLIB_DZC 2   // Division by Zero
64#define FPLIB_IOC 1   // Invalid Operation
65
66#define FP16_BITS 16
67#define FP32_BITS 32
68#define FP64_BITS 64
69
70#define FP16_EXP_BITS 5
71#define FP32_EXP_BITS 8
72#define FP64_EXP_BITS 11
73
74#define FP16_EXP_BIAS 15
75#define FP32_EXP_BIAS 127
76#define FP64_EXP_BIAS 1023
77
78#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
79#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
80#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
81
82#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
83#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
84#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
85
86#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
87#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
88#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
89
90#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
91#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
92#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
93
94static inline uint16_t
95lsl16(uint16_t x, uint32_t shift)
96{
97    return shift < 16 ? x << shift : 0;
98}
99
100static inline uint16_t
101lsr16(uint16_t x, uint32_t shift)
102{
103    return shift < 16 ? x >> shift : 0;
104}
105
106static inline uint32_t
107lsl32(uint32_t x, uint32_t shift)
108{
109    return shift < 32 ? x << shift : 0;
110}
111
112static inline uint32_t
113lsr32(uint32_t x, uint32_t shift)
114{
115    return shift < 32 ? x >> shift : 0;
116}
117
118static inline uint64_t
119lsl64(uint64_t x, uint32_t shift)
120{
121    return shift < 64 ? x << shift : 0;
122}
123
124static inline uint64_t
125lsr64(uint64_t x, uint32_t shift)
126{
127    return shift < 64 ? x >> shift : 0;
128}
129
130static inline void
131lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
132{
133    if (shift == 0) {
134        *r1 = x1;
135        *r0 = x0;
136    } else if (shift < 64) {
137        *r1 = x1 << shift | x0 >> (64 - shift);
138        *r0 = x0 << shift;
139    } else if (shift < 128) {
140        *r1 = x0 << (shift - 64);
141        *r0 = 0;
142    } else {
143        *r1 = 0;
144        *r0 = 0;
145    }
146}
147
148static inline void
149lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
150{
151    if (shift == 0) {
152        *r1 = x1;
153        *r0 = x0;
154    } else if (shift < 64) {
155        *r0 = x0 >> shift | x1 << (64 - shift);
156        *r1 = x1 >> shift;
157    } else if (shift < 128) {
158        *r0 = x1 >> (shift - 64);
159        *r1 = 0;
160    } else {
161        *r0 = 0;
162        *r1 = 0;
163    }
164}
165
166static inline void
167mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
168{
169    uint32_t mask = ((uint32_t)1 << 31) - 1;
170    uint64_t a0 = a & mask;
171    uint64_t a1 = a >> 31 & mask;
172    uint64_t b0 = b & mask;
173    uint64_t b1 = b >> 31 & mask;
174    uint64_t p0 = a0 * b0;
175    uint64_t p2 = a1 * b1;
176    uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
177    uint64_t s0 = p0;
178    uint64_t s1 = (s0 >> 31) + p1;
179    uint64_t s2 = (s1 >> 31) + p2;
180    *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
181    *x1 = s2 >> 2;
182}
183
184static inline
185void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
186{
187    uint64_t t0 = (uint64_t)(uint32_t)a * b;
188    uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
189    *x0 = t1 << 32 | (uint32_t)t0;
190    *x1 = t1 >> 32;
191}
192
193static inline void
194add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
195       uint64_t b1)
196{
197    *x0 = a0 + b0;
198    *x1 = a1 + b1 + (*x0 < a0);
199}
200
201static inline void
202sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
203       uint64_t b1)
204{
205    *x0 = a0 - b0;
206    *x1 = a1 - b1 - (*x0 > a0);
207}
208
209static inline int
210cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
211{
212    return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
213}
214
215static inline uint16_t
216fp16_normalise(uint16_t mnt, int *exp)
217{
218    int shift;
219
220    if (!mnt) {
221        return 0;
222    }
223
224    for (shift = 8; shift; shift >>= 1) {
225        if (!(mnt >> (16 - shift))) {
226            mnt <<= shift;
227            *exp -= shift;
228        }
229    }
230    return mnt;
231}
232
233static inline uint32_t
234fp32_normalise(uint32_t mnt, int *exp)
235{
236    int shift;
237
238    if (!mnt) {
239        return 0;
240    }
241
242    for (shift = 16; shift; shift >>= 1) {
243        if (!(mnt >> (32 - shift))) {
244            mnt <<= shift;
245            *exp -= shift;
246        }
247    }
248    return mnt;
249}
250
251static inline uint64_t
252fp64_normalise(uint64_t mnt, int *exp)
253{
254    int shift;
255
256    if (!mnt) {
257        return 0;
258    }
259
260    for (shift = 32; shift; shift >>= 1) {
261        if (!(mnt >> (64 - shift))) {
262            mnt <<= shift;
263            *exp -= shift;
264        }
265    }
266    return mnt;
267}
268
269static inline void
270fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
271{
272    uint64_t x0 = *mnt0;
273    uint64_t x1 = *mnt1;
274    int shift;
275
276    if (!x0 && !x1) {
277        return;
278    }
279
280    if (!x1) {
281        x1 = x0;
282        x0 = 0;
283        *exp -= 64;
284    }
285
286    for (shift = 32; shift; shift >>= 1) {
287        if (!(x1 >> (64 - shift))) {
288            x1 = x1 << shift | x0 >> (64 - shift);
289            x0 <<= shift;
290            *exp -= shift;
291        }
292    }
293
294    *mnt0 = x0;
295    *mnt1 = x1;
296}
297
298static inline uint16_t
299fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
300{
301    return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt);
302}
303
304static inline uint32_t
305fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
306{
307    return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt);
308}
309
310static inline uint64_t
311fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
312{
313    return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt);
314}
315
316static inline uint16_t
317fp16_zero(int sgn)
318{
319    return fp16_pack(sgn, 0, 0);
320}
321
322static inline uint32_t
323fp32_zero(int sgn)
324{
325    return fp32_pack(sgn, 0, 0);
326}
327
328static inline uint64_t
329fp64_zero(int sgn)
330{
331    return fp64_pack(sgn, 0, 0);
332}
333
334static inline uint16_t
335fp16_max_normal(int sgn)
336{
337    return fp16_pack(sgn, FP16_EXP_INF - 1, -1);
338}
339
340static inline uint32_t
341fp32_max_normal(int sgn)
342{
343    return fp32_pack(sgn, FP32_EXP_INF - 1, -1);
344}
345
346static inline uint64_t
347fp64_max_normal(int sgn)
348{
349    return fp64_pack(sgn, FP64_EXP_INF - 1, -1);
350}
351
352static inline uint16_t
353fp16_infinity(int sgn)
354{
355    return fp16_pack(sgn, FP16_EXP_INF, 0);
356}
357
358static inline uint32_t
359fp32_infinity(int sgn)
360{
361    return fp32_pack(sgn, FP32_EXP_INF, 0);
362}
363
364static inline uint64_t
365fp64_infinity(int sgn)
366{
367    return fp64_pack(sgn, FP64_EXP_INF, 0);
368}
369
370static inline uint16_t
371fp16_defaultNaN()
372{
373    return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1));
374}
375
376static inline uint32_t
377fp32_defaultNaN()
378{
379    return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1));
380}
381
382static inline uint64_t
383fp64_defaultNaN()
384{
385    return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1));
386}
387
388static inline void
389fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
390            int *flags)
391{
392    *sgn = x >> (FP16_BITS - 1);
393    *exp = FP16_EXP(x);
394    *mnt = FP16_MANT(x);
395
396    // Handle subnormals:
397    if (*exp) {
398        *mnt |= 1ULL << FP16_MANT_BITS;
399    } else {
400        ++*exp;
401        // IDC (Input Denormal) is not set in this case.
402        if (mode & FPLIB_FZ16)
403            *mnt = 0;
404    }
405}
406
407static inline void
408fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
409            int *flags)
410{
411    *sgn = x >> (FP32_BITS - 1);
412    *exp = FP32_EXP(x);
413    *mnt = FP32_MANT(x);
414
415    // Handle subnormals:
416    if (*exp) {
417        *mnt |= 1ULL << FP32_MANT_BITS;
418    } else {
419        ++*exp;
420        if ((mode & FPLIB_FZ) && *mnt) {
421            *flags |= FPLIB_IDC;
422            *mnt = 0;
423        }
424    }
425}
426
427static inline void
428fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
429            int *flags)
430{
431    *sgn = x >> (FP64_BITS - 1);
432    *exp = FP64_EXP(x);
433    *mnt = FP64_MANT(x);
434
435    // Handle subnormals:
436    if (*exp) {
437        *mnt |= 1ULL << FP64_MANT_BITS;
438    } else {
439        ++*exp;
440        if ((mode & FPLIB_FZ) && *mnt) {
441            *flags |= FPLIB_IDC;
442            *mnt = 0;
443        }
444    }
445}
446
447static inline int
448fp16_is_NaN(int exp, uint16_t mnt)
449{
450    return exp == FP16_EXP_INF && FP16_MANT(mnt);
451}
452
453static inline int
454fp32_is_NaN(int exp, uint32_t mnt)
455{
456    return exp == FP32_EXP_INF && FP32_MANT(mnt);
457}
458
459static inline int
460fp64_is_NaN(int exp, uint64_t mnt)
461{
462    return exp == FP64_EXP_INF && FP64_MANT(mnt);
463}
464
465static inline int
466fp16_is_signalling_NaN(int exp, uint16_t mnt)
467{
468    return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1);
469}
470
471static inline int
472fp32_is_signalling_NaN(int exp, uint32_t mnt)
473{
474    return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1);
475}
476
477static inline int
478fp64_is_signalling_NaN(int exp, uint64_t mnt)
479{
480    return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1);
481}
482
483static inline int
484fp16_is_quiet_NaN(int exp, uint16_t mnt)
485{
486    return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1);
487}
488
489static inline int
490fp32_is_quiet_NaN(int exp, uint32_t mnt)
491{
492    return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1);
493}
494
495static inline int
496fp64_is_quiet_NaN(int exp, uint64_t mnt)
497{
498    return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1);
499}
500
501static inline int
502fp16_is_infinity(int exp, uint16_t mnt)
503{
504    return exp == FP16_EXP_INF && !FP16_MANT(mnt);
505}
506
507static inline int
508fp32_is_infinity(int exp, uint32_t mnt)
509{
510    return exp == FP32_EXP_INF && !FP32_MANT(mnt);
511}
512
513static inline int
514fp64_is_infinity(int exp, uint64_t mnt)
515{
516    return exp == FP64_EXP_INF && !FP64_MANT(mnt);
517}
518
519static inline uint16_t
520fp16_process_NaN(uint16_t a, int mode, int *flags)
521{
522    if (!(a >> (FP16_MANT_BITS - 1) & 1)) {
523        *flags |= FPLIB_IOC;
524        a |= 1ULL << (FP16_MANT_BITS - 1);
525    }
526    return mode & FPLIB_DN ? fp16_defaultNaN() : a;
527}
528
529static inline uint32_t
530fp32_process_NaN(uint32_t a, int mode, int *flags)
531{
532    if (!(a >> (FP32_MANT_BITS - 1) & 1)) {
533        *flags |= FPLIB_IOC;
534        a |= 1ULL << (FP32_MANT_BITS - 1);
535    }
536    return mode & FPLIB_DN ? fp32_defaultNaN() : a;
537}
538
539static inline uint64_t
540fp64_process_NaN(uint64_t a, int mode, int *flags)
541{
542    if (!(a >> (FP64_MANT_BITS - 1) & 1)) {
543        *flags |= FPLIB_IOC;
544        a |= 1ULL << (FP64_MANT_BITS - 1);
545    }
546    return mode & FPLIB_DN ? fp64_defaultNaN() : a;
547}
548
549static uint16_t
550fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
551{
552    int a_exp = FP16_EXP(a);
553    uint16_t a_mnt = FP16_MANT(a);
554    int b_exp = FP16_EXP(b);
555    uint16_t b_mnt = FP16_MANT(b);
556
557    // Handle signalling NaNs:
558    if (fp16_is_signalling_NaN(a_exp, a_mnt))
559        return fp16_process_NaN(a, mode, flags);
560    if (fp16_is_signalling_NaN(b_exp, b_mnt))
561        return fp16_process_NaN(b, mode, flags);
562
563    // Handle quiet NaNs:
564    if (fp16_is_NaN(a_exp, a_mnt))
565        return fp16_process_NaN(a, mode, flags);
566    if (fp16_is_NaN(b_exp, b_mnt))
567        return fp16_process_NaN(b, mode, flags);
568
569    return 0;
570}
571
572static uint32_t
573fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
574{
575    int a_exp = FP32_EXP(a);
576    uint32_t a_mnt = FP32_MANT(a);
577    int b_exp = FP32_EXP(b);
578    uint32_t b_mnt = FP32_MANT(b);
579
580    // Handle signalling NaNs:
581    if (fp32_is_signalling_NaN(a_exp, a_mnt))
582        return fp32_process_NaN(a, mode, flags);
583    if (fp32_is_signalling_NaN(b_exp, b_mnt))
584        return fp32_process_NaN(b, mode, flags);
585
586    // Handle quiet NaNs:
587    if (fp32_is_NaN(a_exp, a_mnt))
588        return fp32_process_NaN(a, mode, flags);
589    if (fp32_is_NaN(b_exp, b_mnt))
590        return fp32_process_NaN(b, mode, flags);
591
592    return 0;
593}
594
595static uint64_t
596fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
597{
598    int a_exp = FP64_EXP(a);
599    uint64_t a_mnt = FP64_MANT(a);
600    int b_exp = FP64_EXP(b);
601    uint64_t b_mnt = FP64_MANT(b);
602
603    // Handle signalling NaNs:
604    if (fp64_is_signalling_NaN(a_exp, a_mnt))
605        return fp64_process_NaN(a, mode, flags);
606    if (fp64_is_signalling_NaN(b_exp, b_mnt))
607        return fp64_process_NaN(b, mode, flags);
608
609    // Handle quiet NaNs:
610    if (fp64_is_NaN(a_exp, a_mnt))
611        return fp64_process_NaN(a, mode, flags);
612    if (fp64_is_NaN(b_exp, b_mnt))
613        return fp64_process_NaN(b, mode, flags);
614
615    return 0;
616}
617
618static uint16_t
619fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
620{
621    int a_exp = FP16_EXP(a);
622    uint16_t a_mnt = FP16_MANT(a);
623    int b_exp = FP16_EXP(b);
624    uint16_t b_mnt = FP16_MANT(b);
625    int c_exp = FP16_EXP(c);
626    uint16_t c_mnt = FP16_MANT(c);
627
628    // Handle signalling NaNs:
629    if (fp16_is_signalling_NaN(a_exp, a_mnt))
630        return fp16_process_NaN(a, mode, flags);
631    if (fp16_is_signalling_NaN(b_exp, b_mnt))
632        return fp16_process_NaN(b, mode, flags);
633    if (fp16_is_signalling_NaN(c_exp, c_mnt))
634        return fp16_process_NaN(c, mode, flags);
635
636    // Handle quiet NaNs:
637    if (fp16_is_NaN(a_exp, a_mnt))
638        return fp16_process_NaN(a, mode, flags);
639    if (fp16_is_NaN(b_exp, b_mnt))
640        return fp16_process_NaN(b, mode, flags);
641    if (fp16_is_NaN(c_exp, c_mnt))
642        return fp16_process_NaN(c, mode, flags);
643
644    return 0;
645}
646
647static uint32_t
648fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
649{
650    int a_exp = FP32_EXP(a);
651    uint32_t a_mnt = FP32_MANT(a);
652    int b_exp = FP32_EXP(b);
653    uint32_t b_mnt = FP32_MANT(b);
654    int c_exp = FP32_EXP(c);
655    uint32_t c_mnt = FP32_MANT(c);
656
657    // Handle signalling NaNs:
658    if (fp32_is_signalling_NaN(a_exp, a_mnt))
659        return fp32_process_NaN(a, mode, flags);
660    if (fp32_is_signalling_NaN(b_exp, b_mnt))
661        return fp32_process_NaN(b, mode, flags);
662    if (fp32_is_signalling_NaN(c_exp, c_mnt))
663        return fp32_process_NaN(c, mode, flags);
664
665    // Handle quiet NaNs:
666    if (fp32_is_NaN(a_exp, a_mnt))
667        return fp32_process_NaN(a, mode, flags);
668    if (fp32_is_NaN(b_exp, b_mnt))
669        return fp32_process_NaN(b, mode, flags);
670    if (fp32_is_NaN(c_exp, c_mnt))
671        return fp32_process_NaN(c, mode, flags);
672
673    return 0;
674}
675
676static uint64_t
677fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
678{
679    int a_exp = FP64_EXP(a);
680    uint64_t a_mnt = FP64_MANT(a);
681    int b_exp = FP64_EXP(b);
682    uint64_t b_mnt = FP64_MANT(b);
683    int c_exp = FP64_EXP(c);
684    uint64_t c_mnt = FP64_MANT(c);
685
686    // Handle signalling NaNs:
687    if (fp64_is_signalling_NaN(a_exp, a_mnt))
688        return fp64_process_NaN(a, mode, flags);
689    if (fp64_is_signalling_NaN(b_exp, b_mnt))
690        return fp64_process_NaN(b, mode, flags);
691    if (fp64_is_signalling_NaN(c_exp, c_mnt))
692        return fp64_process_NaN(c, mode, flags);
693
694    // Handle quiet NaNs:
695    if (fp64_is_NaN(a_exp, a_mnt))
696        return fp64_process_NaN(a, mode, flags);
697    if (fp64_is_NaN(b_exp, b_mnt))
698        return fp64_process_NaN(b, mode, flags);
699    if (fp64_is_NaN(c_exp, c_mnt))
700        return fp64_process_NaN(c, mode, flags);
701
702    return 0;
703}
704
705static uint16_t
706fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
707{
708    int biased_exp; // non-negative exponent value for result
709    uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS)
710    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
711
712    assert(rm != FPRounding_TIEAWAY);
713
714    // Flush to zero:
715    if ((mode & FPLIB_FZ16) && exp < 1) {
716        *flags |= FPLIB_UFC;
717        return fp16_zero(sgn);
718    }
719
720    // The bottom FP16_EXP_BITS bits of mnt are orred together:
721    mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) |
722           ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0));
723
724    if (exp > 0) {
725        biased_exp = exp;
726        int_mant = mnt >> 2;
727        error = mnt & 3;
728    } else {
729        biased_exp = 0;
730        int_mant = lsr16(mnt, 3 - exp);
731        error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
732    }
733
734    if (!biased_exp && error) { // xx should also check fpscr_val<11>
735        *flags |= FPLIB_UFC;
736    }
737
738    // Round up:
739    if ((rm == FPLIB_RN && (error == 3 ||
740                            (error == 2 && (int_mant & 1)))) ||
741        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
742        ++int_mant;
743        if (int_mant == 1ULL << FP16_MANT_BITS) {
744            // Rounded up from denormalized to normalized
745            biased_exp = 1;
746        }
747        if (int_mant == 2ULL << FP16_MANT_BITS) {
748            // Rounded up to next exponent
749            ++biased_exp;
750            int_mant >>= 1;
751        }
752    }
753
754    // Handle rounding to odd aka Von Neumann rounding:
755    if (error && rm == FPRounding_ODD)
756        int_mant |= 1;
757
758    // Handle overflow:
759    if (!(mode & FPLIB_AHP)) {
760        if (biased_exp >= (int)FP16_EXP_INF) {
761            *flags |= FPLIB_OFC | FPLIB_IXC;
762            if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
763                (rm == FPLIB_RM && sgn)) {
764                return fp16_infinity(sgn);
765            } else {
766                return fp16_max_normal(sgn);
767            }
768        }
769    } else {
770        if (biased_exp >= (int)FP16_EXP_INF + 1) {
771            *flags |= FPLIB_IOC;
772            return fp16_pack(sgn, FP16_EXP_INF, -1);
773        }
774    }
775
776    if (error) {
777        *flags |= FPLIB_IXC;
778    }
779
780    return fp16_pack(sgn, biased_exp, int_mant);
781}
782
783static uint16_t
784fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
785{
786    return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags);
787}
788
789static uint32_t
790fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
791{
792    int biased_exp; // non-negative exponent value for result
793    uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS)
794    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
795
796    assert(rm != FPRounding_TIEAWAY);
797
798    // Flush to zero:
799    if ((mode & FPLIB_FZ) && exp < 1) {
800        *flags |= FPLIB_UFC;
801        return fp32_zero(sgn);
802    }
803
804    // The bottom FP32_EXP_BITS bits of mnt are orred together:
805    mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) |
806           ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0));
807
808    if (exp > 0) {
809        biased_exp = exp;
810        int_mant = mnt >> 2;
811        error = mnt & 3;
812    } else {
813        biased_exp = 0;
814        int_mant = lsr32(mnt, 3 - exp);
815        error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
816    }
817
818    if (!biased_exp && error) { // xx should also check fpscr_val<11>
819        *flags |= FPLIB_UFC;
820    }
821
822    // Round up:
823    if ((rm == FPLIB_RN && (error == 3 ||
824                            (error == 2 && (int_mant & 1)))) ||
825        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
826        ++int_mant;
827        if (int_mant == 1ULL << FP32_MANT_BITS) {
828            // Rounded up from denormalized to normalized
829            biased_exp = 1;
830        }
831        if (int_mant == 2ULL << FP32_MANT_BITS) {
832            // Rounded up to next exponent
833            ++biased_exp;
834            int_mant >>= 1;
835        }
836    }
837
838    // Handle rounding to odd aka Von Neumann rounding:
839    if (error && rm == FPRounding_ODD)
840        int_mant |= 1;
841
842    // Handle overflow:
843    if (biased_exp >= (int)FP32_EXP_INF) {
844        *flags |= FPLIB_OFC | FPLIB_IXC;
845        if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
846            (rm == FPLIB_RM && sgn)) {
847            return fp32_infinity(sgn);
848        } else {
849            return fp32_max_normal(sgn);
850        }
851    }
852
853    if (error) {
854        *flags |= FPLIB_IXC;
855    }
856
857    return fp32_pack(sgn, biased_exp, int_mant);
858}
859
860static uint32_t
861fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
862{
863    return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
864}
865
866static uint64_t
867fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
868{
869    int biased_exp; // non-negative exponent value for result
870    uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS)
871    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
872
873    assert(rm != FPRounding_TIEAWAY);
874
875    // Flush to zero:
876    if ((mode & FPLIB_FZ) && exp < 1) {
877        *flags |= FPLIB_UFC;
878        return fp64_zero(sgn);
879    }
880
881    // The bottom FP64_EXP_BITS bits of mnt are orred together:
882    mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) |
883           ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0));
884
885    if (exp > 0) {
886        biased_exp = exp;
887        int_mant = mnt >> 2;
888        error = mnt & 3;
889    } else {
890        biased_exp = 0;
891        int_mant = lsr64(mnt, 3 - exp);
892        error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
893    }
894
895    if (!biased_exp && error) { // xx should also check fpscr_val<11>
896        *flags |= FPLIB_UFC;
897    }
898
899    // Round up:
900    if ((rm == FPLIB_RN && (error == 3 ||
901                            (error == 2 && (int_mant & 1)))) ||
902        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
903        ++int_mant;
904        if (int_mant == 1ULL << FP64_MANT_BITS) {
905            // Rounded up from denormalized to normalized
906            biased_exp = 1;
907        }
908        if (int_mant == 2ULL << FP64_MANT_BITS) {
909            // Rounded up to next exponent
910            ++biased_exp;
911            int_mant >>= 1;
912        }
913    }
914
915    // Handle rounding to odd aka Von Neumann rounding:
916    if (error && rm == FPRounding_ODD)
917        int_mant |= 1;
918
919    // Handle overflow:
920    if (biased_exp >= (int)FP64_EXP_INF) {
921        *flags |= FPLIB_OFC | FPLIB_IXC;
922        if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
923            (rm == FPLIB_RM && sgn)) {
924            return fp64_infinity(sgn);
925        } else {
926            return fp64_max_normal(sgn);
927        }
928    }
929
930    if (error) {
931        *flags |= FPLIB_IXC;
932    }
933
934    return fp64_pack(sgn, biased_exp, int_mant);
935}
936
937static uint64_t
938fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
939{
940    return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
941}
942
943static int
944fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
945{
946    int a_sgn, a_exp, b_sgn, b_exp;
947    uint16_t a_mnt, b_mnt;
948
949    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
950    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
951
952    if (fp16_is_NaN(a_exp, a_mnt) ||
953        fp16_is_NaN(b_exp, b_mnt)) {
954        if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
955            fp16_is_signalling_NaN(b_exp, b_mnt))
956            *flags |= FPLIB_IOC;
957        return 0;
958    }
959    return a == b || (!a_mnt && !b_mnt);
960}
961
962static int
963fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
964{
965    int a_sgn, a_exp, b_sgn, b_exp;
966    uint16_t a_mnt, b_mnt;
967
968    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
969    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
970
971    if (fp16_is_NaN(a_exp, a_mnt) ||
972        fp16_is_NaN(b_exp, b_mnt)) {
973        *flags |= FPLIB_IOC;
974        return 0;
975    }
976    if (!a_mnt && !b_mnt)
977        return 1;
978    if (a_sgn != b_sgn)
979        return b_sgn;
980    if (a_exp != b_exp)
981        return a_sgn ^ (a_exp > b_exp);
982    if (a_mnt != b_mnt)
983        return a_sgn ^ (a_mnt > b_mnt);
984    return 1;
985}
986
987static int
988fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
989{
990    int a_sgn, a_exp, b_sgn, b_exp;
991    uint16_t a_mnt, b_mnt;
992
993    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
994    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
995
996    if (fp16_is_NaN(a_exp, a_mnt) ||
997        fp16_is_NaN(b_exp, b_mnt)) {
998        *flags |= FPLIB_IOC;
999        return 0;
1000    }
1001    if (!a_mnt && !b_mnt)
1002        return 0;
1003    if (a_sgn != b_sgn)
1004        return b_sgn;
1005    if (a_exp != b_exp)
1006        return a_sgn ^ (a_exp > b_exp);
1007    if (a_mnt != b_mnt)
1008        return a_sgn ^ (a_mnt > b_mnt);
1009    return 0;
1010}
1011
1012static int
1013fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
1014{
1015    int a_sgn, a_exp, b_sgn, b_exp;
1016    uint16_t a_mnt, b_mnt;
1017
1018    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1019    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1020
1021    if (fp16_is_NaN(a_exp, a_mnt) ||
1022        fp16_is_NaN(b_exp, b_mnt)) {
1023        if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
1024            fp16_is_signalling_NaN(b_exp, b_mnt))
1025            *flags |= FPLIB_IOC;
1026        return 1;
1027    }
1028    return 0;
1029}
1030
1031static int
1032fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
1033{
1034    int a_sgn, a_exp, b_sgn, b_exp;
1035    uint32_t a_mnt, b_mnt;
1036
1037    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1038    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1039
1040    if (fp32_is_NaN(a_exp, a_mnt) ||
1041        fp32_is_NaN(b_exp, b_mnt)) {
1042        if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1043            fp32_is_signalling_NaN(b_exp, b_mnt))
1044            *flags |= FPLIB_IOC;
1045        return 0;
1046    }
1047    return a == b || (!a_mnt && !b_mnt);
1048}
1049
1050static int
1051fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
1052{
1053    int a_sgn, a_exp, b_sgn, b_exp;
1054    uint32_t a_mnt, b_mnt;
1055
1056    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1057    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1058
1059    if (fp32_is_NaN(a_exp, a_mnt) ||
1060        fp32_is_NaN(b_exp, b_mnt)) {
1061        *flags |= FPLIB_IOC;
1062        return 0;
1063    }
1064    if (!a_mnt && !b_mnt)
1065        return 1;
1066    if (a_sgn != b_sgn)
1067        return b_sgn;
1068    if (a_exp != b_exp)
1069        return a_sgn ^ (a_exp > b_exp);
1070    if (a_mnt != b_mnt)
1071        return a_sgn ^ (a_mnt > b_mnt);
1072    return 1;
1073}
1074
1075static int
1076fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
1077{
1078    int a_sgn, a_exp, b_sgn, b_exp;
1079    uint32_t a_mnt, b_mnt;
1080
1081    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1082    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1083
1084    if (fp32_is_NaN(a_exp, a_mnt) ||
1085        fp32_is_NaN(b_exp, b_mnt)) {
1086        *flags |= FPLIB_IOC;
1087        return 0;
1088    }
1089    if (!a_mnt && !b_mnt)
1090        return 0;
1091    if (a_sgn != b_sgn)
1092        return b_sgn;
1093    if (a_exp != b_exp)
1094        return a_sgn ^ (a_exp > b_exp);
1095    if (a_mnt != b_mnt)
1096        return a_sgn ^ (a_mnt > b_mnt);
1097    return 0;
1098}
1099
1100static int
1101fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
1102{
1103    int a_sgn, a_exp, b_sgn, b_exp;
1104    uint32_t a_mnt, b_mnt;
1105
1106    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1107    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1108
1109    if (fp32_is_NaN(a_exp, a_mnt) ||
1110        fp32_is_NaN(b_exp, b_mnt)) {
1111        if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1112            fp32_is_signalling_NaN(b_exp, b_mnt))
1113            *flags |= FPLIB_IOC;
1114        return 1;
1115    }
1116    return 0;
1117}
1118
1119static int
1120fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
1121{
1122    int a_sgn, a_exp, b_sgn, b_exp;
1123    uint64_t a_mnt, b_mnt;
1124
1125    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1126    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1127
1128    if (fp64_is_NaN(a_exp, a_mnt) ||
1129        fp64_is_NaN(b_exp, b_mnt)) {
1130        if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1131            fp64_is_signalling_NaN(b_exp, b_mnt))
1132            *flags |= FPLIB_IOC;
1133        return 0;
1134    }
1135    return a == b || (!a_mnt && !b_mnt);
1136}
1137
1138static int
1139fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
1140{
1141    int a_sgn, a_exp, b_sgn, b_exp;
1142    uint64_t a_mnt, b_mnt;
1143
1144    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1145    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1146
1147    if (fp64_is_NaN(a_exp, a_mnt) ||
1148        fp64_is_NaN(b_exp, b_mnt)) {
1149        *flags |= FPLIB_IOC;
1150        return 0;
1151    }
1152    if (!a_mnt && !b_mnt)
1153        return 1;
1154    if (a_sgn != b_sgn)
1155        return b_sgn;
1156    if (a_exp != b_exp)
1157        return a_sgn ^ (a_exp > b_exp);
1158    if (a_mnt != b_mnt)
1159        return a_sgn ^ (a_mnt > b_mnt);
1160    return 1;
1161}
1162
1163static int
1164fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
1165{
1166    int a_sgn, a_exp, b_sgn, b_exp;
1167    uint64_t a_mnt, b_mnt;
1168
1169    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1170    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1171
1172    if (fp64_is_NaN(a_exp, a_mnt) ||
1173        fp64_is_NaN(b_exp, b_mnt)) {
1174        *flags |= FPLIB_IOC;
1175        return 0;
1176    }
1177    if (!a_mnt && !b_mnt)
1178        return 0;
1179    if (a_sgn != b_sgn)
1180        return b_sgn;
1181    if (a_exp != b_exp)
1182        return a_sgn ^ (a_exp > b_exp);
1183    if (a_mnt != b_mnt)
1184        return a_sgn ^ (a_mnt > b_mnt);
1185    return 0;
1186}
1187
1188static int
1189fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
1190{
1191    int a_sgn, a_exp, b_sgn, b_exp;
1192    uint64_t a_mnt, b_mnt;
1193
1194    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1195    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1196
1197    if (fp64_is_NaN(a_exp, a_mnt) ||
1198        fp64_is_NaN(b_exp, b_mnt)) {
1199        if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1200            fp64_is_signalling_NaN(b_exp, b_mnt))
1201            *flags |= FPLIB_IOC;
1202        return 1;
1203    }
1204    return 0;
1205}
1206
1207static uint16_t
1208fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
1209{
1210    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1211    uint16_t a_mnt, b_mnt, x, x_mnt;
1212
1213    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1214    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1215
1216    if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1217        return x;
1218    }
1219
1220    b_sgn ^= neg;
1221
1222    // Handle infinities and zeroes:
1223    if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) {
1224        *flags |= FPLIB_IOC;
1225        return fp16_defaultNaN();
1226    } else if (a_exp == FP16_EXP_INF) {
1227        return fp16_infinity(a_sgn);
1228    } else if (b_exp == FP16_EXP_INF) {
1229        return fp16_infinity(b_sgn);
1230    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1231        return fp16_zero(a_sgn);
1232    }
1233
1234    a_mnt <<= 3;
1235    b_mnt <<= 3;
1236    if (a_exp >= b_exp) {
1237        b_mnt = (lsr16(b_mnt, a_exp - b_exp) |
1238                 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1)));
1239        b_exp = a_exp;
1240    } else {
1241        a_mnt = (lsr16(a_mnt, b_exp - a_exp) |
1242                 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1)));
1243        a_exp = b_exp;
1244    }
1245    x_sgn = a_sgn;
1246    x_exp = a_exp;
1247    if (a_sgn == b_sgn) {
1248        x_mnt = a_mnt + b_mnt;
1249    } else if (a_mnt >= b_mnt) {
1250        x_mnt = a_mnt - b_mnt;
1251    } else {
1252        x_sgn ^= 1;
1253        x_mnt = b_mnt - a_mnt;
1254    }
1255
1256    if (!x_mnt) {
1257        // Sign of exact zero result depends on rounding mode
1258        return fp16_zero((mode & 3) == 2);
1259    }
1260
1261    x_mnt = fp16_normalise(x_mnt, &x_exp);
1262
1263    return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1,
1264                      mode, flags);
1265}
1266
1267static uint32_t
1268fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
1269{
1270    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1271    uint32_t a_mnt, b_mnt, x, x_mnt;
1272
1273    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1274    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1275
1276    if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1277        return x;
1278    }
1279
1280    b_sgn ^= neg;
1281
1282    // Handle infinities and zeroes:
1283    if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) {
1284        *flags |= FPLIB_IOC;
1285        return fp32_defaultNaN();
1286    } else if (a_exp == FP32_EXP_INF) {
1287        return fp32_infinity(a_sgn);
1288    } else if (b_exp == FP32_EXP_INF) {
1289        return fp32_infinity(b_sgn);
1290    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1291        return fp32_zero(a_sgn);
1292    }
1293
1294    a_mnt <<= 3;
1295    b_mnt <<= 3;
1296    if (a_exp >= b_exp) {
1297        b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
1298                 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
1299        b_exp = a_exp;
1300    } else {
1301        a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
1302                 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
1303        a_exp = b_exp;
1304    }
1305    x_sgn = a_sgn;
1306    x_exp = a_exp;
1307    if (a_sgn == b_sgn) {
1308        x_mnt = a_mnt + b_mnt;
1309    } else if (a_mnt >= b_mnt) {
1310        x_mnt = a_mnt - b_mnt;
1311    } else {
1312        x_sgn ^= 1;
1313        x_mnt = b_mnt - a_mnt;
1314    }
1315
1316    if (!x_mnt) {
1317        // Sign of exact zero result depends on rounding mode
1318        return fp32_zero((mode & 3) == 2);
1319    }
1320
1321    x_mnt = fp32_normalise(x_mnt, &x_exp);
1322
1323    return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1,
1324                      mode, flags);
1325}
1326
1327static uint64_t
1328fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
1329{
1330    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1331    uint64_t a_mnt, b_mnt, x, x_mnt;
1332
1333    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1334    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1335
1336    if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1337        return x;
1338    }
1339
1340    b_sgn ^= neg;
1341
1342    // Handle infinities and zeroes:
1343    if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) {
1344        *flags |= FPLIB_IOC;
1345        return fp64_defaultNaN();
1346    } else if (a_exp == FP64_EXP_INF) {
1347        return fp64_infinity(a_sgn);
1348    } else if (b_exp == FP64_EXP_INF) {
1349        return fp64_infinity(b_sgn);
1350    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1351        return fp64_zero(a_sgn);
1352    }
1353
1354    a_mnt <<= 3;
1355    b_mnt <<= 3;
1356    if (a_exp >= b_exp) {
1357        b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
1358                 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
1359        b_exp = a_exp;
1360    } else {
1361        a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
1362                 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
1363        a_exp = b_exp;
1364    }
1365    x_sgn = a_sgn;
1366    x_exp = a_exp;
1367    if (a_sgn == b_sgn) {
1368        x_mnt = a_mnt + b_mnt;
1369    } else if (a_mnt >= b_mnt) {
1370        x_mnt = a_mnt - b_mnt;
1371    } else {
1372        x_sgn ^= 1;
1373        x_mnt = b_mnt - a_mnt;
1374    }
1375
1376    if (!x_mnt) {
1377        // Sign of exact zero result depends on rounding mode
1378        return fp64_zero((mode & 3) == 2);
1379    }
1380
1381    x_mnt = fp64_normalise(x_mnt, &x_exp);
1382
1383    return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1,
1384                      mode, flags);
1385}
1386
1387static uint16_t
1388fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
1389{
1390    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1391    uint16_t a_mnt, b_mnt, x;
1392    uint32_t x_mnt;
1393
1394    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1395    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1396
1397    if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1398        return x;
1399    }
1400
1401    // Handle infinities and zeroes:
1402    if ((a_exp == FP16_EXP_INF && !b_mnt) ||
1403        (b_exp == FP16_EXP_INF && !a_mnt)) {
1404        *flags |= FPLIB_IOC;
1405        return fp16_defaultNaN();
1406    } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) {
1407        return fp16_infinity(a_sgn ^ b_sgn);
1408    } else if (!a_mnt || !b_mnt) {
1409        return fp16_zero(a_sgn ^ b_sgn);
1410    }
1411
1412    // Multiply and normalise:
1413    x_sgn = a_sgn ^ b_sgn;
1414    x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1;
1415    x_mnt = (uint32_t)a_mnt * b_mnt;
1416    x_mnt = fp32_normalise(x_mnt, &x_exp);
1417
1418    // Convert to FP16_BITS bits, collapsing error into bottom bit:
1419    x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1);
1420
1421    return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1422}
1423
1424static uint32_t
1425fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1426{
1427    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1428    uint32_t a_mnt, b_mnt, x;
1429    uint64_t x_mnt;
1430
1431    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1432    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1433
1434    if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1435        return x;
1436    }
1437
1438    // Handle infinities and zeroes:
1439    if ((a_exp == FP32_EXP_INF && !b_mnt) ||
1440        (b_exp == FP32_EXP_INF && !a_mnt)) {
1441        *flags |= FPLIB_IOC;
1442        return fp32_defaultNaN();
1443    } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) {
1444        return fp32_infinity(a_sgn ^ b_sgn);
1445    } else if (!a_mnt || !b_mnt) {
1446        return fp32_zero(a_sgn ^ b_sgn);
1447    }
1448
1449    // Multiply and normalise:
1450    x_sgn = a_sgn ^ b_sgn;
1451    x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1;
1452    x_mnt = (uint64_t)a_mnt * b_mnt;
1453    x_mnt = fp64_normalise(x_mnt, &x_exp);
1454
1455    // Convert to FP32_BITS bits, collapsing error into bottom bit:
1456    x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1);
1457
1458    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1459}
1460
1461static uint64_t
1462fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1463{
1464    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1465    uint64_t a_mnt, b_mnt, x;
1466    uint64_t x0_mnt, x1_mnt;
1467
1468    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1469    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1470
1471    if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1472        return x;
1473    }
1474
1475    // Handle infinities and zeroes:
1476    if ((a_exp == FP64_EXP_INF && !b_mnt) ||
1477        (b_exp == FP64_EXP_INF && !a_mnt)) {
1478        *flags |= FPLIB_IOC;
1479        return fp64_defaultNaN();
1480    } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) {
1481        return fp64_infinity(a_sgn ^ b_sgn);
1482    } else if (!a_mnt || !b_mnt) {
1483        return fp64_zero(a_sgn ^ b_sgn);
1484    }
1485
1486    // Multiply and normalise:
1487    x_sgn = a_sgn ^ b_sgn;
1488    x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1;
1489    mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1490    fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1491
1492    // Convert to FP64_BITS bits, collapsing error into bottom bit:
1493    x0_mnt = x1_mnt << 1 | !!x0_mnt;
1494
1495    return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1496}
1497
1498static uint16_t
1499fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale,
1500            int mode, int *flags)
1501{
1502    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1503    uint16_t a_mnt, b_mnt, c_mnt, x;
1504    uint32_t x_mnt, y_mnt;
1505
1506    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1507    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1508    fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1509
1510    x = fp16_process_NaNs3(a, b, c, mode, flags);
1511
1512    // Quiet NaN added to product of zero and infinity:
1513    if (fp16_is_quiet_NaN(a_exp, a_mnt) &&
1514        ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) ||
1515         (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) {
1516        x = fp16_defaultNaN();
1517        *flags |= FPLIB_IOC;
1518    }
1519
1520    if (x) {
1521        return x;
1522    }
1523
1524    // Handle infinities and zeroes:
1525    if ((b_exp == FP16_EXP_INF && !c_mnt) ||
1526        (c_exp == FP16_EXP_INF && !b_mnt) ||
1527        (a_exp == FP16_EXP_INF &&
1528         (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) &&
1529         (a_sgn != (b_sgn ^ c_sgn)))) {
1530        *flags |= FPLIB_IOC;
1531        return fp16_defaultNaN();
1532    }
1533    if (a_exp == FP16_EXP_INF)
1534        return fp16_infinity(a_sgn);
1535    if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF)
1536        return fp16_infinity(b_sgn ^ c_sgn);
1537    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1538        return fp16_zero(a_sgn);
1539
1540    x_sgn = a_sgn;
1541    x_exp = a_exp + 2 * FP16_EXP_BITS - 3;
1542    x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4);
1543
1544    // Multiply:
1545    y_sgn = b_sgn ^ c_sgn;
1546    y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3;
1547    y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1548    if (!y_mnt) {
1549        y_exp = x_exp;
1550    }
1551
1552    // Add:
1553    if (x_exp >= y_exp) {
1554        y_mnt = (lsr32(y_mnt, x_exp - y_exp) |
1555                 !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1)));
1556        y_exp = x_exp;
1557    } else {
1558        x_mnt = (lsr32(x_mnt, y_exp - x_exp) |
1559                 !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1)));
1560        x_exp = y_exp;
1561    }
1562    if (x_sgn == y_sgn) {
1563        x_mnt = x_mnt + y_mnt;
1564    } else if (x_mnt >= y_mnt) {
1565        x_mnt = x_mnt - y_mnt;
1566    } else {
1567        x_sgn ^= 1;
1568        x_mnt = y_mnt - x_mnt;
1569    }
1570
1571    if (!x_mnt) {
1572        // Sign of exact zero result depends on rounding mode
1573        return fp16_zero((mode & 3) == 2);
1574    }
1575
1576    // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1577    x_mnt = fp32_normalise(x_mnt, &x_exp);
1578    x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1579
1580    return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1581}
1582
1583static uint32_t
1584fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1585            int mode, int *flags)
1586{
1587    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1588    uint32_t a_mnt, b_mnt, c_mnt, x;
1589    uint64_t x_mnt, y_mnt;
1590
1591    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1592    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1593    fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1594
1595    x = fp32_process_NaNs3(a, b, c, mode, flags);
1596
1597    // Quiet NaN added to product of zero and infinity:
1598    if (fp32_is_quiet_NaN(a_exp, a_mnt) &&
1599        ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) ||
1600         (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) {
1601        x = fp32_defaultNaN();
1602        *flags |= FPLIB_IOC;
1603    }
1604
1605    if (x) {
1606        return x;
1607    }
1608
1609    // Handle infinities and zeroes:
1610    if ((b_exp == FP32_EXP_INF && !c_mnt) ||
1611        (c_exp == FP32_EXP_INF && !b_mnt) ||
1612        (a_exp == FP32_EXP_INF &&
1613         (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) &&
1614         (a_sgn != (b_sgn ^ c_sgn)))) {
1615        *flags |= FPLIB_IOC;
1616        return fp32_defaultNaN();
1617    }
1618    if (a_exp == FP32_EXP_INF)
1619        return fp32_infinity(a_sgn);
1620    if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF)
1621        return fp32_infinity(b_sgn ^ c_sgn);
1622    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1623        return fp32_zero(a_sgn);
1624
1625    x_sgn = a_sgn;
1626    x_exp = a_exp + 2 * FP32_EXP_BITS - 3;
1627    x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4);
1628
1629    // Multiply:
1630    y_sgn = b_sgn ^ c_sgn;
1631    y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1632    y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1633    if (!y_mnt) {
1634        y_exp = x_exp;
1635    }
1636
1637    // Add:
1638    if (x_exp >= y_exp) {
1639        y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1640                 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1641        y_exp = x_exp;
1642    } else {
1643        x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1644                 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1645        x_exp = y_exp;
1646    }
1647    if (x_sgn == y_sgn) {
1648        x_mnt = x_mnt + y_mnt;
1649    } else if (x_mnt >= y_mnt) {
1650        x_mnt = x_mnt - y_mnt;
1651    } else {
1652        x_sgn ^= 1;
1653        x_mnt = y_mnt - x_mnt;
1654    }
1655
1656    if (!x_mnt) {
1657        // Sign of exact zero result depends on rounding mode
1658        return fp32_zero((mode & 3) == 2);
1659    }
1660
1661    // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1662    x_mnt = fp64_normalise(x_mnt, &x_exp);
1663    x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1664
1665    return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1666}
1667
1668static uint64_t
1669fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1670            int mode, int *flags)
1671{
1672    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1673    uint64_t a_mnt, b_mnt, c_mnt, x;
1674    uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1675
1676    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1677    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1678    fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1679
1680    x = fp64_process_NaNs3(a, b, c, mode, flags);
1681
1682    // Quiet NaN added to product of zero and infinity:
1683    if (fp64_is_quiet_NaN(a_exp, a_mnt) &&
1684        ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) ||
1685         (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) {
1686        x = fp64_defaultNaN();
1687        *flags |= FPLIB_IOC;
1688    }
1689
1690    if (x) {
1691        return x;
1692    }
1693
1694    // Handle infinities and zeroes:
1695    if ((b_exp == FP64_EXP_INF && !c_mnt) ||
1696        (c_exp == FP64_EXP_INF && !b_mnt) ||
1697        (a_exp == FP64_EXP_INF &&
1698         (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) &&
1699         (a_sgn != (b_sgn ^ c_sgn)))) {
1700        *flags |= FPLIB_IOC;
1701        return fp64_defaultNaN();
1702    }
1703    if (a_exp == FP64_EXP_INF)
1704        return fp64_infinity(a_sgn);
1705    if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF)
1706        return fp64_infinity(b_sgn ^ c_sgn);
1707    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1708        return fp64_zero(a_sgn);
1709
1710    x_sgn = a_sgn;
1711    x_exp = a_exp + FP64_EXP_BITS;
1712    x0_mnt = 0;
1713    x1_mnt = a_mnt;
1714
1715    // Multiply:
1716    y_sgn = b_sgn ^ c_sgn;
1717    y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3;
1718    mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1719    if (!y0_mnt && !y1_mnt) {
1720        y_exp = x_exp;
1721    }
1722
1723    // Add:
1724    if (x_exp >= y_exp) {
1725        uint64_t t0, t1;
1726        lsl128(&t0, &t1, y0_mnt, y1_mnt,
1727               x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1728        lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1729        y0_mnt |= !!(t0 | t1);
1730        y_exp = x_exp;
1731    } else {
1732        uint64_t t0, t1;
1733        lsl128(&t0, &t1, x0_mnt, x1_mnt,
1734               y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1735        lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1736        x0_mnt |= !!(t0 | t1);
1737        x_exp = y_exp;
1738    }
1739    if (x_sgn == y_sgn) {
1740        add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1741    } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1742        sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1743    } else {
1744        x_sgn ^= 1;
1745        sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1746    }
1747
1748    if (!x0_mnt && !x1_mnt) {
1749        // Sign of exact zero result depends on rounding mode
1750        return fp64_zero((mode & 3) == 2);
1751    }
1752
1753    // Normalise into FP64_BITS bits, collapsing error into bottom bit:
1754    fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1755    x0_mnt = x1_mnt << 1 | !!x0_mnt;
1756
1757    return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1758}
1759
1760static uint16_t
1761fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
1762{
1763    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1764    uint16_t a_mnt, b_mnt, x;
1765    uint32_t x_mnt;
1766
1767    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1768    fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1769
1770    if ((x = fp16_process_NaNs(a, b, mode, flags)))
1771        return x;
1772
1773    // Handle infinities and zeroes:
1774    if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) ||
1775        (!a_mnt && !b_mnt)) {
1776        *flags |= FPLIB_IOC;
1777        return fp16_defaultNaN();
1778    }
1779    if (a_exp == FP16_EXP_INF || !b_mnt) {
1780        if (a_exp != FP16_EXP_INF)
1781            *flags |= FPLIB_DZC;
1782        return fp16_infinity(a_sgn ^ b_sgn);
1783    }
1784    if (!a_mnt || b_exp == FP16_EXP_INF)
1785        return fp16_zero(a_sgn ^ b_sgn);
1786
1787    // Divide, setting bottom bit if inexact:
1788    a_mnt = fp16_normalise(a_mnt, &a_exp);
1789    x_sgn = a_sgn ^ b_sgn;
1790    x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3);
1791    x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt;
1792    x_mnt |= (x_mnt * b_mnt !=
1793              (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3));
1794
1795    // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1796    x_mnt = fp32_normalise(x_mnt, &x_exp);
1797    x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1798
1799    return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1800}
1801
1802static uint32_t
1803fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1804{
1805    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1806    uint32_t a_mnt, b_mnt, x;
1807    uint64_t x_mnt;
1808
1809    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1810    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1811
1812    if ((x = fp32_process_NaNs(a, b, mode, flags)))
1813        return x;
1814
1815    // Handle infinities and zeroes:
1816    if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) ||
1817        (!a_mnt && !b_mnt)) {
1818        *flags |= FPLIB_IOC;
1819        return fp32_defaultNaN();
1820    }
1821    if (a_exp == FP32_EXP_INF || !b_mnt) {
1822        if (a_exp != FP32_EXP_INF)
1823            *flags |= FPLIB_DZC;
1824        return fp32_infinity(a_sgn ^ b_sgn);
1825    }
1826    if (!a_mnt || b_exp == FP32_EXP_INF)
1827        return fp32_zero(a_sgn ^ b_sgn);
1828
1829    // Divide, setting bottom bit if inexact:
1830    a_mnt = fp32_normalise(a_mnt, &a_exp);
1831    x_sgn = a_sgn ^ b_sgn;
1832    x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3);
1833    x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt;
1834    x_mnt |= (x_mnt * b_mnt !=
1835              (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3));
1836
1837    // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1838    x_mnt = fp64_normalise(x_mnt, &x_exp);
1839    x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1840
1841    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1842}
1843
1844static uint64_t
1845fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1846{
1847    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1848    uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1849
1850    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1851    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1852
1853    if ((x = fp64_process_NaNs(a, b, mode, flags)))
1854        return x;
1855
1856    // Handle infinities and zeroes:
1857    if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) ||
1858        (!a_mnt && !b_mnt)) {
1859        *flags |= FPLIB_IOC;
1860        return fp64_defaultNaN();
1861    }
1862    if (a_exp == FP64_EXP_INF || !b_mnt) {
1863        if (a_exp != FP64_EXP_INF)
1864            *flags |= FPLIB_DZC;
1865        return fp64_infinity(a_sgn ^ b_sgn);
1866    }
1867    if (!a_mnt || b_exp == FP64_EXP_INF)
1868        return fp64_zero(a_sgn ^ b_sgn);
1869
1870    // Find reciprocal of divisor with Newton-Raphson:
1871    a_mnt = fp64_normalise(a_mnt, &a_exp);
1872    b_mnt = fp64_normalise(b_mnt, &b_exp);
1873    x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1874    mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1875    sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1876    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1877    mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1878    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1879
1880    // Multiply by dividend:
1881    x_sgn = a_sgn ^ b_sgn;
1882    x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8;
1883    mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1884    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1885    x_mnt = x1_mnt;
1886
1887    // This is an underestimate, so try adding one:
1888    mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1889    c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1890    if (c <= 0) {
1891        ++x_mnt;
1892    }
1893
1894    x_mnt = fp64_normalise(x_mnt, &x_exp);
1895
1896    return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1897}
1898
1899static void
1900set_fpscr0(FPSCR &fpscr, int flags)
1901{
1902    if (flags & FPLIB_IDC) {
1903        fpscr.idc = 1;
1904    }
1905    if (flags & FPLIB_IOC) {
1906        fpscr.ioc = 1;
1907    }
1908    if (flags & FPLIB_DZC) {
1909        fpscr.dzc = 1;
1910    }
1911    if (flags & FPLIB_OFC) {
1912        fpscr.ofc = 1;
1913    }
1914    if (flags & FPLIB_UFC) {
1915        fpscr.ufc = 1;
1916    }
1917    if (flags & FPLIB_IXC) {
1918        fpscr.ixc = 1;
1919    }
1920}
1921
1922static uint16_t
1923fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
1924{
1925    int a_sgn, a_exp;
1926    uint16_t a_mnt;
1927
1928    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1929
1930    // Handle NaNs:
1931    if (fp16_is_NaN(a_exp, a_mnt)) {
1932        return fp16_process_NaN(a, mode, flags);
1933    }
1934
1935    // Handle zeroes:
1936    if (!a_mnt) {
1937        return fp16_zero(a_sgn);
1938    }
1939
1940    // Handle infinities:
1941    if (a_exp == FP16_EXP_INF) {
1942        return fp16_infinity(a_sgn);
1943    }
1944
1945    b = b < -300 ? -300 : b;
1946    b = b >  300 ?  300 : b;
1947    a_exp += b;
1948    a_mnt <<= 3;
1949
1950    a_mnt = fp16_normalise(a_mnt, &a_exp);
1951
1952    return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1,
1953                      mode, flags);
1954}
1955
1956static uint32_t
1957fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
1958{
1959    int a_sgn, a_exp;
1960    uint32_t a_mnt;
1961
1962    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1963
1964    // Handle NaNs:
1965    if (fp32_is_NaN(a_exp, a_mnt)) {
1966        return fp32_process_NaN(a, mode, flags);
1967    }
1968
1969    // Handle zeroes:
1970    if (!a_mnt) {
1971        return fp32_zero(a_sgn);
1972    }
1973
1974    // Handle infinities:
1975    if (a_exp == FP32_EXP_INF) {
1976        return fp32_infinity(a_sgn);
1977    }
1978
1979    b = b < -300 ? -300 : b;
1980    b = b >  300 ?  300 : b;
1981    a_exp += b;
1982    a_mnt <<= 3;
1983
1984    a_mnt = fp32_normalise(a_mnt, &a_exp);
1985
1986    return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1,
1987                      mode, flags);
1988}
1989
1990static uint64_t
1991fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
1992{
1993    int a_sgn, a_exp;
1994    uint64_t a_mnt;
1995
1996    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1997
1998    // Handle NaNs:
1999    if (fp64_is_NaN(a_exp, a_mnt)) {
2000        return fp64_process_NaN(a, mode, flags);
2001    }
2002
2003    // Handle zeroes:
2004    if (!a_mnt) {
2005        return fp64_zero(a_sgn);
2006    }
2007
2008    // Handle infinities:
2009    if (a_exp == FP64_EXP_INF) {
2010        return fp64_infinity(a_sgn);
2011    }
2012
2013    b = b < -3000 ? -3000 : b;
2014    b = b >  3000 ?  3000 : b;
2015    a_exp += b;
2016    a_mnt <<= 3;
2017
2018    a_mnt = fp64_normalise(a_mnt, &a_exp);
2019
2020    return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1,
2021                      mode, flags);
2022}
2023
2024static uint16_t
2025fp16_sqrt(uint16_t a, int mode, int *flags)
2026{
2027    int a_sgn, a_exp, x_sgn, x_exp;
2028    uint16_t a_mnt, x_mnt;
2029    uint32_t x, t0, t1;
2030
2031    fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2032
2033    // Handle NaNs:
2034    if (fp16_is_NaN(a_exp, a_mnt))
2035        return fp16_process_NaN(a, mode, flags);
2036
2037    // Handle infinities and zeroes:
2038    if (!a_mnt)
2039        return fp16_zero(a_sgn);
2040    if (a_exp == FP16_EXP_INF && !a_sgn)
2041        return fp16_infinity(a_sgn);
2042    if (a_sgn) {
2043        *flags |= FPLIB_IOC;
2044        return fp16_defaultNaN();
2045    }
2046
2047    a_mnt = fp16_normalise(a_mnt, &a_exp);
2048    if (a_exp & 1) {
2049        ++a_exp;
2050        a_mnt >>= 1;
2051    }
2052
2053    // x = (a * 3 + 5) / 8
2054    x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2055
2056    // x = (a / x + x) / 2; // 8-bit accuracy
2057    x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2058
2059    // x = (a / x + x) / 2; // 16-bit accuracy
2060    x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2061
2062    x_sgn = 0;
2063    x_exp = (a_exp + 27) >> 1;
2064    x_mnt = ((x - (1 << 18)) >> 19) + 1;
2065    t1 = (uint32_t)x_mnt * x_mnt;
2066    t0 = (uint32_t)a_mnt << 9;
2067    if (t1 > t0) {
2068        --x_mnt;
2069    }
2070
2071    x_mnt = fp16_normalise(x_mnt, &x_exp);
2072
2073    return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2074}
2075
2076static uint32_t
2077fp32_sqrt(uint32_t a, int mode, int *flags)
2078{
2079    int a_sgn, a_exp, x_sgn, x_exp;
2080    uint32_t a_mnt, x, x_mnt;
2081    uint64_t t0, t1;
2082
2083    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2084
2085    // Handle NaNs:
2086    if (fp32_is_NaN(a_exp, a_mnt))
2087        return fp32_process_NaN(a, mode, flags);
2088
2089    // Handle infinities and zeroes:
2090    if (!a_mnt)
2091        return fp32_zero(a_sgn);
2092    if (a_exp == FP32_EXP_INF && !a_sgn)
2093        return fp32_infinity(a_sgn);
2094    if (a_sgn) {
2095        *flags |= FPLIB_IOC;
2096        return fp32_defaultNaN();
2097    }
2098
2099    a_mnt = fp32_normalise(a_mnt, &a_exp);
2100    if (!(a_exp & 1)) {
2101        ++a_exp;
2102        a_mnt >>= 1;
2103    }
2104
2105    // x = (a * 3 + 5) / 8
2106    x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2107
2108    // x = (a / x + x) / 2; // 8-bit accuracy
2109    x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2110
2111    // x = (a / x + x) / 2; // 16-bit accuracy
2112    x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2113
2114    // x = (a / x + x) / 2; // 32-bit accuracy
2115    x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
2116
2117    x_sgn = 0;
2118    x_exp = (a_exp + 147) >> 1;
2119    x_mnt = ((x - (1 << 5)) >> 6) + 1;
2120    t1 = (uint64_t)x_mnt * x_mnt;
2121    t0 = (uint64_t)a_mnt << 19;
2122    if (t1 > t0) {
2123        --x_mnt;
2124    }
2125
2126    x_mnt = fp32_normalise(x_mnt, &x_exp);
2127
2128    return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2129}
2130
2131static uint64_t
2132fp64_sqrt(uint64_t a, int mode, int *flags)
2133{
2134    int a_sgn, a_exp, x_sgn, x_exp, c;
2135    uint64_t a_mnt, x_mnt, r, x0, x1;
2136    uint32_t x;
2137
2138    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2139
2140    // Handle NaNs:
2141    if (fp64_is_NaN(a_exp, a_mnt))
2142        return fp64_process_NaN(a, mode, flags);
2143
2144    // Handle infinities and zeroes:
2145    if (!a_mnt)
2146        return fp64_zero(a_sgn);
2147    if (a_exp == FP64_EXP_INF && !a_sgn)
2148        return fp64_infinity(a_sgn);
2149    if (a_sgn) {
2150        *flags |= FPLIB_IOC;
2151        return fp64_defaultNaN();
2152    }
2153
2154    a_mnt = fp64_normalise(a_mnt, &a_exp);
2155    if (a_exp & 1) {
2156        ++a_exp;
2157        a_mnt >>= 1;
2158    }
2159
2160    // x = (a * 3 + 5) / 8
2161    x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2162
2163    // x = (a / x + x) / 2; // 8-bit accuracy
2164    x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2165
2166    // x = (a / x + x) / 2; // 16-bit accuracy
2167    x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2168
2169    // x = (a / x + x) / 2; // 32-bit accuracy
2170    x = ((a_mnt / x) >> 2) + (x >> 1);
2171
2172    // r = 1 / x; // 32-bit accuracy
2173    r = ((uint64_t)1 << 62) / x;
2174
2175    // r = r * (2 - x * r); // 64-bit accuracy
2176    mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
2177    lsr128(&x0, &x1, x0, x1, 31);
2178
2179    // x = (x + a * r) / 2; // 64-bit accuracy
2180    mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2181    lsl128(&x0, &x1, x0, x1, 5);
2182    lsr128(&x0, &x1, x0, x1, 56);
2183
2184    x0 = ((uint64_t)x << 31) + (x0 >> 1);
2185
2186    x_sgn = 0;
2187    x_exp = (a_exp + 1053) >> 1;
2188    x_mnt = x0;
2189    x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2190    mul62x62(&x0, &x1, x_mnt, x_mnt);
2191    lsl128(&x0, &x1, x0, x1, 19);
2192    c = cmp128(x0, x1, 0, a_mnt);
2193    if (c > 0)
2194        --x_mnt;
2195
2196    x_mnt = fp64_normalise(x_mnt, &x_exp);
2197
2198    return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2199}
2200
2201static int
2202modeConv(FPSCR fpscr)
2203{
2204    uint32_t x = (uint32_t)fpscr;
2205    return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0);
2206    // AHP bit is ignored. Only fplibConvert uses AHP.
2207}
2208
2209static void
2210set_fpscr(FPSCR &fpscr, int flags)
2211{
2212    // translate back to FPSCR
2213    bool underflow = false;
2214    if (flags & FPLIB_IDC) {
2215        fpscr.idc = 1;
2216    }
2217    if (flags & FPLIB_IOC) {
2218        fpscr.ioc = 1;
2219    }
2220    if (flags & FPLIB_DZC) {
2221        fpscr.dzc = 1;
2222    }
2223    if (flags & FPLIB_OFC) {
2224        fpscr.ofc = 1;
2225    }
2226    if (flags & FPLIB_UFC) {
2227        underflow = true; //xx Why is this required?
2228        fpscr.ufc = 1;
2229    }
2230    if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
2231        fpscr.ixc = 1;
2232    }
2233}
2234
2235template <>
2236bool
2237fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
2238{
2239    int flags = 0;
2240    int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags);
2241    set_fpscr(fpscr, flags);
2242    return x;
2243}
2244
2245template <>
2246bool
2247fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
2248{
2249    int flags = 0;
2250    int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags);
2251    set_fpscr(fpscr, flags);
2252    return x;
2253}
2254
2255template <>
2256bool
2257fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
2258{
2259    int flags = 0;
2260    int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags);
2261    set_fpscr(fpscr, flags);
2262    return x;
2263}
2264
2265template <>
2266bool
2267fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
2268{
2269    int flags = 0;
2270    int x = fp16_compare_un(a, b, modeConv(fpscr), &flags);
2271    set_fpscr(fpscr, flags);
2272    return x;
2273}
2274
2275template <>
2276bool
2277fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
2278{
2279    int flags = 0;
2280    int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
2281    set_fpscr(fpscr, flags);
2282    return x;
2283}
2284
2285template <>
2286bool
2287fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
2288{
2289    int flags = 0;
2290    int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
2291    set_fpscr(fpscr, flags);
2292    return x;
2293}
2294
2295template <>
2296bool
2297fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
2298{
2299    int flags = 0;
2300    int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
2301    set_fpscr(fpscr, flags);
2302    return x;
2303}
2304
2305template <>
2306bool
2307fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr)
2308{
2309    int flags = 0;
2310    int x = fp32_compare_un(a, b, modeConv(fpscr), &flags);
2311    set_fpscr(fpscr, flags);
2312    return x;
2313}
2314
2315template <>
2316bool
2317fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
2318{
2319    int flags = 0;
2320    int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
2321    set_fpscr(fpscr, flags);
2322    return x;
2323}
2324
2325template <>
2326bool
2327fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
2328{
2329    int flags = 0;
2330    int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
2331    set_fpscr(fpscr, flags);
2332    return x;
2333}
2334
2335template <>
2336bool
2337fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
2338{
2339    int flags = 0;
2340    int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
2341    set_fpscr(fpscr, flags);
2342    return x;
2343}
2344
2345template <>
2346bool
2347fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr)
2348{
2349    int flags = 0;
2350    int x = fp64_compare_un(a, b, modeConv(fpscr), &flags);
2351    set_fpscr(fpscr, flags);
2352    return x;
2353}
2354
2355template <>
2356uint16_t
2357fplibAbs(uint16_t op)
2358{
2359    return op & ~(1ULL << (FP16_BITS - 1));
2360}
2361
2362template <>
2363uint32_t
2364fplibAbs(uint32_t op)
2365{
2366    return op & ~(1ULL << (FP32_BITS - 1));
2367}
2368
2369template <>
2370uint64_t
2371fplibAbs(uint64_t op)
2372{
2373    return op & ~(1ULL << (FP64_BITS - 1));
2374}
2375
2376template <>
2377uint16_t
2378fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2379{
2380    int flags = 0;
2381    uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags);
2382    set_fpscr0(fpscr, flags);
2383    return result;
2384}
2385
2386template <>
2387uint32_t
2388fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2389{
2390    int flags = 0;
2391    uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
2392    set_fpscr0(fpscr, flags);
2393    return result;
2394}
2395
2396template <>
2397uint64_t
2398fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2399{
2400    int flags = 0;
2401    uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
2402    set_fpscr0(fpscr, flags);
2403    return result;
2404}
2405
2406template <>
2407int
2408fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
2409{
2410    int mode = modeConv(fpscr);
2411    int flags = 0;
2412    int sgn1, exp1, sgn2, exp2, result;
2413    uint16_t mnt1, mnt2;
2414
2415    fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2416    fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2417
2418    if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) {
2419        result = 3;
2420        if (fp16_is_signalling_NaN(exp1, mnt1) ||
2421            fp16_is_signalling_NaN(exp2, mnt2) || signal_nans)
2422            flags |= FPLIB_IOC;
2423    } else {
2424        if (op1 == op2 || (!mnt1 && !mnt2)) {
2425            result = 6;
2426        } else if (sgn1 != sgn2) {
2427            result = sgn1 ? 8 : 2;
2428        } else if (exp1 != exp2) {
2429            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2430        } else {
2431            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2432        }
2433    }
2434
2435    set_fpscr0(fpscr, flags);
2436
2437    return result;
2438}
2439
2440template <>
2441int
2442fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
2443{
2444    int mode = modeConv(fpscr);
2445    int flags = 0;
2446    int sgn1, exp1, sgn2, exp2, result;
2447    uint32_t mnt1, mnt2;
2448
2449    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2450    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2451
2452    if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) {
2453        result = 3;
2454        if (fp32_is_signalling_NaN(exp1, mnt1) ||
2455            fp32_is_signalling_NaN(exp2, mnt2) || signal_nans)
2456            flags |= FPLIB_IOC;
2457    } else {
2458        if (op1 == op2 || (!mnt1 && !mnt2)) {
2459            result = 6;
2460        } else if (sgn1 != sgn2) {
2461            result = sgn1 ? 8 : 2;
2462        } else if (exp1 != exp2) {
2463            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2464        } else {
2465            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2466        }
2467    }
2468
2469    set_fpscr0(fpscr, flags);
2470
2471    return result;
2472}
2473
2474template <>
2475int
2476fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
2477{
2478    int mode = modeConv(fpscr);
2479    int flags = 0;
2480    int sgn1, exp1, sgn2, exp2, result;
2481    uint64_t mnt1, mnt2;
2482
2483    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2484    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2485
2486    if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) {
2487        result = 3;
2488        if (fp64_is_signalling_NaN(exp1, mnt1) ||
2489            fp64_is_signalling_NaN(exp2, mnt2) || signal_nans)
2490            flags |= FPLIB_IOC;
2491    } else {
2492        if (op1 == op2 || (!mnt1 && !mnt2)) {
2493            result = 6;
2494        } else if (sgn1 != sgn2) {
2495            result = sgn1 ? 8 : 2;
2496        } else if (exp1 != exp2) {
2497            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2498        } else {
2499            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2500        }
2501    }
2502
2503    set_fpscr0(fpscr, flags);
2504
2505    return result;
2506}
2507
2508static uint16_t
2509fp16_FPConvertNaN_32(uint32_t op)
2510{
2511    return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF,
2512                     1ULL << (FP16_MANT_BITS - 1) |
2513                     op >> (FP32_MANT_BITS - FP16_MANT_BITS));
2514}
2515
2516static uint16_t
2517fp16_FPConvertNaN_64(uint64_t op)
2518{
2519    return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF,
2520                     1ULL << (FP16_MANT_BITS - 1) |
2521                     op >> (FP64_MANT_BITS - FP16_MANT_BITS));
2522}
2523
2524static uint32_t
2525fp32_FPConvertNaN_16(uint16_t op)
2526{
2527    return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF,
2528                     1ULL << (FP32_MANT_BITS - 1) |
2529                     (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS));
2530}
2531
2532static uint32_t
2533fp32_FPConvertNaN_64(uint64_t op)
2534{
2535    return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF,
2536                     1ULL << (FP32_MANT_BITS - 1) |
2537                     op >> (FP64_MANT_BITS - FP32_MANT_BITS));
2538}
2539
2540static uint64_t
2541fp64_FPConvertNaN_16(uint16_t op)
2542{
2543    return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF,
2544                     1ULL << (FP64_MANT_BITS - 1) |
2545                     (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS));
2546}
2547
2548static uint64_t
2549fp64_FPConvertNaN_32(uint32_t op)
2550{
2551    return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF,
2552                     1ULL << (FP64_MANT_BITS - 1) |
2553                     (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS));
2554}
2555
2556static uint16_t
2557fp16_FPOnePointFive(int sgn)
2558{
2559    return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1));
2560}
2561
2562static uint32_t
2563fp32_FPOnePointFive(int sgn)
2564{
2565    return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1));
2566}
2567
2568static uint64_t
2569fp64_FPOnePointFive(int sgn)
2570{
2571    return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1));
2572}
2573
2574static uint16_t
2575fp16_FPThree(int sgn)
2576{
2577    return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1));
2578}
2579
2580static uint32_t
2581fp32_FPThree(int sgn)
2582{
2583    return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1));
2584}
2585
2586static uint64_t
2587fp64_FPThree(int sgn)
2588{
2589    return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1));
2590}
2591
2592static uint16_t
2593fp16_FPTwo(int sgn)
2594{
2595    return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0);
2596}
2597
2598static uint32_t
2599fp32_FPTwo(int sgn)
2600{
2601    return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0);
2602}
2603
2604static uint64_t
2605fp64_FPTwo(int sgn)
2606{
2607    return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0);
2608}
2609
2610template <>
2611uint16_t
2612fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2613{
2614    int mode = modeConv(fpscr);
2615    int flags = 0;
2616    int sgn, exp;
2617    uint32_t mnt;
2618    uint16_t result;
2619
2620    // Unpack floating-point operand optionally with flush-to-zero:
2621    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2622
2623    bool alt_hp = fpscr.ahp;
2624
2625    if (fp32_is_NaN(exp, mnt)) {
2626        if (alt_hp) {
2627            result = fp16_zero(sgn);
2628        } else if (fpscr.dn) {
2629            result = fp16_defaultNaN();
2630        } else {
2631            result = fp16_FPConvertNaN_32(op);
2632        }
2633        if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) {
2634            flags |= FPLIB_IOC;
2635        }
2636    } else if (exp == FP32_EXP_INF) {
2637        if (alt_hp) {
2638            result = ((uint16_t)sgn << (FP16_BITS - 1) |
2639                      ((1ULL << (FP16_BITS - 1)) - 1));
2640            flags |= FPLIB_IOC;
2641        } else {
2642            result = fp16_infinity(sgn);
2643        }
2644    } else if (!mnt) {
2645        result = fp16_zero(sgn);
2646    } else {
2647        result =
2648            fp16_round_(sgn, exp - FP32_EXP_BIAS + FP16_EXP_BIAS,
2649                        mnt >> (FP32_MANT_BITS - FP16_BITS) |
2650                        !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)),
2651                        rounding, (mode & 0xf) | alt_hp << 4, &flags);
2652    }
2653
2654    set_fpscr0(fpscr, flags);
2655
2656    return result;
2657}
2658
2659template <>
2660uint16_t
2661fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2662{
2663    int mode = modeConv(fpscr);
2664    int flags = 0;
2665    int sgn, exp;
2666    uint64_t mnt;
2667    uint16_t result;
2668
2669    // Unpack floating-point operand optionally with flush-to-zero:
2670    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2671
2672    bool alt_hp = fpscr.ahp;
2673
2674    if (fp64_is_NaN(exp, mnt)) {
2675        if (alt_hp) {
2676            result = fp16_zero(sgn);
2677        } else if (fpscr.dn) {
2678            result = fp16_defaultNaN();
2679        } else {
2680            result = fp16_FPConvertNaN_64(op);
2681        }
2682        if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) {
2683            flags |= FPLIB_IOC;
2684        }
2685    } else if (exp == FP64_EXP_INF) {
2686        if (alt_hp) {
2687            result = ((uint16_t)sgn << (FP16_BITS - 1) |
2688                      ((1ULL << (FP16_BITS - 1)) - 1));
2689            flags |= FPLIB_IOC;
2690        } else {
2691            result = fp16_infinity(sgn);
2692        }
2693    } else if (!mnt) {
2694        result = fp16_zero(sgn);
2695    } else {
2696        result =
2697            fp16_round_(sgn, exp - FP64_EXP_BIAS + FP16_EXP_BIAS,
2698                        mnt >> (FP64_MANT_BITS - FP16_BITS) |
2699                        !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)),
2700                        rounding, (mode & 0xf) | alt_hp << 4, &flags);
2701    }
2702
2703    set_fpscr0(fpscr, flags);
2704
2705    return result;
2706}
2707
2708template <>
2709uint32_t
2710fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2711{
2712    int mode = modeConv(fpscr);
2713    int flags = 0;
2714    int sgn, exp;
2715    uint16_t mnt;
2716    uint32_t result;
2717
2718    // Unpack floating-point operand optionally with flush-to-zero:
2719    fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2720
2721    if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2722        if (fpscr.dn) {
2723            result = fp32_defaultNaN();
2724        } else {
2725            result = fp32_FPConvertNaN_16(op);
2726        }
2727        if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2728            flags |= FPLIB_IOC;
2729        }
2730    } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2731        result = fp32_infinity(sgn);
2732    } else if (!mnt) {
2733        result = fp32_zero(sgn);
2734    } else {
2735        mnt = fp16_normalise(mnt, &exp);
2736        result = fp32_pack(sgn, (exp - FP16_EXP_BIAS +
2737                                 FP32_EXP_BIAS + FP16_EXP_BITS),
2738                           (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1));
2739    }
2740
2741    set_fpscr0(fpscr, flags);
2742
2743    return result;
2744}
2745
2746template <>
2747uint32_t
2748fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2749{
2750    int mode = modeConv(fpscr);
2751    int flags = 0;
2752    int sgn, exp;
2753    uint64_t mnt;
2754    uint32_t result;
2755
2756    // Unpack floating-point operand optionally with flush-to-zero:
2757    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2758
2759    if (fp64_is_NaN(exp, mnt)) {
2760        if (fpscr.dn) {
2761            result = fp32_defaultNaN();
2762        } else {
2763            result = fp32_FPConvertNaN_64(op);
2764        }
2765        if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) {
2766            flags |= FPLIB_IOC;
2767        }
2768    } else if (exp == FP64_EXP_INF) {
2769        result = fp32_infinity(sgn);
2770    } else if (!mnt) {
2771        result = fp32_zero(sgn);
2772    } else {
2773        result =
2774            fp32_round_(sgn, exp - FP64_EXP_BIAS + FP32_EXP_BIAS,
2775                        mnt >> (FP64_MANT_BITS - FP32_BITS) |
2776                        !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)),
2777                        rounding, mode, &flags);
2778    }
2779
2780    set_fpscr0(fpscr, flags);
2781
2782    return result;
2783}
2784
2785template <>
2786uint64_t
2787fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2788{
2789    int mode = modeConv(fpscr);
2790    int flags = 0;
2791    int sgn, exp;
2792    uint16_t mnt;
2793    uint64_t result;
2794
2795    // Unpack floating-point operand optionally with flush-to-zero:
2796    fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2797
2798    if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2799        if (fpscr.dn) {
2800            result = fp64_defaultNaN();
2801        } else {
2802            result = fp64_FPConvertNaN_16(op);
2803        }
2804        if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2805            flags |= FPLIB_IOC;
2806        }
2807    } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2808        result = fp64_infinity(sgn);
2809    } else if (!mnt) {
2810        result = fp64_zero(sgn);
2811    } else {
2812        mnt = fp16_normalise(mnt, &exp);
2813        result = fp64_pack(sgn, (exp - FP16_EXP_BIAS +
2814                                 FP64_EXP_BIAS + FP16_EXP_BITS),
2815                           (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1));
2816    }
2817
2818    set_fpscr0(fpscr, flags);
2819
2820    return result;
2821}
2822
2823template <>
2824uint64_t
2825fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2826{
2827    int mode = modeConv(fpscr);
2828    int flags = 0;
2829    int sgn, exp;
2830    uint32_t mnt;
2831    uint64_t result;
2832
2833    // Unpack floating-point operand optionally with flush-to-zero:
2834    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2835
2836    if (fp32_is_NaN(exp, mnt)) {
2837        if (fpscr.dn) {
2838            result = fp64_defaultNaN();
2839        } else {
2840            result = fp64_FPConvertNaN_32(op);
2841        }
2842        if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) {
2843            flags |= FPLIB_IOC;
2844        }
2845    } else if (exp == FP32_EXP_INF) {
2846        result = fp64_infinity(sgn);
2847    } else if (!mnt) {
2848        result = fp64_zero(sgn);
2849    } else {
2850        mnt = fp32_normalise(mnt, &exp);
2851        result = fp64_pack(sgn, (exp - FP32_EXP_BIAS +
2852                                 FP64_EXP_BIAS + FP32_EXP_BITS),
2853                           (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1));
2854    }
2855
2856    set_fpscr0(fpscr, flags);
2857
2858    return result;
2859}
2860
2861template <>
2862uint16_t
2863fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
2864{
2865    int flags = 0;
2866    uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2867    set_fpscr0(fpscr, flags);
2868    return result;
2869}
2870
2871template <>
2872uint32_t
2873fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2874{
2875    int flags = 0;
2876    uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2877    set_fpscr0(fpscr, flags);
2878    return result;
2879}
2880
2881template <>
2882uint64_t
2883fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2884{
2885    int flags = 0;
2886    uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2887    set_fpscr0(fpscr, flags);
2888    return result;
2889}
2890
2891template <>
2892uint16_t
2893fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2894{
2895    int flags = 0;
2896    uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags);
2897    set_fpscr0(fpscr, flags);
2898    return result;
2899}
2900
2901template <>
2902uint32_t
2903fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2904{
2905    int flags = 0;
2906    uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2907    set_fpscr0(fpscr, flags);
2908    return result;
2909}
2910
2911template <>
2912uint64_t
2913fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2914{
2915    int flags = 0;
2916    uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2917    set_fpscr0(fpscr, flags);
2918    return result;
2919}
2920
2921template <>
2922uint16_t
2923fplibExpA(uint16_t op)
2924{
2925    static uint16_t coeff[32] = {
2926        0x0000,
2927        0x0016,
2928        0x002d,
2929        0x0045,
2930        0x005d,
2931        0x0075,
2932        0x008e,
2933        0x00a8,
2934        0x00c2,
2935        0x00dc,
2936        0x00f8,
2937        0x0114,
2938        0x0130,
2939        0x014d,
2940        0x016b,
2941        0x0189,
2942        0x01a8,
2943        0x01c8,
2944        0x01e8,
2945        0x0209,
2946        0x022b,
2947        0x024e,
2948        0x0271,
2949        0x0295,
2950        0x02ba,
2951        0x02e0,
2952        0x0306,
2953        0x032e,
2954        0x0356,
2955        0x037f,
2956        0x03a9,
2957        0x03d4
2958    };
2959    return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) |
2960            coeff[op & ((1 << 5) - 1)]);
2961}
2962
2963template <>
2964uint32_t
2965fplibExpA(uint32_t op)
2966{
2967    static uint32_t coeff[64] = {
2968        0x000000,
2969        0x0164d2,
2970        0x02cd87,
2971        0x043a29,
2972        0x05aac3,
2973        0x071f62,
2974        0x08980f,
2975        0x0a14d5,
2976        0x0b95c2,
2977        0x0d1adf,
2978        0x0ea43a,
2979        0x1031dc,
2980        0x11c3d3,
2981        0x135a2b,
2982        0x14f4f0,
2983        0x16942d,
2984        0x1837f0,
2985        0x19e046,
2986        0x1b8d3a,
2987        0x1d3eda,
2988        0x1ef532,
2989        0x20b051,
2990        0x227043,
2991        0x243516,
2992        0x25fed7,
2993        0x27cd94,
2994        0x29a15b,
2995        0x2b7a3a,
2996        0x2d583f,
2997        0x2f3b79,
2998        0x3123f6,
2999        0x3311c4,
3000        0x3504f3,
3001        0x36fd92,
3002        0x38fbaf,
3003        0x3aff5b,
3004        0x3d08a4,
3005        0x3f179a,
3006        0x412c4d,
3007        0x4346cd,
3008        0x45672a,
3009        0x478d75,
3010        0x49b9be,
3011        0x4bec15,
3012        0x4e248c,
3013        0x506334,
3014        0x52a81e,
3015        0x54f35b,
3016        0x5744fd,
3017        0x599d16,
3018        0x5bfbb8,
3019        0x5e60f5,
3020        0x60ccdf,
3021        0x633f89,
3022        0x65b907,
3023        0x68396a,
3024        0x6ac0c7,
3025        0x6d4f30,
3026        0x6fe4ba,
3027        0x728177,
3028        0x75257d,
3029        0x77d0df,
3030        0x7a83b3,
3031        0x7d3e0c
3032    };
3033    return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) |
3034            coeff[op & ((1 << 6) - 1)]);
3035}
3036
3037template <>
3038uint64_t
3039fplibExpA(uint64_t op)
3040{
3041    static uint64_t coeff[64] = {
3042        0x0000000000000ULL,
3043        0x02c9a3e778061ULL,
3044        0x059b0d3158574ULL,
3045        0x0874518759bc8ULL,
3046        0x0b5586cf9890fULL,
3047        0x0e3ec32d3d1a2ULL,
3048        0x11301d0125b51ULL,
3049        0x1429aaea92de0ULL,
3050        0x172b83c7d517bULL,
3051        0x1a35beb6fcb75ULL,
3052        0x1d4873168b9aaULL,
3053        0x2063b88628cd6ULL,
3054        0x2387a6e756238ULL,
3055        0x26b4565e27cddULL,
3056        0x29e9df51fdee1ULL,
3057        0x2d285a6e4030bULL,
3058        0x306fe0a31b715ULL,
3059        0x33c08b26416ffULL,
3060        0x371a7373aa9cbULL,
3061        0x3a7db34e59ff7ULL,
3062        0x3dea64c123422ULL,
3063        0x4160a21f72e2aULL,
3064        0x44e086061892dULL,
3065        0x486a2b5c13cd0ULL,
3066        0x4bfdad5362a27ULL,
3067        0x4f9b2769d2ca7ULL,
3068        0x5342b569d4f82ULL,
3069        0x56f4736b527daULL,
3070        0x5ab07dd485429ULL,
3071        0x5e76f15ad2148ULL,
3072        0x6247eb03a5585ULL,
3073        0x6623882552225ULL,
3074        0x6a09e667f3bcdULL,
3075        0x6dfb23c651a2fULL,
3076        0x71f75e8ec5f74ULL,
3077        0x75feb564267c9ULL,
3078        0x7a11473eb0187ULL,
3079        0x7e2f336cf4e62ULL,
3080        0x82589994cce13ULL,
3081        0x868d99b4492edULL,
3082        0x8ace5422aa0dbULL,
3083        0x8f1ae99157736ULL,
3084        0x93737b0cdc5e5ULL,
3085        0x97d829fde4e50ULL,
3086        0x9c49182a3f090ULL,
3087        0xa0c667b5de565ULL,
3088        0xa5503b23e255dULL,
3089        0xa9e6b5579fdbfULL,
3090        0xae89f995ad3adULL,
3091        0xb33a2b84f15fbULL,
3092        0xb7f76f2fb5e47ULL,
3093        0xbcc1e904bc1d2ULL,
3094        0xc199bdd85529cULL,
3095        0xc67f12e57d14bULL,
3096        0xcb720dcef9069ULL,
3097        0xd072d4a07897cULL,
3098        0xd5818dcfba487ULL,
3099        0xda9e603db3285ULL,
3100        0xdfc97337b9b5fULL,
3101        0xe502ee78b3ff6ULL,
3102        0xea4afa2a490daULL,
3103        0xefa1bee615a27ULL,
3104        0xf50765b6e4540ULL,
3105        0xfa7c1819e90d8ULL
3106    };
3107    return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) |
3108            coeff[op & ((1 << 6) - 1)]);
3109}
3110
3111static uint16_t
3112fp16_repack(int sgn, int exp, uint16_t mnt)
3113{
3114    return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt);
3115}
3116
3117static uint32_t
3118fp32_repack(int sgn, int exp, uint32_t mnt)
3119{
3120    return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt);
3121}
3122
3123static uint64_t
3124fp64_repack(int sgn, int exp, uint64_t mnt)
3125{
3126    return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt);
3127}
3128
3129static void
3130fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
3131{
3132    // Treat a single quiet-NaN as +Infinity/-Infinity
3133    if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) &&
3134        (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS)
3135        *op1 = fp16_infinity(sgn);
3136    if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) &&
3137        (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS)
3138        *op2 = fp16_infinity(sgn);
3139}
3140
3141static void
3142fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
3143{
3144    // Treat a single quiet-NaN as +Infinity/-Infinity
3145    if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) &&
3146        (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS)
3147        *op1 = fp32_infinity(sgn);
3148    if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) &&
3149        (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS)
3150        *op2 = fp32_infinity(sgn);
3151}
3152
3153static void
3154fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
3155{
3156    // Treat a single quiet-NaN as +Infinity/-Infinity
3157    if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) &&
3158        (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS)
3159        *op1 = fp64_infinity(sgn);
3160    if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) &&
3161        (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS)
3162        *op2 = fp64_infinity(sgn);
3163}
3164
3165template <>
3166uint16_t
3167fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3168{
3169    int mode = modeConv(fpscr);
3170    int flags = 0;
3171    int sgn1, exp1, sgn2, exp2;
3172    uint16_t mnt1, mnt2, x, result;
3173
3174    fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3175    fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3176
3177    if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3178        result = x;
3179    } else {
3180        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3181                  fp16_repack(sgn1, exp1, mnt1) :
3182                  fp16_repack(sgn2, exp2, mnt2));
3183    }
3184    set_fpscr0(fpscr, flags);
3185    return result;
3186}
3187
3188template <>
3189uint32_t
3190fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3191{
3192    int mode = modeConv(fpscr);
3193    int flags = 0;
3194    int sgn1, exp1, sgn2, exp2;
3195    uint32_t mnt1, mnt2, x, result;
3196
3197    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3198    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3199
3200    if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3201        result = x;
3202    } else {
3203        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3204                  fp32_repack(sgn1, exp1, mnt1) :
3205                  fp32_repack(sgn2, exp2, mnt2));
3206    }
3207    set_fpscr0(fpscr, flags);
3208    return result;
3209}
3210
3211template <>
3212uint64_t
3213fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3214{
3215    int mode = modeConv(fpscr);
3216    int flags = 0;
3217    int sgn1, exp1, sgn2, exp2;
3218    uint64_t mnt1, mnt2, x, result;
3219
3220    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3221    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3222
3223    if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3224        result = x;
3225    } else {
3226        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3227                  fp64_repack(sgn1, exp1, mnt1) :
3228                  fp64_repack(sgn2, exp2, mnt2));
3229    }
3230    set_fpscr0(fpscr, flags);
3231    return result;
3232}
3233
3234template <>
3235uint16_t
3236fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3237{
3238    fp16_minmaxnum(&op1, &op2, 1);
3239    return fplibMax<uint16_t>(op1, op2, fpscr);
3240}
3241
3242template <>
3243uint32_t
3244fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3245{
3246    fp32_minmaxnum(&op1, &op2, 1);
3247    return fplibMax<uint32_t>(op1, op2, fpscr);
3248}
3249
3250template <>
3251uint64_t
3252fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3253{
3254    fp64_minmaxnum(&op1, &op2, 1);
3255    return fplibMax<uint64_t>(op1, op2, fpscr);
3256}
3257
3258template <>
3259uint16_t
3260fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3261{
3262    int mode = modeConv(fpscr);
3263    int flags = 0;
3264    int sgn1, exp1, sgn2, exp2;
3265    uint16_t mnt1, mnt2, x, result;
3266
3267    fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3268    fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3269
3270    if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3271        result = x;
3272    } else {
3273        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3274                  fp16_repack(sgn1, exp1, mnt1) :
3275                  fp16_repack(sgn2, exp2, mnt2));
3276    }
3277    set_fpscr0(fpscr, flags);
3278    return result;
3279}
3280
3281template <>
3282uint32_t
3283fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3284{
3285    int mode = modeConv(fpscr);
3286    int flags = 0;
3287    int sgn1, exp1, sgn2, exp2;
3288    uint32_t mnt1, mnt2, x, result;
3289
3290    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3291    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3292
3293    if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3294        result = x;
3295    } else {
3296        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3297                  fp32_repack(sgn1, exp1, mnt1) :
3298                  fp32_repack(sgn2, exp2, mnt2));
3299    }
3300    set_fpscr0(fpscr, flags);
3301    return result;
3302}
3303
3304template <>
3305uint64_t
3306fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3307{
3308    int mode = modeConv(fpscr);
3309    int flags = 0;
3310    int sgn1, exp1, sgn2, exp2;
3311    uint64_t mnt1, mnt2, x, result;
3312
3313    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3314    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3315
3316    if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3317        result = x;
3318    } else {
3319        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3320                  fp64_repack(sgn1, exp1, mnt1) :
3321                  fp64_repack(sgn2, exp2, mnt2));
3322    }
3323    set_fpscr0(fpscr, flags);
3324    return result;
3325}
3326
3327template <>
3328uint16_t
3329fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3330{
3331    fp16_minmaxnum(&op1, &op2, 0);
3332    return fplibMin<uint16_t>(op1, op2, fpscr);
3333}
3334
3335template <>
3336uint32_t
3337fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3338{
3339    fp32_minmaxnum(&op1, &op2, 0);
3340    return fplibMin<uint32_t>(op1, op2, fpscr);
3341}
3342
3343template <>
3344uint64_t
3345fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3346{
3347    fp64_minmaxnum(&op1, &op2, 0);
3348    return fplibMin<uint64_t>(op1, op2, fpscr);
3349}
3350
3351template <>
3352uint16_t
3353fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3354{
3355    int flags = 0;
3356    uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags);
3357    set_fpscr0(fpscr, flags);
3358    return result;
3359}
3360
3361template <>
3362uint32_t
3363fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3364{
3365    int flags = 0;
3366    uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
3367    set_fpscr0(fpscr, flags);
3368    return result;
3369}
3370
3371template <>
3372uint64_t
3373fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3374{
3375    int flags = 0;
3376    uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
3377    set_fpscr0(fpscr, flags);
3378    return result;
3379}
3380
3381template <>
3382uint16_t
3383fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3384{
3385    int mode = modeConv(fpscr);
3386    int flags = 0;
3387    int sgn1, exp1, sgn2, exp2;
3388    uint16_t mnt1, mnt2, result;
3389
3390    fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3391    fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3392
3393    result = fp16_process_NaNs(op1, op2, mode, &flags);
3394    if (!result) {
3395        if ((exp1 == FP16_EXP_INF && !mnt2) ||
3396            (exp2 == FP16_EXP_INF && !mnt1)) {
3397            result = fp16_FPTwo(sgn1 ^ sgn2);
3398        } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3399            result = fp16_infinity(sgn1 ^ sgn2);
3400        } else if (!mnt1 || !mnt2) {
3401            result = fp16_zero(sgn1 ^ sgn2);
3402        } else {
3403            result = fp16_mul(op1, op2, mode, &flags);
3404        }
3405    }
3406
3407    set_fpscr0(fpscr, flags);
3408
3409    return result;
3410}
3411
3412template <>
3413uint32_t
3414fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3415{
3416    int mode = modeConv(fpscr);
3417    int flags = 0;
3418    int sgn1, exp1, sgn2, exp2;
3419    uint32_t mnt1, mnt2, result;
3420
3421    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3422    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3423
3424    result = fp32_process_NaNs(op1, op2, mode, &flags);
3425    if (!result) {
3426        if ((exp1 == FP32_EXP_INF && !mnt2) ||
3427            (exp2 == FP32_EXP_INF && !mnt1)) {
3428            result = fp32_FPTwo(sgn1 ^ sgn2);
3429        } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3430            result = fp32_infinity(sgn1 ^ sgn2);
3431        } else if (!mnt1 || !mnt2) {
3432            result = fp32_zero(sgn1 ^ sgn2);
3433        } else {
3434            result = fp32_mul(op1, op2, mode, &flags);
3435        }
3436    }
3437
3438    set_fpscr0(fpscr, flags);
3439
3440    return result;
3441}
3442
3443template <>
3444uint64_t
3445fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3446{
3447    int mode = modeConv(fpscr);
3448    int flags = 0;
3449    int sgn1, exp1, sgn2, exp2;
3450    uint64_t mnt1, mnt2, result;
3451
3452    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3453    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3454
3455    result = fp64_process_NaNs(op1, op2, mode, &flags);
3456    if (!result) {
3457        if ((exp1 == FP64_EXP_INF && !mnt2) ||
3458            (exp2 == FP64_EXP_INF && !mnt1)) {
3459            result = fp64_FPTwo(sgn1 ^ sgn2);
3460        } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3461            result = fp64_infinity(sgn1 ^ sgn2);
3462        } else if (!mnt1 || !mnt2) {
3463            result = fp64_zero(sgn1 ^ sgn2);
3464        } else {
3465            result = fp64_mul(op1, op2, mode, &flags);
3466        }
3467    }
3468
3469    set_fpscr0(fpscr, flags);
3470
3471    return result;
3472}
3473
3474template <>
3475uint16_t
3476fplibNeg(uint16_t op)
3477{
3478    return op ^ 1ULL << (FP16_BITS - 1);
3479}
3480
3481template <>
3482uint32_t
3483fplibNeg(uint32_t op)
3484{
3485    return op ^ 1ULL << (FP32_BITS - 1);
3486}
3487
3488template <>
3489uint64_t
3490fplibNeg(uint64_t op)
3491{
3492    return op ^ 1ULL << (FP64_BITS - 1);
3493}
3494
3495static const uint8_t recip_sqrt_estimate[256] = {
3496    255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3497    226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3498    201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3499    180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3500    162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3501    145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3502    131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3503    118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3504    105, 104, 103, 101, 100,  99,  97,  96,  95,  93,  92,  91,  90,  88,  87,  86,
3505    85,  84,  82,  81,  80,  79,  78,  77,  76,  75,  74,  72,  71,  70,  69,  68,
3506    67,  66,  65,  64,  63,  62,  61,  60,  60,  59,  58,  57,  56,  55,  54,  53,
3507    52,  51,  51,  50,  49,  48,  47,  46,  46,  45,  44,  43,  42,  42,  41,  40,
3508    39,  38,  38,  37,  36,  35,  35,  34,  33,  33,  32,  31,  30,  30,  29,  28,
3509    28,  27,  26,  26,  25,  24,  24,  23,  22,  22,  21,  20,  20,  19,  19,  18,
3510    17,  17,  16,  16,  15,  14,  14,  13,  13,  12,  11,  11,  10,  10,   9,   9,
3511    8,   8,   7,   6,   6,   5,   5,   4,   4,   3,   3,   2,   2,   1,   1,   0
3512};
3513
3514template <>
3515uint16_t
3516fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
3517{
3518    int mode = modeConv(fpscr);
3519    int flags = 0;
3520    int sgn, exp;
3521    uint16_t mnt, result;
3522
3523    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3524
3525    if (fp16_is_NaN(exp, mnt)) {
3526        result = fp16_process_NaN(op, mode, &flags);
3527    } else if (!mnt) {
3528        result = fp16_infinity(sgn);
3529        flags |= FPLIB_DZC;
3530    } else if (sgn) {
3531        result = fp16_defaultNaN();
3532        flags |= FPLIB_IOC;
3533    } else if (exp == FP16_EXP_INF) {
3534        result = fp16_zero(0);
3535    } else {
3536        exp += FP16_EXP_BITS;
3537        mnt = fp16_normalise(mnt, &exp);
3538        mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3539                                  (mnt >> (FP16_BITS - 8) & 127)];
3540        result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1,
3541                           mnt << (FP16_MANT_BITS - 8));
3542    }
3543
3544    set_fpscr0(fpscr, flags);
3545
3546    return result;
3547}
3548
3549template <>
3550uint32_t
3551fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
3552{
3553    int mode = modeConv(fpscr);
3554    int flags = 0;
3555    int sgn, exp;
3556    uint32_t mnt, result;
3557
3558    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3559
3560    if (fp32_is_NaN(exp, mnt)) {
3561        result = fp32_process_NaN(op, mode, &flags);
3562    } else if (!mnt) {
3563        result = fp32_infinity(sgn);
3564        flags |= FPLIB_DZC;
3565    } else if (sgn) {
3566        result = fp32_defaultNaN();
3567        flags |= FPLIB_IOC;
3568    } else if (exp == FP32_EXP_INF) {
3569        result = fp32_zero(0);
3570    } else {
3571        exp += FP32_EXP_BITS;
3572        mnt = fp32_normalise(mnt, &exp);
3573        mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3574                                  (mnt >> (FP32_BITS - 8) & 127)];
3575        result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1,
3576                           mnt << (FP32_MANT_BITS - 8));
3577    }
3578
3579    set_fpscr0(fpscr, flags);
3580
3581    return result;
3582}
3583
3584template <>
3585uint64_t
3586fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
3587{
3588    int mode = modeConv(fpscr);
3589    int flags = 0;
3590    int sgn, exp;
3591    uint64_t mnt, result;
3592
3593    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3594
3595    if (fp64_is_NaN(exp, mnt)) {
3596        result = fp64_process_NaN(op, mode, &flags);
3597    } else if (!mnt) {
3598        result = fp64_infinity(sgn);
3599        flags |= FPLIB_DZC;
3600    } else if (sgn) {
3601        result = fp64_defaultNaN();
3602        flags |= FPLIB_IOC;
3603    } else if (exp == FP64_EXP_INF) {
3604        result = fp32_zero(0);
3605    } else {
3606        exp += FP64_EXP_BITS;
3607        mnt = fp64_normalise(mnt, &exp);
3608        mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3609                                  (mnt >> (FP64_BITS - 8) & 127)];
3610        result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1,
3611                           mnt << (FP64_MANT_BITS - 8));
3612    }
3613
3614    set_fpscr0(fpscr, flags);
3615
3616    return result;
3617}
3618
3619template <>
3620uint16_t
3621fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3622{
3623    int mode = modeConv(fpscr);
3624    int flags = 0;
3625    int sgn1, exp1, sgn2, exp2;
3626    uint16_t mnt1, mnt2, result;
3627
3628    op1 = fplibNeg<uint16_t>(op1);
3629    fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3630    fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3631
3632    result = fp16_process_NaNs(op1, op2, mode, &flags);
3633    if (!result) {
3634        if ((exp1 == FP16_EXP_INF && !mnt2) ||
3635            (exp2 == FP16_EXP_INF && !mnt1)) {
3636            result = fp16_FPOnePointFive(0);
3637        } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3638            result = fp16_infinity(sgn1 ^ sgn2);
3639        } else {
3640            result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags);
3641        }
3642    }
3643
3644    set_fpscr0(fpscr, flags);
3645
3646    return result;
3647}
3648
3649template <>
3650uint32_t
3651fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3652{
3653    int mode = modeConv(fpscr);
3654    int flags = 0;
3655    int sgn1, exp1, sgn2, exp2;
3656    uint32_t mnt1, mnt2, result;
3657
3658    op1 = fplibNeg<uint32_t>(op1);
3659    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3660    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3661
3662    result = fp32_process_NaNs(op1, op2, mode, &flags);
3663    if (!result) {
3664        if ((exp1 == FP32_EXP_INF && !mnt2) ||
3665            (exp2 == FP32_EXP_INF && !mnt1)) {
3666            result = fp32_FPOnePointFive(0);
3667        } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3668            result = fp32_infinity(sgn1 ^ sgn2);
3669        } else {
3670            result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
3671        }
3672    }
3673
3674    set_fpscr0(fpscr, flags);
3675
3676    return result;
3677}
3678
3679template <>
3680uint64_t
3681fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3682{
3683    int mode = modeConv(fpscr);
3684    int flags = 0;
3685    int sgn1, exp1, sgn2, exp2;
3686    uint64_t mnt1, mnt2, result;
3687
3688    op1 = fplibNeg<uint64_t>(op1);
3689    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3690    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3691
3692    result = fp64_process_NaNs(op1, op2, mode, &flags);
3693    if (!result) {
3694        if ((exp1 == FP64_EXP_INF && !mnt2) ||
3695            (exp2 == FP64_EXP_INF && !mnt1)) {
3696            result = fp64_FPOnePointFive(0);
3697        } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3698            result = fp64_infinity(sgn1 ^ sgn2);
3699        } else {
3700            result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
3701        }
3702    }
3703
3704    set_fpscr0(fpscr, flags);
3705
3706    return result;
3707}
3708
3709template <>
3710uint16_t
3711fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
3712{
3713    int mode = modeConv(fpscr);
3714    int flags = 0;
3715    int sgn, exp;
3716    uint16_t mnt, result;
3717
3718    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3719
3720    if (fp16_is_NaN(exp, mnt)) {
3721        result = fp16_process_NaN(op, mode, &flags);
3722    } else if (exp == FP16_EXP_INF) {
3723        result = fp16_zero(sgn);
3724    } else if (!mnt) {
3725        result = fp16_infinity(sgn);
3726        flags |= FPLIB_DZC;
3727    } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) {
3728        bool overflow_to_inf = false;
3729        switch (FPCRRounding(fpscr)) {
3730          case FPRounding_TIEEVEN:
3731            overflow_to_inf = true;
3732            break;
3733          case FPRounding_POSINF:
3734            overflow_to_inf = !sgn;
3735            break;
3736          case FPRounding_NEGINF:
3737            overflow_to_inf = sgn;
3738            break;
3739          case FPRounding_ZERO:
3740            overflow_to_inf = false;
3741            break;
3742          default:
3743            assert(0);
3744        }
3745        result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn);
3746        flags |= FPLIB_OFC | FPLIB_IXC;
3747    } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) {
3748        result = fp16_zero(sgn);
3749        flags |= FPLIB_UFC;
3750    } else {
3751        exp += FP16_EXP_BITS;
3752        mnt = fp16_normalise(mnt, &exp);
3753        int result_exp = 2 * FP16_EXP_BIAS - 1 - exp;
3754        uint16_t fraction = (((uint32_t)1 << 19) /
3755                             (mnt >> (FP16_BITS - 10) | 1) + 1) >> 1;
3756        fraction <<= FP16_MANT_BITS - 8;
3757        if (result_exp == 0) {
3758            fraction >>= 1;
3759        } else if (result_exp == -1) {
3760            fraction >>= 2;
3761            result_exp = 0;
3762        }
3763        result = fp16_pack(sgn, result_exp, fraction);
3764    }
3765
3766    set_fpscr0(fpscr, flags);
3767
3768    return result;
3769}
3770
3771template <>
3772uint32_t
3773fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
3774{
3775    int mode = modeConv(fpscr);
3776    int flags = 0;
3777    int sgn, exp;
3778    uint32_t mnt, result;
3779
3780    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3781
3782    if (fp32_is_NaN(exp, mnt)) {
3783        result = fp32_process_NaN(op, mode, &flags);
3784    } else if (exp == FP32_EXP_INF) {
3785        result = fp32_zero(sgn);
3786    } else if (!mnt) {
3787        result = fp32_infinity(sgn);
3788        flags |= FPLIB_DZC;
3789    } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) {
3790        bool overflow_to_inf = false;
3791        switch (FPCRRounding(fpscr)) {
3792          case FPRounding_TIEEVEN:
3793            overflow_to_inf = true;
3794            break;
3795          case FPRounding_POSINF:
3796            overflow_to_inf = !sgn;
3797            break;
3798          case FPRounding_NEGINF:
3799            overflow_to_inf = sgn;
3800            break;
3801          case FPRounding_ZERO:
3802            overflow_to_inf = false;
3803            break;
3804          default:
3805            assert(0);
3806        }
3807        result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
3808        flags |= FPLIB_OFC | FPLIB_IXC;
3809    } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) {
3810        result = fp32_zero(sgn);
3811        flags |= FPLIB_UFC;
3812    } else {
3813        exp += FP32_EXP_BITS;
3814        mnt = fp32_normalise(mnt, &exp);
3815        int result_exp = 2 * FP32_EXP_BIAS - 1 - exp;
3816        uint32_t fraction = (((uint32_t)1 << 19) /
3817                             (mnt >> (FP32_BITS - 10) | 1) + 1) >> 1;
3818        fraction <<= FP32_MANT_BITS - 8;
3819        if (result_exp == 0) {
3820            fraction >>= 1;
3821        } else if (result_exp == -1) {
3822            fraction >>= 2;
3823            result_exp = 0;
3824        }
3825        result = fp32_pack(sgn, result_exp, fraction);
3826    }
3827
3828    set_fpscr0(fpscr, flags);
3829
3830    return result;
3831}
3832
3833template <>
3834uint64_t
3835fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
3836{
3837    int mode = modeConv(fpscr);
3838    int flags = 0;
3839    int sgn, exp;
3840    uint64_t mnt, result;
3841
3842    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3843
3844    if (fp64_is_NaN(exp, mnt)) {
3845        result = fp64_process_NaN(op, mode, &flags);
3846    } else if (exp == FP64_EXP_INF) {
3847        result = fp64_zero(sgn);
3848    } else if (!mnt) {
3849        result = fp64_infinity(sgn);
3850        flags |= FPLIB_DZC;
3851    } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) {
3852        bool overflow_to_inf = false;
3853        switch (FPCRRounding(fpscr)) {
3854          case FPRounding_TIEEVEN:
3855            overflow_to_inf = true;
3856            break;
3857          case FPRounding_POSINF:
3858            overflow_to_inf = !sgn;
3859            break;
3860          case FPRounding_NEGINF:
3861            overflow_to_inf = sgn;
3862            break;
3863          case FPRounding_ZERO:
3864            overflow_to_inf = false;
3865            break;
3866          default:
3867            assert(0);
3868        }
3869        result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
3870        flags |= FPLIB_OFC | FPLIB_IXC;
3871    } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) {
3872        result = fp64_zero(sgn);
3873        flags |= FPLIB_UFC;
3874    } else {
3875        exp += FP64_EXP_BITS;
3876        mnt = fp64_normalise(mnt, &exp);
3877        int result_exp = 2 * FP64_EXP_BIAS - 1 - exp;
3878        uint64_t fraction = (((uint32_t)1 << 19) /
3879                             (mnt >> (FP64_BITS - 10) | 1) + 1) >> 1;
3880        fraction <<= FP64_MANT_BITS - 8;
3881        if (result_exp == 0) {
3882            fraction >>= 1;
3883        } else if (result_exp == -1) {
3884            fraction >>= 2;
3885            result_exp = 0;
3886        }
3887        result = fp64_pack(sgn, result_exp, fraction);
3888    }
3889
3890    set_fpscr0(fpscr, flags);
3891
3892    return result;
3893}
3894
3895template <>
3896uint16_t
3897fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3898{
3899    int mode = modeConv(fpscr);
3900    int flags = 0;
3901    int sgn1, exp1, sgn2, exp2;
3902    uint16_t mnt1, mnt2, result;
3903
3904    op1 = fplibNeg<uint16_t>(op1);
3905    fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3906    fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3907
3908    result = fp16_process_NaNs(op1, op2, mode, &flags);
3909    if (!result) {
3910        if ((exp1 == FP16_EXP_INF && !mnt2) ||
3911            (exp2 == FP16_EXP_INF && !mnt1)) {
3912            result = fp16_FPTwo(0);
3913        } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3914            result = fp16_infinity(sgn1 ^ sgn2);
3915        } else {
3916            result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags);
3917        }
3918    }
3919
3920    set_fpscr0(fpscr, flags);
3921
3922    return result;
3923}
3924
3925template <>
3926uint32_t
3927fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3928{
3929    int mode = modeConv(fpscr);
3930    int flags = 0;
3931    int sgn1, exp1, sgn2, exp2;
3932    uint32_t mnt1, mnt2, result;
3933
3934    op1 = fplibNeg<uint32_t>(op1);
3935    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3936    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3937
3938    result = fp32_process_NaNs(op1, op2, mode, &flags);
3939    if (!result) {
3940        if ((exp1 == FP32_EXP_INF && !mnt2) ||
3941            (exp2 == FP32_EXP_INF && !mnt1)) {
3942            result = fp32_FPTwo(0);
3943        } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3944            result = fp32_infinity(sgn1 ^ sgn2);
3945        } else {
3946            result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
3947        }
3948    }
3949
3950    set_fpscr0(fpscr, flags);
3951
3952    return result;
3953}
3954
3955template <>
3956uint64_t
3957fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3958{
3959    int mode = modeConv(fpscr);
3960    int flags = 0;
3961    int sgn1, exp1, sgn2, exp2;
3962    uint64_t mnt1, mnt2, result;
3963
3964    op1 = fplibNeg<uint64_t>(op1);
3965    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3966    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3967
3968    result = fp64_process_NaNs(op1, op2, mode, &flags);
3969    if (!result) {
3970        if ((exp1 == FP64_EXP_INF && !mnt2) ||
3971            (exp2 == FP64_EXP_INF && !mnt1)) {
3972            result = fp64_FPTwo(0);
3973        } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3974            result = fp64_infinity(sgn1 ^ sgn2);
3975        } else {
3976            result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
3977        }
3978    }
3979
3980    set_fpscr0(fpscr, flags);
3981
3982    return result;
3983}
3984
3985template <>
3986uint16_t
3987fplibRecpX(uint16_t op, FPSCR &fpscr)
3988{
3989    int mode = modeConv(fpscr);
3990    int flags = 0;
3991    int sgn, exp;
3992    uint16_t mnt, result;
3993
3994    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3995
3996    if (fp16_is_NaN(exp, mnt)) {
3997        result = fp16_process_NaN(op, mode, &flags);
3998    }
3999    else {
4000        if (!mnt) { // Zero and denormals
4001            result = fp16_pack(sgn, FP16_EXP_INF - 1, 0);
4002        } else { // Infinities and normals
4003            result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0);
4004        }
4005    }
4006
4007    set_fpscr0(fpscr, flags);
4008
4009    return result;
4010}
4011
4012template <>
4013uint32_t
4014fplibRecpX(uint32_t op, FPSCR &fpscr)
4015{
4016    int mode = modeConv(fpscr);
4017    int flags = 0;
4018    int sgn, exp;
4019    uint32_t mnt, result;
4020
4021    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4022
4023    if (fp32_is_NaN(exp, mnt)) {
4024        result = fp32_process_NaN(op, mode, &flags);
4025    }
4026    else {
4027        if (!mnt) { // Zero and denormals
4028            result = fp32_pack(sgn, FP32_EXP_INF - 1, 0);
4029        } else { // Infinities and normals
4030            result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0);
4031        }
4032    }
4033
4034    set_fpscr0(fpscr, flags);
4035
4036    return result;
4037}
4038
4039template <>
4040uint64_t
4041fplibRecpX(uint64_t op, FPSCR &fpscr)
4042{
4043    int mode = modeConv(fpscr);
4044    int flags = 0;
4045    int sgn, exp;
4046    uint64_t mnt, result;
4047
4048    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4049
4050    if (fp64_is_NaN(exp, mnt)) {
4051        result = fp64_process_NaN(op, mode, &flags);
4052    }
4053    else {
4054        if (!mnt) { // Zero and denormals
4055            result = fp64_pack(sgn, FP64_EXP_INF - 1, 0);
4056        } else { // Infinities and normals
4057            result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0);
4058        }
4059    }
4060
4061    set_fpscr0(fpscr, flags);
4062
4063    return result;
4064}
4065
4066template <>
4067uint16_t
4068fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4069{
4070    int expint = FP16_EXP_BIAS + FP16_MANT_BITS;
4071    int mode = modeConv(fpscr);
4072    int flags = 0;
4073    int sgn, exp;
4074    uint16_t mnt, result;
4075
4076    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4077    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4078
4079    // Handle NaNs, infinities and zeroes:
4080    if (fp16_is_NaN(exp, mnt)) {
4081        result = fp16_process_NaN(op, mode, &flags);
4082    } else if (exp == FP16_EXP_INF) {
4083        result = fp16_infinity(sgn);
4084    } else if (!mnt) {
4085        result = fp16_zero(sgn);
4086    } else if (exp >= expint) {
4087        // There are no fractional bits
4088        result = op;
4089    } else {
4090        // Truncate towards zero:
4091        uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp);
4092        int err = exp < expint - FP16_BITS ? 1 :
4093            ((mnt << 1 >> (expint - exp - 1) & 3) |
4094             ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0));
4095        switch (rounding) {
4096          case FPRounding_TIEEVEN:
4097            x += (err == 3 || (err == 2 && (x & 1)));
4098            break;
4099          case FPRounding_POSINF:
4100            x += err && !sgn;
4101            break;
4102          case FPRounding_NEGINF:
4103            x += err && sgn;
4104            break;
4105          case FPRounding_ZERO:
4106            break;
4107          case FPRounding_TIEAWAY:
4108            x += err >> 1;
4109            break;
4110          default:
4111            assert(0);
4112        }
4113
4114        if (x == 0) {
4115            result = fp16_zero(sgn);
4116        } else {
4117            exp = expint;
4118            mnt = fp16_normalise(x, &exp);
4119            result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS);
4120        }
4121
4122        if (err && exact)
4123            flags |= FPLIB_IXC;
4124    }
4125
4126    set_fpscr0(fpscr, flags);
4127
4128    return result;
4129}
4130
4131template <>
4132uint32_t
4133fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4134{
4135    int expint = FP32_EXP_BIAS + FP32_MANT_BITS;
4136    int mode = modeConv(fpscr);
4137    int flags = 0;
4138    int sgn, exp;
4139    uint32_t mnt, result;
4140
4141    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4142    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4143
4144    // Handle NaNs, infinities and zeroes:
4145    if (fp32_is_NaN(exp, mnt)) {
4146        result = fp32_process_NaN(op, mode, &flags);
4147    } else if (exp == FP32_EXP_INF) {
4148        result = fp32_infinity(sgn);
4149    } else if (!mnt) {
4150        result = fp32_zero(sgn);
4151    } else if (exp >= expint) {
4152        // There are no fractional bits
4153        result = op;
4154    } else {
4155        // Truncate towards zero:
4156        uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp);
4157        int err = exp < expint - FP32_BITS ? 1 :
4158            ((mnt << 1 >> (expint - exp - 1) & 3) |
4159             ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0));
4160        switch (rounding) {
4161          case FPRounding_TIEEVEN:
4162            x += (err == 3 || (err == 2 && (x & 1)));
4163            break;
4164          case FPRounding_POSINF:
4165            x += err && !sgn;
4166            break;
4167          case FPRounding_NEGINF:
4168            x += err && sgn;
4169            break;
4170          case FPRounding_ZERO:
4171            break;
4172          case FPRounding_TIEAWAY:
4173            x += err >> 1;
4174            break;
4175          default:
4176            assert(0);
4177        }
4178
4179        if (x == 0) {
4180            result = fp32_zero(sgn);
4181        } else {
4182            exp = expint;
4183            mnt = fp32_normalise(x, &exp);
4184            result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS);
4185        }
4186
4187        if (err && exact)
4188            flags |= FPLIB_IXC;
4189    }
4190
4191    set_fpscr0(fpscr, flags);
4192
4193    return result;
4194}
4195
4196template <>
4197uint64_t
4198fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4199{
4200    int expint = FP64_EXP_BIAS + FP64_MANT_BITS;
4201    int mode = modeConv(fpscr);
4202    int flags = 0;
4203    int sgn, exp;
4204    uint64_t mnt, result;
4205
4206    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4207    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4208
4209    // Handle NaNs, infinities and zeroes:
4210    if (fp64_is_NaN(exp, mnt)) {
4211        result = fp64_process_NaN(op, mode, &flags);
4212    } else if (exp == FP64_EXP_INF) {
4213        result = fp64_infinity(sgn);
4214    } else if (!mnt) {
4215        result = fp64_zero(sgn);
4216    } else if (exp >= expint) {
4217        // There are no fractional bits
4218        result = op;
4219    } else {
4220        // Truncate towards zero:
4221        uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp);
4222        int err = exp < expint - FP64_BITS ? 1 :
4223            ((mnt << 1 >> (expint - exp - 1) & 3) |
4224             ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0));
4225        switch (rounding) {
4226          case FPRounding_TIEEVEN:
4227            x += (err == 3 || (err == 2 && (x & 1)));
4228            break;
4229          case FPRounding_POSINF:
4230            x += err && !sgn;
4231            break;
4232          case FPRounding_NEGINF:
4233            x += err && sgn;
4234            break;
4235          case FPRounding_ZERO:
4236            break;
4237          case FPRounding_TIEAWAY:
4238            x += err >> 1;
4239            break;
4240          default:
4241            assert(0);
4242        }
4243
4244        if (x == 0) {
4245            result = fp64_zero(sgn);
4246        } else {
4247            exp = expint;
4248            mnt = fp64_normalise(x, &exp);
4249            result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS);
4250        }
4251
4252        if (err && exact)
4253            flags |= FPLIB_IXC;
4254    }
4255
4256    set_fpscr0(fpscr, flags);
4257
4258    return result;
4259}
4260
4261template <>
4262uint16_t
4263fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4264{
4265    int flags = 0;
4266    uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags);
4267    set_fpscr0(fpscr, flags);
4268    return result;
4269}
4270
4271template <>
4272uint32_t
4273fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4274{
4275    int flags = 0;
4276    uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags);
4277    set_fpscr0(fpscr, flags);
4278    return result;
4279}
4280
4281template <>
4282uint64_t
4283fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4284{
4285    int flags = 0;
4286    uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags);
4287    set_fpscr0(fpscr, flags);
4288    return result;
4289}
4290
4291template <>
4292uint16_t
4293fplibSqrt(uint16_t op, FPSCR &fpscr)
4294{
4295    int flags = 0;
4296    uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags);
4297    set_fpscr0(fpscr, flags);
4298    return result;
4299}
4300
4301template <>
4302uint32_t
4303fplibSqrt(uint32_t op, FPSCR &fpscr)
4304{
4305    int flags = 0;
4306    uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
4307    set_fpscr0(fpscr, flags);
4308    return result;
4309}
4310
4311template <>
4312uint64_t
4313fplibSqrt(uint64_t op, FPSCR &fpscr)
4314{
4315    int flags = 0;
4316    uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
4317    set_fpscr0(fpscr, flags);
4318    return result;
4319}
4320
4321template <>
4322uint16_t
4323fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4324{
4325    int flags = 0;
4326    uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags);
4327    set_fpscr0(fpscr, flags);
4328    return result;
4329}
4330
4331template <>
4332uint32_t
4333fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4334{
4335    int flags = 0;
4336    uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
4337    set_fpscr0(fpscr, flags);
4338    return result;
4339}
4340
4341template <>
4342uint64_t
4343fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4344{
4345    int flags = 0;
4346    uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
4347    set_fpscr0(fpscr, flags);
4348    return result;
4349}
4350
4351template <>
4352uint16_t
4353fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
4354{
4355    static uint16_t coeff[2][8] = {
4356        {
4357            0x3c00,
4358            0xb155,
4359            0x2030,
4360            0x0000,
4361            0x0000,
4362            0x0000,
4363            0x0000,
4364            0x0000,
4365        },
4366        {
4367            0x3c00,
4368            0xb800,
4369            0x293a,
4370            0x0000,
4371            0x0000,
4372            0x0000,
4373            0x0000,
4374            0x0000
4375        }
4376    };
4377    int flags = 0;
4378    uint16_t result =
4379        fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1,
4380                    fplibAbs(op2), 0, modeConv(fpscr), &flags);
4381    set_fpscr0(fpscr, flags);
4382    return result;
4383}
4384
4385template <>
4386uint32_t
4387fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr)
4388{
4389    static uint32_t coeff[2][8] = {
4390        {
4391            0x3f800000,
4392            0xbe2aaaab,
4393            0x3c088886,
4394            0xb95008b9,
4395            0x36369d6d,
4396            0x00000000,
4397            0x00000000,
4398            0x00000000
4399        },
4400        {
4401            0x3f800000,
4402            0xbf000000,
4403            0x3d2aaaa6,
4404            0xbab60705,
4405            0x37cd37cc,
4406            0x00000000,
4407            0x00000000,
4408            0x00000000
4409        }
4410    };
4411    int flags = 0;
4412    uint32_t result =
4413        fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1,
4414                    fplibAbs(op2), 0, modeConv(fpscr), &flags);
4415    set_fpscr0(fpscr, flags);
4416    return result;
4417}
4418
4419template <>
4420uint64_t
4421fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr)
4422{
4423    static uint64_t coeff[2][8] = {
4424        {
4425            0x3ff0000000000000ULL,
4426            0xbfc5555555555543ULL,
4427            0x3f8111111110f30cULL,
4428            0xbf2a01a019b92fc6ULL,
4429            0x3ec71de351f3d22bULL,
4430            0xbe5ae5e2b60f7b91ULL,
4431            0x3de5d8408868552fULL,
4432            0x0000000000000000ULL
4433        },
4434        {
4435            0x3ff0000000000000ULL,
4436            0xbfe0000000000000ULL,
4437            0x3fa5555555555536ULL,
4438            0xbf56c16c16c13a0bULL,
4439            0x3efa01a019b1e8d8ULL,
4440            0xbe927e4f7282f468ULL,
4441            0x3e21ee96d2641b13ULL,
4442            0xbda8f76380fbb401ULL
4443        }
4444    };
4445    int flags = 0;
4446    uint64_t result =
4447        fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1,
4448                    fplibAbs(op2), 0, modeConv(fpscr), &flags);
4449    set_fpscr0(fpscr, flags);
4450    return result;
4451}
4452
4453template <>
4454uint16_t
4455fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4456{
4457    int flags = 0;
4458    int sgn, exp;
4459    uint16_t mnt;
4460
4461    int mode = modeConv(fpscr);
4462    uint16_t result = fp16_mul(op1, op1, mode, &flags);
4463    set_fpscr0(fpscr, flags);
4464
4465    fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4466    if (!fp16_is_NaN(exp, mnt)) {
4467        result = (result & ~(1ULL << (FP16_BITS - 1))) |
4468            op2 << (FP16_BITS - 1);
4469    }
4470    return result;
4471}
4472
4473template <>
4474uint32_t
4475fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4476{
4477    int flags = 0;
4478    int sgn, exp;
4479    uint32_t mnt;
4480
4481    int mode = modeConv(fpscr);
4482    uint32_t result = fp32_mul(op1, op1, mode, &flags);
4483    set_fpscr0(fpscr, flags);
4484
4485    fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4486    if (!fp32_is_NaN(exp, mnt)) {
4487        result = (result & ~(1ULL << (FP32_BITS - 1))) | op2 << (FP32_BITS - 1);
4488    }
4489    return result;
4490}
4491
4492template <>
4493uint64_t
4494fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4495{
4496    int flags = 0;
4497    int sgn, exp;
4498    uint64_t mnt;
4499
4500    int mode = modeConv(fpscr);
4501    uint64_t result = fp64_mul(op1, op1, mode, &flags);
4502    set_fpscr0(fpscr, flags);
4503
4504    fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4505    if (!fp64_is_NaN(exp, mnt)) {
4506        result = (result & ~(1ULL << (FP64_BITS - 1))) | op2 << (FP64_BITS - 1);
4507    }
4508    return result;
4509}
4510
4511template <>
4512uint16_t
4513fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4514{
4515    static constexpr uint16_t fpOne =
4516        (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0
4517    if (op2 & 1)
4518        op1 = fpOne;
4519    return op1 ^ ((op2 >> 1) << (FP16_BITS - 1));
4520}
4521
4522template <>
4523uint32_t
4524fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4525{
4526    static constexpr uint32_t fpOne =
4527        (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0
4528    if (op2 & 1)
4529        op1 = fpOne;
4530    return op1 ^ ((op2 >> 1) << (FP32_BITS - 1));
4531}
4532
4533template <>
4534uint64_t
4535fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4536{
4537    static constexpr uint64_t fpOne =
4538        (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0
4539    if (op2 & 1)
4540        op1 = fpOne;
4541    return op1 ^ ((op2 >> 1) << (FP64_BITS - 1));
4542}
4543
4544static uint64_t
4545FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4546             int *flags)
4547{
4548    int expmax = FP64_EXP_BIAS + FP64_BITS - 1;
4549    uint64_t x;
4550    int err;
4551
4552    if (exp > expmax) {
4553        *flags = FPLIB_IOC;
4554        return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4555    }
4556
4557    x = lsr64(mnt << FP64_EXP_BITS, expmax - exp);
4558    err = (exp > expmax - 2 ? 0 :
4559           (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) |
4560           !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1)));
4561
4562    switch (rounding) {
4563      case FPRounding_TIEEVEN:
4564        x += (err == 3 || (err == 2 && (x & 1)));
4565        break;
4566      case FPRounding_POSINF:
4567        x += err && !sgn;
4568        break;
4569      case FPRounding_NEGINF:
4570        x += err && sgn;
4571        break;
4572      case FPRounding_ZERO:
4573        break;
4574      case FPRounding_TIEAWAY:
4575        x += err >> 1;
4576        break;
4577      default:
4578        assert(0);
4579    }
4580
4581    if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) {
4582        *flags = FPLIB_IOC;
4583        return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4584    }
4585
4586    if (err) {
4587        *flags = FPLIB_IXC;
4588    }
4589
4590    return sgn ? -x : x;
4591}
4592
4593static uint32_t
4594FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4595             int *flags)
4596{
4597    uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4598    if (u ? x >= 1ULL << FP32_BITS :
4599        !(x < 1ULL << (FP32_BITS - 1) ||
4600          (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) {
4601        *flags = FPLIB_IOC;
4602        x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn;
4603    }
4604    return x;
4605}
4606
4607static uint16_t
4608FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4609             int *flags)
4610{
4611    uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4612    if (u ? x >= 1ULL << FP16_BITS :
4613        !(x < 1ULL << (FP16_BITS - 1) ||
4614          (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) {
4615        *flags = FPLIB_IOC;
4616        x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn;
4617    }
4618    return x;
4619}
4620
4621template <>
4622uint16_t
4623fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4624               FPSCR &fpscr)
4625{
4626    int flags = 0;
4627    int sgn, exp;
4628    uint16_t mnt, result;
4629
4630    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4631    fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4632
4633    // If NaN, set cumulative flag or take exception:
4634    if (fp16_is_NaN(exp, mnt)) {
4635        flags = FPLIB_IOC;
4636        result = 0;
4637    } else {
4638        assert(fbits >= 0);
4639        // Infinity is treated as an ordinary normalised number that saturates.
4640        result =
4641            FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4642                         (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4643                         u, rounding, &flags);
4644    }
4645
4646    set_fpscr0(fpscr, flags);
4647
4648    return result;
4649}
4650
4651template <>
4652uint32_t
4653fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4654               FPSCR &fpscr)
4655{
4656    int flags = 0;
4657    int sgn, exp;
4658    uint16_t mnt;
4659    uint32_t result;
4660
4661    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4662    fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4663
4664    // If NaN, set cumulative flag or take exception:
4665    if (fp16_is_NaN(exp, mnt)) {
4666        flags = FPLIB_IOC;
4667        result = 0;
4668    } else {
4669        assert(fbits >= 0);
4670        if (exp == FP16_EXP_INF)
4671            exp = 255; // infinity: make it big enough to saturate
4672        result =
4673            FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4674                         (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4675                         u, rounding, &flags);
4676    }
4677
4678    set_fpscr0(fpscr, flags);
4679
4680    return result;
4681}
4682
4683template <>
4684uint32_t
4685fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4686{
4687    int flags = 0;
4688    int sgn, exp;
4689    uint32_t mnt, result;
4690
4691    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4692    fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4693
4694    // If NaN, set cumulative flag or take exception:
4695    if (fp32_is_NaN(exp, mnt)) {
4696        flags = FPLIB_IOC;
4697        result = 0;
4698    } else {
4699        assert(fbits >= 0);
4700        // Infinity is treated as an ordinary normalised number that saturates.
4701        result =
4702            FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4703                         (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4704                         u, rounding, &flags);
4705    }
4706
4707    set_fpscr0(fpscr, flags);
4708
4709    return result;
4710}
4711
4712template <>
4713uint32_t
4714fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4715{
4716    int flags = 0;
4717    int sgn, exp;
4718    uint64_t mnt;
4719    uint32_t result;
4720
4721    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4722    fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4723
4724    // If NaN, set cumulative flag or take exception:
4725    if (fp64_is_NaN(exp, mnt)) {
4726        flags = FPLIB_IOC;
4727        result = 0;
4728    } else {
4729        assert(fbits >= 0);
4730        // Infinity is treated as an ordinary normalised number that saturates.
4731        result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
4732    }
4733
4734    set_fpscr0(fpscr, flags);
4735
4736    return result;
4737}
4738
4739template <>
4740uint64_t
4741fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4742               FPSCR &fpscr)
4743{
4744    int flags = 0;
4745    int sgn, exp;
4746    uint16_t mnt;
4747    uint64_t result;
4748
4749    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4750    fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4751
4752    // If NaN, set cumulative flag or take exception:
4753    if (fp16_is_NaN(exp, mnt)) {
4754        flags = FPLIB_IOC;
4755        result = 0;
4756    } else {
4757        assert(fbits >= 0);
4758        if (exp == FP16_EXP_INF)
4759            exp = 255; // infinity: make it big enough to saturate
4760        result =
4761            FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4762                         (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4763                         u, rounding, &flags);
4764    }
4765
4766    set_fpscr0(fpscr, flags);
4767
4768    return result;
4769}
4770
4771template <>
4772uint64_t
4773fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4774{
4775    int flags = 0;
4776    int sgn, exp;
4777    uint32_t mnt;
4778    uint64_t result;
4779
4780    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4781    fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4782
4783    // If NaN, set cumulative flag or take exception:
4784    if (fp32_is_NaN(exp, mnt)) {
4785        flags = FPLIB_IOC;
4786        result = 0;
4787    } else {
4788        assert(fbits >= 0);
4789        // Infinity is treated as an ordinary normalised number that saturates.
4790        result =
4791            FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4792                         (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4793                         u, rounding, &flags);
4794    }
4795
4796    set_fpscr0(fpscr, flags);
4797
4798    return result;
4799}
4800
4801template <>
4802uint64_t
4803fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4804{
4805    int flags = 0;
4806    int sgn, exp;
4807    uint64_t mnt, result;
4808
4809    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4810    fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4811
4812    // If NaN, set cumulative flag or take exception:
4813    if (fp64_is_NaN(exp, mnt)) {
4814        flags = FPLIB_IOC;
4815        result = 0;
4816    } else {
4817        assert(fbits >= 0);
4818        // Infinity is treated as an ordinary normalised number that saturates.
4819        result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
4820    }
4821
4822    set_fpscr0(fpscr, flags);
4823
4824    return result;
4825}
4826
4827static uint16_t
4828fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4829{
4830    int x_sgn = !u && a >> (FP64_BITS - 1);
4831    int x_exp = FP16_EXP_BIAS + FP64_BITS - 1 - fbits;
4832    uint64_t x_mnt = x_sgn ? -a : a;
4833
4834    // Handle zero:
4835    if (!x_mnt) {
4836        return fp16_zero(0);
4837    }
4838
4839    // Normalise into FP16_BITS bits, collapsing error into bottom bit:
4840    x_mnt = fp64_normalise(x_mnt, &x_exp);
4841    x_mnt = (x_mnt >> (FP64_BITS - FP16_BITS - 1) |
4842             !!(x_mnt & ((1ULL << (FP64_BITS - FP16_BITS - 1)) - 1)));
4843
4844    return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
4845}
4846
4847static uint32_t
4848fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4849{
4850    int x_sgn = !u && a >> (FP64_BITS - 1);
4851    int x_exp = FP32_EXP_BIAS + FP64_BITS - 1 - fbits;
4852    uint64_t x_mnt = x_sgn ? -a : a;
4853
4854    // Handle zero:
4855    if (!x_mnt) {
4856        return fp32_zero(0);
4857    }
4858
4859    // Normalise into FP32_BITS bits, collapsing error into bottom bit:
4860    x_mnt = fp64_normalise(x_mnt, &x_exp);
4861    x_mnt = (x_mnt >> (FP64_BITS - FP32_BITS - 1) |
4862             !!(x_mnt & ((1ULL << (FP64_BITS - FP32_BITS - 1)) - 1)));
4863
4864    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
4865}
4866
4867static uint64_t
4868fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4869{
4870    int x_sgn = !u && a >> (FP64_BITS - 1);
4871    int x_exp = FP64_EXP_BIAS + FP64_BITS - 1 - fbits;
4872    uint64_t x_mnt = x_sgn ? -a : a;
4873
4874    // Handle zero:
4875    if (!x_mnt) {
4876        return fp64_zero(0);
4877    }
4878
4879    x_mnt = fp64_normalise(x_mnt, &x_exp);
4880
4881    return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
4882}
4883
4884template <>
4885uint16_t
4886fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
4887               FPSCR &fpscr)
4888{
4889    int flags = 0;
4890    uint16_t res = fp16_cvtf(op, fbits, u,
4891                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4892                             &flags);
4893    set_fpscr0(fpscr, flags);
4894    return res;
4895}
4896
4897template <>
4898uint32_t
4899fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4900{
4901    int flags = 0;
4902    uint32_t res = fp32_cvtf(op, fbits, u,
4903                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4904                             &flags);
4905    set_fpscr0(fpscr, flags);
4906    return res;
4907}
4908
4909template <>
4910uint64_t
4911fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4912{
4913    int flags = 0;
4914    uint64_t res = fp64_cvtf(op, fbits, u,
4915                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4916                             &flags);
4917    set_fpscr0(fpscr, flags);
4918    return res;
4919}
4920
4921template <>
4922uint16_t
4923fplibInfinity(int sgn)
4924{
4925    return fp16_infinity(sgn);
4926}
4927
4928template <>
4929uint32_t
4930fplibInfinity(int sgn)
4931{
4932    return fp32_infinity(sgn);
4933}
4934
4935template <>
4936uint64_t
4937fplibInfinity(int sgn)
4938{
4939    return fp64_infinity(sgn);
4940}
4941
4942template <>
4943uint16_t
4944fplibDefaultNaN()
4945{
4946    return fp16_defaultNaN();
4947}
4948
4949template <>
4950uint32_t
4951fplibDefaultNaN()
4952{
4953    return fp32_defaultNaN();
4954}
4955
4956template <>
4957uint64_t
4958fplibDefaultNaN()
4959{
4960    return fp64_defaultNaN();
4961}
4962
4963}
4964