fplib.cc revision 10272:336c7d36ac00
1/*
2* Copyright (c) 2012-2013 ARM Limited
3* All rights reserved
4*
5* The license below extends only to copyright in the software and shall
6* not be construed as granting a license to any other intellectual
7* property including but not limited to intellectual property relating
8* to a hardware implementation of the functionality of the software
9* licensed hereunder.  You may use the software subject to the license
10* terms below provided that you ensure that this notice is replicated
11* unmodified and in its entirety in all distributions of the software,
12* modified or unmodified, in source code or in binary form.
13*
14* Redistribution and use in source and binary forms, with or without
15* modification, are permitted provided that the following conditions are
16* met: redistributions of source code must retain the above copyright
17* notice, this list of conditions and the following disclaimer;
18* redistributions in binary form must reproduce the above copyright
19* notice, this list of conditions and the following disclaimer in the
20* documentation and/or other materials provided with the distribution;
21* neither the name of the copyright holders nor the names of its
22* contributors may be used to endorse or promote products derived from
23* this software without specific prior written permission.
24*
25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36*
37* Authors: Edmund Grimley Evans
38*          Thomas Grocutt
39*/
40
41#include <stdint.h>
42
43#include <cassert>
44
45#include "fplib.hh"
46
47namespace ArmISA
48{
49
50#define FPLIB_RN 0
51#define FPLIB_RP 1
52#define FPLIB_RM 2
53#define FPLIB_RZ 3
54#define FPLIB_FZ 4
55#define FPLIB_DN 8
56#define FPLIB_AHP 16
57
58#define FPLIB_IDC 128 // Input Denormal
59#define FPLIB_IXC 16  // Inexact
60#define FPLIB_UFC 8   // Underflow
61#define FPLIB_OFC 4   // Overflow
62#define FPLIB_DZC 2   // Division by Zero
63#define FPLIB_IOC 1   // Invalid Operation
64
65static inline uint16_t
66lsl16(uint16_t x, uint32_t shift)
67{
68    return shift < 16 ? x << shift : 0;
69}
70
71static inline uint16_t
72lsr16(uint16_t x, uint32_t shift)
73{
74    return shift < 16 ? x >> shift : 0;
75}
76
77static inline uint32_t
78lsl32(uint32_t x, uint32_t shift)
79{
80    return shift < 32 ? x << shift : 0;
81}
82
83static inline uint32_t
84lsr32(uint32_t x, uint32_t shift)
85{
86    return shift < 32 ? x >> shift : 0;
87}
88
89static inline uint64_t
90lsl64(uint64_t x, uint32_t shift)
91{
92    return shift < 64 ? x << shift : 0;
93}
94
95static inline uint64_t
96lsr64(uint64_t x, uint32_t shift)
97{
98    return shift < 64 ? x >> shift : 0;
99}
100
101static inline void
102lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
103{
104    if (shift < 64) {
105        *r1 = x1 << shift | x0 >> (64 - shift);
106        *r0 = x0 << shift;
107    } else if (shift < 128) {
108        *r1 = x0 << (shift - 64);
109        *r0 = 0;
110    } else {
111        *r1 = 0;
112        *r0 = 0;
113    }
114}
115
116static inline void
117lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
118{
119    if (shift < 64) {
120        *r0 = x0 >> shift | x1 << (64 - shift);
121        *r1 = x1 >> shift;
122    } else if (shift < 128) {
123        *r0 = x1 >> (shift - 64);
124        *r1 = 0;
125    } else {
126        *r0 = 0;
127        *r1 = 0;
128    }
129}
130
131static inline void
132mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
133{
134    uint32_t mask = ((uint32_t)1 << 31) - 1;
135    uint64_t a0 = a & mask;
136    uint64_t a1 = a >> 31 & mask;
137    uint64_t b0 = b & mask;
138    uint64_t b1 = b >> 31 & mask;
139    uint64_t p0 = a0 * b0;
140    uint64_t p2 = a1 * b1;
141    uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
142    uint64_t s0 = p0;
143    uint64_t s1 = (s0 >> 31) + p1;
144    uint64_t s2 = (s1 >> 31) + p2;
145    *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
146    *x1 = s2 >> 2;
147}
148
149static inline
150void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
151{
152    uint64_t t0 = (uint64_t)(uint32_t)a * b;
153    uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
154    *x0 = t1 << 32 | (uint32_t)t0;
155    *x1 = t1 >> 32;
156}
157
158static inline void
159add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
160       uint64_t b1)
161{
162    *x0 = a0 + b0;
163    *x1 = a1 + b1 + (*x0 < a0);
164}
165
166static inline void
167sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
168       uint64_t b1)
169{
170    *x0 = a0 - b0;
171    *x1 = a1 - b1 - (*x0 > a0);
172}
173
174static inline int
175cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
176{
177    return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
178}
179
180static inline uint16_t
181fp16_normalise(uint16_t mnt, int *exp)
182{
183    int shift;
184
185    if (!mnt) {
186        return 0;
187    }
188
189    for (shift = 8; shift; shift >>= 1) {
190        if (!(mnt >> (16 - shift))) {
191            mnt <<= shift;
192            *exp -= shift;
193        }
194    }
195    return mnt;
196}
197
198static inline uint32_t
199fp32_normalise(uint32_t mnt, int *exp)
200{
201    int shift;
202
203    if (!mnt) {
204        return 0;
205    }
206
207    for (shift = 16; shift; shift >>= 1) {
208        if (!(mnt >> (32 - shift))) {
209            mnt <<= shift;
210            *exp -= shift;
211        }
212    }
213    return mnt;
214}
215
216static inline uint64_t
217fp64_normalise(uint64_t mnt, int *exp)
218{
219    int shift;
220
221    if (!mnt) {
222        return 0;
223    }
224
225    for (shift = 32; shift; shift >>= 1) {
226        if (!(mnt >> (64 - shift))) {
227            mnt <<= shift;
228            *exp -= shift;
229        }
230    }
231    return mnt;
232}
233
234static inline void
235fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
236{
237    uint64_t x0 = *mnt0;
238    uint64_t x1 = *mnt1;
239    int shift;
240
241    if (!x0 && !x1) {
242        return;
243    }
244
245    if (!x1) {
246        x1 = x0;
247        x0 = 0;
248        *exp -= 64;
249    }
250
251    for (shift = 32; shift; shift >>= 1) {
252        if (!(x1 >> (64 - shift))) {
253            x1 = x1 << shift | x0 >> (64 - shift);
254            x0 <<= shift;
255            *exp -= shift;
256        }
257    }
258
259    *mnt0 = x0;
260    *mnt1 = x1;
261}
262
263static inline uint16_t
264fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
265{
266    return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1));
267}
268
269static inline uint32_t
270fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
271{
272    return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1));
273}
274
275static inline uint64_t
276fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
277{
278    return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1));
279}
280
281static inline uint16_t
282fp16_zero(int sgn)
283{
284    return fp16_pack(sgn, 0, 0);
285}
286
287static inline uint32_t
288fp32_zero(int sgn)
289{
290    return fp32_pack(sgn, 0, 0);
291}
292
293static inline uint64_t
294fp64_zero(int sgn)
295{
296    return fp64_pack(sgn, 0, 0);
297}
298
299static inline uint16_t
300fp16_max_normal(int sgn)
301{
302    return fp16_pack(sgn, 30, -1);
303}
304
305static inline uint32_t
306fp32_max_normal(int sgn)
307{
308    return fp32_pack(sgn, 254, -1);
309}
310
311static inline uint64_t
312fp64_max_normal(int sgn)
313{
314    return fp64_pack(sgn, 2046, -1);
315}
316
317static inline uint16_t
318fp16_infinity(int sgn)
319{
320    return fp16_pack(sgn, 31, 0);
321}
322
323static inline uint32_t
324fp32_infinity(int sgn)
325{
326    return fp32_pack(sgn, 255, 0);
327}
328
329static inline uint64_t
330fp64_infinity(int sgn)
331{
332    return fp64_pack(sgn, 2047, 0);
333}
334
335static inline uint16_t
336fp16_defaultNaN()
337{
338    return fp16_pack(0, 31, (uint16_t)1 << 9);
339}
340
341static inline uint32_t
342fp32_defaultNaN()
343{
344    return fp32_pack(0, 255, (uint32_t)1 << 22);
345}
346
347static inline uint64_t
348fp64_defaultNaN()
349{
350    return fp64_pack(0, 2047, (uint64_t)1 << 51);
351}
352
353static inline void
354fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
355            int *flags)
356{
357    *sgn = x >> 15;
358    *exp = x >> 10 & 31;
359    *mnt = x & (((uint16_t)1 << 10) - 1);
360
361    // Handle subnormals:
362    if (*exp) {
363        *mnt |= (uint16_t)1 << 10;
364    } else {
365        ++*exp;
366        // There is no flush to zero in this case!
367    }
368}
369
370static inline void
371fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
372            int *flags)
373{
374    *sgn = x >> 31;
375    *exp = x >> 23 & 255;
376    *mnt = x & (((uint32_t)1 << 23) - 1);
377
378    // Handle subnormals:
379    if (*exp) {
380        *mnt |= (uint32_t)1 << 23;
381    } else {
382        ++*exp;
383        if ((mode & FPLIB_FZ) && *mnt) {
384            *flags |= FPLIB_IDC;
385            *mnt = 0;
386        }
387    }
388}
389
390static inline void
391fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
392            int *flags)
393{
394    *sgn = x >> 63;
395    *exp = x >> 52 & 2047;
396    *mnt = x & (((uint64_t)1 << 52) - 1);
397
398    // Handle subnormals:
399    if (*exp) {
400        *mnt |= (uint64_t)1 << 52;
401    } else {
402        ++*exp;
403        if ((mode & FPLIB_FZ) && *mnt) {
404            *flags |= FPLIB_IDC;
405            *mnt = 0;
406        }
407    }
408}
409
410static inline uint32_t
411fp32_process_NaN(uint32_t a, int mode, int *flags)
412{
413    if (!(a >> 22 & 1)) {
414        *flags |= FPLIB_IOC;
415        a |= (uint32_t)1 << 22;
416    }
417    return mode & FPLIB_DN ? fp32_defaultNaN() : a;
418}
419
420static inline uint64_t
421fp64_process_NaN(uint64_t a, int mode, int *flags)
422{
423    if (!(a >> 51 & 1)) {
424        *flags |= FPLIB_IOC;
425        a |= (uint64_t)1 << 51;
426    }
427    return mode & FPLIB_DN ? fp64_defaultNaN() : a;
428}
429
430static uint32_t
431fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
432{
433    int a_exp = a >> 23 & 255;
434    uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
435    int b_exp = b >> 23 & 255;
436    uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
437
438    // Handle signalling NaNs:
439    if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
440        return fp32_process_NaN(a, mode, flags);
441    if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
442        return fp32_process_NaN(b, mode, flags);
443
444    // Handle quiet NaNs:
445    if (a_exp == 255 && a_mnt)
446        return fp32_process_NaN(a, mode, flags);
447    if (b_exp == 255 && b_mnt)
448        return fp32_process_NaN(b, mode, flags);
449
450    return 0;
451}
452
453static uint64_t
454fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
455{
456    int a_exp = a >> 52 & 2047;
457    uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
458    int b_exp = b >> 52 & 2047;
459    uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
460
461    // Handle signalling NaNs:
462    if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
463        return fp64_process_NaN(a, mode, flags);
464    if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
465        return fp64_process_NaN(b, mode, flags);
466
467    // Handle quiet NaNs:
468    if (a_exp == 2047 && a_mnt)
469        return fp64_process_NaN(a, mode, flags);
470    if (b_exp == 2047 && b_mnt)
471        return fp64_process_NaN(b, mode, flags);
472
473    return 0;
474}
475
476static uint32_t
477fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
478{
479    int a_exp = a >> 23 & 255;
480    uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
481    int b_exp = b >> 23 & 255;
482    uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
483    int c_exp = c >> 23 & 255;
484    uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1);
485
486    // Handle signalling NaNs:
487    if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
488        return fp32_process_NaN(a, mode, flags);
489    if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
490        return fp32_process_NaN(b, mode, flags);
491    if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1))
492        return fp32_process_NaN(c, mode, flags);
493
494    // Handle quiet NaNs:
495    if (a_exp == 255 && a_mnt)
496        return fp32_process_NaN(a, mode, flags);
497    if (b_exp == 255 && b_mnt)
498        return fp32_process_NaN(b, mode, flags);
499    if (c_exp == 255 && c_mnt)
500        return fp32_process_NaN(c, mode, flags);
501
502    return 0;
503}
504
505static uint64_t
506fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
507{
508    int a_exp = a >> 52 & 2047;
509    uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
510    int b_exp = b >> 52 & 2047;
511    uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
512    int c_exp = c >> 52 & 2047;
513    uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1);
514
515    // Handle signalling NaNs:
516    if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
517        return fp64_process_NaN(a, mode, flags);
518    if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
519        return fp64_process_NaN(b, mode, flags);
520    if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1))
521        return fp64_process_NaN(c, mode, flags);
522
523    // Handle quiet NaNs:
524    if (a_exp == 2047 && a_mnt)
525        return fp64_process_NaN(a, mode, flags);
526    if (b_exp == 2047 && b_mnt)
527        return fp64_process_NaN(b, mode, flags);
528    if (c_exp == 2047 && c_mnt)
529        return fp64_process_NaN(c, mode, flags);
530
531    return 0;
532}
533
534static uint16_t
535fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
536{
537    int biased_exp; // non-negative exponent value for result
538    uint16_t int_mant; // mantissa for result, less than (1 << 11)
539    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
540
541    assert(rm != FPRounding_TIEAWAY);
542
543    // There is no flush to zero in this case!
544
545    // The bottom 5 bits of mnt are orred together:
546    mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0);
547
548    if (exp > 0) {
549        biased_exp = exp;
550        int_mant = mnt >> 2;
551        error = mnt & 3;
552    } else {
553        biased_exp = 0;
554        int_mant = lsr16(mnt, 3 - exp);
555        error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
556    }
557
558    if (!biased_exp && error) { // xx should also check fpscr_val<11>
559        *flags |= FPLIB_UFC;
560    }
561
562    // Round up:
563    if ((rm == FPLIB_RN && (error == 3 ||
564                            (error == 2 && (int_mant & 1)))) ||
565        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
566        ++int_mant;
567        if (int_mant == (uint32_t)1 << 10) {
568            // Rounded up from denormalized to normalized
569            biased_exp = 1;
570        }
571        if (int_mant == (uint32_t)1 << 11) {
572            // Rounded up to next exponent
573            ++biased_exp;
574            int_mant >>= 1;
575        }
576    }
577
578    // Handle rounding to odd aka Von Neumann rounding:
579    if (error && rm == FPRounding_ODD)
580        int_mant |= 1;
581
582    // Handle overflow:
583    if (!(mode & FPLIB_AHP)) {
584        if (biased_exp >= 31) {
585            *flags |= FPLIB_OFC | FPLIB_IXC;
586            if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
587                (rm == FPLIB_RM && sgn)) {
588                return fp16_infinity(sgn);
589            } else {
590                return fp16_max_normal(sgn);
591            }
592        }
593    } else {
594        if (biased_exp >= 32) {
595            *flags |= FPLIB_IOC;
596            return fp16_pack(sgn, 31, -1);
597        }
598    }
599
600    if (error) {
601        *flags |= FPLIB_IXC;
602    }
603
604    return fp16_pack(sgn, biased_exp, int_mant);
605}
606
607static uint32_t
608fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
609{
610    int biased_exp; // non-negative exponent value for result
611    uint32_t int_mant; // mantissa for result, less than (1 << 24)
612    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
613
614    assert(rm != FPRounding_TIEAWAY);
615
616    // Flush to zero:
617    if ((mode & FPLIB_FZ) && exp < 1) {
618        *flags |= FPLIB_UFC;
619        return fp32_zero(sgn);
620    }
621
622    // The bottom 8 bits of mnt are orred together:
623    mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0);
624
625    if (exp > 0) {
626        biased_exp = exp;
627        int_mant = mnt >> 2;
628        error = mnt & 3;
629    } else {
630        biased_exp = 0;
631        int_mant = lsr32(mnt, 3 - exp);
632        error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
633    }
634
635    if (!biased_exp && error) { // xx should also check fpscr_val<11>
636        *flags |= FPLIB_UFC;
637    }
638
639    // Round up:
640    if ((rm == FPLIB_RN && (error == 3 ||
641                            (error == 2 && (int_mant & 1)))) ||
642        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
643        ++int_mant;
644        if (int_mant == (uint32_t)1 << 23) {
645            // Rounded up from denormalized to normalized
646            biased_exp = 1;
647        }
648        if (int_mant == (uint32_t)1 << 24) {
649            // Rounded up to next exponent
650            ++biased_exp;
651            int_mant >>= 1;
652        }
653    }
654
655    // Handle rounding to odd aka Von Neumann rounding:
656    if (error && rm == FPRounding_ODD)
657        int_mant |= 1;
658
659    // Handle overflow:
660    if (biased_exp >= 255) {
661        *flags |= FPLIB_OFC | FPLIB_IXC;
662        if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
663            (rm == FPLIB_RM && sgn)) {
664            return fp32_infinity(sgn);
665        } else {
666            return fp32_max_normal(sgn);
667        }
668    }
669
670    if (error) {
671        *flags |= FPLIB_IXC;
672    }
673
674    return fp32_pack(sgn, biased_exp, int_mant);
675}
676
677static uint32_t
678fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
679{
680    return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
681}
682
683static uint64_t
684fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
685{
686    int biased_exp; // non-negative exponent value for result
687    uint64_t int_mant; // mantissa for result, less than (1 << 52)
688    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
689
690    assert(rm != FPRounding_TIEAWAY);
691
692    // Flush to zero:
693    if ((mode & FPLIB_FZ) && exp < 1) {
694        *flags |= FPLIB_UFC;
695        return fp64_zero(sgn);
696    }
697
698    // The bottom 11 bits of mnt are orred together:
699    mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0);
700
701    if (exp > 0) {
702        biased_exp = exp;
703        int_mant = mnt >> 2;
704        error = mnt & 3;
705    } else {
706        biased_exp = 0;
707        int_mant = lsr64(mnt, 3 - exp);
708        error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
709    }
710
711    if (!biased_exp && error) { // xx should also check fpscr_val<11>
712        *flags |= FPLIB_UFC;
713    }
714
715    // Round up:
716    if ((rm == FPLIB_RN && (error == 3 ||
717                            (error == 2 && (int_mant & 1)))) ||
718        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
719        ++int_mant;
720        if (int_mant == (uint64_t)1 << 52) {
721            // Rounded up from denormalized to normalized
722            biased_exp = 1;
723        }
724        if (int_mant == (uint64_t)1 << 53) {
725            // Rounded up to next exponent
726            ++biased_exp;
727            int_mant >>= 1;
728        }
729    }
730
731    // Handle rounding to odd aka Von Neumann rounding:
732    if (error && rm == FPRounding_ODD)
733        int_mant |= 1;
734
735    // Handle overflow:
736    if (biased_exp >= 2047) {
737        *flags |= FPLIB_OFC | FPLIB_IXC;
738        if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
739            (rm == FPLIB_RM && sgn)) {
740            return fp64_infinity(sgn);
741        } else {
742            return fp64_max_normal(sgn);
743        }
744    }
745
746    if (error) {
747        *flags |= FPLIB_IXC;
748    }
749
750    return fp64_pack(sgn, biased_exp, int_mant);
751}
752
753static uint64_t
754fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
755{
756    return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
757}
758
759static int
760fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
761{
762    int a_sgn, a_exp, b_sgn, b_exp;
763    uint32_t a_mnt, b_mnt;
764
765    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
766    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
767
768    if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
769        (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
770        if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) ||
771            (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1)))
772            *flags |= FPLIB_IOC;
773        return 0;
774    }
775    return a == b || (!a_mnt && !b_mnt);
776}
777
778static int
779fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
780{
781    int a_sgn, a_exp, b_sgn, b_exp;
782    uint32_t a_mnt, b_mnt;
783
784    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
785    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
786
787    if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
788        (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
789        *flags |= FPLIB_IOC;
790        return 0;
791    }
792    if (!a_mnt && !b_mnt)
793        return 1;
794    if (a_sgn != b_sgn)
795        return b_sgn;
796    if (a_exp != b_exp)
797        return a_sgn ^ (a_exp > b_exp);
798    if (a_mnt != b_mnt)
799        return a_sgn ^ (a_mnt > b_mnt);
800    return 1;
801}
802
803static int
804fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
805{
806    int a_sgn, a_exp, b_sgn, b_exp;
807    uint32_t a_mnt, b_mnt;
808
809    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
810    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
811
812    if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
813        (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
814        *flags |= FPLIB_IOC;
815        return 0;
816    }
817    if (!a_mnt && !b_mnt)
818        return 0;
819    if (a_sgn != b_sgn)
820        return b_sgn;
821    if (a_exp != b_exp)
822        return a_sgn ^ (a_exp > b_exp);
823    if (a_mnt != b_mnt)
824        return a_sgn ^ (a_mnt > b_mnt);
825    return 0;
826}
827
828static int
829fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
830{
831    int a_sgn, a_exp, b_sgn, b_exp;
832    uint64_t a_mnt, b_mnt;
833
834    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
835    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
836
837    if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
838        (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
839        if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) ||
840            (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1)))
841            *flags |= FPLIB_IOC;
842        return 0;
843    }
844    return a == b || (!a_mnt && !b_mnt);
845}
846
847static int
848fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
849{
850    int a_sgn, a_exp, b_sgn, b_exp;
851    uint64_t a_mnt, b_mnt;
852
853    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
854    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
855
856    if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
857        (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
858        *flags |= FPLIB_IOC;
859        return 0;
860    }
861    if (!a_mnt && !b_mnt)
862        return 1;
863    if (a_sgn != b_sgn)
864        return b_sgn;
865    if (a_exp != b_exp)
866        return a_sgn ^ (a_exp > b_exp);
867    if (a_mnt != b_mnt)
868        return a_sgn ^ (a_mnt > b_mnt);
869    return 1;
870}
871
872static int
873fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
874{
875    int a_sgn, a_exp, b_sgn, b_exp;
876    uint64_t a_mnt, b_mnt;
877
878    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
879    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
880
881    if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
882        (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
883        *flags |= FPLIB_IOC;
884        return 0;
885    }
886    if (!a_mnt && !b_mnt)
887        return 0;
888    if (a_sgn != b_sgn)
889        return b_sgn;
890    if (a_exp != b_exp)
891        return a_sgn ^ (a_exp > b_exp);
892    if (a_mnt != b_mnt)
893        return a_sgn ^ (a_mnt > b_mnt);
894    return 0;
895}
896
897static uint32_t
898fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
899{
900    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
901    uint32_t a_mnt, b_mnt, x, x_mnt;
902
903    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
904    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
905
906    if ((x = fp32_process_NaNs(a, b, mode, flags))) {
907        return x;
908    }
909
910    b_sgn ^= neg;
911
912    // Handle infinities and zeroes:
913    if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) {
914        *flags |= FPLIB_IOC;
915        return fp32_defaultNaN();
916    } else if (a_exp == 255) {
917        return fp32_infinity(a_sgn);
918    } else if (b_exp == 255) {
919        return fp32_infinity(b_sgn);
920    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
921        return fp32_zero(a_sgn);
922    }
923
924    a_mnt <<= 3;
925    b_mnt <<= 3;
926    if (a_exp >= b_exp) {
927        b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
928                 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
929        b_exp = a_exp;
930    } else {
931        a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
932                 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
933        a_exp = b_exp;
934    }
935    x_sgn = a_sgn;
936    x_exp = a_exp;
937    if (a_sgn == b_sgn) {
938        x_mnt = a_mnt + b_mnt;
939    } else if (a_mnt >= b_mnt) {
940        x_mnt = a_mnt - b_mnt;
941    } else {
942        x_sgn ^= 1;
943        x_mnt = b_mnt - a_mnt;
944    }
945
946    if (!x_mnt) {
947        // Sign of exact zero result depends on rounding mode
948        return fp32_zero((mode & 3) == 2);
949    }
950
951    x_mnt = fp32_normalise(x_mnt, &x_exp);
952
953    return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags);
954}
955
956static uint64_t
957fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
958{
959    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
960    uint64_t a_mnt, b_mnt, x, x_mnt;
961
962    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
963    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
964
965    if ((x = fp64_process_NaNs(a, b, mode, flags))) {
966        return x;
967    }
968
969    b_sgn ^= neg;
970
971    // Handle infinities and zeroes:
972    if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) {
973        *flags |= FPLIB_IOC;
974        return fp64_defaultNaN();
975    } else if (a_exp == 2047) {
976        return fp64_infinity(a_sgn);
977    } else if (b_exp == 2047) {
978        return fp64_infinity(b_sgn);
979    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
980        return fp64_zero(a_sgn);
981    }
982
983    a_mnt <<= 3;
984    b_mnt <<= 3;
985    if (a_exp >= b_exp) {
986        b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
987                 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
988        b_exp = a_exp;
989    } else {
990        a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
991                 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
992        a_exp = b_exp;
993    }
994    x_sgn = a_sgn;
995    x_exp = a_exp;
996    if (a_sgn == b_sgn) {
997        x_mnt = a_mnt + b_mnt;
998    } else if (a_mnt >= b_mnt) {
999        x_mnt = a_mnt - b_mnt;
1000    } else {
1001        x_sgn ^= 1;
1002        x_mnt = b_mnt - a_mnt;
1003    }
1004
1005    if (!x_mnt) {
1006        // Sign of exact zero result depends on rounding mode
1007        return fp64_zero((mode & 3) == 2);
1008    }
1009
1010    x_mnt = fp64_normalise(x_mnt, &x_exp);
1011
1012    return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags);
1013}
1014
1015static uint32_t
1016fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1017{
1018    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1019    uint32_t a_mnt, b_mnt, x;
1020    uint64_t x_mnt;
1021
1022    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1023    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1024
1025    if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1026        return x;
1027    }
1028
1029    // Handle infinities and zeroes:
1030    if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) {
1031        *flags |= FPLIB_IOC;
1032        return fp32_defaultNaN();
1033    } else if (a_exp == 255 || b_exp == 255) {
1034        return fp32_infinity(a_sgn ^ b_sgn);
1035    } else if (!a_mnt || !b_mnt) {
1036        return fp32_zero(a_sgn ^ b_sgn);
1037    }
1038
1039    // Multiply and normalise:
1040    x_sgn = a_sgn ^ b_sgn;
1041    x_exp = a_exp + b_exp - 110;
1042    x_mnt = (uint64_t)a_mnt * b_mnt;
1043    x_mnt = fp64_normalise(x_mnt, &x_exp);
1044
1045    // Convert to 32 bits, collapsing error into bottom bit:
1046    x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33);
1047
1048    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1049}
1050
1051static uint64_t
1052fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1053{
1054    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1055    uint64_t a_mnt, b_mnt, x;
1056    uint64_t x0_mnt, x1_mnt;
1057
1058    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1059    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1060
1061    if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1062        return x;
1063    }
1064
1065    // Handle infinities and zeroes:
1066    if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) {
1067        *flags |= FPLIB_IOC;
1068        return fp64_defaultNaN();
1069    } else if (a_exp == 2047 || b_exp == 2047) {
1070        return fp64_infinity(a_sgn ^ b_sgn);
1071    } else if (!a_mnt || !b_mnt) {
1072        return fp64_zero(a_sgn ^ b_sgn);
1073    }
1074
1075    // Multiply and normalise:
1076    x_sgn = a_sgn ^ b_sgn;
1077    x_exp = a_exp + b_exp - 1000;
1078    mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1079    fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1080
1081    // Convert to 64 bits, collapsing error into bottom bit:
1082    x0_mnt = x1_mnt << 1 | !!x0_mnt;
1083
1084    return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1085}
1086
1087static uint32_t
1088fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1089            int mode, int *flags)
1090{
1091    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1092    uint32_t a_mnt, b_mnt, c_mnt, x;
1093    uint64_t x_mnt, y_mnt;
1094
1095    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1096    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1097    fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1098
1099    x = fp32_process_NaNs3(a, b, c, mode, flags);
1100
1101    // Quiet NaN added to product of zero and infinity:
1102    if (a_exp == 255 && (a_mnt >> 22 & 1) &&
1103        ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) ||
1104         (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) {
1105        x = fp32_defaultNaN();
1106        *flags |= FPLIB_IOC;
1107    }
1108
1109    if (x) {
1110        return x;
1111    }
1112
1113    // Handle infinities and zeroes:
1114    if ((b_exp == 255 && !c_mnt) ||
1115        (c_exp == 255 && !b_mnt) ||
1116        (a_exp == 255 && (b_exp == 255 || c_exp == 255) &&
1117         (a_sgn != (b_sgn ^ c_sgn)))) {
1118        *flags |= FPLIB_IOC;
1119        return fp32_defaultNaN();
1120    }
1121    if (a_exp == 255)
1122        return fp32_infinity(a_sgn);
1123    if (b_exp == 255 || c_exp == 255)
1124        return fp32_infinity(b_sgn ^ c_sgn);
1125    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1126        return fp32_zero(a_sgn);
1127
1128    x_sgn = a_sgn;
1129    x_exp = a_exp + 13;
1130    x_mnt = (uint64_t)a_mnt << 27;
1131
1132    // Multiply:
1133    y_sgn = b_sgn ^ c_sgn;
1134    y_exp = b_exp + c_exp - 113;
1135    y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1136    if (!y_mnt) {
1137        y_exp = x_exp;
1138    }
1139
1140    // Add:
1141    if (x_exp >= y_exp) {
1142        y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1143                 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1144        y_exp = x_exp;
1145    } else {
1146        x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1147                 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1148        x_exp = y_exp;
1149    }
1150    if (x_sgn == y_sgn) {
1151        x_mnt = x_mnt + y_mnt;
1152    } else if (x_mnt >= y_mnt) {
1153        x_mnt = x_mnt - y_mnt;
1154    } else {
1155        x_sgn ^= 1;
1156        x_mnt = y_mnt - x_mnt;
1157    }
1158
1159    if (!x_mnt) {
1160        // Sign of exact zero result depends on rounding mode
1161        return fp32_zero((mode & 3) == 2);
1162    }
1163
1164    // Normalise and convert to 32 bits, collapsing error into bottom bit:
1165    x_mnt = fp64_normalise(x_mnt, &x_exp);
1166    x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1167
1168    return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1169}
1170
1171static uint64_t
1172fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1173            int mode, int *flags)
1174{
1175    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1176    uint64_t a_mnt, b_mnt, c_mnt, x;
1177    uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1178
1179    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1180    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1181    fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1182
1183    x = fp64_process_NaNs3(a, b, c, mode, flags);
1184
1185    // Quiet NaN added to product of zero and infinity:
1186    if (a_exp == 2047 && (a_mnt >> 51 & 1) &&
1187        ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) ||
1188         (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) {
1189        x = fp64_defaultNaN();
1190        *flags |= FPLIB_IOC;
1191    }
1192
1193    if (x) {
1194        return x;
1195    }
1196
1197    // Handle infinities and zeroes:
1198    if ((b_exp == 2047 && !c_mnt) ||
1199        (c_exp == 2047 && !b_mnt) ||
1200        (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) &&
1201         (a_sgn != (b_sgn ^ c_sgn)))) {
1202        *flags |= FPLIB_IOC;
1203        return fp64_defaultNaN();
1204    }
1205    if (a_exp == 2047)
1206        return fp64_infinity(a_sgn);
1207    if (b_exp == 2047 || c_exp == 2047)
1208        return fp64_infinity(b_sgn ^ c_sgn);
1209    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1210        return fp64_zero(a_sgn);
1211
1212    x_sgn = a_sgn;
1213    x_exp = a_exp + 11;
1214    x0_mnt = 0;
1215    x1_mnt = a_mnt;
1216
1217    // Multiply:
1218    y_sgn = b_sgn ^ c_sgn;
1219    y_exp = b_exp + c_exp - 1003;
1220    mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1221    if (!y0_mnt && !y1_mnt) {
1222        y_exp = x_exp;
1223    }
1224
1225    // Add:
1226    if (x_exp >= y_exp) {
1227        uint64_t t0, t1;
1228        lsl128(&t0, &t1, y0_mnt, y1_mnt,
1229               x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1230        lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1231        y0_mnt |= !!(t0 | t1);
1232        y_exp = x_exp;
1233    } else {
1234        uint64_t t0, t1;
1235        lsl128(&t0, &t1, x0_mnt, x1_mnt,
1236               y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1237        lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1238        x0_mnt |= !!(t0 | t1);
1239        x_exp = y_exp;
1240    }
1241    if (x_sgn == y_sgn) {
1242        add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1243    } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1244        sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1245    } else {
1246        x_sgn ^= 1;
1247        sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1248    }
1249
1250    if (!x0_mnt && !x1_mnt) {
1251        // Sign of exact zero result depends on rounding mode
1252        return fp64_zero((mode & 3) == 2);
1253    }
1254
1255    // Normalise and convert to 64 bits, collapsing error into bottom bit:
1256    fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1257    x0_mnt = x1_mnt << 1 | !!x0_mnt;
1258
1259    return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1260}
1261
1262static uint32_t
1263fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1264{
1265    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1266    uint32_t a_mnt, b_mnt, x;
1267    uint64_t x_mnt;
1268
1269    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1270    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1271
1272    if ((x = fp32_process_NaNs(a, b, mode, flags)))
1273        return x;
1274
1275    // Handle infinities and zeroes:
1276    if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) {
1277        *flags |= FPLIB_IOC;
1278        return fp32_defaultNaN();
1279    }
1280    if (a_exp == 255 || !b_mnt) {
1281        if (a_exp != 255)
1282            *flags |= FPLIB_DZC;
1283        return fp32_infinity(a_sgn ^ b_sgn);
1284    }
1285    if (!a_mnt || b_exp == 255)
1286        return fp32_zero(a_sgn ^ b_sgn);
1287
1288    // Divide, setting bottom bit if inexact:
1289    a_mnt = fp32_normalise(a_mnt, &a_exp);
1290    x_sgn = a_sgn ^ b_sgn;
1291    x_exp = a_exp - b_exp + 172;
1292    x_mnt = ((uint64_t)a_mnt << 18) / b_mnt;
1293    x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18);
1294
1295    // Normalise and convert to 32 bits, collapsing error into bottom bit:
1296    x_mnt = fp64_normalise(x_mnt, &x_exp);
1297    x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1298
1299    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1300}
1301
1302static uint64_t
1303fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1304{
1305    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1306    uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1307
1308    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1309    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1310
1311    if ((x = fp64_process_NaNs(a, b, mode, flags)))
1312        return x;
1313
1314    // Handle infinities and zeroes:
1315    if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) {
1316        *flags |= FPLIB_IOC;
1317        return fp64_defaultNaN();
1318    }
1319    if (a_exp == 2047 || !b_mnt) {
1320        if (a_exp != 2047)
1321            *flags |= FPLIB_DZC;
1322        return fp64_infinity(a_sgn ^ b_sgn);
1323    }
1324    if (!a_mnt || b_exp == 2047)
1325        return fp64_zero(a_sgn ^ b_sgn);
1326
1327    // Find reciprocal of divisor with Newton-Raphson:
1328    a_mnt = fp64_normalise(a_mnt, &a_exp);
1329    b_mnt = fp64_normalise(b_mnt, &b_exp);
1330    x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1331    mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1332    sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1333    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1334    mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1335    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1336
1337    // Multiply by dividend:
1338    x_sgn = a_sgn ^ b_sgn;
1339    x_exp = a_exp - b_exp + 1031;
1340    mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough
1341    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1342    x_mnt = x1_mnt;
1343
1344    // This is an underestimate, so try adding one:
1345    mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough
1346    c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1347    if (c <= 0) {
1348        ++x_mnt;
1349    }
1350
1351    x_mnt = fp64_normalise(x_mnt, &x_exp);
1352
1353    return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1354}
1355
1356static void
1357set_fpscr0(FPSCR &fpscr, int flags)
1358{
1359    if (flags & FPLIB_IDC) {
1360        fpscr.idc = 1;
1361    }
1362    if (flags & FPLIB_IOC) {
1363        fpscr.ioc = 1;
1364    }
1365    if (flags & FPLIB_DZC) {
1366        fpscr.dzc = 1;
1367    }
1368    if (flags & FPLIB_OFC) {
1369        fpscr.ofc = 1;
1370    }
1371    if (flags & FPLIB_UFC) {
1372        fpscr.ufc = 1;
1373    }
1374    if (flags & FPLIB_IXC) {
1375        fpscr.ixc = 1;
1376    }
1377}
1378
1379static uint32_t
1380fp32_sqrt(uint32_t a, int mode, int *flags)
1381{
1382    int a_sgn, a_exp, x_sgn, x_exp;
1383    uint32_t a_mnt, x, x_mnt;
1384    uint64_t t0, t1;
1385
1386    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1387
1388    // Handle NaNs:
1389    if (a_exp == 255 && (uint32_t)(a_mnt << 9))
1390        return fp32_process_NaN(a, mode, flags);
1391
1392    // Handle infinities and zeroes:
1393    if (!a_mnt) {
1394        return fp32_zero(a_sgn);
1395    }
1396    if (a_exp == 255 && !a_sgn) {
1397        return fp32_infinity(a_sgn);
1398    }
1399    if (a_sgn) {
1400        *flags |= FPLIB_IOC;
1401        return fp32_defaultNaN();
1402    }
1403
1404    a_mnt = fp32_normalise(a_mnt, &a_exp);
1405    if (!(a_exp & 1)) {
1406        ++a_exp;
1407        a_mnt >>= 1;
1408    }
1409
1410    // x = (a * 3 + 5) / 8
1411    x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28);
1412
1413    // x = (a / x + x) / 2; // 16-bit accuracy
1414    x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1415
1416    // x = (a / x + x) / 2; // 16-bit accuracy
1417    x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1418
1419    // x = (a / x + x) / 2; // 32-bit accuracy
1420    x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
1421
1422    x_sgn = 0;
1423    x_exp = (a_exp + 147) >> 1;
1424    x_mnt = ((x - (1 << 5)) >> 6) + 1;
1425    t1 = (uint64_t)x_mnt * x_mnt;
1426    t0 = (uint64_t)a_mnt << 19;
1427    if (t1 > t0) {
1428        --x_mnt;
1429    }
1430
1431    x_mnt = fp32_normalise(x_mnt, &x_exp);
1432
1433    return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
1434}
1435
1436static uint64_t
1437fp64_sqrt(uint64_t a, int mode, int *flags)
1438{
1439    int a_sgn, a_exp, x_sgn, x_exp, c;
1440    uint64_t a_mnt, x_mnt, r, x0, x1;
1441    uint32_t x;
1442
1443    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1444
1445    // Handle NaNs:
1446    if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) {
1447        return fp64_process_NaN(a, mode, flags);
1448    }
1449
1450    // Handle infinities and zeroes:
1451    if (!a_mnt)
1452        return fp64_zero(a_sgn);
1453    if (a_exp == 2047 && !a_sgn)
1454        return fp64_infinity(a_sgn);
1455    if (a_sgn) {
1456        *flags |= FPLIB_IOC;
1457        return fp64_defaultNaN();
1458    }
1459
1460    a_mnt = fp64_normalise(a_mnt, &a_exp);
1461    if (a_exp & 1) {
1462        ++a_exp;
1463        a_mnt >>= 1;
1464    }
1465
1466    // x = (a * 3 + 5) / 8
1467    x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28);
1468
1469    // x = (a / x + x) / 2; // 16-bit accuracy
1470    x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1471
1472    // x = (a / x + x) / 2; // 16-bit accuracy
1473    x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1474
1475    // x = (a / x + x) / 2; // 32-bit accuracy
1476    x = ((a_mnt / x) >> 2) + (x >> 1);
1477
1478    // r = 1 / x; // 32-bit accuracy
1479    r = ((uint64_t)1 << 62) / x;
1480
1481    // r = r * (2 - x * r); // 64-bit accuracy
1482    mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
1483    lsr128(&x0, &x1, x0, x1, 31);
1484
1485    // x = (x + a * r) / 2; // 64-bit accuracy
1486    mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
1487    lsl128(&x0, &x1, x0, x1, 5);
1488    lsr128(&x0, &x1, x0, x1, 56);
1489
1490    x0 = ((uint64_t)x << 31) + (x0 >> 1);
1491
1492    x_sgn = 0;
1493    x_exp = (a_exp + 1053) >> 1;
1494    x_mnt = x0;
1495    x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
1496    mul62x62(&x0, &x1, x_mnt, x_mnt);
1497    lsl128(&x0, &x1, x0, x1, 19);
1498    c = cmp128(x0, x1, 0, a_mnt);
1499    if (c > 0)
1500        --x_mnt;
1501
1502    x_mnt = fp64_normalise(x_mnt, &x_exp);
1503
1504    return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1505}
1506
1507static int
1508modeConv(FPSCR fpscr)
1509{
1510    return (((int) fpscr) >> 22) & 0xF;
1511}
1512
1513static void
1514set_fpscr(FPSCR &fpscr, int flags)
1515{
1516    // translate back to FPSCR
1517    bool underflow = false;
1518    if (flags & FPLIB_IDC) {
1519        fpscr.idc = 1;
1520    }
1521    if (flags & FPLIB_IOC) {
1522        fpscr.ioc = 1;
1523    }
1524    if (flags & FPLIB_DZC) {
1525        fpscr.dzc = 1;
1526    }
1527    if (flags & FPLIB_OFC) {
1528        fpscr.ofc = 1;
1529    }
1530    if (flags & FPLIB_UFC) {
1531        underflow = true; //xx Why is this required?
1532        fpscr.ufc = 1;
1533    }
1534    if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
1535        fpscr.ixc = 1;
1536    }
1537}
1538
1539template <>
1540bool
1541fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
1542{
1543    int flags = 0;
1544    int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
1545    set_fpscr(fpscr, flags);
1546    return x;
1547}
1548
1549template <>
1550bool
1551fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
1552{
1553    int flags = 0;
1554    int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
1555    set_fpscr(fpscr, flags);
1556    return x;
1557}
1558
1559template <>
1560bool
1561fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
1562{
1563    int flags = 0;
1564    int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
1565    set_fpscr(fpscr, flags);
1566    return x;
1567}
1568
1569template <>
1570bool
1571fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
1572{
1573    int flags = 0;
1574    int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
1575    set_fpscr(fpscr, flags);
1576    return x;
1577}
1578
1579template <>
1580bool
1581fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
1582{
1583    int flags = 0;
1584    int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
1585    set_fpscr(fpscr, flags);
1586    return x;
1587}
1588
1589template <>
1590bool
1591fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
1592{
1593    int flags = 0;
1594    int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
1595    set_fpscr(fpscr, flags);
1596    return x;
1597}
1598
1599template <>
1600uint32_t
1601fplibAbs(uint32_t op)
1602{
1603    return op & ~((uint32_t)1 << 31);
1604}
1605
1606template <>
1607uint64_t
1608fplibAbs(uint64_t op)
1609{
1610    return op & ~((uint64_t)1 << 63);
1611}
1612
1613template <>
1614uint32_t
1615fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
1616{
1617    int flags = 0;
1618    uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
1619    set_fpscr0(fpscr, flags);
1620    return result;
1621}
1622
1623template <>
1624uint64_t
1625fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
1626{
1627    int flags = 0;
1628    uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
1629    set_fpscr0(fpscr, flags);
1630    return result;
1631}
1632
1633template <>
1634int
1635fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
1636{
1637    int mode = modeConv(fpscr);
1638    int flags = 0;
1639    int sgn1, exp1, sgn2, exp2, result;
1640    uint32_t mnt1, mnt2;
1641
1642    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1643    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1644
1645    if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) ||
1646        (exp2 == 255 && (uint32_t)(mnt2 << 9))) {
1647        result = 3;
1648        if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) ||
1649            (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) ||
1650            signal_nans)
1651            flags |= FPLIB_IOC;
1652    } else {
1653        if (op1 == op2 || (!mnt1 && !mnt2)) {
1654            result = 6;
1655        } else if (sgn1 != sgn2) {
1656            result = sgn1 ? 8 : 2;
1657        } else if (exp1 != exp2) {
1658            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1659        } else {
1660            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1661        }
1662    }
1663
1664    set_fpscr0(fpscr, flags);
1665
1666    return result;
1667}
1668
1669template <>
1670int
1671fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
1672{
1673    int mode = modeConv(fpscr);
1674    int flags = 0;
1675    int sgn1, exp1, sgn2, exp2, result;
1676    uint64_t mnt1, mnt2;
1677
1678    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1679    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1680
1681    if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) ||
1682        (exp2 == 2047 && (uint64_t)(mnt2 << 12))) {
1683        result = 3;
1684        if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) ||
1685            (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) ||
1686            signal_nans)
1687            flags |= FPLIB_IOC;
1688    } else {
1689        if (op1 == op2 || (!mnt1 && !mnt2)) {
1690            result = 6;
1691        } else if (sgn1 != sgn2) {
1692            result = sgn1 ? 8 : 2;
1693        } else if (exp1 != exp2) {
1694            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1695        } else {
1696            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1697        }
1698    }
1699
1700    set_fpscr0(fpscr, flags);
1701
1702    return result;
1703}
1704
1705static uint16_t
1706fp16_FPConvertNaN_32(uint32_t op)
1707{
1708    return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13);
1709}
1710
1711static uint16_t
1712fp16_FPConvertNaN_64(uint64_t op)
1713{
1714    return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42);
1715}
1716
1717static uint32_t
1718fp32_FPConvertNaN_16(uint16_t op)
1719{
1720    return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13);
1721}
1722
1723static uint32_t
1724fp32_FPConvertNaN_64(uint64_t op)
1725{
1726    return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29);
1727}
1728
1729static uint64_t
1730fp64_FPConvertNaN_16(uint16_t op)
1731{
1732    return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42);
1733}
1734
1735static uint64_t
1736fp64_FPConvertNaN_32(uint32_t op)
1737{
1738    return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29);
1739}
1740
1741static uint32_t
1742fp32_FPOnePointFive(int sgn)
1743{
1744    return fp32_pack(sgn, 127, (uint64_t)1 << 22);
1745}
1746
1747static uint64_t
1748fp64_FPOnePointFive(int sgn)
1749{
1750    return fp64_pack(sgn, 1023, (uint64_t)1 << 51);
1751}
1752
1753static uint32_t
1754fp32_FPThree(int sgn)
1755{
1756    return fp32_pack(sgn, 128, (uint64_t)1 << 22);
1757}
1758
1759static uint64_t
1760fp64_FPThree(int sgn)
1761{
1762    return fp64_pack(sgn, 1024, (uint64_t)1 << 51);
1763}
1764
1765static uint32_t
1766fp32_FPTwo(int sgn)
1767{
1768    return fp32_pack(sgn, 128, 0);
1769}
1770
1771static uint64_t
1772fp64_FPTwo(int sgn)
1773{
1774    return fp64_pack(sgn, 1024, 0);
1775}
1776
1777template <>
1778uint16_t
1779fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1780{
1781    int mode = modeConv(fpscr);
1782    int flags = 0;
1783    int sgn, exp;
1784    uint32_t mnt;
1785    uint16_t result;
1786
1787    // Unpack floating-point operand optionally with flush-to-zero:
1788    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1789
1790    bool alt_hp = fpscr.ahp;
1791
1792    if (exp == 255 && (uint32_t)(mnt << 9)) {
1793        if (alt_hp) {
1794            result = fp16_zero(sgn);
1795        } else if (fpscr.dn) {
1796            result = fp16_defaultNaN();
1797        } else {
1798            result = fp16_FPConvertNaN_32(op);
1799        }
1800        if (!(mnt >> 22 & 1) || alt_hp) {
1801            flags |= FPLIB_IOC;
1802        }
1803    } else if (exp == 255) {
1804        if (alt_hp) {
1805            result = sgn << 15 | (uint16_t)0x7fff;
1806            flags |= FPLIB_IOC;
1807        } else {
1808            result = fp16_infinity(sgn);
1809        }
1810    } else if (!mnt) {
1811        result = fp16_zero(sgn);
1812    } else {
1813        result = fp16_round_(sgn, exp - 127 + 15,
1814                             mnt >> 7 | !!(uint32_t)(mnt << 25),
1815                             rounding, mode | alt_hp << 4, &flags);
1816    }
1817
1818    set_fpscr0(fpscr, flags);
1819
1820    return result;
1821}
1822
1823template <>
1824uint16_t
1825fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1826{
1827    int mode = modeConv(fpscr);
1828    int flags = 0;
1829    int sgn, exp;
1830    uint64_t mnt;
1831    uint16_t result;
1832
1833    // Unpack floating-point operand optionally with flush-to-zero:
1834    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1835
1836    bool alt_hp = fpscr.ahp;
1837
1838    if (exp == 2047 && (uint64_t)(mnt << 12)) {
1839        if (alt_hp) {
1840            result = fp16_zero(sgn);
1841        } else if (fpscr.dn) {
1842            result = fp16_defaultNaN();
1843        } else {
1844            result = fp16_FPConvertNaN_64(op);
1845        }
1846        if (!(mnt >> 51 & 1) || alt_hp) {
1847            flags |= FPLIB_IOC;
1848        }
1849    } else if (exp == 2047) {
1850        if (alt_hp) {
1851            result = sgn << 15 | (uint16_t)0x7fff;
1852            flags |= FPLIB_IOC;
1853        } else {
1854            result = fp16_infinity(sgn);
1855        }
1856    } else if (!mnt) {
1857        result = fp16_zero(sgn);
1858    } else {
1859        result = fp16_round_(sgn, exp - 1023 + 15,
1860                             mnt >> 36 | !!(uint64_t)(mnt << 28),
1861                             rounding, mode | alt_hp << 4, &flags);
1862    }
1863
1864    set_fpscr0(fpscr, flags);
1865
1866    return result;
1867}
1868
1869template <>
1870uint32_t
1871fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1872{
1873    int mode = modeConv(fpscr);
1874    int flags = 0;
1875    int sgn, exp;
1876    uint16_t mnt;
1877    uint32_t result;
1878
1879    // Unpack floating-point operand optionally with flush-to-zero:
1880    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1881
1882    if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1883        if (fpscr.dn) {
1884            result = fp32_defaultNaN();
1885        } else {
1886            result = fp32_FPConvertNaN_16(op);
1887        }
1888        if (!(mnt >> 9 & 1)) {
1889            flags |= FPLIB_IOC;
1890        }
1891    } else if (exp == 31 && !fpscr.ahp) {
1892        result = fp32_infinity(sgn);
1893    } else if (!mnt) {
1894        result = fp32_zero(sgn);
1895    } else {
1896        mnt = fp16_normalise(mnt, &exp);
1897        result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8);
1898    }
1899
1900    set_fpscr0(fpscr, flags);
1901
1902    return result;
1903}
1904
1905template <>
1906uint32_t
1907fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1908{
1909    int mode = modeConv(fpscr);
1910    int flags = 0;
1911    int sgn, exp;
1912    uint64_t mnt;
1913    uint32_t result;
1914
1915    // Unpack floating-point operand optionally with flush-to-zero:
1916    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1917
1918    if (exp == 2047 && (uint64_t)(mnt << 12)) {
1919        if (fpscr.dn) {
1920            result = fp32_defaultNaN();
1921        } else {
1922            result = fp32_FPConvertNaN_64(op);
1923        }
1924        if (!(mnt >> 51 & 1)) {
1925            flags |= FPLIB_IOC;
1926        }
1927    } else if (exp == 2047) {
1928        result = fp32_infinity(sgn);
1929    } else if (!mnt) {
1930        result = fp32_zero(sgn);
1931    } else {
1932        result = fp32_round_(sgn, exp - 1023 + 127,
1933                             mnt >> 20 | !!(uint64_t)(mnt << 44),
1934                             rounding, mode, &flags);
1935    }
1936
1937    set_fpscr0(fpscr, flags);
1938
1939    return result;
1940}
1941
1942template <>
1943uint64_t
1944fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1945{
1946    int mode = modeConv(fpscr);
1947    int flags = 0;
1948    int sgn, exp;
1949    uint16_t mnt;
1950    uint64_t result;
1951
1952    // Unpack floating-point operand optionally with flush-to-zero:
1953    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1954
1955    if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1956        if (fpscr.dn) {
1957            result = fp64_defaultNaN();
1958        } else {
1959            result = fp64_FPConvertNaN_16(op);
1960        }
1961        if (!(mnt >> 9 & 1)) {
1962            flags |= FPLIB_IOC;
1963        }
1964    } else if (exp == 31 && !fpscr.ahp) {
1965        result = fp64_infinity(sgn);
1966    } else if (!mnt) {
1967        result = fp64_zero(sgn);
1968    } else {
1969        mnt = fp16_normalise(mnt, &exp);
1970        result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37);
1971    }
1972
1973    set_fpscr0(fpscr, flags);
1974
1975    return result;
1976}
1977
1978template <>
1979uint64_t
1980fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1981{
1982    int mode = modeConv(fpscr);
1983    int flags = 0;
1984    int sgn, exp;
1985    uint32_t mnt;
1986    uint64_t result;
1987
1988    // Unpack floating-point operand optionally with flush-to-zero:
1989    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1990
1991    if (exp == 255 && (uint32_t)(mnt << 9)) {
1992        if (fpscr.dn) {
1993            result = fp64_defaultNaN();
1994        } else {
1995            result = fp64_FPConvertNaN_32(op);
1996        }
1997        if (!(mnt >> 22 & 1)) {
1998            flags |= FPLIB_IOC;
1999        }
2000    } else if (exp == 255) {
2001        result = fp64_infinity(sgn);
2002    } else if (!mnt) {
2003        result = fp64_zero(sgn);
2004    } else {
2005        mnt = fp32_normalise(mnt, &exp);
2006        result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21);
2007    }
2008
2009    set_fpscr0(fpscr, flags);
2010
2011    return result;
2012}
2013
2014template <>
2015uint32_t
2016fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2017{
2018    int flags = 0;
2019    uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2020    set_fpscr0(fpscr, flags);
2021    return result;
2022}
2023
2024template <>
2025uint64_t
2026fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2027{
2028    int flags = 0;
2029    uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2030    set_fpscr0(fpscr, flags);
2031    return result;
2032}
2033
2034template <>
2035uint32_t
2036fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2037{
2038    int flags = 0;
2039    uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2040    set_fpscr0(fpscr, flags);
2041    return result;
2042}
2043
2044template <>
2045uint64_t
2046fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2047{
2048    int flags = 0;
2049    uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2050    set_fpscr0(fpscr, flags);
2051    return result;
2052}
2053
2054static uint32_t
2055fp32_repack(int sgn, int exp, uint32_t mnt)
2056{
2057    return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt);
2058}
2059
2060static uint64_t
2061fp64_repack(int sgn, int exp, uint64_t mnt)
2062{
2063    return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt);
2064}
2065
2066static void
2067fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
2068{
2069    // Treat a single quiet-NaN as +Infinity/-Infinity
2070    if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23)
2071        *op1 = fp32_infinity(sgn);
2072    if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23)
2073        *op2 = fp32_infinity(sgn);
2074}
2075
2076static void
2077fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
2078{
2079    // Treat a single quiet-NaN as +Infinity/-Infinity
2080    if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52)
2081        *op1 = fp64_infinity(sgn);
2082    if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52)
2083        *op2 = fp64_infinity(sgn);
2084}
2085
2086template <>
2087uint32_t
2088fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2089{
2090    int mode = modeConv(fpscr);
2091    int flags = 0;
2092    int sgn1, exp1, sgn2, exp2;
2093    uint32_t mnt1, mnt2, x, result;
2094
2095    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2096    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2097
2098    if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2099        result = x;
2100    } else {
2101        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2102                  fp32_repack(sgn1, exp1, mnt1) :
2103                  fp32_repack(sgn2, exp2, mnt2));
2104    }
2105    set_fpscr0(fpscr, flags);
2106    return result;
2107}
2108
2109template <>
2110uint64_t
2111fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2112{
2113    int mode = modeConv(fpscr);
2114    int flags = 0;
2115    int sgn1, exp1, sgn2, exp2;
2116    uint64_t mnt1, mnt2, x, result;
2117
2118    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2119    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2120
2121    if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2122        result = x;
2123    } else {
2124        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2125                  fp64_repack(sgn1, exp1, mnt1) :
2126                  fp64_repack(sgn2, exp2, mnt2));
2127    }
2128    set_fpscr0(fpscr, flags);
2129    return result;
2130}
2131
2132template <>
2133uint32_t
2134fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2135{
2136    fp32_minmaxnum(&op1, &op2, 1);
2137    return fplibMax<uint32_t>(op1, op2, fpscr);
2138}
2139
2140template <>
2141uint64_t
2142fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2143{
2144    fp64_minmaxnum(&op1, &op2, 1);
2145    return fplibMax<uint64_t>(op1, op2, fpscr);
2146}
2147
2148template <>
2149uint32_t
2150fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2151{
2152    int mode = modeConv(fpscr);
2153    int flags = 0;
2154    int sgn1, exp1, sgn2, exp2;
2155    uint32_t mnt1, mnt2, x, result;
2156
2157    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2158    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2159
2160    if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2161        result = x;
2162    } else {
2163        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2164                  fp32_repack(sgn1, exp1, mnt1) :
2165                  fp32_repack(sgn2, exp2, mnt2));
2166    }
2167    set_fpscr0(fpscr, flags);
2168    return result;
2169}
2170
2171template <>
2172uint64_t
2173fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2174{
2175    int mode = modeConv(fpscr);
2176    int flags = 0;
2177    int sgn1, exp1, sgn2, exp2;
2178    uint64_t mnt1, mnt2, x, result;
2179
2180    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2181    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2182
2183    if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2184        result = x;
2185    } else {
2186        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2187                  fp64_repack(sgn1, exp1, mnt1) :
2188                  fp64_repack(sgn2, exp2, mnt2));
2189    }
2190    set_fpscr0(fpscr, flags);
2191    return result;
2192}
2193
2194template <>
2195uint32_t
2196fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2197{
2198    fp32_minmaxnum(&op1, &op2, 0);
2199    return fplibMin<uint32_t>(op1, op2, fpscr);
2200}
2201
2202template <>
2203uint64_t
2204fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2205{
2206    fp64_minmaxnum(&op1, &op2, 0);
2207    return fplibMin<uint64_t>(op1, op2, fpscr);
2208}
2209
2210template <>
2211uint32_t
2212fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2213{
2214    int flags = 0;
2215    uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
2216    set_fpscr0(fpscr, flags);
2217    return result;
2218}
2219
2220template <>
2221uint64_t
2222fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2223{
2224    int flags = 0;
2225    uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
2226    set_fpscr0(fpscr, flags);
2227    return result;
2228}
2229
2230template <>
2231uint32_t
2232fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2233{
2234    int mode = modeConv(fpscr);
2235    int flags = 0;
2236    int sgn1, exp1, sgn2, exp2;
2237    uint32_t mnt1, mnt2, result;
2238
2239    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2240    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2241
2242    result = fp32_process_NaNs(op1, op2, mode, &flags);
2243    if (!result) {
2244        if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2245            result = fp32_FPTwo(sgn1 ^ sgn2);
2246        } else if (exp1 == 255 || exp2 == 255) {
2247            result = fp32_infinity(sgn1 ^ sgn2);
2248        } else if (!mnt1 || !mnt2) {
2249            result = fp32_zero(sgn1 ^ sgn2);
2250        } else {
2251            result = fp32_mul(op1, op2, mode, &flags);
2252        }
2253    }
2254
2255    set_fpscr0(fpscr, flags);
2256
2257    return result;
2258}
2259
2260template <>
2261uint64_t
2262fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2263{
2264    int mode = modeConv(fpscr);
2265    int flags = 0;
2266    int sgn1, exp1, sgn2, exp2;
2267    uint64_t mnt1, mnt2, result;
2268
2269    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2270    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2271
2272    result = fp64_process_NaNs(op1, op2, mode, &flags);
2273    if (!result) {
2274        if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2275            result = fp64_FPTwo(sgn1 ^ sgn2);
2276        } else if (exp1 == 2047 || exp2 == 2047) {
2277            result = fp64_infinity(sgn1 ^ sgn2);
2278        } else if (!mnt1 || !mnt2) {
2279            result = fp64_zero(sgn1 ^ sgn2);
2280        } else {
2281            result = fp64_mul(op1, op2, mode, &flags);
2282        }
2283    }
2284
2285    set_fpscr0(fpscr, flags);
2286
2287    return result;
2288}
2289
2290template <>
2291uint32_t
2292fplibNeg(uint32_t op)
2293{
2294    return op ^ (uint32_t)1 << 31;
2295}
2296
2297template <>
2298uint64_t
2299fplibNeg(uint64_t op)
2300{
2301    return op ^ (uint64_t)1 << 63;
2302}
2303
2304static const uint8_t recip_sqrt_estimate[256] = {
2305    255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
2306    226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
2307    201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
2308    180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
2309    162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
2310    145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
2311    131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
2312    118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
2313    105, 104, 103, 101, 100,  99,  97,  96,  95,  93,  92,  91,  90,  88,  87,  86,
2314    85,  84,  82,  81,  80,  79,  78,  77,  76,  75,  74,  72,  71,  70,  69,  68,
2315    67,  66,  65,  64,  63,  62,  61,  60,  60,  59,  58,  57,  56,  55,  54,  53,
2316    52,  51,  51,  50,  49,  48,  47,  46,  46,  45,  44,  43,  42,  42,  41,  40,
2317    39,  38,  38,  37,  36,  35,  35,  34,  33,  33,  32,  31,  30,  30,  29,  28,
2318    28,  27,  26,  26,  25,  24,  24,  23,  22,  22,  21,  20,  20,  19,  19,  18,
2319    17,  17,  16,  16,  15,  14,  14,  13,  13,  12,  11,  11,  10,  10,   9,   9,
2320    8,   8,   7,   6,   6,   5,   5,   4,   4,   3,   3,   2,   2,   1,   1,   0
2321};
2322
2323template <>
2324uint32_t
2325fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
2326{
2327    int mode = modeConv(fpscr);
2328    int flags = 0;
2329    int sgn, exp;
2330    uint32_t mnt, result;
2331
2332    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2333
2334    if (exp == 255 && (uint32_t)(mnt << 9)) {
2335        result = fp32_process_NaN(op, mode, &flags);
2336    } else if (!mnt) {
2337        result = fp32_infinity(sgn);
2338        flags |= FPLIB_DZC;
2339    } else if (sgn) {
2340        result = fp32_defaultNaN();
2341        flags |= FPLIB_IOC;
2342    } else if (exp == 255) {
2343        result = fp32_zero(0);
2344    } else {
2345        exp += 8;
2346        mnt = fp32_normalise(mnt, &exp);
2347        mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)];
2348        result = fp32_pack(0, (380 - exp) >> 1, mnt << 15);
2349    }
2350
2351    set_fpscr0(fpscr, flags);
2352
2353    return result;
2354}
2355
2356template <>
2357uint64_t
2358fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
2359{
2360    int mode = modeConv(fpscr);
2361    int flags = 0;
2362    int sgn, exp;
2363    uint64_t mnt, result;
2364
2365    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2366
2367    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2368        result = fp64_process_NaN(op, mode, &flags);
2369    } else if (!mnt) {
2370        result = fp64_infinity(sgn);
2371        flags |= FPLIB_DZC;
2372    } else if (sgn) {
2373        result = fp64_defaultNaN();
2374        flags |= FPLIB_IOC;
2375    } else if (exp == 2047) {
2376        result = fp32_zero(0);
2377    } else {
2378        exp += 11;
2379        mnt = fp64_normalise(mnt, &exp);
2380        mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)];
2381        result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44);
2382    }
2383
2384    set_fpscr0(fpscr, flags);
2385
2386    return result;
2387}
2388
2389template <>
2390uint32_t
2391fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2392{
2393    int mode = modeConv(fpscr);
2394    int flags = 0;
2395    int sgn1, exp1, sgn2, exp2;
2396    uint32_t mnt1, mnt2, result;
2397
2398    op1 = fplibNeg<uint32_t>(op1);
2399    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2400    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2401
2402    result = fp32_process_NaNs(op1, op2, mode, &flags);
2403    if (!result) {
2404        if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2405            result = fp32_FPOnePointFive(0);
2406        } else if (exp1 == 255 || exp2 == 255) {
2407            result = fp32_infinity(sgn1 ^ sgn2);
2408        } else {
2409            result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
2410        }
2411    }
2412
2413    set_fpscr0(fpscr, flags);
2414
2415    return result;
2416}
2417
2418template <>
2419uint64_t
2420fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2421{
2422    int mode = modeConv(fpscr);
2423    int flags = 0;
2424    int sgn1, exp1, sgn2, exp2;
2425    uint64_t mnt1, mnt2, result;
2426
2427    op1 = fplibNeg<uint64_t>(op1);
2428    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2429    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2430
2431    result = fp64_process_NaNs(op1, op2, mode, &flags);
2432    if (!result) {
2433        if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2434            result = fp64_FPOnePointFive(0);
2435        } else if (exp1 == 2047 || exp2 == 2047) {
2436            result = fp64_infinity(sgn1 ^ sgn2);
2437        } else {
2438            result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
2439        }
2440    }
2441
2442    set_fpscr0(fpscr, flags);
2443
2444    return result;
2445}
2446
2447template <>
2448uint32_t
2449fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2450{
2451    int mode = modeConv(fpscr);
2452    int flags = 0;
2453    int sgn1, exp1, sgn2, exp2;
2454    uint32_t mnt1, mnt2, result;
2455
2456    op1 = fplibNeg<uint32_t>(op1);
2457    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2458    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2459
2460    result = fp32_process_NaNs(op1, op2, mode, &flags);
2461    if (!result) {
2462        if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2463            result = fp32_FPTwo(0);
2464        } else if (exp1 == 255 || exp2 == 255) {
2465            result = fp32_infinity(sgn1 ^ sgn2);
2466        } else {
2467            result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
2468        }
2469    }
2470
2471    set_fpscr0(fpscr, flags);
2472
2473    return result;
2474}
2475
2476template <>
2477uint32_t
2478fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
2479{
2480    int mode = modeConv(fpscr);
2481    int flags = 0;
2482    int sgn, exp;
2483    uint32_t mnt, result;
2484
2485    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2486
2487    if (exp == 255 && (uint32_t)(mnt << 9)) {
2488        result = fp32_process_NaN(op, mode, &flags);
2489    } else if (exp == 255) {
2490        result = fp32_zero(sgn);
2491    } else if (!mnt) {
2492        result = fp32_infinity(sgn);
2493        flags |= FPLIB_DZC;
2494    } else if (!((uint32_t)(op << 1) >> 22)) {
2495        bool overflow_to_inf = false;
2496        switch (FPCRRounding(fpscr)) {
2497          case FPRounding_TIEEVEN:
2498            overflow_to_inf = true;
2499            break;
2500          case FPRounding_POSINF:
2501            overflow_to_inf = !sgn;
2502            break;
2503          case FPRounding_NEGINF:
2504            overflow_to_inf = sgn;
2505            break;
2506          case FPRounding_ZERO:
2507            overflow_to_inf = false;
2508            break;
2509          default:
2510            assert(0);
2511        }
2512        result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
2513        flags |= FPLIB_OFC | FPLIB_IXC;
2514    } else if (fpscr.fz && exp >= 253) {
2515        result = fp32_zero(sgn);
2516        flags |= FPLIB_UFC;
2517    } else {
2518        exp += 8;
2519        mnt = fp32_normalise(mnt, &exp);
2520        int result_exp = 253 - exp;
2521        uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1;
2522        fraction <<= 15;
2523        if (result_exp == 0) {
2524            fraction >>= 1;
2525        } else if (result_exp == -1) {
2526            fraction >>= 2;
2527            result_exp = 0;
2528        }
2529        result = fp32_pack(sgn, result_exp, fraction);
2530    }
2531
2532    set_fpscr0(fpscr, flags);
2533
2534    return result;
2535}
2536
2537template <>
2538uint64_t
2539fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
2540{
2541    int mode = modeConv(fpscr);
2542    int flags = 0;
2543    int sgn, exp;
2544    uint64_t mnt, result;
2545
2546    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2547
2548    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2549        result = fp64_process_NaN(op, mode, &flags);
2550    } else if (exp == 2047) {
2551        result = fp64_zero(sgn);
2552    } else if (!mnt) {
2553        result = fp64_infinity(sgn);
2554        flags |= FPLIB_DZC;
2555    } else if (!((uint64_t)(op << 1) >> 51)) {
2556        bool overflow_to_inf = false;
2557        switch (FPCRRounding(fpscr)) {
2558          case FPRounding_TIEEVEN:
2559            overflow_to_inf = true;
2560            break;
2561          case FPRounding_POSINF:
2562            overflow_to_inf = !sgn;
2563            break;
2564          case FPRounding_NEGINF:
2565            overflow_to_inf = sgn;
2566            break;
2567          case FPRounding_ZERO:
2568            overflow_to_inf = false;
2569            break;
2570          default:
2571            assert(0);
2572        }
2573        result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
2574        flags |= FPLIB_OFC | FPLIB_IXC;
2575    } else if (fpscr.fz && exp >= 2045) {
2576        result = fp64_zero(sgn);
2577        flags |= FPLIB_UFC;
2578    } else {
2579        exp += 11;
2580        mnt = fp64_normalise(mnt, &exp);
2581        int result_exp = 2045 - exp;
2582        uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1;
2583        fraction <<= 44;
2584        if (result_exp == 0) {
2585            fraction >>= 1;
2586        } else if (result_exp == -1) {
2587            fraction >>= 2;
2588            result_exp = 0;
2589        }
2590        result = fp64_pack(sgn, result_exp, fraction);
2591    }
2592
2593    set_fpscr0(fpscr, flags);
2594
2595    return result;
2596}
2597
2598template <>
2599uint64_t
2600fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2601{
2602    int mode = modeConv(fpscr);
2603    int flags = 0;
2604    int sgn1, exp1, sgn2, exp2;
2605    uint64_t mnt1, mnt2, result;
2606
2607    op1 = fplibNeg<uint64_t>(op1);
2608    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2609    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2610
2611    result = fp64_process_NaNs(op1, op2, mode, &flags);
2612    if (!result) {
2613        if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2614            result = fp64_FPTwo(0);
2615        } else if (exp1 == 2047 || exp2 == 2047) {
2616            result = fp64_infinity(sgn1 ^ sgn2);
2617        } else {
2618            result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
2619        }
2620    }
2621
2622    set_fpscr0(fpscr, flags);
2623
2624    return result;
2625}
2626
2627template <>
2628uint32_t
2629fplibRecpX(uint32_t op, FPSCR &fpscr)
2630{
2631    int mode = modeConv(fpscr);
2632    int flags = 0;
2633    int sgn, exp;
2634    uint32_t mnt, result;
2635
2636    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2637
2638    if (exp == 255 && (uint32_t)(mnt << 9)) {
2639        result = fp32_process_NaN(op, mode, &flags);
2640    }
2641    else {
2642        if (!mnt) { // Zero and denormals
2643            result = fp32_pack(sgn, 254, 0);
2644        } else { // Infinities and normals
2645            result = fp32_pack(sgn, exp ^ 255, 0);
2646        }
2647    }
2648
2649    set_fpscr0(fpscr, flags);
2650
2651    return result;
2652}
2653
2654template <>
2655uint64_t
2656fplibRecpX(uint64_t op, FPSCR &fpscr)
2657{
2658    int mode = modeConv(fpscr);
2659    int flags = 0;
2660    int sgn, exp;
2661    uint64_t mnt, result;
2662
2663    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2664
2665    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2666        result = fp64_process_NaN(op, mode, &flags);
2667    }
2668    else {
2669        if (!mnt) { // Zero and denormals
2670            result = fp64_pack(sgn, 2046, 0);
2671        } else { // Infinities and normals
2672            result = fp64_pack(sgn, exp ^ 2047, 0);
2673        }
2674    }
2675
2676    set_fpscr0(fpscr, flags);
2677
2678    return result;
2679}
2680
2681template <>
2682uint32_t
2683fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2684{
2685    int mode = modeConv(fpscr);
2686    int flags = 0;
2687    int sgn, exp;
2688    uint32_t mnt, result;
2689
2690    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2691    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2692
2693    // Handle NaNs, infinities and zeroes:
2694    if (exp == 255 && (uint32_t)(mnt << 9)) {
2695        result = fp32_process_NaN(op, mode, &flags);
2696    } else if (exp == 255) {
2697        result = fp32_infinity(sgn);
2698    } else if (!mnt) {
2699        result = fp32_zero(sgn);
2700    } else if (exp >= 150) {
2701        // There are no fractional bits
2702        result = op;
2703    } else {
2704        // Truncate towards zero:
2705        uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp);
2706        int err = exp < 118 ? 1 :
2707            (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0);
2708        switch (rounding) {
2709          case FPRounding_TIEEVEN:
2710            x += (err == 3 || (err == 2 && (x & 1)));
2711            break;
2712          case FPRounding_POSINF:
2713            x += err && !sgn;
2714            break;
2715          case FPRounding_NEGINF:
2716            x += err && sgn;
2717            break;
2718          case FPRounding_ZERO:
2719            break;
2720          case FPRounding_TIEAWAY:
2721            x += err >> 1;
2722            break;
2723          default:
2724            assert(0);
2725        }
2726
2727        if (x == 0) {
2728            result = fp32_zero(sgn);
2729        } else {
2730            exp = 150;
2731            mnt = fp32_normalise(x, &exp);
2732            result = fp32_pack(sgn, exp + 8, mnt >> 8);
2733        }
2734
2735        if (err && exact)
2736            flags |= FPLIB_IXC;
2737    }
2738
2739    set_fpscr0(fpscr, flags);
2740
2741    return result;
2742}
2743
2744template <>
2745uint64_t
2746fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2747{
2748    int mode = modeConv(fpscr);
2749    int flags = 0;
2750    int sgn, exp;
2751    uint64_t mnt, result;
2752
2753    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2754    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2755
2756    // Handle NaNs, infinities and zeroes:
2757    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2758        result = fp64_process_NaN(op, mode, &flags);
2759    } else if (exp == 2047) {
2760        result = fp64_infinity(sgn);
2761    } else if (!mnt) {
2762        result = fp64_zero(sgn);
2763    } else if (exp >= 1075) {
2764        // There are no fractional bits
2765        result = op;
2766    } else {
2767        // Truncate towards zero:
2768        uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp);
2769        int err = exp < 1011 ? 1 :
2770            (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0);
2771        switch (rounding) {
2772          case FPRounding_TIEEVEN:
2773            x += (err == 3 || (err == 2 && (x & 1)));
2774            break;
2775          case FPRounding_POSINF:
2776            x += err && !sgn;
2777            break;
2778          case FPRounding_NEGINF:
2779            x += err && sgn;
2780            break;
2781          case FPRounding_ZERO:
2782            break;
2783          case FPRounding_TIEAWAY:
2784            x += err >> 1;
2785            break;
2786          default:
2787            assert(0);
2788        }
2789
2790        if (x == 0) {
2791            result = fp64_zero(sgn);
2792        } else {
2793            exp = 1075;
2794            mnt = fp64_normalise(x, &exp);
2795            result = fp64_pack(sgn, exp + 11, mnt >> 11);
2796        }
2797
2798        if (err && exact)
2799            flags |= FPLIB_IXC;
2800    }
2801
2802    set_fpscr0(fpscr, flags);
2803
2804    return result;
2805}
2806
2807template <>
2808uint32_t
2809fplibSqrt(uint32_t op, FPSCR &fpscr)
2810{
2811    int flags = 0;
2812    uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
2813    set_fpscr0(fpscr, flags);
2814    return result;
2815}
2816
2817template <>
2818uint64_t
2819fplibSqrt(uint64_t op, FPSCR &fpscr)
2820{
2821    int flags = 0;
2822    uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
2823    set_fpscr0(fpscr, flags);
2824    return result;
2825}
2826
2827template <>
2828uint32_t
2829fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2830{
2831    int flags = 0;
2832    uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
2833    set_fpscr0(fpscr, flags);
2834    return result;
2835}
2836
2837template <>
2838uint64_t
2839fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2840{
2841    int flags = 0;
2842    uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
2843    set_fpscr0(fpscr, flags);
2844    return result;
2845}
2846
2847static uint64_t
2848FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2849             int *flags)
2850{
2851    uint64_t x;
2852    int err;
2853
2854    if (exp > 1023 + 63) {
2855        *flags = FPLIB_IOC;
2856        return ((uint64_t)!u << 63) - !sgn;
2857    }
2858
2859    x = lsr64(mnt << 11, 1023 + 63 - exp);
2860    err = (exp > 1023 + 63 - 2 ? 0 :
2861           (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) |
2862           !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1)));
2863
2864    switch (rounding) {
2865      case FPRounding_TIEEVEN:
2866        x += (err == 3 || (err == 2 && (x & 1)));
2867        break;
2868      case FPRounding_POSINF:
2869        x += err && !sgn;
2870        break;
2871      case FPRounding_NEGINF:
2872        x += err && sgn;
2873        break;
2874      case FPRounding_ZERO:
2875        break;
2876      case FPRounding_TIEAWAY:
2877        x += err >> 1;
2878        break;
2879      default:
2880        assert(0);
2881    }
2882
2883    if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) {
2884        *flags = FPLIB_IOC;
2885        return ((uint64_t)!u << 63) - !sgn;
2886    }
2887
2888    if (err) {
2889        *flags = FPLIB_IXC;
2890    }
2891
2892    return sgn ? -x : x;
2893}
2894
2895static uint32_t
2896FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2897             int *flags)
2898{
2899    uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
2900    if (u ? x >= (uint64_t)1 << 32 :
2901        !(x < (uint64_t)1 << 31 ||
2902          (uint64_t)-x <= (uint64_t)1 << 31)) {
2903        *flags = FPLIB_IOC;
2904        x = ((uint32_t)!u << 31) - !sgn;
2905    }
2906    return x;
2907}
2908
2909template <>
2910uint32_t
2911fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2912{
2913    int flags = 0;
2914    int sgn, exp;
2915    uint32_t mnt, result;
2916
2917    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2918    fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2919
2920    // If NaN, set cumulative flag or take exception:
2921    if (exp == 255 && (uint32_t)(mnt << 9)) {
2922        flags = FPLIB_IOC;
2923        result = 0;
2924    } else {
2925        result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits,
2926                              (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2927    }
2928
2929    set_fpscr0(fpscr, flags);
2930
2931    return result;
2932}
2933
2934template <>
2935uint32_t
2936fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2937{
2938    int flags = 0;
2939    int sgn, exp;
2940    uint64_t mnt;
2941    uint32_t result;
2942
2943    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2944    fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2945
2946    // If NaN, set cumulative flag or take exception:
2947    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2948        flags = FPLIB_IOC;
2949        result = 0;
2950    } else {
2951        result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
2952    }
2953
2954    set_fpscr0(fpscr, flags);
2955
2956    return result;
2957}
2958
2959template <>
2960uint64_t
2961fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2962{
2963    int flags = 0;
2964    int sgn, exp;
2965    uint32_t mnt;
2966    uint64_t result;
2967
2968    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2969    fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2970
2971    // If NaN, set cumulative flag or take exception:
2972    if (exp == 255 && (uint32_t)(mnt << 9)) {
2973        flags = FPLIB_IOC;
2974        result = 0;
2975    } else {
2976        result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits,
2977                              (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2978    }
2979
2980    set_fpscr0(fpscr, flags);
2981
2982    return result;
2983}
2984
2985template <>
2986uint64_t
2987fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2988{
2989    int flags = 0;
2990    int sgn, exp;
2991    uint64_t mnt, result;
2992
2993    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2994    fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2995
2996    // If NaN, set cumulative flag or take exception:
2997    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2998        flags = FPLIB_IOC;
2999        result = 0;
3000    } else {
3001        result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
3002    }
3003
3004    set_fpscr0(fpscr, flags);
3005
3006    return result;
3007}
3008
3009static uint32_t
3010fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3011{
3012    int x_sgn = !u && a >> 63;
3013    int x_exp = 190 - fbits;
3014    uint64_t x_mnt = x_sgn ? -a : a;
3015
3016    // Handle zero:
3017    if (!x_mnt) {
3018        return fp32_zero(0);
3019    }
3020
3021    // Normalise and convert to 32 bits, collapsing error into bottom bit:
3022    x_mnt = fp64_normalise(x_mnt, &x_exp);
3023    x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
3024
3025    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
3026}
3027
3028static uint64_t
3029fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3030{
3031    int x_sgn = !u && a >> 63;
3032    int x_exp = 1024 + 62 - fbits;
3033    uint64_t x_mnt = x_sgn ? -a : a;
3034
3035    // Handle zero:
3036    if (!x_mnt) {
3037        return fp64_zero(0);
3038    }
3039
3040    x_mnt = fp64_normalise(x_mnt, &x_exp);
3041
3042    return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
3043}
3044
3045template <>
3046uint32_t
3047fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3048{
3049    int flags = 0;
3050    uint32_t res = fp32_cvtf(op, fbits, u,
3051                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3052                             &flags);
3053    set_fpscr0(fpscr, flags);
3054    return res;
3055}
3056
3057template <>
3058uint64_t
3059fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3060{
3061    int flags = 0;
3062    uint64_t res = fp64_cvtf(op, fbits, u,
3063                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3064                             &flags);
3065    set_fpscr0(fpscr, flags);
3066    return res;
3067}
3068
3069}
3070