fplib.cc revision 10104:ff709c429b7b
1/*
2* Copyright (c) 2012-2013 ARM Limited
3* All rights reserved
4*
5* The license below extends only to copyright in the software and shall
6* not be construed as granting a license to any other intellectual
7* property including but not limited to intellectual property relating
8* to a hardware implementation of the functionality of the software
9* licensed hereunder.  You may use the software subject to the license
10* terms below provided that you ensure that this notice is replicated
11* unmodified and in its entirety in all distributions of the software,
12* modified or unmodified, in source code or in binary form.
13*
14* Redistribution and use in source and binary forms, with or without
15* modification, are permitted provided that the following conditions are
16* met: redistributions of source code must retain the above copyright
17* notice, this list of conditions and the following disclaimer;
18* redistributions in binary form must reproduce the above copyright
19* notice, this list of conditions and the following disclaimer in the
20* documentation and/or other materials provided with the distribution;
21* neither the name of the copyright holders nor the names of its
22* contributors may be used to endorse or promote products derived from
23* this software without specific prior written permission.
24*
25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36*
37* Authors: Edmund Grimley Evans
38*          Thomas Grocutt
39*/
40
41#include <stdint.h>
42
43#include <cassert>
44
45#include "fplib.hh"
46
47namespace ArmISA
48{
49
50#define FPLIB_RN 0
51#define FPLIB_RP 1
52#define FPLIB_RM 2
53#define FPLIB_RZ 3
54#define FPLIB_FZ 4
55#define FPLIB_DN 8
56#define FPLIB_AHP 16
57
58#define FPLIB_IDC 128 // Input Denormal
59#define FPLIB_IXC 16  // Inexact
60#define FPLIB_UFC 8   // Underflow
61#define FPLIB_OFC 4   // Overflow
62#define FPLIB_DZC 2   // Division by Zero
63#define FPLIB_IOC 1   // Invalid Operation
64
65static inline uint16_t
66lsl16(uint16_t x, uint32_t shift)
67{
68    return shift < 16 ? x << shift : 0;
69}
70
71static inline uint16_t
72lsr16(uint16_t x, uint32_t shift)
73{
74    return shift < 16 ? x >> shift : 0;
75}
76
77static inline uint32_t
78lsl32(uint32_t x, uint32_t shift)
79{
80    return shift < 32 ? x << shift : 0;
81}
82
83static inline uint32_t
84lsr32(uint32_t x, uint32_t shift)
85{
86    return shift < 32 ? x >> shift : 0;
87}
88
89static inline uint64_t
90lsl64(uint64_t x, uint32_t shift)
91{
92    return shift < 64 ? x << shift : 0;
93}
94
95static inline uint64_t
96lsr64(uint64_t x, uint32_t shift)
97{
98    return shift < 64 ? x >> shift : 0;
99}
100
101static inline void
102lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
103{
104    if (shift < 64) {
105        *r1 = x1 << shift | x0 >> (64 - shift);
106        *r0 = x0 << shift;
107    } else if (shift < 128) {
108        *r1 = x0 << (shift - 64);
109        *r0 = 0;
110    } else {
111        *r1 = 0;
112        *r0 = 0;
113    }
114}
115
116static inline void
117lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
118{
119    if (shift < 64) {
120        *r0 = x0 >> shift | x1 << (64 - shift);
121        *r1 = x1 >> shift;
122    } else if (shift < 128) {
123        *r0 = x1 >> (shift - 64);
124        *r1 = 0;
125    } else {
126        *r0 = 0;
127        *r1 = 0;
128    }
129}
130
131static inline void
132mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
133{
134    uint32_t mask = ((uint32_t)1 << 31) - 1;
135    uint64_t a0 = a & mask;
136    uint64_t a1 = a >> 31 & mask;
137    uint64_t b0 = b & mask;
138    uint64_t b1 = b >> 31 & mask;
139    uint64_t p0 = a0 * b0;
140    uint64_t p2 = a1 * b1;
141    uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
142    uint64_t s0 = p0;
143    uint64_t s1 = (s0 >> 31) + p1;
144    uint64_t s2 = (s1 >> 31) + p2;
145    *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
146    *x1 = s2 >> 2;
147}
148
149static inline
150void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
151{
152    uint64_t t0 = (uint64_t)(uint32_t)a * b;
153    uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
154    *x0 = t1 << 32 | (uint32_t)t0;
155    *x1 = t1 >> 32;
156}
157
158static inline void
159mul64x64(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
160{
161    uint64_t a0 = (uint32_t)a;
162    uint64_t a1 = a >> 32;
163    uint64_t b0 = (uint32_t)b;
164    uint64_t b1 = b >> 32;
165    uint64_t t1 = (a0 * b0 >> 32) + a1 * b0;
166    uint64_t t2 = a0 * b1;
167    uint64_t x = ((uint64_t)(uint32_t)t1 + (uint32_t)t2) >> 32;
168    x += t1 >> 32;
169    x += t2 >> 32;
170    x += a1 * b1;
171    *x0 = a * b;
172    *x1 = x;
173}
174
175static inline void
176add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
177       uint64_t b1)
178{
179    *x0 = a0 + b0;
180    *x1 = a1 + b1 + (*x0 < a0);
181}
182
183static inline void
184sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
185       uint64_t b1)
186{
187    *x0 = a0 - b0;
188    *x1 = a1 - b1 - (*x0 > a0);
189}
190
191static inline int
192cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
193{
194    return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
195}
196
197static inline uint16_t
198fp16_normalise(uint16_t mnt, int *exp)
199{
200    int shift;
201
202    if (!mnt) {
203        return 0;
204    }
205
206    for (shift = 8; shift; shift >>= 1) {
207        if (!(mnt >> (16 - shift))) {
208            mnt <<= shift;
209            *exp -= shift;
210        }
211    }
212    return mnt;
213}
214
215static inline uint32_t
216fp32_normalise(uint32_t mnt, int *exp)
217{
218    int shift;
219
220    if (!mnt) {
221        return 0;
222    }
223
224    for (shift = 16; shift; shift >>= 1) {
225        if (!(mnt >> (32 - shift))) {
226            mnt <<= shift;
227            *exp -= shift;
228        }
229    }
230    return mnt;
231}
232
233static inline uint64_t
234fp64_normalise(uint64_t mnt, int *exp)
235{
236    int shift;
237
238    if (!mnt) {
239        return 0;
240    }
241
242    for (shift = 32; shift; shift >>= 1) {
243        if (!(mnt >> (64 - shift))) {
244            mnt <<= shift;
245            *exp -= shift;
246        }
247    }
248    return mnt;
249}
250
251static inline void
252fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
253{
254    uint64_t x0 = *mnt0;
255    uint64_t x1 = *mnt1;
256    int shift;
257
258    if (!x0 && !x1) {
259        return;
260    }
261
262    if (!x1) {
263        x1 = x0;
264        x0 = 0;
265        *exp -= 64;
266    }
267
268    for (shift = 32; shift; shift >>= 1) {
269        if (!(x1 >> (64 - shift))) {
270            x1 = x1 << shift | x0 >> (64 - shift);
271            x0 <<= shift;
272            *exp -= shift;
273        }
274    }
275
276    *mnt0 = x0;
277    *mnt1 = x1;
278}
279
280static inline uint16_t
281fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
282{
283    return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1));
284}
285
286static inline uint32_t
287fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
288{
289    return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1));
290}
291
292static inline uint64_t
293fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
294{
295    return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1));
296}
297
298static inline uint16_t
299fp16_zero(int sgn)
300{
301    return fp16_pack(sgn, 0, 0);
302}
303
304static inline uint32_t
305fp32_zero(int sgn)
306{
307    return fp32_pack(sgn, 0, 0);
308}
309
310static inline uint64_t
311fp64_zero(int sgn)
312{
313    return fp64_pack(sgn, 0, 0);
314}
315
316static inline uint16_t
317fp16_max_normal(int sgn)
318{
319    return fp16_pack(sgn, 30, -1);
320}
321
322static inline uint32_t
323fp32_max_normal(int sgn)
324{
325    return fp32_pack(sgn, 254, -1);
326}
327
328static inline uint64_t
329fp64_max_normal(int sgn)
330{
331    return fp64_pack(sgn, 2046, -1);
332}
333
334static inline uint16_t
335fp16_infinity(int sgn)
336{
337    return fp16_pack(sgn, 31, 0);
338}
339
340static inline uint32_t
341fp32_infinity(int sgn)
342{
343    return fp32_pack(sgn, 255, 0);
344}
345
346static inline uint64_t
347fp64_infinity(int sgn)
348{
349    return fp64_pack(sgn, 2047, 0);
350}
351
352static inline uint16_t
353fp16_defaultNaN()
354{
355    return fp16_pack(0, 31, (uint16_t)1 << 9);
356}
357
358static inline uint32_t
359fp32_defaultNaN()
360{
361    return fp32_pack(0, 255, (uint32_t)1 << 22);
362}
363
364static inline uint64_t
365fp64_defaultNaN()
366{
367    return fp64_pack(0, 2047, (uint64_t)1 << 51);
368}
369
370static inline void
371fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
372            int *flags)
373{
374    *sgn = x >> 15;
375    *exp = x >> 10 & 31;
376    *mnt = x & (((uint16_t)1 << 10) - 1);
377
378    // Handle subnormals:
379    if (*exp) {
380        *mnt |= (uint16_t)1 << 10;
381    } else {
382        ++*exp;
383        // There is no flush to zero in this case!
384    }
385}
386
387static inline void
388fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
389            int *flags)
390{
391    *sgn = x >> 31;
392    *exp = x >> 23 & 255;
393    *mnt = x & (((uint32_t)1 << 23) - 1);
394
395    // Handle subnormals:
396    if (*exp) {
397        *mnt |= (uint32_t)1 << 23;
398    } else {
399        ++*exp;
400        if ((mode & FPLIB_FZ) && *mnt) {
401            *flags |= FPLIB_IDC;
402            *mnt = 0;
403        }
404    }
405}
406
407static inline void
408fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
409            int *flags)
410{
411    *sgn = x >> 63;
412    *exp = x >> 52 & 2047;
413    *mnt = x & (((uint64_t)1 << 52) - 1);
414
415    // Handle subnormals:
416    if (*exp) {
417        *mnt |= (uint64_t)1 << 52;
418    } else {
419        ++*exp;
420        if ((mode & FPLIB_FZ) && *mnt) {
421            *flags |= FPLIB_IDC;
422            *mnt = 0;
423        }
424    }
425}
426
427static inline uint32_t
428fp32_process_NaN(uint32_t a, int mode, int *flags)
429{
430    if (!(a >> 22 & 1)) {
431        *flags |= FPLIB_IOC;
432        a |= (uint32_t)1 << 22;
433    }
434    return mode & FPLIB_DN ? fp32_defaultNaN() : a;
435}
436
437static inline uint64_t
438fp64_process_NaN(uint64_t a, int mode, int *flags)
439{
440    if (!(a >> 51 & 1)) {
441        *flags |= FPLIB_IOC;
442        a |= (uint64_t)1 << 51;
443    }
444    return mode & FPLIB_DN ? fp64_defaultNaN() : a;
445}
446
447static uint32_t
448fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
449{
450    int a_exp = a >> 23 & 255;
451    uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
452    int b_exp = b >> 23 & 255;
453    uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
454
455    // Handle signalling NaNs:
456    if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
457        return fp32_process_NaN(a, mode, flags);
458    if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
459        return fp32_process_NaN(b, mode, flags);
460
461    // Handle quiet NaNs:
462    if (a_exp == 255 && a_mnt)
463        return fp32_process_NaN(a, mode, flags);
464    if (b_exp == 255 && b_mnt)
465        return fp32_process_NaN(b, mode, flags);
466
467    return 0;
468}
469
470static uint64_t
471fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
472{
473    int a_exp = a >> 52 & 2047;
474    uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
475    int b_exp = b >> 52 & 2047;
476    uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
477
478    // Handle signalling NaNs:
479    if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
480        return fp64_process_NaN(a, mode, flags);
481    if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
482        return fp64_process_NaN(b, mode, flags);
483
484    // Handle quiet NaNs:
485    if (a_exp == 2047 && a_mnt)
486        return fp64_process_NaN(a, mode, flags);
487    if (b_exp == 2047 && b_mnt)
488        return fp64_process_NaN(b, mode, flags);
489
490    return 0;
491}
492
493static uint32_t
494fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
495{
496    int a_exp = a >> 23 & 255;
497    uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
498    int b_exp = b >> 23 & 255;
499    uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
500    int c_exp = c >> 23 & 255;
501    uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1);
502
503    // Handle signalling NaNs:
504    if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
505        return fp32_process_NaN(a, mode, flags);
506    if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
507        return fp32_process_NaN(b, mode, flags);
508    if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1))
509        return fp32_process_NaN(c, mode, flags);
510
511    // Handle quiet NaNs:
512    if (a_exp == 255 && a_mnt)
513        return fp32_process_NaN(a, mode, flags);
514    if (b_exp == 255 && b_mnt)
515        return fp32_process_NaN(b, mode, flags);
516    if (c_exp == 255 && c_mnt)
517        return fp32_process_NaN(c, mode, flags);
518
519    return 0;
520}
521
522static uint64_t
523fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
524{
525    int a_exp = a >> 52 & 2047;
526    uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
527    int b_exp = b >> 52 & 2047;
528    uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
529    int c_exp = c >> 52 & 2047;
530    uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1);
531
532    // Handle signalling NaNs:
533    if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
534        return fp64_process_NaN(a, mode, flags);
535    if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
536        return fp64_process_NaN(b, mode, flags);
537    if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1))
538        return fp64_process_NaN(c, mode, flags);
539
540    // Handle quiet NaNs:
541    if (a_exp == 2047 && a_mnt)
542        return fp64_process_NaN(a, mode, flags);
543    if (b_exp == 2047 && b_mnt)
544        return fp64_process_NaN(b, mode, flags);
545    if (c_exp == 2047 && c_mnt)
546        return fp64_process_NaN(c, mode, flags);
547
548    return 0;
549}
550
551static uint16_t
552fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
553{
554    int biased_exp; // non-negative exponent value for result
555    uint16_t int_mant; // mantissa for result, less than (1 << 11)
556    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
557
558    assert(rm != FPRounding_TIEAWAY);
559
560    // There is no flush to zero in this case!
561
562    // The bottom 5 bits of mnt are orred together:
563    mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0);
564
565    if (exp > 0) {
566        biased_exp = exp;
567        int_mant = mnt >> 2;
568        error = mnt & 3;
569    } else {
570        biased_exp = 0;
571        int_mant = lsr16(mnt, 3 - exp);
572        error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
573    }
574
575    if (!biased_exp && error) { // xx should also check fpscr_val<11>
576        *flags |= FPLIB_UFC;
577    }
578
579    // Round up:
580    if ((rm == FPLIB_RN && (error == 3 ||
581                            (error == 2 && (int_mant & 1)))) ||
582        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
583        ++int_mant;
584        if (int_mant == (uint32_t)1 << 10) {
585            // Rounded up from denormalized to normalized
586            biased_exp = 1;
587        }
588        if (int_mant == (uint32_t)1 << 11) {
589            // Rounded up to next exponent
590            ++biased_exp;
591            int_mant >>= 1;
592        }
593    }
594
595    // Handle rounding to odd aka Von Neumann rounding:
596    if (error && rm == FPRounding_ODD)
597        int_mant |= 1;
598
599    // Handle overflow:
600    if (!(mode & FPLIB_AHP)) {
601        if (biased_exp >= 31) {
602            *flags |= FPLIB_OFC | FPLIB_IXC;
603            if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
604                (rm == FPLIB_RM && sgn)) {
605                return fp16_infinity(sgn);
606            } else {
607                return fp16_max_normal(sgn);
608            }
609        }
610    } else {
611        if (biased_exp >= 32) {
612            *flags |= FPLIB_IOC;
613            return fp16_pack(sgn, 31, -1);
614        }
615    }
616
617    if (error) {
618        *flags |= FPLIB_IXC;
619    }
620
621    return fp16_pack(sgn, biased_exp, int_mant);
622}
623
624static uint32_t
625fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
626{
627    int biased_exp; // non-negative exponent value for result
628    uint32_t int_mant; // mantissa for result, less than (1 << 24)
629    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
630
631    assert(rm != FPRounding_TIEAWAY);
632
633    // Flush to zero:
634    if ((mode & FPLIB_FZ) && exp < 1) {
635        *flags |= FPLIB_UFC;
636        return fp32_zero(sgn);
637    }
638
639    // The bottom 8 bits of mnt are orred together:
640    mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0);
641
642    if (exp > 0) {
643        biased_exp = exp;
644        int_mant = mnt >> 2;
645        error = mnt & 3;
646    } else {
647        biased_exp = 0;
648        int_mant = lsr32(mnt, 3 - exp);
649        error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
650    }
651
652    if (!biased_exp && error) { // xx should also check fpscr_val<11>
653        *flags |= FPLIB_UFC;
654    }
655
656    // Round up:
657    if ((rm == FPLIB_RN && (error == 3 ||
658                            (error == 2 && (int_mant & 1)))) ||
659        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
660        ++int_mant;
661        if (int_mant == (uint32_t)1 << 23) {
662            // Rounded up from denormalized to normalized
663            biased_exp = 1;
664        }
665        if (int_mant == (uint32_t)1 << 24) {
666            // Rounded up to next exponent
667            ++biased_exp;
668            int_mant >>= 1;
669        }
670    }
671
672    // Handle rounding to odd aka Von Neumann rounding:
673    if (error && rm == FPRounding_ODD)
674        int_mant |= 1;
675
676    // Handle overflow:
677    if (biased_exp >= 255) {
678        *flags |= FPLIB_OFC | FPLIB_IXC;
679        if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
680            (rm == FPLIB_RM && sgn)) {
681            return fp32_infinity(sgn);
682        } else {
683            return fp32_max_normal(sgn);
684        }
685    }
686
687    if (error) {
688        *flags |= FPLIB_IXC;
689    }
690
691    return fp32_pack(sgn, biased_exp, int_mant);
692}
693
694static uint32_t
695fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
696{
697    return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
698}
699
700static uint64_t
701fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
702{
703    int biased_exp; // non-negative exponent value for result
704    uint64_t int_mant; // mantissa for result, less than (1 << 52)
705    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
706
707    assert(rm != FPRounding_TIEAWAY);
708
709    // Flush to zero:
710    if ((mode & FPLIB_FZ) && exp < 1) {
711        *flags |= FPLIB_UFC;
712        return fp64_zero(sgn);
713    }
714
715    // The bottom 11 bits of mnt are orred together:
716    mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0);
717
718    if (exp > 0) {
719        biased_exp = exp;
720        int_mant = mnt >> 2;
721        error = mnt & 3;
722    } else {
723        biased_exp = 0;
724        int_mant = lsr64(mnt, 3 - exp);
725        error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
726    }
727
728    if (!biased_exp && error) { // xx should also check fpscr_val<11>
729        *flags |= FPLIB_UFC;
730    }
731
732    // Round up:
733    if ((rm == FPLIB_RN && (error == 3 ||
734                            (error == 2 && (int_mant & 1)))) ||
735        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
736        ++int_mant;
737        if (int_mant == (uint64_t)1 << 52) {
738            // Rounded up from denormalized to normalized
739            biased_exp = 1;
740        }
741        if (int_mant == (uint64_t)1 << 53) {
742            // Rounded up to next exponent
743            ++biased_exp;
744            int_mant >>= 1;
745        }
746    }
747
748    // Handle rounding to odd aka Von Neumann rounding:
749    if (error && rm == FPRounding_ODD)
750        int_mant |= 1;
751
752    // Handle overflow:
753    if (biased_exp >= 2047) {
754        *flags |= FPLIB_OFC | FPLIB_IXC;
755        if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
756            (rm == FPLIB_RM && sgn)) {
757            return fp64_infinity(sgn);
758        } else {
759            return fp64_max_normal(sgn);
760        }
761    }
762
763    if (error) {
764        *flags |= FPLIB_IXC;
765    }
766
767    return fp64_pack(sgn, biased_exp, int_mant);
768}
769
770static uint64_t
771fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
772{
773    return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
774}
775
776static int
777fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
778{
779    int a_sgn, a_exp, b_sgn, b_exp;
780    uint32_t a_mnt, b_mnt;
781
782    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
783    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
784
785    if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
786        (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
787        if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) ||
788            (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1)))
789            *flags |= FPLIB_IOC;
790        return 0;
791    }
792    return a == b || (!a_mnt && !b_mnt);
793}
794
795static int
796fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
797{
798    int a_sgn, a_exp, b_sgn, b_exp;
799    uint32_t a_mnt, b_mnt;
800
801    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
802    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
803
804    if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
805        (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
806        *flags |= FPLIB_IOC;
807        return 0;
808    }
809    if (!a_mnt && !b_mnt)
810        return 1;
811    if (a_sgn != b_sgn)
812        return b_sgn;
813    if (a_exp != b_exp)
814        return a_sgn ^ (a_exp > b_exp);
815    if (a_mnt != b_mnt)
816        return a_sgn ^ (a_mnt > b_mnt);
817    return 1;
818}
819
820static int
821fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
822{
823    int a_sgn, a_exp, b_sgn, b_exp;
824    uint32_t a_mnt, b_mnt;
825
826    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
827    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
828
829    if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
830        (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
831        *flags |= FPLIB_IOC;
832        return 0;
833    }
834    if (!a_mnt && !b_mnt)
835        return 0;
836    if (a_sgn != b_sgn)
837        return b_sgn;
838    if (a_exp != b_exp)
839        return a_sgn ^ (a_exp > b_exp);
840    if (a_mnt != b_mnt)
841        return a_sgn ^ (a_mnt > b_mnt);
842    return 0;
843}
844
845static int
846fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
847{
848    int a_sgn, a_exp, b_sgn, b_exp;
849    uint64_t a_mnt, b_mnt;
850
851    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
852    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
853
854    if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
855        (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
856        if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) ||
857            (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1)))
858            *flags |= FPLIB_IOC;
859        return 0;
860    }
861    return a == b || (!a_mnt && !b_mnt);
862}
863
864static int
865fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
866{
867    int a_sgn, a_exp, b_sgn, b_exp;
868    uint64_t a_mnt, b_mnt;
869
870    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
871    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
872
873    if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
874        (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
875        *flags |= FPLIB_IOC;
876        return 0;
877    }
878    if (!a_mnt && !b_mnt)
879        return 1;
880    if (a_sgn != b_sgn)
881        return b_sgn;
882    if (a_exp != b_exp)
883        return a_sgn ^ (a_exp > b_exp);
884    if (a_mnt != b_mnt)
885        return a_sgn ^ (a_mnt > b_mnt);
886    return 1;
887}
888
889static int
890fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
891{
892    int a_sgn, a_exp, b_sgn, b_exp;
893    uint64_t a_mnt, b_mnt;
894
895    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
896    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
897
898    if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
899        (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
900        *flags |= FPLIB_IOC;
901        return 0;
902    }
903    if (!a_mnt && !b_mnt)
904        return 0;
905    if (a_sgn != b_sgn)
906        return b_sgn;
907    if (a_exp != b_exp)
908        return a_sgn ^ (a_exp > b_exp);
909    if (a_mnt != b_mnt)
910        return a_sgn ^ (a_mnt > b_mnt);
911    return 0;
912}
913
914static uint32_t
915fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
916{
917    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
918    uint32_t a_mnt, b_mnt, x, x_mnt;
919
920    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
921    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
922
923    if ((x = fp32_process_NaNs(a, b, mode, flags))) {
924        return x;
925    }
926
927    b_sgn ^= neg;
928
929    // Handle infinities and zeroes:
930    if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) {
931        *flags |= FPLIB_IOC;
932        return fp32_defaultNaN();
933    } else if (a_exp == 255) {
934        return fp32_infinity(a_sgn);
935    } else if (b_exp == 255) {
936        return fp32_infinity(b_sgn);
937    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
938        return fp32_zero(a_sgn);
939    }
940
941    a_mnt <<= 3;
942    b_mnt <<= 3;
943    if (a_exp >= b_exp) {
944        b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
945                 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
946        b_exp = a_exp;
947    } else {
948        a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
949                 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
950        a_exp = b_exp;
951    }
952    x_sgn = a_sgn;
953    x_exp = a_exp;
954    if (a_sgn == b_sgn) {
955        x_mnt = a_mnt + b_mnt;
956    } else if (a_mnt >= b_mnt) {
957        x_mnt = a_mnt - b_mnt;
958    } else {
959        x_sgn ^= 1;
960        x_mnt = b_mnt - a_mnt;
961    }
962
963    if (!x_mnt) {
964        // Sign of exact zero result depends on rounding mode
965        return fp32_zero((mode & 3) == 2);
966    }
967
968    x_mnt = fp32_normalise(x_mnt, &x_exp);
969
970    return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags);
971}
972
973static uint64_t
974fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
975{
976    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
977    uint64_t a_mnt, b_mnt, x, x_mnt;
978
979    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
980    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
981
982    if ((x = fp64_process_NaNs(a, b, mode, flags))) {
983        return x;
984    }
985
986    b_sgn ^= neg;
987
988    // Handle infinities and zeroes:
989    if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) {
990        *flags |= FPLIB_IOC;
991        return fp64_defaultNaN();
992    } else if (a_exp == 2047) {
993        return fp64_infinity(a_sgn);
994    } else if (b_exp == 2047) {
995        return fp64_infinity(b_sgn);
996    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
997        return fp64_zero(a_sgn);
998    }
999
1000    a_mnt <<= 3;
1001    b_mnt <<= 3;
1002    if (a_exp >= b_exp) {
1003        b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
1004                 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
1005        b_exp = a_exp;
1006    } else {
1007        a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
1008                 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
1009        a_exp = b_exp;
1010    }
1011    x_sgn = a_sgn;
1012    x_exp = a_exp;
1013    if (a_sgn == b_sgn) {
1014        x_mnt = a_mnt + b_mnt;
1015    } else if (a_mnt >= b_mnt) {
1016        x_mnt = a_mnt - b_mnt;
1017    } else {
1018        x_sgn ^= 1;
1019        x_mnt = b_mnt - a_mnt;
1020    }
1021
1022    if (!x_mnt) {
1023        // Sign of exact zero result depends on rounding mode
1024        return fp64_zero((mode & 3) == 2);
1025    }
1026
1027    x_mnt = fp64_normalise(x_mnt, &x_exp);
1028
1029    return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags);
1030}
1031
1032static uint32_t
1033fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1034{
1035    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1036    uint32_t a_mnt, b_mnt, x;
1037    uint64_t x_mnt;
1038
1039    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1040    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1041
1042    if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1043        return x;
1044    }
1045
1046    // Handle infinities and zeroes:
1047    if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) {
1048        *flags |= FPLIB_IOC;
1049        return fp32_defaultNaN();
1050    } else if (a_exp == 255 || b_exp == 255) {
1051        return fp32_infinity(a_sgn ^ b_sgn);
1052    } else if (!a_mnt || !b_mnt) {
1053        return fp32_zero(a_sgn ^ b_sgn);
1054    }
1055
1056    // Multiply and normalise:
1057    x_sgn = a_sgn ^ b_sgn;
1058    x_exp = a_exp + b_exp - 110;
1059    x_mnt = (uint64_t)a_mnt * b_mnt;
1060    x_mnt = fp64_normalise(x_mnt, &x_exp);
1061
1062    // Convert to 32 bits, collapsing error into bottom bit:
1063    x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33);
1064
1065    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1066}
1067
1068static uint64_t
1069fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1070{
1071    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1072    uint64_t a_mnt, b_mnt, x;
1073    uint64_t x0_mnt, x1_mnt;
1074
1075    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1076    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1077
1078    if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1079        return x;
1080    }
1081
1082    // Handle infinities and zeroes:
1083    if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) {
1084        *flags |= FPLIB_IOC;
1085        return fp64_defaultNaN();
1086    } else if (a_exp == 2047 || b_exp == 2047) {
1087        return fp64_infinity(a_sgn ^ b_sgn);
1088    } else if (!a_mnt || !b_mnt) {
1089        return fp64_zero(a_sgn ^ b_sgn);
1090    }
1091
1092    // Multiply and normalise:
1093    x_sgn = a_sgn ^ b_sgn;
1094    x_exp = a_exp + b_exp - 1000;
1095    mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1096    fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1097
1098    // Convert to 64 bits, collapsing error into bottom bit:
1099    x0_mnt = x1_mnt << 1 | !!x0_mnt;
1100
1101    return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1102}
1103
1104static uint32_t
1105fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1106            int mode, int *flags)
1107{
1108    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1109    uint32_t a_mnt, b_mnt, c_mnt, x;
1110    uint64_t x_mnt, y_mnt;
1111
1112    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1113    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1114    fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1115
1116    x = fp32_process_NaNs3(a, b, c, mode, flags);
1117
1118    // Quiet NaN added to product of zero and infinity:
1119    if (a_exp == 255 && (a_mnt >> 22 & 1) &&
1120        ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) ||
1121         (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) {
1122        x = fp32_defaultNaN();
1123        *flags |= FPLIB_IOC;
1124    }
1125
1126    if (x) {
1127        return x;
1128    }
1129
1130    // Handle infinities and zeroes:
1131    if ((b_exp == 255 && !c_mnt) ||
1132        (c_exp == 255 && !b_mnt) ||
1133        (a_exp == 255 && (b_exp == 255 || c_exp == 255) &&
1134         (a_sgn != (b_sgn ^ c_sgn)))) {
1135        *flags |= FPLIB_IOC;
1136        return fp32_defaultNaN();
1137    }
1138    if (a_exp == 255)
1139        return fp32_infinity(a_sgn);
1140    if (b_exp == 255 || c_exp == 255)
1141        return fp32_infinity(b_sgn ^ c_sgn);
1142    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1143        return fp32_zero(a_sgn);
1144
1145    x_sgn = a_sgn;
1146    x_exp = a_exp + 13;
1147    x_mnt = (uint64_t)a_mnt << 27;
1148
1149    // Multiply:
1150    y_sgn = b_sgn ^ c_sgn;
1151    y_exp = b_exp + c_exp - 113;
1152    y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1153    if (!y_mnt) {
1154        y_exp = x_exp;
1155    }
1156
1157    // Add:
1158    if (x_exp >= y_exp) {
1159        y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1160                 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1161        y_exp = x_exp;
1162    } else {
1163        x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1164                 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1165        x_exp = y_exp;
1166    }
1167    if (x_sgn == y_sgn) {
1168        x_mnt = x_mnt + y_mnt;
1169    } else if (x_mnt >= y_mnt) {
1170        x_mnt = x_mnt - y_mnt;
1171    } else {
1172        x_sgn ^= 1;
1173        x_mnt = y_mnt - x_mnt;
1174    }
1175
1176    if (!x_mnt) {
1177        // Sign of exact zero result depends on rounding mode
1178        return fp32_zero((mode & 3) == 2);
1179    }
1180
1181    // Normalise and convert to 32 bits, collapsing error into bottom bit:
1182    x_mnt = fp64_normalise(x_mnt, &x_exp);
1183    x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1184
1185    return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1186}
1187
1188static uint64_t
1189fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1190            int mode, int *flags)
1191{
1192    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1193    uint64_t a_mnt, b_mnt, c_mnt, x;
1194    uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1195
1196    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1197    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1198    fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1199
1200    x = fp64_process_NaNs3(a, b, c, mode, flags);
1201
1202    // Quiet NaN added to product of zero and infinity:
1203    if (a_exp == 2047 && (a_mnt >> 51 & 1) &&
1204        ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) ||
1205         (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) {
1206        x = fp64_defaultNaN();
1207        *flags |= FPLIB_IOC;
1208    }
1209
1210    if (x) {
1211        return x;
1212    }
1213
1214    // Handle infinities and zeroes:
1215    if ((b_exp == 2047 && !c_mnt) ||
1216        (c_exp == 2047 && !b_mnt) ||
1217        (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) &&
1218         (a_sgn != (b_sgn ^ c_sgn)))) {
1219        *flags |= FPLIB_IOC;
1220        return fp64_defaultNaN();
1221    }
1222    if (a_exp == 2047)
1223        return fp64_infinity(a_sgn);
1224    if (b_exp == 2047 || c_exp == 2047)
1225        return fp64_infinity(b_sgn ^ c_sgn);
1226    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1227        return fp64_zero(a_sgn);
1228
1229    x_sgn = a_sgn;
1230    x_exp = a_exp + 11;
1231    x0_mnt = 0;
1232    x1_mnt = a_mnt;
1233
1234    // Multiply:
1235    y_sgn = b_sgn ^ c_sgn;
1236    y_exp = b_exp + c_exp - 1003;
1237    mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1238    if (!y0_mnt && !y1_mnt) {
1239        y_exp = x_exp;
1240    }
1241
1242    // Add:
1243    if (x_exp >= y_exp) {
1244        uint64_t t0, t1;
1245        lsl128(&t0, &t1, y0_mnt, y1_mnt,
1246               x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1247        lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1248        y0_mnt |= !!(t0 | t1);
1249        y_exp = x_exp;
1250    } else {
1251        uint64_t t0, t1;
1252        lsl128(&t0, &t1, x0_mnt, x1_mnt,
1253               y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1254        lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1255        x0_mnt |= !!(t0 | t1);
1256        x_exp = y_exp;
1257    }
1258    if (x_sgn == y_sgn) {
1259        add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1260    } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1261        sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1262    } else {
1263        x_sgn ^= 1;
1264        sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1265    }
1266
1267    if (!x0_mnt && !x1_mnt) {
1268        // Sign of exact zero result depends on rounding mode
1269        return fp64_zero((mode & 3) == 2);
1270    }
1271
1272    // Normalise and convert to 64 bits, collapsing error into bottom bit:
1273    fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1274    x0_mnt = x1_mnt << 1 | !!x0_mnt;
1275
1276    return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1277}
1278
1279static uint32_t
1280fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1281{
1282    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1283    uint32_t a_mnt, b_mnt, x;
1284    uint64_t x_mnt;
1285
1286    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1287    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1288
1289    if ((x = fp32_process_NaNs(a, b, mode, flags)))
1290        return x;
1291
1292    // Handle infinities and zeroes:
1293    if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) {
1294        *flags |= FPLIB_IOC;
1295        return fp32_defaultNaN();
1296    }
1297    if (a_exp == 255 || !b_mnt) {
1298        if (a_exp != 255)
1299            *flags |= FPLIB_DZC;
1300        return fp32_infinity(a_sgn ^ b_sgn);
1301    }
1302    if (!a_mnt || b_exp == 255)
1303        return fp32_zero(a_sgn ^ b_sgn);
1304
1305    // Divide, setting bottom bit if inexact:
1306    a_mnt = fp32_normalise(a_mnt, &a_exp);
1307    x_sgn = a_sgn ^ b_sgn;
1308    x_exp = a_exp - b_exp + 172;
1309    x_mnt = ((uint64_t)a_mnt << 18) / b_mnt;
1310    x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18);
1311
1312    // Normalise and convert to 32 bits, collapsing error into bottom bit:
1313    x_mnt = fp64_normalise(x_mnt, &x_exp);
1314    x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1315
1316    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1317}
1318
1319static uint64_t
1320fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1321{
1322    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1323    uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1324
1325    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1326    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1327
1328    if ((x = fp64_process_NaNs(a, b, mode, flags)))
1329        return x;
1330
1331    // Handle infinities and zeroes:
1332    if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) {
1333        *flags |= FPLIB_IOC;
1334        return fp64_defaultNaN();
1335    }
1336    if (a_exp == 2047 || !b_mnt) {
1337        if (a_exp != 2047)
1338            *flags |= FPLIB_DZC;
1339        return fp64_infinity(a_sgn ^ b_sgn);
1340    }
1341    if (!a_mnt || b_exp == 2047)
1342        return fp64_zero(a_sgn ^ b_sgn);
1343
1344    // Find reciprocal of divisor with Newton-Raphson:
1345    a_mnt = fp64_normalise(a_mnt, &a_exp);
1346    b_mnt = fp64_normalise(b_mnt, &b_exp);
1347    x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1348    mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1349    sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1350    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1351    mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1352    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1353
1354    // Multiply by dividend:
1355    x_sgn = a_sgn ^ b_sgn;
1356    x_exp = a_exp - b_exp + 1031;
1357    mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough
1358    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1359    x_mnt = x1_mnt;
1360
1361    // This is an underestimate, so try adding one:
1362    mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough
1363    c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1364    if (c <= 0) {
1365        ++x_mnt;
1366    }
1367
1368    x_mnt = fp64_normalise(x_mnt, &x_exp);
1369
1370    return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1371}
1372
1373static void
1374set_fpscr0(FPSCR &fpscr, int flags)
1375{
1376    if (flags & FPLIB_IDC) {
1377        fpscr.idc = 1;
1378    }
1379    if (flags & FPLIB_IOC) {
1380        fpscr.ioc = 1;
1381    }
1382    if (flags & FPLIB_DZC) {
1383        fpscr.dzc = 1;
1384    }
1385    if (flags & FPLIB_OFC) {
1386        fpscr.ofc = 1;
1387    }
1388    if (flags & FPLIB_UFC) {
1389        fpscr.ufc = 1;
1390    }
1391    if (flags & FPLIB_IXC) {
1392        fpscr.ixc = 1;
1393    }
1394}
1395
1396static uint32_t
1397fp32_sqrt(uint32_t a, int mode, int *flags)
1398{
1399    int a_sgn, a_exp, x_sgn, x_exp;
1400    uint32_t a_mnt, x, x_mnt;
1401    uint64_t t0, t1;
1402
1403    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1404
1405    // Handle NaNs:
1406    if (a_exp == 255 && (uint32_t)(a_mnt << 9))
1407        return fp32_process_NaN(a, mode, flags);
1408
1409    // Handle infinities and zeroes:
1410    if (!a_mnt) {
1411        return fp32_zero(a_sgn);
1412    }
1413    if (a_exp == 255 && !a_sgn) {
1414        return fp32_infinity(a_sgn);
1415    }
1416    if (a_sgn) {
1417        *flags |= FPLIB_IOC;
1418        return fp32_defaultNaN();
1419    }
1420
1421    a_mnt = fp32_normalise(a_mnt, &a_exp);
1422    if (!(a_exp & 1)) {
1423        ++a_exp;
1424        a_mnt >>= 1;
1425    }
1426
1427    // x = (a * 3 + 5) / 8
1428    x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28);
1429
1430    // x = (a / x + x) / 2; // 16-bit accuracy
1431    x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1432
1433    // x = (a / x + x) / 2; // 16-bit accuracy
1434    x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1435
1436    // x = (a / x + x) / 2; // 32-bit accuracy
1437    x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
1438
1439    x_sgn = 0;
1440    x_exp = (a_exp + 147) >> 1;
1441    x_mnt = ((x - (1 << 5)) >> 6) + 1;
1442    t1 = (uint64_t)x_mnt * x_mnt;
1443    t0 = (uint64_t)a_mnt << 19;
1444    if (t1 > t0) {
1445        --x_mnt;
1446    }
1447
1448    x_mnt = fp32_normalise(x_mnt, &x_exp);
1449
1450    return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
1451}
1452
1453static uint64_t
1454fp64_sqrt(uint64_t a, int mode, int *flags)
1455{
1456    int a_sgn, a_exp, x_sgn, x_exp, c;
1457    uint64_t a_mnt, x_mnt, r, x0, x1;
1458    uint32_t x;
1459
1460    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1461
1462    // Handle NaNs:
1463    if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) {
1464        return fp64_process_NaN(a, mode, flags);
1465    }
1466
1467    // Handle infinities and zeroes:
1468    if (!a_mnt)
1469        return fp64_zero(a_sgn);
1470    if (a_exp == 2047 && !a_sgn)
1471        return fp64_infinity(a_sgn);
1472    if (a_sgn) {
1473        *flags |= FPLIB_IOC;
1474        return fp64_defaultNaN();
1475    }
1476
1477    a_mnt = fp64_normalise(a_mnt, &a_exp);
1478    if (a_exp & 1) {
1479        ++a_exp;
1480        a_mnt >>= 1;
1481    }
1482
1483    // x = (a * 3 + 5) / 8
1484    x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28);
1485
1486    // x = (a / x + x) / 2; // 16-bit accuracy
1487    x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1488
1489    // x = (a / x + x) / 2; // 16-bit accuracy
1490    x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1491
1492    // x = (a / x + x) / 2; // 32-bit accuracy
1493    x = ((a_mnt / x) >> 2) + (x >> 1);
1494
1495    // r = 1 / x; // 32-bit accuracy
1496    r = ((uint64_t)1 << 62) / x;
1497
1498    // r = r * (2 - x * r); // 64-bit accuracy
1499    mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
1500    lsr128(&x0, &x1, x0, x1, 31);
1501
1502    // x = (x + a * r) / 2; // 64-bit accuracy
1503    mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
1504    lsl128(&x0, &x1, x0, x1, 5);
1505    lsr128(&x0, &x1, x0, x1, 56);
1506
1507    x0 = ((uint64_t)x << 31) + (x0 >> 1);
1508
1509    x_sgn = 0;
1510    x_exp = (a_exp + 1053) >> 1;
1511    x_mnt = x0;
1512    x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
1513    mul62x62(&x0, &x1, x_mnt, x_mnt);
1514    lsl128(&x0, &x1, x0, x1, 19);
1515    c = cmp128(x0, x1, 0, a_mnt);
1516    if (c > 0)
1517        --x_mnt;
1518
1519    x_mnt = fp64_normalise(x_mnt, &x_exp);
1520
1521    return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1522}
1523
1524static int
1525modeConv(FPSCR fpscr)
1526{
1527    return (((int) fpscr) >> 22) & 0xF;
1528}
1529
1530static void
1531set_fpscr(FPSCR &fpscr, int flags)
1532{
1533    // translate back to FPSCR
1534    bool underflow = false;
1535    if (flags & FPLIB_IDC) {
1536        fpscr.idc = 1;
1537    }
1538    if (flags & FPLIB_IOC) {
1539        fpscr.ioc = 1;
1540    }
1541    if (flags & FPLIB_DZC) {
1542        fpscr.dzc = 1;
1543    }
1544    if (flags & FPLIB_OFC) {
1545        fpscr.ofc = 1;
1546    }
1547    if (flags & FPLIB_UFC) {
1548        underflow = true; //xx Why is this required?
1549        fpscr.ufc = 1;
1550    }
1551    if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
1552        fpscr.ixc = 1;
1553    }
1554}
1555
1556template <>
1557bool
1558fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
1559{
1560    int flags = 0;
1561    int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
1562    set_fpscr(fpscr, flags);
1563    return x;
1564}
1565
1566template <>
1567bool
1568fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
1569{
1570    int flags = 0;
1571    int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
1572    set_fpscr(fpscr, flags);
1573    return x;
1574}
1575
1576template <>
1577bool
1578fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
1579{
1580    int flags = 0;
1581    int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
1582    set_fpscr(fpscr, flags);
1583    return x;
1584}
1585
1586template <>
1587bool
1588fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
1589{
1590    int flags = 0;
1591    int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
1592    set_fpscr(fpscr, flags);
1593    return x;
1594}
1595
1596template <>
1597bool
1598fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
1599{
1600    int flags = 0;
1601    int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
1602    set_fpscr(fpscr, flags);
1603    return x;
1604}
1605
1606template <>
1607bool
1608fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
1609{
1610    int flags = 0;
1611    int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
1612    set_fpscr(fpscr, flags);
1613    return x;
1614}
1615
1616template <>
1617uint32_t
1618fplibAbs(uint32_t op)
1619{
1620    return op & ~((uint32_t)1 << 31);
1621}
1622
1623template <>
1624uint64_t
1625fplibAbs(uint64_t op)
1626{
1627    return op & ~((uint64_t)1 << 63);
1628}
1629
1630template <>
1631uint32_t
1632fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
1633{
1634    int flags = 0;
1635    uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
1636    set_fpscr0(fpscr, flags);
1637    return result;
1638}
1639
1640template <>
1641uint64_t
1642fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
1643{
1644    int flags = 0;
1645    uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
1646    set_fpscr0(fpscr, flags);
1647    return result;
1648}
1649
1650template <>
1651int
1652fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
1653{
1654    int mode = modeConv(fpscr);
1655    int flags = 0;
1656    int sgn1, exp1, sgn2, exp2, result;
1657    uint32_t mnt1, mnt2;
1658
1659    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1660    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1661
1662    if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) ||
1663        (exp2 == 255 && (uint32_t)(mnt2 << 9))) {
1664        result = 3;
1665        if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) ||
1666            (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) ||
1667            signal_nans)
1668            flags |= FPLIB_IOC;
1669    } else {
1670        if (op1 == op2 || (!mnt1 && !mnt2)) {
1671            result = 6;
1672        } else if (sgn1 != sgn2) {
1673            result = sgn1 ? 8 : 2;
1674        } else if (exp1 != exp2) {
1675            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1676        } else {
1677            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1678        }
1679    }
1680
1681    set_fpscr0(fpscr, flags);
1682
1683    return result;
1684}
1685
1686template <>
1687int
1688fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
1689{
1690    int mode = modeConv(fpscr);
1691    int flags = 0;
1692    int sgn1, exp1, sgn2, exp2, result;
1693    uint64_t mnt1, mnt2;
1694
1695    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1696    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1697
1698    if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) ||
1699        (exp2 == 2047 && (uint64_t)(mnt2 << 12))) {
1700        result = 3;
1701        if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) ||
1702            (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) ||
1703            signal_nans)
1704            flags |= FPLIB_IOC;
1705    } else {
1706        if (op1 == op2 || (!mnt1 && !mnt2)) {
1707            result = 6;
1708        } else if (sgn1 != sgn2) {
1709            result = sgn1 ? 8 : 2;
1710        } else if (exp1 != exp2) {
1711            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1712        } else {
1713            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1714        }
1715    }
1716
1717    set_fpscr0(fpscr, flags);
1718
1719    return result;
1720}
1721
1722static uint16_t
1723fp16_FPConvertNaN_32(uint32_t op)
1724{
1725    return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13);
1726}
1727
1728static uint16_t
1729fp16_FPConvertNaN_64(uint64_t op)
1730{
1731    return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42);
1732}
1733
1734static uint32_t
1735fp32_FPConvertNaN_16(uint16_t op)
1736{
1737    return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13);
1738}
1739
1740static uint32_t
1741fp32_FPConvertNaN_64(uint64_t op)
1742{
1743    return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29);
1744}
1745
1746static uint64_t
1747fp64_FPConvertNaN_16(uint16_t op)
1748{
1749    return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42);
1750}
1751
1752static uint64_t
1753fp64_FPConvertNaN_32(uint32_t op)
1754{
1755    return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29);
1756}
1757
1758static uint32_t
1759fp32_FPOnePointFive(int sgn)
1760{
1761    return fp32_pack(sgn, 127, (uint64_t)1 << 22);
1762}
1763
1764static uint64_t
1765fp64_FPOnePointFive(int sgn)
1766{
1767    return fp64_pack(sgn, 1023, (uint64_t)1 << 51);
1768}
1769
1770static uint32_t
1771fp32_FPThree(int sgn)
1772{
1773    return fp32_pack(sgn, 128, (uint64_t)1 << 22);
1774}
1775
1776static uint64_t
1777fp64_FPThree(int sgn)
1778{
1779    return fp64_pack(sgn, 1024, (uint64_t)1 << 51);
1780}
1781
1782static uint32_t
1783fp32_FPTwo(int sgn)
1784{
1785    return fp32_pack(sgn, 128, 0);
1786}
1787
1788static uint64_t
1789fp64_FPTwo(int sgn)
1790{
1791    return fp64_pack(sgn, 1024, 0);
1792}
1793
1794template <>
1795uint16_t
1796fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1797{
1798    int mode = modeConv(fpscr);
1799    int flags = 0;
1800    int sgn, exp;
1801    uint32_t mnt;
1802    uint16_t result;
1803
1804    // Unpack floating-point operand optionally with flush-to-zero:
1805    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1806
1807    bool alt_hp = fpscr.ahp;
1808
1809    if (exp == 255 && (uint32_t)(mnt << 9)) {
1810        if (alt_hp) {
1811            result = fp16_zero(sgn);
1812        } else if (fpscr.dn) {
1813            result = fp16_defaultNaN();
1814        } else {
1815            result = fp16_FPConvertNaN_32(op);
1816        }
1817        if (!(mnt >> 22 & 1) || alt_hp) {
1818            flags |= FPLIB_IOC;
1819        }
1820    } else if (exp == 255) {
1821        if (alt_hp) {
1822            result = sgn << 15 | (uint16_t)0x7fff;
1823            flags |= FPLIB_IOC;
1824        } else {
1825            result = fp16_infinity(sgn);
1826        }
1827    } else if (!mnt) {
1828        result = fp16_zero(sgn);
1829    } else {
1830        result = fp16_round_(sgn, exp - 127 + 15,
1831                             mnt >> 7 | !!(uint32_t)(mnt << 25),
1832                             rounding, mode | alt_hp << 4, &flags);
1833    }
1834
1835    set_fpscr0(fpscr, flags);
1836
1837    return result;
1838}
1839
1840template <>
1841uint16_t
1842fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1843{
1844    int mode = modeConv(fpscr);
1845    int flags = 0;
1846    int sgn, exp;
1847    uint64_t mnt;
1848    uint16_t result;
1849
1850    // Unpack floating-point operand optionally with flush-to-zero:
1851    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1852
1853    bool alt_hp = fpscr.ahp;
1854
1855    if (exp == 2047 && (uint64_t)(mnt << 12)) {
1856        if (alt_hp) {
1857            result = fp16_zero(sgn);
1858        } else if (fpscr.dn) {
1859            result = fp16_defaultNaN();
1860        } else {
1861            result = fp16_FPConvertNaN_64(op);
1862        }
1863        if (!(mnt >> 51 & 1) || alt_hp) {
1864            flags |= FPLIB_IOC;
1865        }
1866    } else if (exp == 2047) {
1867        if (alt_hp) {
1868            result = sgn << 15 | (uint16_t)0x7fff;
1869            flags |= FPLIB_IOC;
1870        } else {
1871            result = fp16_infinity(sgn);
1872        }
1873    } else if (!mnt) {
1874        result = fp16_zero(sgn);
1875    } else {
1876        result = fp16_round_(sgn, exp - 1023 + 15,
1877                             mnt >> 36 | !!(uint64_t)(mnt << 28),
1878                             rounding, mode | alt_hp << 4, &flags);
1879    }
1880
1881    set_fpscr0(fpscr, flags);
1882
1883    return result;
1884}
1885
1886template <>
1887uint32_t
1888fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1889{
1890    int mode = modeConv(fpscr);
1891    int flags = 0;
1892    int sgn, exp;
1893    uint16_t mnt;
1894    uint32_t result;
1895
1896    // Unpack floating-point operand optionally with flush-to-zero:
1897    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1898
1899    if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1900        if (fpscr.dn) {
1901            result = fp32_defaultNaN();
1902        } else {
1903            result = fp32_FPConvertNaN_16(op);
1904        }
1905        if (!(mnt >> 9 & 1)) {
1906            flags |= FPLIB_IOC;
1907        }
1908    } else if (exp == 31 && !fpscr.ahp) {
1909        result = fp32_infinity(sgn);
1910    } else if (!mnt) {
1911        result = fp32_zero(sgn);
1912    } else {
1913        mnt = fp16_normalise(mnt, &exp);
1914        result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8);
1915    }
1916
1917    set_fpscr0(fpscr, flags);
1918
1919    return result;
1920}
1921
1922template <>
1923uint32_t
1924fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1925{
1926    int mode = modeConv(fpscr);
1927    int flags = 0;
1928    int sgn, exp;
1929    uint64_t mnt;
1930    uint32_t result;
1931
1932    // Unpack floating-point operand optionally with flush-to-zero:
1933    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1934
1935    if (exp == 2047 && (uint64_t)(mnt << 12)) {
1936        if (fpscr.dn) {
1937            result = fp32_defaultNaN();
1938        } else {
1939            result = fp32_FPConvertNaN_64(op);
1940        }
1941        if (!(mnt >> 51 & 1)) {
1942            flags |= FPLIB_IOC;
1943        }
1944    } else if (exp == 2047) {
1945        result = fp32_infinity(sgn);
1946    } else if (!mnt) {
1947        result = fp32_zero(sgn);
1948    } else {
1949        result = fp32_round_(sgn, exp - 1023 + 127,
1950                             mnt >> 20 | !!(uint64_t)(mnt << 44),
1951                             rounding, mode, &flags);
1952    }
1953
1954    set_fpscr0(fpscr, flags);
1955
1956    return result;
1957}
1958
1959template <>
1960uint64_t
1961fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1962{
1963    int mode = modeConv(fpscr);
1964    int flags = 0;
1965    int sgn, exp;
1966    uint16_t mnt;
1967    uint64_t result;
1968
1969    // Unpack floating-point operand optionally with flush-to-zero:
1970    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1971
1972    if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1973        if (fpscr.dn) {
1974            result = fp64_defaultNaN();
1975        } else {
1976            result = fp64_FPConvertNaN_16(op);
1977        }
1978        if (!(mnt >> 9 & 1)) {
1979            flags |= FPLIB_IOC;
1980        }
1981    } else if (exp == 31 && !fpscr.ahp) {
1982        result = fp64_infinity(sgn);
1983    } else if (!mnt) {
1984        result = fp64_zero(sgn);
1985    } else {
1986        mnt = fp16_normalise(mnt, &exp);
1987        result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37);
1988    }
1989
1990    set_fpscr0(fpscr, flags);
1991
1992    return result;
1993}
1994
1995template <>
1996uint64_t
1997fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1998{
1999    int mode = modeConv(fpscr);
2000    int flags = 0;
2001    int sgn, exp;
2002    uint32_t mnt;
2003    uint64_t result;
2004
2005    // Unpack floating-point operand optionally with flush-to-zero:
2006    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2007
2008    if (exp == 255 && (uint32_t)(mnt << 9)) {
2009        if (fpscr.dn) {
2010            result = fp64_defaultNaN();
2011        } else {
2012            result = fp64_FPConvertNaN_32(op);
2013        }
2014        if (!(mnt >> 22 & 1)) {
2015            flags |= FPLIB_IOC;
2016        }
2017    } else if (exp == 255) {
2018        result = fp64_infinity(sgn);
2019    } else if (!mnt) {
2020        result = fp64_zero(sgn);
2021    } else {
2022        mnt = fp32_normalise(mnt, &exp);
2023        result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21);
2024    }
2025
2026    set_fpscr0(fpscr, flags);
2027
2028    return result;
2029}
2030
2031template <>
2032uint32_t
2033fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2034{
2035    int flags = 0;
2036    uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2037    set_fpscr0(fpscr, flags);
2038    return result;
2039}
2040
2041template <>
2042uint64_t
2043fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2044{
2045    int flags = 0;
2046    uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2047    set_fpscr0(fpscr, flags);
2048    return result;
2049}
2050
2051template <>
2052uint32_t
2053fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2054{
2055    int flags = 0;
2056    uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2057    set_fpscr0(fpscr, flags);
2058    return result;
2059}
2060
2061template <>
2062uint64_t
2063fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2064{
2065    int flags = 0;
2066    uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2067    set_fpscr0(fpscr, flags);
2068    return result;
2069}
2070
2071static uint32_t
2072fp32_repack(int sgn, int exp, uint32_t mnt)
2073{
2074    return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt);
2075}
2076
2077static uint64_t
2078fp64_repack(int sgn, int exp, uint64_t mnt)
2079{
2080    return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt);
2081}
2082
2083static void
2084fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
2085{
2086    // Treat a single quiet-NaN as +Infinity/-Infinity
2087    if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23)
2088        *op1 = fp32_infinity(sgn);
2089    if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23)
2090        *op2 = fp32_infinity(sgn);
2091}
2092
2093static void
2094fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
2095{
2096    // Treat a single quiet-NaN as +Infinity/-Infinity
2097    if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52)
2098        *op1 = fp64_infinity(sgn);
2099    if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52)
2100        *op2 = fp64_infinity(sgn);
2101}
2102
2103template <>
2104uint32_t
2105fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2106{
2107    int mode = modeConv(fpscr);
2108    int flags = 0;
2109    int sgn1, exp1, sgn2, exp2;
2110    uint32_t mnt1, mnt2, x, result;
2111
2112    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2113    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2114
2115    if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2116        result = x;
2117    } else {
2118        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2119                  fp32_repack(sgn1, exp1, mnt1) :
2120                  fp32_repack(sgn2, exp2, mnt2));
2121    }
2122    set_fpscr0(fpscr, flags);
2123    return result;
2124}
2125
2126template <>
2127uint64_t
2128fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2129{
2130    int mode = modeConv(fpscr);
2131    int flags = 0;
2132    int sgn1, exp1, sgn2, exp2;
2133    uint64_t mnt1, mnt2, x, result;
2134
2135    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2136    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2137
2138    if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2139        result = x;
2140    } else {
2141        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2142                  fp64_repack(sgn1, exp1, mnt1) :
2143                  fp64_repack(sgn2, exp2, mnt2));
2144    }
2145    set_fpscr0(fpscr, flags);
2146    return result;
2147}
2148
2149template <>
2150uint32_t
2151fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2152{
2153    fp32_minmaxnum(&op1, &op2, 1);
2154    return fplibMax<uint32_t>(op1, op2, fpscr);
2155}
2156
2157template <>
2158uint64_t
2159fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2160{
2161    fp64_minmaxnum(&op1, &op2, 1);
2162    return fplibMax<uint64_t>(op1, op2, fpscr);
2163}
2164
2165template <>
2166uint32_t
2167fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2168{
2169    int mode = modeConv(fpscr);
2170    int flags = 0;
2171    int sgn1, exp1, sgn2, exp2;
2172    uint32_t mnt1, mnt2, x, result;
2173
2174    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2175    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2176
2177    if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2178        result = x;
2179    } else {
2180        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2181                  fp32_repack(sgn1, exp1, mnt1) :
2182                  fp32_repack(sgn2, exp2, mnt2));
2183    }
2184    set_fpscr0(fpscr, flags);
2185    return result;
2186}
2187
2188template <>
2189uint64_t
2190fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2191{
2192    int mode = modeConv(fpscr);
2193    int flags = 0;
2194    int sgn1, exp1, sgn2, exp2;
2195    uint64_t mnt1, mnt2, x, result;
2196
2197    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2198    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2199
2200    if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2201        result = x;
2202    } else {
2203        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2204                  fp64_repack(sgn1, exp1, mnt1) :
2205                  fp64_repack(sgn2, exp2, mnt2));
2206    }
2207    set_fpscr0(fpscr, flags);
2208    return result;
2209}
2210
2211template <>
2212uint32_t
2213fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2214{
2215    fp32_minmaxnum(&op1, &op2, 0);
2216    return fplibMin<uint32_t>(op1, op2, fpscr);
2217}
2218
2219template <>
2220uint64_t
2221fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2222{
2223    fp64_minmaxnum(&op1, &op2, 0);
2224    return fplibMin<uint64_t>(op1, op2, fpscr);
2225}
2226
2227template <>
2228uint32_t
2229fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2230{
2231    int flags = 0;
2232    uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
2233    set_fpscr0(fpscr, flags);
2234    return result;
2235}
2236
2237template <>
2238uint64_t
2239fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2240{
2241    int flags = 0;
2242    uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
2243    set_fpscr0(fpscr, flags);
2244    return result;
2245}
2246
2247template <>
2248uint32_t
2249fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2250{
2251    int mode = modeConv(fpscr);
2252    int flags = 0;
2253    int sgn1, exp1, sgn2, exp2;
2254    uint32_t mnt1, mnt2, result;
2255
2256    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2257    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2258
2259    result = fp32_process_NaNs(op1, op2, mode, &flags);
2260    if (!result) {
2261        if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2262            result = fp32_FPTwo(sgn1 ^ sgn2);
2263        } else if (exp1 == 255 || exp2 == 255) {
2264            result = fp32_infinity(sgn1 ^ sgn2);
2265        } else if (!mnt1 || !mnt2) {
2266            result = fp32_zero(sgn1 ^ sgn2);
2267        } else {
2268            result = fp32_mul(op1, op2, mode, &flags);
2269        }
2270    }
2271
2272    set_fpscr0(fpscr, flags);
2273
2274    return result;
2275}
2276
2277template <>
2278uint64_t
2279fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2280{
2281    int mode = modeConv(fpscr);
2282    int flags = 0;
2283    int sgn1, exp1, sgn2, exp2;
2284    uint64_t mnt1, mnt2, result;
2285
2286    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2287    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2288
2289    result = fp64_process_NaNs(op1, op2, mode, &flags);
2290    if (!result) {
2291        if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2292            result = fp64_FPTwo(sgn1 ^ sgn2);
2293        } else if (exp1 == 2047 || exp2 == 2047) {
2294            result = fp64_infinity(sgn1 ^ sgn2);
2295        } else if (!mnt1 || !mnt2) {
2296            result = fp64_zero(sgn1 ^ sgn2);
2297        } else {
2298            result = fp64_mul(op1, op2, mode, &flags);
2299        }
2300    }
2301
2302    set_fpscr0(fpscr, flags);
2303
2304    return result;
2305}
2306
2307template <>
2308uint32_t
2309fplibNeg(uint32_t op)
2310{
2311    return op ^ (uint32_t)1 << 31;
2312}
2313
2314template <>
2315uint64_t
2316fplibNeg(uint64_t op)
2317{
2318    return op ^ (uint64_t)1 << 63;
2319}
2320
2321static const uint8_t recip_sqrt_estimate[256] = {
2322    255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
2323    226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
2324    201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
2325    180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
2326    162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
2327    145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
2328    131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
2329    118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
2330    105, 104, 103, 101, 100,  99,  97,  96,  95,  93,  92,  91,  90,  88,  87,  86,
2331    85,  84,  82,  81,  80,  79,  78,  77,  76,  75,  74,  72,  71,  70,  69,  68,
2332    67,  66,  65,  64,  63,  62,  61,  60,  60,  59,  58,  57,  56,  55,  54,  53,
2333    52,  51,  51,  50,  49,  48,  47,  46,  46,  45,  44,  43,  42,  42,  41,  40,
2334    39,  38,  38,  37,  36,  35,  35,  34,  33,  33,  32,  31,  30,  30,  29,  28,
2335    28,  27,  26,  26,  25,  24,  24,  23,  22,  22,  21,  20,  20,  19,  19,  18,
2336    17,  17,  16,  16,  15,  14,  14,  13,  13,  12,  11,  11,  10,  10,   9,   9,
2337    8,   8,   7,   6,   6,   5,   5,   4,   4,   3,   3,   2,   2,   1,   1,   0
2338};
2339
2340template <>
2341uint32_t
2342fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
2343{
2344    int mode = modeConv(fpscr);
2345    int flags = 0;
2346    int sgn, exp;
2347    uint32_t mnt, result;
2348
2349    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2350
2351    if (exp == 255 && (uint32_t)(mnt << 9)) {
2352        result = fp32_process_NaN(op, mode, &flags);
2353    } else if (!mnt) {
2354        result = fp32_infinity(sgn);
2355        flags |= FPLIB_DZC;
2356    } else if (sgn) {
2357        result = fp32_defaultNaN();
2358        flags |= FPLIB_IOC;
2359    } else if (exp == 255) {
2360        result = fp32_zero(0);
2361    } else {
2362        exp += 8;
2363        mnt = fp32_normalise(mnt, &exp);
2364        mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)];
2365        result = fp32_pack(0, (380 - exp) >> 1, mnt << 15);
2366    }
2367
2368    set_fpscr0(fpscr, flags);
2369
2370    return result;
2371}
2372
2373template <>
2374uint64_t
2375fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
2376{
2377    int mode = modeConv(fpscr);
2378    int flags = 0;
2379    int sgn, exp;
2380    uint64_t mnt, result;
2381
2382    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2383
2384    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2385        result = fp64_process_NaN(op, mode, &flags);
2386    } else if (!mnt) {
2387        result = fp64_infinity(sgn);
2388        flags |= FPLIB_DZC;
2389    } else if (sgn) {
2390        result = fp64_defaultNaN();
2391        flags |= FPLIB_IOC;
2392    } else if (exp == 2047) {
2393        result = fp32_zero(0);
2394    } else {
2395        exp += 11;
2396        mnt = fp64_normalise(mnt, &exp);
2397        mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)];
2398        result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44);
2399    }
2400
2401    set_fpscr0(fpscr, flags);
2402
2403    return result;
2404}
2405
2406template <>
2407uint32_t
2408fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2409{
2410    int mode = modeConv(fpscr);
2411    int flags = 0;
2412    int sgn1, exp1, sgn2, exp2;
2413    uint32_t mnt1, mnt2, result;
2414
2415    op1 = fplibNeg<uint32_t>(op1);
2416    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2417    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2418
2419    result = fp32_process_NaNs(op1, op2, mode, &flags);
2420    if (!result) {
2421        if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2422            result = fp32_FPOnePointFive(0);
2423        } else if (exp1 == 255 || exp2 == 255) {
2424            result = fp32_infinity(sgn1 ^ sgn2);
2425        } else {
2426            result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
2427        }
2428    }
2429
2430    set_fpscr0(fpscr, flags);
2431
2432    return result;
2433}
2434
2435template <>
2436uint64_t
2437fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2438{
2439    int mode = modeConv(fpscr);
2440    int flags = 0;
2441    int sgn1, exp1, sgn2, exp2;
2442    uint64_t mnt1, mnt2, result;
2443
2444    op1 = fplibNeg<uint64_t>(op1);
2445    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2446    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2447
2448    result = fp64_process_NaNs(op1, op2, mode, &flags);
2449    if (!result) {
2450        if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2451            result = fp64_FPOnePointFive(0);
2452        } else if (exp1 == 2047 || exp2 == 2047) {
2453            result = fp64_infinity(sgn1 ^ sgn2);
2454        } else {
2455            result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
2456        }
2457    }
2458
2459    set_fpscr0(fpscr, flags);
2460
2461    return result;
2462}
2463
2464template <>
2465uint32_t
2466fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2467{
2468    int mode = modeConv(fpscr);
2469    int flags = 0;
2470    int sgn1, exp1, sgn2, exp2;
2471    uint32_t mnt1, mnt2, result;
2472
2473    op1 = fplibNeg<uint32_t>(op1);
2474    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2475    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2476
2477    result = fp32_process_NaNs(op1, op2, mode, &flags);
2478    if (!result) {
2479        if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2480            result = fp32_FPTwo(0);
2481        } else if (exp1 == 255 || exp2 == 255) {
2482            result = fp32_infinity(sgn1 ^ sgn2);
2483        } else {
2484            result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
2485        }
2486    }
2487
2488    set_fpscr0(fpscr, flags);
2489
2490    return result;
2491}
2492
2493template <>
2494uint32_t
2495fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
2496{
2497    int mode = modeConv(fpscr);
2498    int flags = 0;
2499    int sgn, exp;
2500    uint32_t mnt, result;
2501
2502    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2503
2504    if (exp == 255 && (uint32_t)(mnt << 9)) {
2505        result = fp32_process_NaN(op, mode, &flags);
2506    } else if (exp == 255) {
2507        result = fp32_zero(sgn);
2508    } else if (!mnt) {
2509        result = fp32_infinity(sgn);
2510        flags |= FPLIB_DZC;
2511    } else if (!((uint32_t)(op << 1) >> 22)) {
2512        bool overflow_to_inf = false;
2513        switch (FPCRRounding(fpscr)) {
2514          case FPRounding_TIEEVEN:
2515            overflow_to_inf = true;
2516            break;
2517          case FPRounding_POSINF:
2518            overflow_to_inf = !sgn;
2519            break;
2520          case FPRounding_NEGINF:
2521            overflow_to_inf = sgn;
2522            break;
2523          case FPRounding_ZERO:
2524            overflow_to_inf = false;
2525            break;
2526          default:
2527            assert(0);
2528        }
2529        result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
2530        flags |= FPLIB_OFC | FPLIB_IXC;
2531    } else if (fpscr.fz && exp >= 253) {
2532        result = fp32_zero(sgn);
2533        flags |= FPLIB_UFC;
2534    } else {
2535        exp += 8;
2536        mnt = fp32_normalise(mnt, &exp);
2537        int result_exp = 253 - exp;
2538        uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1;
2539        fraction <<= 15;
2540        if (result_exp == 0) {
2541            fraction >>= 1;
2542        } else if (result_exp == -1) {
2543            fraction >>= 2;
2544            result_exp = 0;
2545        }
2546        result = fp32_pack(sgn, result_exp, fraction);
2547    }
2548
2549    set_fpscr0(fpscr, flags);
2550
2551    return result;
2552}
2553
2554template <>
2555uint64_t
2556fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
2557{
2558    int mode = modeConv(fpscr);
2559    int flags = 0;
2560    int sgn, exp;
2561    uint64_t mnt, result;
2562
2563    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2564
2565    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2566        result = fp64_process_NaN(op, mode, &flags);
2567    } else if (exp == 2047) {
2568        result = fp64_zero(sgn);
2569    } else if (!mnt) {
2570        result = fp64_infinity(sgn);
2571        flags |= FPLIB_DZC;
2572    } else if (!((uint64_t)(op << 1) >> 51)) {
2573        bool overflow_to_inf = false;
2574        switch (FPCRRounding(fpscr)) {
2575          case FPRounding_TIEEVEN:
2576            overflow_to_inf = true;
2577            break;
2578          case FPRounding_POSINF:
2579            overflow_to_inf = !sgn;
2580            break;
2581          case FPRounding_NEGINF:
2582            overflow_to_inf = sgn;
2583            break;
2584          case FPRounding_ZERO:
2585            overflow_to_inf = false;
2586            break;
2587          default:
2588            assert(0);
2589        }
2590        result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
2591        flags |= FPLIB_OFC | FPLIB_IXC;
2592    } else if (fpscr.fz && exp >= 2045) {
2593        result = fp64_zero(sgn);
2594        flags |= FPLIB_UFC;
2595    } else {
2596        exp += 11;
2597        mnt = fp64_normalise(mnt, &exp);
2598        int result_exp = 2045 - exp;
2599        uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1;
2600        fraction <<= 44;
2601        if (result_exp == 0) {
2602            fraction >>= 1;
2603        } else if (result_exp == -1) {
2604            fraction >>= 2;
2605            result_exp = 0;
2606        }
2607        result = fp64_pack(sgn, result_exp, fraction);
2608    }
2609
2610    set_fpscr0(fpscr, flags);
2611
2612    return result;
2613}
2614
2615template <>
2616uint64_t
2617fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2618{
2619    int mode = modeConv(fpscr);
2620    int flags = 0;
2621    int sgn1, exp1, sgn2, exp2;
2622    uint64_t mnt1, mnt2, result;
2623
2624    op1 = fplibNeg<uint64_t>(op1);
2625    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2626    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2627
2628    result = fp64_process_NaNs(op1, op2, mode, &flags);
2629    if (!result) {
2630        if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2631            result = fp64_FPTwo(0);
2632        } else if (exp1 == 2047 || exp2 == 2047) {
2633            result = fp64_infinity(sgn1 ^ sgn2);
2634        } else {
2635            result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
2636        }
2637    }
2638
2639    set_fpscr0(fpscr, flags);
2640
2641    return result;
2642}
2643
2644template <>
2645uint32_t
2646fplibRecpX(uint32_t op, FPSCR &fpscr)
2647{
2648    int mode = modeConv(fpscr);
2649    int flags = 0;
2650    int sgn, exp;
2651    uint32_t mnt, result;
2652
2653    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2654
2655    if (exp == 255 && (uint32_t)(mnt << 9)) {
2656        result = fp32_process_NaN(op, mode, &flags);
2657    }
2658    else {
2659        if (!mnt) { // Zero and denormals
2660            result = fp32_pack(sgn, 254, 0);
2661        } else { // Infinities and normals
2662            result = fp32_pack(sgn, exp ^ 255, 0);
2663        }
2664    }
2665
2666    set_fpscr0(fpscr, flags);
2667
2668    return result;
2669}
2670
2671template <>
2672uint64_t
2673fplibRecpX(uint64_t op, FPSCR &fpscr)
2674{
2675    int mode = modeConv(fpscr);
2676    int flags = 0;
2677    int sgn, exp;
2678    uint64_t mnt, result;
2679
2680    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2681
2682    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2683        result = fp64_process_NaN(op, mode, &flags);
2684    }
2685    else {
2686        if (!mnt) { // Zero and denormals
2687            result = fp64_pack(sgn, 2046, 0);
2688        } else { // Infinities and normals
2689            result = fp64_pack(sgn, exp ^ 2047, 0);
2690        }
2691    }
2692
2693    set_fpscr0(fpscr, flags);
2694
2695    return result;
2696}
2697
2698template <>
2699uint32_t
2700fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2701{
2702    int mode = modeConv(fpscr);
2703    int flags = 0;
2704    int sgn, exp;
2705    uint32_t mnt, result;
2706
2707    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2708    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2709
2710    // Handle NaNs, infinities and zeroes:
2711    if (exp == 255 && (uint32_t)(mnt << 9)) {
2712        result = fp32_process_NaN(op, mode, &flags);
2713    } else if (exp == 255) {
2714        result = fp32_infinity(sgn);
2715    } else if (!mnt) {
2716        result = fp32_zero(sgn);
2717    } else if (exp >= 150) {
2718        // There are no fractional bits
2719        result = op;
2720    } else {
2721        // Truncate towards zero:
2722        uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp);
2723        int err = exp < 118 ? 1 :
2724            (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0);
2725        switch (rounding) {
2726          case FPRounding_TIEEVEN:
2727            x += (err == 3 || (err == 2 && (x & 1)));
2728            break;
2729          case FPRounding_POSINF:
2730            x += err && !sgn;
2731            break;
2732          case FPRounding_NEGINF:
2733            x += err && sgn;
2734            break;
2735          case FPRounding_ZERO:
2736            break;
2737          case FPRounding_TIEAWAY:
2738            x += err >> 1;
2739            break;
2740          default:
2741            assert(0);
2742        }
2743
2744        if (x == 0) {
2745            result = fp32_zero(sgn);
2746        } else {
2747            exp = 150;
2748            mnt = fp32_normalise(x, &exp);
2749            result = fp32_pack(sgn, exp + 8, mnt >> 8);
2750        }
2751
2752        if (err && exact)
2753            flags |= FPLIB_IXC;
2754    }
2755
2756    set_fpscr0(fpscr, flags);
2757
2758    return result;
2759}
2760
2761template <>
2762uint64_t
2763fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2764{
2765    int mode = modeConv(fpscr);
2766    int flags = 0;
2767    int sgn, exp;
2768    uint64_t mnt, result;
2769
2770    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2771    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2772
2773    // Handle NaNs, infinities and zeroes:
2774    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2775        result = fp64_process_NaN(op, mode, &flags);
2776    } else if (exp == 2047) {
2777        result = fp64_infinity(sgn);
2778    } else if (!mnt) {
2779        result = fp64_zero(sgn);
2780    } else if (exp >= 1075) {
2781        // There are no fractional bits
2782        result = op;
2783    } else {
2784        // Truncate towards zero:
2785        uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp);
2786        int err = exp < 1011 ? 1 :
2787            (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0);
2788        switch (rounding) {
2789          case FPRounding_TIEEVEN:
2790            x += (err == 3 || (err == 2 && (x & 1)));
2791            break;
2792          case FPRounding_POSINF:
2793            x += err && !sgn;
2794            break;
2795          case FPRounding_NEGINF:
2796            x += err && sgn;
2797            break;
2798          case FPRounding_ZERO:
2799            break;
2800          case FPRounding_TIEAWAY:
2801            x += err >> 1;
2802            break;
2803          default:
2804            assert(0);
2805        }
2806
2807        if (x == 0) {
2808            result = fp64_zero(sgn);
2809        } else {
2810            exp = 1075;
2811            mnt = fp64_normalise(x, &exp);
2812            result = fp64_pack(sgn, exp + 11, mnt >> 11);
2813        }
2814
2815        if (err && exact)
2816            flags |= FPLIB_IXC;
2817    }
2818
2819    set_fpscr0(fpscr, flags);
2820
2821    return result;
2822}
2823
2824template <>
2825uint32_t
2826fplibSqrt(uint32_t op, FPSCR &fpscr)
2827{
2828    int flags = 0;
2829    uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
2830    set_fpscr0(fpscr, flags);
2831    return result;
2832}
2833
2834template <>
2835uint64_t
2836fplibSqrt(uint64_t op, FPSCR &fpscr)
2837{
2838    int flags = 0;
2839    uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
2840    set_fpscr0(fpscr, flags);
2841    return result;
2842}
2843
2844template <>
2845uint32_t
2846fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2847{
2848    int flags = 0;
2849    uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
2850    set_fpscr0(fpscr, flags);
2851    return result;
2852}
2853
2854template <>
2855uint64_t
2856fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2857{
2858    int flags = 0;
2859    uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
2860    set_fpscr0(fpscr, flags);
2861    return result;
2862}
2863
2864static uint64_t
2865FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2866             int *flags)
2867{
2868    uint64_t x;
2869    int err;
2870
2871    if (exp > 1023 + 63) {
2872        *flags = FPLIB_IOC;
2873        return ((uint64_t)!u << 63) - !sgn;
2874    }
2875
2876    x = lsr64(mnt << 11, 1023 + 63 - exp);
2877    err = (exp > 1023 + 63 - 2 ? 0 :
2878           (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) |
2879           !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1)));
2880
2881    switch (rounding) {
2882      case FPRounding_TIEEVEN:
2883        x += (err == 3 || (err == 2 && (x & 1)));
2884        break;
2885      case FPRounding_POSINF:
2886        x += err && !sgn;
2887        break;
2888      case FPRounding_NEGINF:
2889        x += err && sgn;
2890        break;
2891      case FPRounding_ZERO:
2892        break;
2893      case FPRounding_TIEAWAY:
2894        x += err >> 1;
2895        break;
2896      default:
2897        assert(0);
2898    }
2899
2900    if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) {
2901        *flags = FPLIB_IOC;
2902        return ((uint64_t)!u << 63) - !sgn;
2903    }
2904
2905    if (err) {
2906        *flags = FPLIB_IXC;
2907    }
2908
2909    return sgn ? -x : x;
2910}
2911
2912static uint32_t
2913FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2914             int *flags)
2915{
2916    uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
2917    if (u ? x >= (uint64_t)1 << 32 :
2918        !(x < (uint64_t)1 << 31 ||
2919          (uint64_t)-x <= (uint64_t)1 << 31)) {
2920        *flags = FPLIB_IOC;
2921        x = ((uint32_t)!u << 31) - !sgn;
2922    }
2923    return x;
2924}
2925
2926template <>
2927uint32_t
2928fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2929{
2930    int flags = 0;
2931    int sgn, exp;
2932    uint32_t mnt, result;
2933
2934    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2935    fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2936
2937    // If NaN, set cumulative flag or take exception:
2938    if (exp == 255 && (uint32_t)(mnt << 9)) {
2939        flags = FPLIB_IOC;
2940        result = 0;
2941    } else {
2942        result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits,
2943                              (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2944    }
2945
2946    set_fpscr0(fpscr, flags);
2947
2948    return result;
2949}
2950
2951template <>
2952uint32_t
2953fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2954{
2955    int flags = 0;
2956    int sgn, exp;
2957    uint64_t mnt;
2958    uint32_t result;
2959
2960    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2961    fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2962
2963    // If NaN, set cumulative flag or take exception:
2964    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2965        flags = FPLIB_IOC;
2966        result = 0;
2967    } else {
2968        result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
2969    }
2970
2971    set_fpscr0(fpscr, flags);
2972
2973    return result;
2974}
2975
2976template <>
2977uint64_t
2978fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2979{
2980    int flags = 0;
2981    int sgn, exp;
2982    uint32_t mnt;
2983    uint64_t result;
2984
2985    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2986    fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2987
2988    // If NaN, set cumulative flag or take exception:
2989    if (exp == 255 && (uint32_t)(mnt << 9)) {
2990        flags = FPLIB_IOC;
2991        result = 0;
2992    } else {
2993        result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits,
2994                              (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2995    }
2996
2997    set_fpscr0(fpscr, flags);
2998
2999    return result;
3000}
3001
3002template <>
3003uint64_t
3004fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3005{
3006    int flags = 0;
3007    int sgn, exp;
3008    uint64_t mnt, result;
3009
3010    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
3011    fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
3012
3013    // If NaN, set cumulative flag or take exception:
3014    if (exp == 2047 && (uint64_t)(mnt << 12)) {
3015        flags = FPLIB_IOC;
3016        result = 0;
3017    } else {
3018        result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
3019    }
3020
3021    set_fpscr0(fpscr, flags);
3022
3023    return result;
3024}
3025
3026static uint32_t
3027fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3028{
3029    int x_sgn = !u && a >> 63;
3030    int x_exp = 190 - fbits;
3031    uint64_t x_mnt = x_sgn ? -a : a;
3032
3033    // Handle zero:
3034    if (!x_mnt) {
3035        return fp32_zero(0);
3036    }
3037
3038    // Normalise and convert to 32 bits, collapsing error into bottom bit:
3039    x_mnt = fp64_normalise(x_mnt, &x_exp);
3040    x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
3041
3042    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
3043}
3044
3045static uint64_t
3046fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3047{
3048    int x_sgn = !u && a >> 63;
3049    int x_exp = 1024 + 62 - fbits;
3050    uint64_t x_mnt = x_sgn ? -a : a;
3051
3052    // Handle zero:
3053    if (!x_mnt) {
3054        return fp64_zero(0);
3055    }
3056
3057    x_mnt = fp64_normalise(x_mnt, &x_exp);
3058
3059    return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
3060}
3061
3062template <>
3063uint32_t
3064fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3065{
3066    int flags = 0;
3067    uint32_t res = fp32_cvtf(op, fbits, u,
3068                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3069                             &flags);
3070    set_fpscr0(fpscr, flags);
3071    return res;
3072}
3073
3074template <>
3075uint64_t
3076fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3077{
3078    int flags = 0;
3079    uint64_t res = fp64_cvtf(op, fbits, u,
3080                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3081                             &flags);
3082    set_fpscr0(fpscr, flags);
3083    return res;
3084}
3085
3086}
3087