fplib.cc revision 11224:a7a718faaf56
1/*
2* Copyright (c) 2012-2013 ARM Limited
3* All rights reserved
4*
5* The license below extends only to copyright in the software and shall
6* not be construed as granting a license to any other intellectual
7* property including but not limited to intellectual property relating
8* to a hardware implementation of the functionality of the software
9* licensed hereunder.  You may use the software subject to the license
10* terms below provided that you ensure that this notice is replicated
11* unmodified and in its entirety in all distributions of the software,
12* modified or unmodified, in source code or in binary form.
13*
14* Redistribution and use in source and binary forms, with or without
15* modification, are permitted provided that the following conditions are
16* met: redistributions of source code must retain the above copyright
17* notice, this list of conditions and the following disclaimer;
18* redistributions in binary form must reproduce the above copyright
19* notice, this list of conditions and the following disclaimer in the
20* documentation and/or other materials provided with the distribution;
21* neither the name of the copyright holders nor the names of its
22* contributors may be used to endorse or promote products derived from
23* this software without specific prior written permission.
24*
25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36*
37* Authors: Edmund Grimley Evans
38*          Thomas Grocutt
39*/
40
41#include <stdint.h>
42
43#include <cassert>
44
45#include "fplib.hh"
46
47namespace ArmISA
48{
49
50#define FPLIB_RN 0
51#define FPLIB_RP 1
52#define FPLIB_RM 2
53#define FPLIB_RZ 3
54#define FPLIB_FZ 4
55#define FPLIB_DN 8
56#define FPLIB_AHP 16
57
58#define FPLIB_IDC 128 // Input Denormal
59#define FPLIB_IXC 16  // Inexact
60#define FPLIB_UFC 8   // Underflow
61#define FPLIB_OFC 4   // Overflow
62#define FPLIB_DZC 2   // Division by Zero
63#define FPLIB_IOC 1   // Invalid Operation
64
65static inline uint16_t
66lsl16(uint16_t x, uint32_t shift)
67{
68    return shift < 16 ? x << shift : 0;
69}
70
71static inline uint16_t
72lsr16(uint16_t x, uint32_t shift)
73{
74    return shift < 16 ? x >> shift : 0;
75}
76
77static inline uint32_t
78lsl32(uint32_t x, uint32_t shift)
79{
80    return shift < 32 ? x << shift : 0;
81}
82
83static inline uint32_t
84lsr32(uint32_t x, uint32_t shift)
85{
86    return shift < 32 ? x >> shift : 0;
87}
88
89static inline uint64_t
90lsl64(uint64_t x, uint32_t shift)
91{
92    return shift < 64 ? x << shift : 0;
93}
94
95static inline uint64_t
96lsr64(uint64_t x, uint32_t shift)
97{
98    return shift < 64 ? x >> shift : 0;
99}
100
101static inline void
102lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
103{
104    if (shift == 0) {
105        *r1 = x1;
106        *r0 = x0;
107    } else if (shift < 64) {
108        *r1 = x1 << shift | x0 >> (64 - shift);
109        *r0 = x0 << shift;
110    } else if (shift < 128) {
111        *r1 = x0 << (shift - 64);
112        *r0 = 0;
113    } else {
114        *r1 = 0;
115        *r0 = 0;
116    }
117}
118
119static inline void
120lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
121{
122    if (shift == 0) {
123        *r1 = x1;
124        *r0 = x0;
125    } else if (shift < 64) {
126        *r0 = x0 >> shift | x1 << (64 - shift);
127        *r1 = x1 >> shift;
128    } else if (shift < 128) {
129        *r0 = x1 >> (shift - 64);
130        *r1 = 0;
131    } else {
132        *r0 = 0;
133        *r1 = 0;
134    }
135}
136
137static inline void
138mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
139{
140    uint32_t mask = ((uint32_t)1 << 31) - 1;
141    uint64_t a0 = a & mask;
142    uint64_t a1 = a >> 31 & mask;
143    uint64_t b0 = b & mask;
144    uint64_t b1 = b >> 31 & mask;
145    uint64_t p0 = a0 * b0;
146    uint64_t p2 = a1 * b1;
147    uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
148    uint64_t s0 = p0;
149    uint64_t s1 = (s0 >> 31) + p1;
150    uint64_t s2 = (s1 >> 31) + p2;
151    *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
152    *x1 = s2 >> 2;
153}
154
155static inline
156void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
157{
158    uint64_t t0 = (uint64_t)(uint32_t)a * b;
159    uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
160    *x0 = t1 << 32 | (uint32_t)t0;
161    *x1 = t1 >> 32;
162}
163
164static inline void
165add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
166       uint64_t b1)
167{
168    *x0 = a0 + b0;
169    *x1 = a1 + b1 + (*x0 < a0);
170}
171
172static inline void
173sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
174       uint64_t b1)
175{
176    *x0 = a0 - b0;
177    *x1 = a1 - b1 - (*x0 > a0);
178}
179
180static inline int
181cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
182{
183    return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
184}
185
186static inline uint16_t
187fp16_normalise(uint16_t mnt, int *exp)
188{
189    int shift;
190
191    if (!mnt) {
192        return 0;
193    }
194
195    for (shift = 8; shift; shift >>= 1) {
196        if (!(mnt >> (16 - shift))) {
197            mnt <<= shift;
198            *exp -= shift;
199        }
200    }
201    return mnt;
202}
203
204static inline uint32_t
205fp32_normalise(uint32_t mnt, int *exp)
206{
207    int shift;
208
209    if (!mnt) {
210        return 0;
211    }
212
213    for (shift = 16; shift; shift >>= 1) {
214        if (!(mnt >> (32 - shift))) {
215            mnt <<= shift;
216            *exp -= shift;
217        }
218    }
219    return mnt;
220}
221
222static inline uint64_t
223fp64_normalise(uint64_t mnt, int *exp)
224{
225    int shift;
226
227    if (!mnt) {
228        return 0;
229    }
230
231    for (shift = 32; shift; shift >>= 1) {
232        if (!(mnt >> (64 - shift))) {
233            mnt <<= shift;
234            *exp -= shift;
235        }
236    }
237    return mnt;
238}
239
240static inline void
241fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
242{
243    uint64_t x0 = *mnt0;
244    uint64_t x1 = *mnt1;
245    int shift;
246
247    if (!x0 && !x1) {
248        return;
249    }
250
251    if (!x1) {
252        x1 = x0;
253        x0 = 0;
254        *exp -= 64;
255    }
256
257    for (shift = 32; shift; shift >>= 1) {
258        if (!(x1 >> (64 - shift))) {
259            x1 = x1 << shift | x0 >> (64 - shift);
260            x0 <<= shift;
261            *exp -= shift;
262        }
263    }
264
265    *mnt0 = x0;
266    *mnt1 = x1;
267}
268
269static inline uint16_t
270fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
271{
272    return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1));
273}
274
275static inline uint32_t
276fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
277{
278    return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1));
279}
280
281static inline uint64_t
282fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
283{
284    return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1));
285}
286
287static inline uint16_t
288fp16_zero(int sgn)
289{
290    return fp16_pack(sgn, 0, 0);
291}
292
293static inline uint32_t
294fp32_zero(int sgn)
295{
296    return fp32_pack(sgn, 0, 0);
297}
298
299static inline uint64_t
300fp64_zero(int sgn)
301{
302    return fp64_pack(sgn, 0, 0);
303}
304
305static inline uint16_t
306fp16_max_normal(int sgn)
307{
308    return fp16_pack(sgn, 30, -1);
309}
310
311static inline uint32_t
312fp32_max_normal(int sgn)
313{
314    return fp32_pack(sgn, 254, -1);
315}
316
317static inline uint64_t
318fp64_max_normal(int sgn)
319{
320    return fp64_pack(sgn, 2046, -1);
321}
322
323static inline uint16_t
324fp16_infinity(int sgn)
325{
326    return fp16_pack(sgn, 31, 0);
327}
328
329static inline uint32_t
330fp32_infinity(int sgn)
331{
332    return fp32_pack(sgn, 255, 0);
333}
334
335static inline uint64_t
336fp64_infinity(int sgn)
337{
338    return fp64_pack(sgn, 2047, 0);
339}
340
341static inline uint16_t
342fp16_defaultNaN()
343{
344    return fp16_pack(0, 31, (uint16_t)1 << 9);
345}
346
347static inline uint32_t
348fp32_defaultNaN()
349{
350    return fp32_pack(0, 255, (uint32_t)1 << 22);
351}
352
353static inline uint64_t
354fp64_defaultNaN()
355{
356    return fp64_pack(0, 2047, (uint64_t)1 << 51);
357}
358
359static inline void
360fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
361            int *flags)
362{
363    *sgn = x >> 15;
364    *exp = x >> 10 & 31;
365    *mnt = x & (((uint16_t)1 << 10) - 1);
366
367    // Handle subnormals:
368    if (*exp) {
369        *mnt |= (uint16_t)1 << 10;
370    } else {
371        ++*exp;
372        // There is no flush to zero in this case!
373    }
374}
375
376static inline void
377fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
378            int *flags)
379{
380    *sgn = x >> 31;
381    *exp = x >> 23 & 255;
382    *mnt = x & (((uint32_t)1 << 23) - 1);
383
384    // Handle subnormals:
385    if (*exp) {
386        *mnt |= (uint32_t)1 << 23;
387    } else {
388        ++*exp;
389        if ((mode & FPLIB_FZ) && *mnt) {
390            *flags |= FPLIB_IDC;
391            *mnt = 0;
392        }
393    }
394}
395
396static inline void
397fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
398            int *flags)
399{
400    *sgn = x >> 63;
401    *exp = x >> 52 & 2047;
402    *mnt = x & (((uint64_t)1 << 52) - 1);
403
404    // Handle subnormals:
405    if (*exp) {
406        *mnt |= (uint64_t)1 << 52;
407    } else {
408        ++*exp;
409        if ((mode & FPLIB_FZ) && *mnt) {
410            *flags |= FPLIB_IDC;
411            *mnt = 0;
412        }
413    }
414}
415
416static inline uint32_t
417fp32_process_NaN(uint32_t a, int mode, int *flags)
418{
419    if (!(a >> 22 & 1)) {
420        *flags |= FPLIB_IOC;
421        a |= (uint32_t)1 << 22;
422    }
423    return mode & FPLIB_DN ? fp32_defaultNaN() : a;
424}
425
426static inline uint64_t
427fp64_process_NaN(uint64_t a, int mode, int *flags)
428{
429    if (!(a >> 51 & 1)) {
430        *flags |= FPLIB_IOC;
431        a |= (uint64_t)1 << 51;
432    }
433    return mode & FPLIB_DN ? fp64_defaultNaN() : a;
434}
435
436static uint32_t
437fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
438{
439    int a_exp = a >> 23 & 255;
440    uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
441    int b_exp = b >> 23 & 255;
442    uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
443
444    // Handle signalling NaNs:
445    if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
446        return fp32_process_NaN(a, mode, flags);
447    if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
448        return fp32_process_NaN(b, mode, flags);
449
450    // Handle quiet NaNs:
451    if (a_exp == 255 && a_mnt)
452        return fp32_process_NaN(a, mode, flags);
453    if (b_exp == 255 && b_mnt)
454        return fp32_process_NaN(b, mode, flags);
455
456    return 0;
457}
458
459static uint64_t
460fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
461{
462    int a_exp = a >> 52 & 2047;
463    uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
464    int b_exp = b >> 52 & 2047;
465    uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
466
467    // Handle signalling NaNs:
468    if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
469        return fp64_process_NaN(a, mode, flags);
470    if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
471        return fp64_process_NaN(b, mode, flags);
472
473    // Handle quiet NaNs:
474    if (a_exp == 2047 && a_mnt)
475        return fp64_process_NaN(a, mode, flags);
476    if (b_exp == 2047 && b_mnt)
477        return fp64_process_NaN(b, mode, flags);
478
479    return 0;
480}
481
482static uint32_t
483fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
484{
485    int a_exp = a >> 23 & 255;
486    uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
487    int b_exp = b >> 23 & 255;
488    uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
489    int c_exp = c >> 23 & 255;
490    uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1);
491
492    // Handle signalling NaNs:
493    if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
494        return fp32_process_NaN(a, mode, flags);
495    if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
496        return fp32_process_NaN(b, mode, flags);
497    if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1))
498        return fp32_process_NaN(c, mode, flags);
499
500    // Handle quiet NaNs:
501    if (a_exp == 255 && a_mnt)
502        return fp32_process_NaN(a, mode, flags);
503    if (b_exp == 255 && b_mnt)
504        return fp32_process_NaN(b, mode, flags);
505    if (c_exp == 255 && c_mnt)
506        return fp32_process_NaN(c, mode, flags);
507
508    return 0;
509}
510
511static uint64_t
512fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
513{
514    int a_exp = a >> 52 & 2047;
515    uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
516    int b_exp = b >> 52 & 2047;
517    uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
518    int c_exp = c >> 52 & 2047;
519    uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1);
520
521    // Handle signalling NaNs:
522    if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
523        return fp64_process_NaN(a, mode, flags);
524    if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
525        return fp64_process_NaN(b, mode, flags);
526    if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1))
527        return fp64_process_NaN(c, mode, flags);
528
529    // Handle quiet NaNs:
530    if (a_exp == 2047 && a_mnt)
531        return fp64_process_NaN(a, mode, flags);
532    if (b_exp == 2047 && b_mnt)
533        return fp64_process_NaN(b, mode, flags);
534    if (c_exp == 2047 && c_mnt)
535        return fp64_process_NaN(c, mode, flags);
536
537    return 0;
538}
539
540static uint16_t
541fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
542{
543    int biased_exp; // non-negative exponent value for result
544    uint16_t int_mant; // mantissa for result, less than (1 << 11)
545    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
546
547    assert(rm != FPRounding_TIEAWAY);
548
549    // There is no flush to zero in this case!
550
551    // The bottom 5 bits of mnt are orred together:
552    mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0);
553
554    if (exp > 0) {
555        biased_exp = exp;
556        int_mant = mnt >> 2;
557        error = mnt & 3;
558    } else {
559        biased_exp = 0;
560        int_mant = lsr16(mnt, 3 - exp);
561        error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
562    }
563
564    if (!biased_exp && error) { // xx should also check fpscr_val<11>
565        *flags |= FPLIB_UFC;
566    }
567
568    // Round up:
569    if ((rm == FPLIB_RN && (error == 3 ||
570                            (error == 2 && (int_mant & 1)))) ||
571        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
572        ++int_mant;
573        if (int_mant == (uint32_t)1 << 10) {
574            // Rounded up from denormalized to normalized
575            biased_exp = 1;
576        }
577        if (int_mant == (uint32_t)1 << 11) {
578            // Rounded up to next exponent
579            ++biased_exp;
580            int_mant >>= 1;
581        }
582    }
583
584    // Handle rounding to odd aka Von Neumann rounding:
585    if (error && rm == FPRounding_ODD)
586        int_mant |= 1;
587
588    // Handle overflow:
589    if (!(mode & FPLIB_AHP)) {
590        if (biased_exp >= 31) {
591            *flags |= FPLIB_OFC | FPLIB_IXC;
592            if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
593                (rm == FPLIB_RM && sgn)) {
594                return fp16_infinity(sgn);
595            } else {
596                return fp16_max_normal(sgn);
597            }
598        }
599    } else {
600        if (biased_exp >= 32) {
601            *flags |= FPLIB_IOC;
602            return fp16_pack(sgn, 31, -1);
603        }
604    }
605
606    if (error) {
607        *flags |= FPLIB_IXC;
608    }
609
610    return fp16_pack(sgn, biased_exp, int_mant);
611}
612
613static uint32_t
614fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
615{
616    int biased_exp; // non-negative exponent value for result
617    uint32_t int_mant; // mantissa for result, less than (1 << 24)
618    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
619
620    assert(rm != FPRounding_TIEAWAY);
621
622    // Flush to zero:
623    if ((mode & FPLIB_FZ) && exp < 1) {
624        *flags |= FPLIB_UFC;
625        return fp32_zero(sgn);
626    }
627
628    // The bottom 8 bits of mnt are orred together:
629    mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0);
630
631    if (exp > 0) {
632        biased_exp = exp;
633        int_mant = mnt >> 2;
634        error = mnt & 3;
635    } else {
636        biased_exp = 0;
637        int_mant = lsr32(mnt, 3 - exp);
638        error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
639    }
640
641    if (!biased_exp && error) { // xx should also check fpscr_val<11>
642        *flags |= FPLIB_UFC;
643    }
644
645    // Round up:
646    if ((rm == FPLIB_RN && (error == 3 ||
647                            (error == 2 && (int_mant & 1)))) ||
648        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
649        ++int_mant;
650        if (int_mant == (uint32_t)1 << 23) {
651            // Rounded up from denormalized to normalized
652            biased_exp = 1;
653        }
654        if (int_mant == (uint32_t)1 << 24) {
655            // Rounded up to next exponent
656            ++biased_exp;
657            int_mant >>= 1;
658        }
659    }
660
661    // Handle rounding to odd aka Von Neumann rounding:
662    if (error && rm == FPRounding_ODD)
663        int_mant |= 1;
664
665    // Handle overflow:
666    if (biased_exp >= 255) {
667        *flags |= FPLIB_OFC | FPLIB_IXC;
668        if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
669            (rm == FPLIB_RM && sgn)) {
670            return fp32_infinity(sgn);
671        } else {
672            return fp32_max_normal(sgn);
673        }
674    }
675
676    if (error) {
677        *flags |= FPLIB_IXC;
678    }
679
680    return fp32_pack(sgn, biased_exp, int_mant);
681}
682
683static uint32_t
684fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
685{
686    return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
687}
688
689static uint64_t
690fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
691{
692    int biased_exp; // non-negative exponent value for result
693    uint64_t int_mant; // mantissa for result, less than (1 << 52)
694    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
695
696    assert(rm != FPRounding_TIEAWAY);
697
698    // Flush to zero:
699    if ((mode & FPLIB_FZ) && exp < 1) {
700        *flags |= FPLIB_UFC;
701        return fp64_zero(sgn);
702    }
703
704    // The bottom 11 bits of mnt are orred together:
705    mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0);
706
707    if (exp > 0) {
708        biased_exp = exp;
709        int_mant = mnt >> 2;
710        error = mnt & 3;
711    } else {
712        biased_exp = 0;
713        int_mant = lsr64(mnt, 3 - exp);
714        error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
715    }
716
717    if (!biased_exp && error) { // xx should also check fpscr_val<11>
718        *flags |= FPLIB_UFC;
719    }
720
721    // Round up:
722    if ((rm == FPLIB_RN && (error == 3 ||
723                            (error == 2 && (int_mant & 1)))) ||
724        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
725        ++int_mant;
726        if (int_mant == (uint64_t)1 << 52) {
727            // Rounded up from denormalized to normalized
728            biased_exp = 1;
729        }
730        if (int_mant == (uint64_t)1 << 53) {
731            // Rounded up to next exponent
732            ++biased_exp;
733            int_mant >>= 1;
734        }
735    }
736
737    // Handle rounding to odd aka Von Neumann rounding:
738    if (error && rm == FPRounding_ODD)
739        int_mant |= 1;
740
741    // Handle overflow:
742    if (biased_exp >= 2047) {
743        *flags |= FPLIB_OFC | FPLIB_IXC;
744        if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
745            (rm == FPLIB_RM && sgn)) {
746            return fp64_infinity(sgn);
747        } else {
748            return fp64_max_normal(sgn);
749        }
750    }
751
752    if (error) {
753        *flags |= FPLIB_IXC;
754    }
755
756    return fp64_pack(sgn, biased_exp, int_mant);
757}
758
759static uint64_t
760fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
761{
762    return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
763}
764
765static int
766fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
767{
768    int a_sgn, a_exp, b_sgn, b_exp;
769    uint32_t a_mnt, b_mnt;
770
771    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
772    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
773
774    if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
775        (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
776        if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) ||
777            (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1)))
778            *flags |= FPLIB_IOC;
779        return 0;
780    }
781    return a == b || (!a_mnt && !b_mnt);
782}
783
784static int
785fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
786{
787    int a_sgn, a_exp, b_sgn, b_exp;
788    uint32_t a_mnt, b_mnt;
789
790    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
791    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
792
793    if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
794        (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
795        *flags |= FPLIB_IOC;
796        return 0;
797    }
798    if (!a_mnt && !b_mnt)
799        return 1;
800    if (a_sgn != b_sgn)
801        return b_sgn;
802    if (a_exp != b_exp)
803        return a_sgn ^ (a_exp > b_exp);
804    if (a_mnt != b_mnt)
805        return a_sgn ^ (a_mnt > b_mnt);
806    return 1;
807}
808
809static int
810fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
811{
812    int a_sgn, a_exp, b_sgn, b_exp;
813    uint32_t a_mnt, b_mnt;
814
815    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
816    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
817
818    if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
819        (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
820        *flags |= FPLIB_IOC;
821        return 0;
822    }
823    if (!a_mnt && !b_mnt)
824        return 0;
825    if (a_sgn != b_sgn)
826        return b_sgn;
827    if (a_exp != b_exp)
828        return a_sgn ^ (a_exp > b_exp);
829    if (a_mnt != b_mnt)
830        return a_sgn ^ (a_mnt > b_mnt);
831    return 0;
832}
833
834static int
835fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
836{
837    int a_sgn, a_exp, b_sgn, b_exp;
838    uint64_t a_mnt, b_mnt;
839
840    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
841    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
842
843    if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
844        (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
845        if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) ||
846            (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1)))
847            *flags |= FPLIB_IOC;
848        return 0;
849    }
850    return a == b || (!a_mnt && !b_mnt);
851}
852
853static int
854fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
855{
856    int a_sgn, a_exp, b_sgn, b_exp;
857    uint64_t a_mnt, b_mnt;
858
859    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
860    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
861
862    if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
863        (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
864        *flags |= FPLIB_IOC;
865        return 0;
866    }
867    if (!a_mnt && !b_mnt)
868        return 1;
869    if (a_sgn != b_sgn)
870        return b_sgn;
871    if (a_exp != b_exp)
872        return a_sgn ^ (a_exp > b_exp);
873    if (a_mnt != b_mnt)
874        return a_sgn ^ (a_mnt > b_mnt);
875    return 1;
876}
877
878static int
879fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
880{
881    int a_sgn, a_exp, b_sgn, b_exp;
882    uint64_t a_mnt, b_mnt;
883
884    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
885    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
886
887    if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
888        (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
889        *flags |= FPLIB_IOC;
890        return 0;
891    }
892    if (!a_mnt && !b_mnt)
893        return 0;
894    if (a_sgn != b_sgn)
895        return b_sgn;
896    if (a_exp != b_exp)
897        return a_sgn ^ (a_exp > b_exp);
898    if (a_mnt != b_mnt)
899        return a_sgn ^ (a_mnt > b_mnt);
900    return 0;
901}
902
903static uint32_t
904fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
905{
906    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
907    uint32_t a_mnt, b_mnt, x, x_mnt;
908
909    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
910    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
911
912    if ((x = fp32_process_NaNs(a, b, mode, flags))) {
913        return x;
914    }
915
916    b_sgn ^= neg;
917
918    // Handle infinities and zeroes:
919    if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) {
920        *flags |= FPLIB_IOC;
921        return fp32_defaultNaN();
922    } else if (a_exp == 255) {
923        return fp32_infinity(a_sgn);
924    } else if (b_exp == 255) {
925        return fp32_infinity(b_sgn);
926    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
927        return fp32_zero(a_sgn);
928    }
929
930    a_mnt <<= 3;
931    b_mnt <<= 3;
932    if (a_exp >= b_exp) {
933        b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
934                 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
935        b_exp = a_exp;
936    } else {
937        a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
938                 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
939        a_exp = b_exp;
940    }
941    x_sgn = a_sgn;
942    x_exp = a_exp;
943    if (a_sgn == b_sgn) {
944        x_mnt = a_mnt + b_mnt;
945    } else if (a_mnt >= b_mnt) {
946        x_mnt = a_mnt - b_mnt;
947    } else {
948        x_sgn ^= 1;
949        x_mnt = b_mnt - a_mnt;
950    }
951
952    if (!x_mnt) {
953        // Sign of exact zero result depends on rounding mode
954        return fp32_zero((mode & 3) == 2);
955    }
956
957    x_mnt = fp32_normalise(x_mnt, &x_exp);
958
959    return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags);
960}
961
962static uint64_t
963fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
964{
965    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
966    uint64_t a_mnt, b_mnt, x, x_mnt;
967
968    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
969    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
970
971    if ((x = fp64_process_NaNs(a, b, mode, flags))) {
972        return x;
973    }
974
975    b_sgn ^= neg;
976
977    // Handle infinities and zeroes:
978    if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) {
979        *flags |= FPLIB_IOC;
980        return fp64_defaultNaN();
981    } else if (a_exp == 2047) {
982        return fp64_infinity(a_sgn);
983    } else if (b_exp == 2047) {
984        return fp64_infinity(b_sgn);
985    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
986        return fp64_zero(a_sgn);
987    }
988
989    a_mnt <<= 3;
990    b_mnt <<= 3;
991    if (a_exp >= b_exp) {
992        b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
993                 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
994        b_exp = a_exp;
995    } else {
996        a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
997                 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
998        a_exp = b_exp;
999    }
1000    x_sgn = a_sgn;
1001    x_exp = a_exp;
1002    if (a_sgn == b_sgn) {
1003        x_mnt = a_mnt + b_mnt;
1004    } else if (a_mnt >= b_mnt) {
1005        x_mnt = a_mnt - b_mnt;
1006    } else {
1007        x_sgn ^= 1;
1008        x_mnt = b_mnt - a_mnt;
1009    }
1010
1011    if (!x_mnt) {
1012        // Sign of exact zero result depends on rounding mode
1013        return fp64_zero((mode & 3) == 2);
1014    }
1015
1016    x_mnt = fp64_normalise(x_mnt, &x_exp);
1017
1018    return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags);
1019}
1020
1021static uint32_t
1022fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1023{
1024    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1025    uint32_t a_mnt, b_mnt, x;
1026    uint64_t x_mnt;
1027
1028    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1029    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1030
1031    if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1032        return x;
1033    }
1034
1035    // Handle infinities and zeroes:
1036    if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) {
1037        *flags |= FPLIB_IOC;
1038        return fp32_defaultNaN();
1039    } else if (a_exp == 255 || b_exp == 255) {
1040        return fp32_infinity(a_sgn ^ b_sgn);
1041    } else if (!a_mnt || !b_mnt) {
1042        return fp32_zero(a_sgn ^ b_sgn);
1043    }
1044
1045    // Multiply and normalise:
1046    x_sgn = a_sgn ^ b_sgn;
1047    x_exp = a_exp + b_exp - 110;
1048    x_mnt = (uint64_t)a_mnt * b_mnt;
1049    x_mnt = fp64_normalise(x_mnt, &x_exp);
1050
1051    // Convert to 32 bits, collapsing error into bottom bit:
1052    x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33);
1053
1054    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1055}
1056
1057static uint64_t
1058fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1059{
1060    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1061    uint64_t a_mnt, b_mnt, x;
1062    uint64_t x0_mnt, x1_mnt;
1063
1064    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1065    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1066
1067    if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1068        return x;
1069    }
1070
1071    // Handle infinities and zeroes:
1072    if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) {
1073        *flags |= FPLIB_IOC;
1074        return fp64_defaultNaN();
1075    } else if (a_exp == 2047 || b_exp == 2047) {
1076        return fp64_infinity(a_sgn ^ b_sgn);
1077    } else if (!a_mnt || !b_mnt) {
1078        return fp64_zero(a_sgn ^ b_sgn);
1079    }
1080
1081    // Multiply and normalise:
1082    x_sgn = a_sgn ^ b_sgn;
1083    x_exp = a_exp + b_exp - 1000;
1084    mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1085    fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1086
1087    // Convert to 64 bits, collapsing error into bottom bit:
1088    x0_mnt = x1_mnt << 1 | !!x0_mnt;
1089
1090    return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1091}
1092
1093static uint32_t
1094fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1095            int mode, int *flags)
1096{
1097    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1098    uint32_t a_mnt, b_mnt, c_mnt, x;
1099    uint64_t x_mnt, y_mnt;
1100
1101    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1102    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1103    fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1104
1105    x = fp32_process_NaNs3(a, b, c, mode, flags);
1106
1107    // Quiet NaN added to product of zero and infinity:
1108    if (a_exp == 255 && (a_mnt >> 22 & 1) &&
1109        ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) ||
1110         (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) {
1111        x = fp32_defaultNaN();
1112        *flags |= FPLIB_IOC;
1113    }
1114
1115    if (x) {
1116        return x;
1117    }
1118
1119    // Handle infinities and zeroes:
1120    if ((b_exp == 255 && !c_mnt) ||
1121        (c_exp == 255 && !b_mnt) ||
1122        (a_exp == 255 && (b_exp == 255 || c_exp == 255) &&
1123         (a_sgn != (b_sgn ^ c_sgn)))) {
1124        *flags |= FPLIB_IOC;
1125        return fp32_defaultNaN();
1126    }
1127    if (a_exp == 255)
1128        return fp32_infinity(a_sgn);
1129    if (b_exp == 255 || c_exp == 255)
1130        return fp32_infinity(b_sgn ^ c_sgn);
1131    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1132        return fp32_zero(a_sgn);
1133
1134    x_sgn = a_sgn;
1135    x_exp = a_exp + 13;
1136    x_mnt = (uint64_t)a_mnt << 27;
1137
1138    // Multiply:
1139    y_sgn = b_sgn ^ c_sgn;
1140    y_exp = b_exp + c_exp - 113;
1141    y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1142    if (!y_mnt) {
1143        y_exp = x_exp;
1144    }
1145
1146    // Add:
1147    if (x_exp >= y_exp) {
1148        y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1149                 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1150        y_exp = x_exp;
1151    } else {
1152        x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1153                 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1154        x_exp = y_exp;
1155    }
1156    if (x_sgn == y_sgn) {
1157        x_mnt = x_mnt + y_mnt;
1158    } else if (x_mnt >= y_mnt) {
1159        x_mnt = x_mnt - y_mnt;
1160    } else {
1161        x_sgn ^= 1;
1162        x_mnt = y_mnt - x_mnt;
1163    }
1164
1165    if (!x_mnt) {
1166        // Sign of exact zero result depends on rounding mode
1167        return fp32_zero((mode & 3) == 2);
1168    }
1169
1170    // Normalise and convert to 32 bits, collapsing error into bottom bit:
1171    x_mnt = fp64_normalise(x_mnt, &x_exp);
1172    x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1173
1174    return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1175}
1176
1177static uint64_t
1178fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1179            int mode, int *flags)
1180{
1181    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1182    uint64_t a_mnt, b_mnt, c_mnt, x;
1183    uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1184
1185    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1186    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1187    fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1188
1189    x = fp64_process_NaNs3(a, b, c, mode, flags);
1190
1191    // Quiet NaN added to product of zero and infinity:
1192    if (a_exp == 2047 && (a_mnt >> 51 & 1) &&
1193        ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) ||
1194         (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) {
1195        x = fp64_defaultNaN();
1196        *flags |= FPLIB_IOC;
1197    }
1198
1199    if (x) {
1200        return x;
1201    }
1202
1203    // Handle infinities and zeroes:
1204    if ((b_exp == 2047 && !c_mnt) ||
1205        (c_exp == 2047 && !b_mnt) ||
1206        (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) &&
1207         (a_sgn != (b_sgn ^ c_sgn)))) {
1208        *flags |= FPLIB_IOC;
1209        return fp64_defaultNaN();
1210    }
1211    if (a_exp == 2047)
1212        return fp64_infinity(a_sgn);
1213    if (b_exp == 2047 || c_exp == 2047)
1214        return fp64_infinity(b_sgn ^ c_sgn);
1215    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1216        return fp64_zero(a_sgn);
1217
1218    x_sgn = a_sgn;
1219    x_exp = a_exp + 11;
1220    x0_mnt = 0;
1221    x1_mnt = a_mnt;
1222
1223    // Multiply:
1224    y_sgn = b_sgn ^ c_sgn;
1225    y_exp = b_exp + c_exp - 1003;
1226    mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1227    if (!y0_mnt && !y1_mnt) {
1228        y_exp = x_exp;
1229    }
1230
1231    // Add:
1232    if (x_exp >= y_exp) {
1233        uint64_t t0, t1;
1234        lsl128(&t0, &t1, y0_mnt, y1_mnt,
1235               x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1236        lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1237        y0_mnt |= !!(t0 | t1);
1238        y_exp = x_exp;
1239    } else {
1240        uint64_t t0, t1;
1241        lsl128(&t0, &t1, x0_mnt, x1_mnt,
1242               y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1243        lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1244        x0_mnt |= !!(t0 | t1);
1245        x_exp = y_exp;
1246    }
1247    if (x_sgn == y_sgn) {
1248        add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1249    } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1250        sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1251    } else {
1252        x_sgn ^= 1;
1253        sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1254    }
1255
1256    if (!x0_mnt && !x1_mnt) {
1257        // Sign of exact zero result depends on rounding mode
1258        return fp64_zero((mode & 3) == 2);
1259    }
1260
1261    // Normalise and convert to 64 bits, collapsing error into bottom bit:
1262    fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1263    x0_mnt = x1_mnt << 1 | !!x0_mnt;
1264
1265    return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1266}
1267
1268static uint32_t
1269fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1270{
1271    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1272    uint32_t a_mnt, b_mnt, x;
1273    uint64_t x_mnt;
1274
1275    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1276    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1277
1278    if ((x = fp32_process_NaNs(a, b, mode, flags)))
1279        return x;
1280
1281    // Handle infinities and zeroes:
1282    if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) {
1283        *flags |= FPLIB_IOC;
1284        return fp32_defaultNaN();
1285    }
1286    if (a_exp == 255 || !b_mnt) {
1287        if (a_exp != 255)
1288            *flags |= FPLIB_DZC;
1289        return fp32_infinity(a_sgn ^ b_sgn);
1290    }
1291    if (!a_mnt || b_exp == 255)
1292        return fp32_zero(a_sgn ^ b_sgn);
1293
1294    // Divide, setting bottom bit if inexact:
1295    a_mnt = fp32_normalise(a_mnt, &a_exp);
1296    x_sgn = a_sgn ^ b_sgn;
1297    x_exp = a_exp - b_exp + 172;
1298    x_mnt = ((uint64_t)a_mnt << 18) / b_mnt;
1299    x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18);
1300
1301    // Normalise and convert to 32 bits, collapsing error into bottom bit:
1302    x_mnt = fp64_normalise(x_mnt, &x_exp);
1303    x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1304
1305    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1306}
1307
1308static uint64_t
1309fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1310{
1311    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1312    uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1313
1314    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1315    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1316
1317    if ((x = fp64_process_NaNs(a, b, mode, flags)))
1318        return x;
1319
1320    // Handle infinities and zeroes:
1321    if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) {
1322        *flags |= FPLIB_IOC;
1323        return fp64_defaultNaN();
1324    }
1325    if (a_exp == 2047 || !b_mnt) {
1326        if (a_exp != 2047)
1327            *flags |= FPLIB_DZC;
1328        return fp64_infinity(a_sgn ^ b_sgn);
1329    }
1330    if (!a_mnt || b_exp == 2047)
1331        return fp64_zero(a_sgn ^ b_sgn);
1332
1333    // Find reciprocal of divisor with Newton-Raphson:
1334    a_mnt = fp64_normalise(a_mnt, &a_exp);
1335    b_mnt = fp64_normalise(b_mnt, &b_exp);
1336    x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1337    mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1338    sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1339    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1340    mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1341    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1342
1343    // Multiply by dividend:
1344    x_sgn = a_sgn ^ b_sgn;
1345    x_exp = a_exp - b_exp + 1031;
1346    mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough
1347    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1348    x_mnt = x1_mnt;
1349
1350    // This is an underestimate, so try adding one:
1351    mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough
1352    c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1353    if (c <= 0) {
1354        ++x_mnt;
1355    }
1356
1357    x_mnt = fp64_normalise(x_mnt, &x_exp);
1358
1359    return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1360}
1361
1362static void
1363set_fpscr0(FPSCR &fpscr, int flags)
1364{
1365    if (flags & FPLIB_IDC) {
1366        fpscr.idc = 1;
1367    }
1368    if (flags & FPLIB_IOC) {
1369        fpscr.ioc = 1;
1370    }
1371    if (flags & FPLIB_DZC) {
1372        fpscr.dzc = 1;
1373    }
1374    if (flags & FPLIB_OFC) {
1375        fpscr.ofc = 1;
1376    }
1377    if (flags & FPLIB_UFC) {
1378        fpscr.ufc = 1;
1379    }
1380    if (flags & FPLIB_IXC) {
1381        fpscr.ixc = 1;
1382    }
1383}
1384
1385static uint32_t
1386fp32_sqrt(uint32_t a, int mode, int *flags)
1387{
1388    int a_sgn, a_exp, x_sgn, x_exp;
1389    uint32_t a_mnt, x, x_mnt;
1390    uint64_t t0, t1;
1391
1392    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1393
1394    // Handle NaNs:
1395    if (a_exp == 255 && (uint32_t)(a_mnt << 9))
1396        return fp32_process_NaN(a, mode, flags);
1397
1398    // Handle infinities and zeroes:
1399    if (!a_mnt) {
1400        return fp32_zero(a_sgn);
1401    }
1402    if (a_exp == 255 && !a_sgn) {
1403        return fp32_infinity(a_sgn);
1404    }
1405    if (a_sgn) {
1406        *flags |= FPLIB_IOC;
1407        return fp32_defaultNaN();
1408    }
1409
1410    a_mnt = fp32_normalise(a_mnt, &a_exp);
1411    if (!(a_exp & 1)) {
1412        ++a_exp;
1413        a_mnt >>= 1;
1414    }
1415
1416    // x = (a * 3 + 5) / 8
1417    x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28);
1418
1419    // x = (a / x + x) / 2; // 16-bit accuracy
1420    x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1421
1422    // x = (a / x + x) / 2; // 16-bit accuracy
1423    x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1424
1425    // x = (a / x + x) / 2; // 32-bit accuracy
1426    x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
1427
1428    x_sgn = 0;
1429    x_exp = (a_exp + 147) >> 1;
1430    x_mnt = ((x - (1 << 5)) >> 6) + 1;
1431    t1 = (uint64_t)x_mnt * x_mnt;
1432    t0 = (uint64_t)a_mnt << 19;
1433    if (t1 > t0) {
1434        --x_mnt;
1435    }
1436
1437    x_mnt = fp32_normalise(x_mnt, &x_exp);
1438
1439    return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
1440}
1441
1442static uint64_t
1443fp64_sqrt(uint64_t a, int mode, int *flags)
1444{
1445    int a_sgn, a_exp, x_sgn, x_exp, c;
1446    uint64_t a_mnt, x_mnt, r, x0, x1;
1447    uint32_t x;
1448
1449    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1450
1451    // Handle NaNs:
1452    if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) {
1453        return fp64_process_NaN(a, mode, flags);
1454    }
1455
1456    // Handle infinities and zeroes:
1457    if (!a_mnt)
1458        return fp64_zero(a_sgn);
1459    if (a_exp == 2047 && !a_sgn)
1460        return fp64_infinity(a_sgn);
1461    if (a_sgn) {
1462        *flags |= FPLIB_IOC;
1463        return fp64_defaultNaN();
1464    }
1465
1466    a_mnt = fp64_normalise(a_mnt, &a_exp);
1467    if (a_exp & 1) {
1468        ++a_exp;
1469        a_mnt >>= 1;
1470    }
1471
1472    // x = (a * 3 + 5) / 8
1473    x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28);
1474
1475    // x = (a / x + x) / 2; // 16-bit accuracy
1476    x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1477
1478    // x = (a / x + x) / 2; // 16-bit accuracy
1479    x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1480
1481    // x = (a / x + x) / 2; // 32-bit accuracy
1482    x = ((a_mnt / x) >> 2) + (x >> 1);
1483
1484    // r = 1 / x; // 32-bit accuracy
1485    r = ((uint64_t)1 << 62) / x;
1486
1487    // r = r * (2 - x * r); // 64-bit accuracy
1488    mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
1489    lsr128(&x0, &x1, x0, x1, 31);
1490
1491    // x = (x + a * r) / 2; // 64-bit accuracy
1492    mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
1493    lsl128(&x0, &x1, x0, x1, 5);
1494    lsr128(&x0, &x1, x0, x1, 56);
1495
1496    x0 = ((uint64_t)x << 31) + (x0 >> 1);
1497
1498    x_sgn = 0;
1499    x_exp = (a_exp + 1053) >> 1;
1500    x_mnt = x0;
1501    x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
1502    mul62x62(&x0, &x1, x_mnt, x_mnt);
1503    lsl128(&x0, &x1, x0, x1, 19);
1504    c = cmp128(x0, x1, 0, a_mnt);
1505    if (c > 0)
1506        --x_mnt;
1507
1508    x_mnt = fp64_normalise(x_mnt, &x_exp);
1509
1510    return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1511}
1512
1513static int
1514modeConv(FPSCR fpscr)
1515{
1516    return (((int) fpscr) >> 22) & 0xF;
1517}
1518
1519static void
1520set_fpscr(FPSCR &fpscr, int flags)
1521{
1522    // translate back to FPSCR
1523    bool underflow = false;
1524    if (flags & FPLIB_IDC) {
1525        fpscr.idc = 1;
1526    }
1527    if (flags & FPLIB_IOC) {
1528        fpscr.ioc = 1;
1529    }
1530    if (flags & FPLIB_DZC) {
1531        fpscr.dzc = 1;
1532    }
1533    if (flags & FPLIB_OFC) {
1534        fpscr.ofc = 1;
1535    }
1536    if (flags & FPLIB_UFC) {
1537        underflow = true; //xx Why is this required?
1538        fpscr.ufc = 1;
1539    }
1540    if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
1541        fpscr.ixc = 1;
1542    }
1543}
1544
1545template <>
1546bool
1547fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
1548{
1549    int flags = 0;
1550    int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
1551    set_fpscr(fpscr, flags);
1552    return x;
1553}
1554
1555template <>
1556bool
1557fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
1558{
1559    int flags = 0;
1560    int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
1561    set_fpscr(fpscr, flags);
1562    return x;
1563}
1564
1565template <>
1566bool
1567fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
1568{
1569    int flags = 0;
1570    int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
1571    set_fpscr(fpscr, flags);
1572    return x;
1573}
1574
1575template <>
1576bool
1577fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
1578{
1579    int flags = 0;
1580    int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
1581    set_fpscr(fpscr, flags);
1582    return x;
1583}
1584
1585template <>
1586bool
1587fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
1588{
1589    int flags = 0;
1590    int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
1591    set_fpscr(fpscr, flags);
1592    return x;
1593}
1594
1595template <>
1596bool
1597fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
1598{
1599    int flags = 0;
1600    int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
1601    set_fpscr(fpscr, flags);
1602    return x;
1603}
1604
1605template <>
1606uint32_t
1607fplibAbs(uint32_t op)
1608{
1609    return op & ~((uint32_t)1 << 31);
1610}
1611
1612template <>
1613uint64_t
1614fplibAbs(uint64_t op)
1615{
1616    return op & ~((uint64_t)1 << 63);
1617}
1618
1619template <>
1620uint32_t
1621fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
1622{
1623    int flags = 0;
1624    uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
1625    set_fpscr0(fpscr, flags);
1626    return result;
1627}
1628
1629template <>
1630uint64_t
1631fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
1632{
1633    int flags = 0;
1634    uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
1635    set_fpscr0(fpscr, flags);
1636    return result;
1637}
1638
1639template <>
1640int
1641fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
1642{
1643    int mode = modeConv(fpscr);
1644    int flags = 0;
1645    int sgn1, exp1, sgn2, exp2, result;
1646    uint32_t mnt1, mnt2;
1647
1648    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1649    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1650
1651    if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) ||
1652        (exp2 == 255 && (uint32_t)(mnt2 << 9))) {
1653        result = 3;
1654        if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) ||
1655            (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) ||
1656            signal_nans)
1657            flags |= FPLIB_IOC;
1658    } else {
1659        if (op1 == op2 || (!mnt1 && !mnt2)) {
1660            result = 6;
1661        } else if (sgn1 != sgn2) {
1662            result = sgn1 ? 8 : 2;
1663        } else if (exp1 != exp2) {
1664            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1665        } else {
1666            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1667        }
1668    }
1669
1670    set_fpscr0(fpscr, flags);
1671
1672    return result;
1673}
1674
1675template <>
1676int
1677fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
1678{
1679    int mode = modeConv(fpscr);
1680    int flags = 0;
1681    int sgn1, exp1, sgn2, exp2, result;
1682    uint64_t mnt1, mnt2;
1683
1684    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1685    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1686
1687    if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) ||
1688        (exp2 == 2047 && (uint64_t)(mnt2 << 12))) {
1689        result = 3;
1690        if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) ||
1691            (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) ||
1692            signal_nans)
1693            flags |= FPLIB_IOC;
1694    } else {
1695        if (op1 == op2 || (!mnt1 && !mnt2)) {
1696            result = 6;
1697        } else if (sgn1 != sgn2) {
1698            result = sgn1 ? 8 : 2;
1699        } else if (exp1 != exp2) {
1700            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1701        } else {
1702            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1703        }
1704    }
1705
1706    set_fpscr0(fpscr, flags);
1707
1708    return result;
1709}
1710
1711static uint16_t
1712fp16_FPConvertNaN_32(uint32_t op)
1713{
1714    return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13);
1715}
1716
1717static uint16_t
1718fp16_FPConvertNaN_64(uint64_t op)
1719{
1720    return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42);
1721}
1722
1723static uint32_t
1724fp32_FPConvertNaN_16(uint16_t op)
1725{
1726    return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13);
1727}
1728
1729static uint32_t
1730fp32_FPConvertNaN_64(uint64_t op)
1731{
1732    return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29);
1733}
1734
1735static uint64_t
1736fp64_FPConvertNaN_16(uint16_t op)
1737{
1738    return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42);
1739}
1740
1741static uint64_t
1742fp64_FPConvertNaN_32(uint32_t op)
1743{
1744    return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29);
1745}
1746
1747static uint32_t
1748fp32_FPOnePointFive(int sgn)
1749{
1750    return fp32_pack(sgn, 127, (uint64_t)1 << 22);
1751}
1752
1753static uint64_t
1754fp64_FPOnePointFive(int sgn)
1755{
1756    return fp64_pack(sgn, 1023, (uint64_t)1 << 51);
1757}
1758
1759static uint32_t
1760fp32_FPThree(int sgn)
1761{
1762    return fp32_pack(sgn, 128, (uint64_t)1 << 22);
1763}
1764
1765static uint64_t
1766fp64_FPThree(int sgn)
1767{
1768    return fp64_pack(sgn, 1024, (uint64_t)1 << 51);
1769}
1770
1771static uint32_t
1772fp32_FPTwo(int sgn)
1773{
1774    return fp32_pack(sgn, 128, 0);
1775}
1776
1777static uint64_t
1778fp64_FPTwo(int sgn)
1779{
1780    return fp64_pack(sgn, 1024, 0);
1781}
1782
1783template <>
1784uint16_t
1785fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1786{
1787    int mode = modeConv(fpscr);
1788    int flags = 0;
1789    int sgn, exp;
1790    uint32_t mnt;
1791    uint16_t result;
1792
1793    // Unpack floating-point operand optionally with flush-to-zero:
1794    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1795
1796    bool alt_hp = fpscr.ahp;
1797
1798    if (exp == 255 && (uint32_t)(mnt << 9)) {
1799        if (alt_hp) {
1800            result = fp16_zero(sgn);
1801        } else if (fpscr.dn) {
1802            result = fp16_defaultNaN();
1803        } else {
1804            result = fp16_FPConvertNaN_32(op);
1805        }
1806        if (!(mnt >> 22 & 1) || alt_hp) {
1807            flags |= FPLIB_IOC;
1808        }
1809    } else if (exp == 255) {
1810        if (alt_hp) {
1811            result = sgn << 15 | (uint16_t)0x7fff;
1812            flags |= FPLIB_IOC;
1813        } else {
1814            result = fp16_infinity(sgn);
1815        }
1816    } else if (!mnt) {
1817        result = fp16_zero(sgn);
1818    } else {
1819        result = fp16_round_(sgn, exp - 127 + 15,
1820                             mnt >> 7 | !!(uint32_t)(mnt << 25),
1821                             rounding, mode | alt_hp << 4, &flags);
1822    }
1823
1824    set_fpscr0(fpscr, flags);
1825
1826    return result;
1827}
1828
1829template <>
1830uint16_t
1831fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1832{
1833    int mode = modeConv(fpscr);
1834    int flags = 0;
1835    int sgn, exp;
1836    uint64_t mnt;
1837    uint16_t result;
1838
1839    // Unpack floating-point operand optionally with flush-to-zero:
1840    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1841
1842    bool alt_hp = fpscr.ahp;
1843
1844    if (exp == 2047 && (uint64_t)(mnt << 12)) {
1845        if (alt_hp) {
1846            result = fp16_zero(sgn);
1847        } else if (fpscr.dn) {
1848            result = fp16_defaultNaN();
1849        } else {
1850            result = fp16_FPConvertNaN_64(op);
1851        }
1852        if (!(mnt >> 51 & 1) || alt_hp) {
1853            flags |= FPLIB_IOC;
1854        }
1855    } else if (exp == 2047) {
1856        if (alt_hp) {
1857            result = sgn << 15 | (uint16_t)0x7fff;
1858            flags |= FPLIB_IOC;
1859        } else {
1860            result = fp16_infinity(sgn);
1861        }
1862    } else if (!mnt) {
1863        result = fp16_zero(sgn);
1864    } else {
1865        result = fp16_round_(sgn, exp - 1023 + 15,
1866                             mnt >> 36 | !!(uint64_t)(mnt << 28),
1867                             rounding, mode | alt_hp << 4, &flags);
1868    }
1869
1870    set_fpscr0(fpscr, flags);
1871
1872    return result;
1873}
1874
1875template <>
1876uint32_t
1877fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1878{
1879    int mode = modeConv(fpscr);
1880    int flags = 0;
1881    int sgn, exp;
1882    uint16_t mnt;
1883    uint32_t result;
1884
1885    // Unpack floating-point operand optionally with flush-to-zero:
1886    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1887
1888    if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1889        if (fpscr.dn) {
1890            result = fp32_defaultNaN();
1891        } else {
1892            result = fp32_FPConvertNaN_16(op);
1893        }
1894        if (!(mnt >> 9 & 1)) {
1895            flags |= FPLIB_IOC;
1896        }
1897    } else if (exp == 31 && !fpscr.ahp) {
1898        result = fp32_infinity(sgn);
1899    } else if (!mnt) {
1900        result = fp32_zero(sgn);
1901    } else {
1902        mnt = fp16_normalise(mnt, &exp);
1903        result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8);
1904    }
1905
1906    set_fpscr0(fpscr, flags);
1907
1908    return result;
1909}
1910
1911template <>
1912uint32_t
1913fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1914{
1915    int mode = modeConv(fpscr);
1916    int flags = 0;
1917    int sgn, exp;
1918    uint64_t mnt;
1919    uint32_t result;
1920
1921    // Unpack floating-point operand optionally with flush-to-zero:
1922    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1923
1924    if (exp == 2047 && (uint64_t)(mnt << 12)) {
1925        if (fpscr.dn) {
1926            result = fp32_defaultNaN();
1927        } else {
1928            result = fp32_FPConvertNaN_64(op);
1929        }
1930        if (!(mnt >> 51 & 1)) {
1931            flags |= FPLIB_IOC;
1932        }
1933    } else if (exp == 2047) {
1934        result = fp32_infinity(sgn);
1935    } else if (!mnt) {
1936        result = fp32_zero(sgn);
1937    } else {
1938        result = fp32_round_(sgn, exp - 1023 + 127,
1939                             mnt >> 20 | !!(uint64_t)(mnt << 44),
1940                             rounding, mode, &flags);
1941    }
1942
1943    set_fpscr0(fpscr, flags);
1944
1945    return result;
1946}
1947
1948template <>
1949uint64_t
1950fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1951{
1952    int mode = modeConv(fpscr);
1953    int flags = 0;
1954    int sgn, exp;
1955    uint16_t mnt;
1956    uint64_t result;
1957
1958    // Unpack floating-point operand optionally with flush-to-zero:
1959    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1960
1961    if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1962        if (fpscr.dn) {
1963            result = fp64_defaultNaN();
1964        } else {
1965            result = fp64_FPConvertNaN_16(op);
1966        }
1967        if (!(mnt >> 9 & 1)) {
1968            flags |= FPLIB_IOC;
1969        }
1970    } else if (exp == 31 && !fpscr.ahp) {
1971        result = fp64_infinity(sgn);
1972    } else if (!mnt) {
1973        result = fp64_zero(sgn);
1974    } else {
1975        mnt = fp16_normalise(mnt, &exp);
1976        result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37);
1977    }
1978
1979    set_fpscr0(fpscr, flags);
1980
1981    return result;
1982}
1983
1984template <>
1985uint64_t
1986fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1987{
1988    int mode = modeConv(fpscr);
1989    int flags = 0;
1990    int sgn, exp;
1991    uint32_t mnt;
1992    uint64_t result;
1993
1994    // Unpack floating-point operand optionally with flush-to-zero:
1995    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1996
1997    if (exp == 255 && (uint32_t)(mnt << 9)) {
1998        if (fpscr.dn) {
1999            result = fp64_defaultNaN();
2000        } else {
2001            result = fp64_FPConvertNaN_32(op);
2002        }
2003        if (!(mnt >> 22 & 1)) {
2004            flags |= FPLIB_IOC;
2005        }
2006    } else if (exp == 255) {
2007        result = fp64_infinity(sgn);
2008    } else if (!mnt) {
2009        result = fp64_zero(sgn);
2010    } else {
2011        mnt = fp32_normalise(mnt, &exp);
2012        result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21);
2013    }
2014
2015    set_fpscr0(fpscr, flags);
2016
2017    return result;
2018}
2019
2020template <>
2021uint32_t
2022fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2023{
2024    int flags = 0;
2025    uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2026    set_fpscr0(fpscr, flags);
2027    return result;
2028}
2029
2030template <>
2031uint64_t
2032fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2033{
2034    int flags = 0;
2035    uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2036    set_fpscr0(fpscr, flags);
2037    return result;
2038}
2039
2040template <>
2041uint32_t
2042fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2043{
2044    int flags = 0;
2045    uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2046    set_fpscr0(fpscr, flags);
2047    return result;
2048}
2049
2050template <>
2051uint64_t
2052fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2053{
2054    int flags = 0;
2055    uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2056    set_fpscr0(fpscr, flags);
2057    return result;
2058}
2059
2060static uint32_t
2061fp32_repack(int sgn, int exp, uint32_t mnt)
2062{
2063    return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt);
2064}
2065
2066static uint64_t
2067fp64_repack(int sgn, int exp, uint64_t mnt)
2068{
2069    return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt);
2070}
2071
2072static void
2073fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
2074{
2075    // Treat a single quiet-NaN as +Infinity/-Infinity
2076    if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23)
2077        *op1 = fp32_infinity(sgn);
2078    if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23)
2079        *op2 = fp32_infinity(sgn);
2080}
2081
2082static void
2083fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
2084{
2085    // Treat a single quiet-NaN as +Infinity/-Infinity
2086    if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52)
2087        *op1 = fp64_infinity(sgn);
2088    if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52)
2089        *op2 = fp64_infinity(sgn);
2090}
2091
2092template <>
2093uint32_t
2094fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2095{
2096    int mode = modeConv(fpscr);
2097    int flags = 0;
2098    int sgn1, exp1, sgn2, exp2;
2099    uint32_t mnt1, mnt2, x, result;
2100
2101    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2102    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2103
2104    if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2105        result = x;
2106    } else {
2107        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2108                  fp32_repack(sgn1, exp1, mnt1) :
2109                  fp32_repack(sgn2, exp2, mnt2));
2110    }
2111    set_fpscr0(fpscr, flags);
2112    return result;
2113}
2114
2115template <>
2116uint64_t
2117fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2118{
2119    int mode = modeConv(fpscr);
2120    int flags = 0;
2121    int sgn1, exp1, sgn2, exp2;
2122    uint64_t mnt1, mnt2, x, result;
2123
2124    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2125    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2126
2127    if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2128        result = x;
2129    } else {
2130        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2131                  fp64_repack(sgn1, exp1, mnt1) :
2132                  fp64_repack(sgn2, exp2, mnt2));
2133    }
2134    set_fpscr0(fpscr, flags);
2135    return result;
2136}
2137
2138template <>
2139uint32_t
2140fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2141{
2142    fp32_minmaxnum(&op1, &op2, 1);
2143    return fplibMax<uint32_t>(op1, op2, fpscr);
2144}
2145
2146template <>
2147uint64_t
2148fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2149{
2150    fp64_minmaxnum(&op1, &op2, 1);
2151    return fplibMax<uint64_t>(op1, op2, fpscr);
2152}
2153
2154template <>
2155uint32_t
2156fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2157{
2158    int mode = modeConv(fpscr);
2159    int flags = 0;
2160    int sgn1, exp1, sgn2, exp2;
2161    uint32_t mnt1, mnt2, x, result;
2162
2163    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2164    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2165
2166    if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2167        result = x;
2168    } else {
2169        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2170                  fp32_repack(sgn1, exp1, mnt1) :
2171                  fp32_repack(sgn2, exp2, mnt2));
2172    }
2173    set_fpscr0(fpscr, flags);
2174    return result;
2175}
2176
2177template <>
2178uint64_t
2179fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2180{
2181    int mode = modeConv(fpscr);
2182    int flags = 0;
2183    int sgn1, exp1, sgn2, exp2;
2184    uint64_t mnt1, mnt2, x, result;
2185
2186    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2187    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2188
2189    if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2190        result = x;
2191    } else {
2192        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2193                  fp64_repack(sgn1, exp1, mnt1) :
2194                  fp64_repack(sgn2, exp2, mnt2));
2195    }
2196    set_fpscr0(fpscr, flags);
2197    return result;
2198}
2199
2200template <>
2201uint32_t
2202fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2203{
2204    fp32_minmaxnum(&op1, &op2, 0);
2205    return fplibMin<uint32_t>(op1, op2, fpscr);
2206}
2207
2208template <>
2209uint64_t
2210fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2211{
2212    fp64_minmaxnum(&op1, &op2, 0);
2213    return fplibMin<uint64_t>(op1, op2, fpscr);
2214}
2215
2216template <>
2217uint32_t
2218fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2219{
2220    int flags = 0;
2221    uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
2222    set_fpscr0(fpscr, flags);
2223    return result;
2224}
2225
2226template <>
2227uint64_t
2228fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2229{
2230    int flags = 0;
2231    uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
2232    set_fpscr0(fpscr, flags);
2233    return result;
2234}
2235
2236template <>
2237uint32_t
2238fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2239{
2240    int mode = modeConv(fpscr);
2241    int flags = 0;
2242    int sgn1, exp1, sgn2, exp2;
2243    uint32_t mnt1, mnt2, result;
2244
2245    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2246    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2247
2248    result = fp32_process_NaNs(op1, op2, mode, &flags);
2249    if (!result) {
2250        if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2251            result = fp32_FPTwo(sgn1 ^ sgn2);
2252        } else if (exp1 == 255 || exp2 == 255) {
2253            result = fp32_infinity(sgn1 ^ sgn2);
2254        } else if (!mnt1 || !mnt2) {
2255            result = fp32_zero(sgn1 ^ sgn2);
2256        } else {
2257            result = fp32_mul(op1, op2, mode, &flags);
2258        }
2259    }
2260
2261    set_fpscr0(fpscr, flags);
2262
2263    return result;
2264}
2265
2266template <>
2267uint64_t
2268fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2269{
2270    int mode = modeConv(fpscr);
2271    int flags = 0;
2272    int sgn1, exp1, sgn2, exp2;
2273    uint64_t mnt1, mnt2, result;
2274
2275    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2276    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2277
2278    result = fp64_process_NaNs(op1, op2, mode, &flags);
2279    if (!result) {
2280        if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2281            result = fp64_FPTwo(sgn1 ^ sgn2);
2282        } else if (exp1 == 2047 || exp2 == 2047) {
2283            result = fp64_infinity(sgn1 ^ sgn2);
2284        } else if (!mnt1 || !mnt2) {
2285            result = fp64_zero(sgn1 ^ sgn2);
2286        } else {
2287            result = fp64_mul(op1, op2, mode, &flags);
2288        }
2289    }
2290
2291    set_fpscr0(fpscr, flags);
2292
2293    return result;
2294}
2295
2296template <>
2297uint32_t
2298fplibNeg(uint32_t op)
2299{
2300    return op ^ (uint32_t)1 << 31;
2301}
2302
2303template <>
2304uint64_t
2305fplibNeg(uint64_t op)
2306{
2307    return op ^ (uint64_t)1 << 63;
2308}
2309
2310static const uint8_t recip_sqrt_estimate[256] = {
2311    255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
2312    226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
2313    201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
2314    180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
2315    162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
2316    145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
2317    131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
2318    118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
2319    105, 104, 103, 101, 100,  99,  97,  96,  95,  93,  92,  91,  90,  88,  87,  86,
2320    85,  84,  82,  81,  80,  79,  78,  77,  76,  75,  74,  72,  71,  70,  69,  68,
2321    67,  66,  65,  64,  63,  62,  61,  60,  60,  59,  58,  57,  56,  55,  54,  53,
2322    52,  51,  51,  50,  49,  48,  47,  46,  46,  45,  44,  43,  42,  42,  41,  40,
2323    39,  38,  38,  37,  36,  35,  35,  34,  33,  33,  32,  31,  30,  30,  29,  28,
2324    28,  27,  26,  26,  25,  24,  24,  23,  22,  22,  21,  20,  20,  19,  19,  18,
2325    17,  17,  16,  16,  15,  14,  14,  13,  13,  12,  11,  11,  10,  10,   9,   9,
2326    8,   8,   7,   6,   6,   5,   5,   4,   4,   3,   3,   2,   2,   1,   1,   0
2327};
2328
2329template <>
2330uint32_t
2331fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
2332{
2333    int mode = modeConv(fpscr);
2334    int flags = 0;
2335    int sgn, exp;
2336    uint32_t mnt, result;
2337
2338    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2339
2340    if (exp == 255 && (uint32_t)(mnt << 9)) {
2341        result = fp32_process_NaN(op, mode, &flags);
2342    } else if (!mnt) {
2343        result = fp32_infinity(sgn);
2344        flags |= FPLIB_DZC;
2345    } else if (sgn) {
2346        result = fp32_defaultNaN();
2347        flags |= FPLIB_IOC;
2348    } else if (exp == 255) {
2349        result = fp32_zero(0);
2350    } else {
2351        exp += 8;
2352        mnt = fp32_normalise(mnt, &exp);
2353        mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)];
2354        result = fp32_pack(0, (380 - exp) >> 1, mnt << 15);
2355    }
2356
2357    set_fpscr0(fpscr, flags);
2358
2359    return result;
2360}
2361
2362template <>
2363uint64_t
2364fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
2365{
2366    int mode = modeConv(fpscr);
2367    int flags = 0;
2368    int sgn, exp;
2369    uint64_t mnt, result;
2370
2371    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2372
2373    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2374        result = fp64_process_NaN(op, mode, &flags);
2375    } else if (!mnt) {
2376        result = fp64_infinity(sgn);
2377        flags |= FPLIB_DZC;
2378    } else if (sgn) {
2379        result = fp64_defaultNaN();
2380        flags |= FPLIB_IOC;
2381    } else if (exp == 2047) {
2382        result = fp32_zero(0);
2383    } else {
2384        exp += 11;
2385        mnt = fp64_normalise(mnt, &exp);
2386        mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)];
2387        result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44);
2388    }
2389
2390    set_fpscr0(fpscr, flags);
2391
2392    return result;
2393}
2394
2395template <>
2396uint32_t
2397fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2398{
2399    int mode = modeConv(fpscr);
2400    int flags = 0;
2401    int sgn1, exp1, sgn2, exp2;
2402    uint32_t mnt1, mnt2, result;
2403
2404    op1 = fplibNeg<uint32_t>(op1);
2405    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2406    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2407
2408    result = fp32_process_NaNs(op1, op2, mode, &flags);
2409    if (!result) {
2410        if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2411            result = fp32_FPOnePointFive(0);
2412        } else if (exp1 == 255 || exp2 == 255) {
2413            result = fp32_infinity(sgn1 ^ sgn2);
2414        } else {
2415            result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
2416        }
2417    }
2418
2419    set_fpscr0(fpscr, flags);
2420
2421    return result;
2422}
2423
2424template <>
2425uint64_t
2426fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2427{
2428    int mode = modeConv(fpscr);
2429    int flags = 0;
2430    int sgn1, exp1, sgn2, exp2;
2431    uint64_t mnt1, mnt2, result;
2432
2433    op1 = fplibNeg<uint64_t>(op1);
2434    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2435    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2436
2437    result = fp64_process_NaNs(op1, op2, mode, &flags);
2438    if (!result) {
2439        if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2440            result = fp64_FPOnePointFive(0);
2441        } else if (exp1 == 2047 || exp2 == 2047) {
2442            result = fp64_infinity(sgn1 ^ sgn2);
2443        } else {
2444            result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
2445        }
2446    }
2447
2448    set_fpscr0(fpscr, flags);
2449
2450    return result;
2451}
2452
2453template <>
2454uint32_t
2455fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2456{
2457    int mode = modeConv(fpscr);
2458    int flags = 0;
2459    int sgn1, exp1, sgn2, exp2;
2460    uint32_t mnt1, mnt2, result;
2461
2462    op1 = fplibNeg<uint32_t>(op1);
2463    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2464    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2465
2466    result = fp32_process_NaNs(op1, op2, mode, &flags);
2467    if (!result) {
2468        if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2469            result = fp32_FPTwo(0);
2470        } else if (exp1 == 255 || exp2 == 255) {
2471            result = fp32_infinity(sgn1 ^ sgn2);
2472        } else {
2473            result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
2474        }
2475    }
2476
2477    set_fpscr0(fpscr, flags);
2478
2479    return result;
2480}
2481
2482template <>
2483uint32_t
2484fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
2485{
2486    int mode = modeConv(fpscr);
2487    int flags = 0;
2488    int sgn, exp;
2489    uint32_t mnt, result;
2490
2491    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2492
2493    if (exp == 255 && (uint32_t)(mnt << 9)) {
2494        result = fp32_process_NaN(op, mode, &flags);
2495    } else if (exp == 255) {
2496        result = fp32_zero(sgn);
2497    } else if (!mnt) {
2498        result = fp32_infinity(sgn);
2499        flags |= FPLIB_DZC;
2500    } else if (!((uint32_t)(op << 1) >> 22)) {
2501        bool overflow_to_inf = false;
2502        switch (FPCRRounding(fpscr)) {
2503          case FPRounding_TIEEVEN:
2504            overflow_to_inf = true;
2505            break;
2506          case FPRounding_POSINF:
2507            overflow_to_inf = !sgn;
2508            break;
2509          case FPRounding_NEGINF:
2510            overflow_to_inf = sgn;
2511            break;
2512          case FPRounding_ZERO:
2513            overflow_to_inf = false;
2514            break;
2515          default:
2516            assert(0);
2517        }
2518        result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
2519        flags |= FPLIB_OFC | FPLIB_IXC;
2520    } else if (fpscr.fz && exp >= 253) {
2521        result = fp32_zero(sgn);
2522        flags |= FPLIB_UFC;
2523    } else {
2524        exp += 8;
2525        mnt = fp32_normalise(mnt, &exp);
2526        int result_exp = 253 - exp;
2527        uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1;
2528        fraction <<= 15;
2529        if (result_exp == 0) {
2530            fraction >>= 1;
2531        } else if (result_exp == -1) {
2532            fraction >>= 2;
2533            result_exp = 0;
2534        }
2535        result = fp32_pack(sgn, result_exp, fraction);
2536    }
2537
2538    set_fpscr0(fpscr, flags);
2539
2540    return result;
2541}
2542
2543template <>
2544uint64_t
2545fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
2546{
2547    int mode = modeConv(fpscr);
2548    int flags = 0;
2549    int sgn, exp;
2550    uint64_t mnt, result;
2551
2552    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2553
2554    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2555        result = fp64_process_NaN(op, mode, &flags);
2556    } else if (exp == 2047) {
2557        result = fp64_zero(sgn);
2558    } else if (!mnt) {
2559        result = fp64_infinity(sgn);
2560        flags |= FPLIB_DZC;
2561    } else if (!((uint64_t)(op << 1) >> 51)) {
2562        bool overflow_to_inf = false;
2563        switch (FPCRRounding(fpscr)) {
2564          case FPRounding_TIEEVEN:
2565            overflow_to_inf = true;
2566            break;
2567          case FPRounding_POSINF:
2568            overflow_to_inf = !sgn;
2569            break;
2570          case FPRounding_NEGINF:
2571            overflow_to_inf = sgn;
2572            break;
2573          case FPRounding_ZERO:
2574            overflow_to_inf = false;
2575            break;
2576          default:
2577            assert(0);
2578        }
2579        result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
2580        flags |= FPLIB_OFC | FPLIB_IXC;
2581    } else if (fpscr.fz && exp >= 2045) {
2582        result = fp64_zero(sgn);
2583        flags |= FPLIB_UFC;
2584    } else {
2585        exp += 11;
2586        mnt = fp64_normalise(mnt, &exp);
2587        int result_exp = 2045 - exp;
2588        uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1;
2589        fraction <<= 44;
2590        if (result_exp == 0) {
2591            fraction >>= 1;
2592        } else if (result_exp == -1) {
2593            fraction >>= 2;
2594            result_exp = 0;
2595        }
2596        result = fp64_pack(sgn, result_exp, fraction);
2597    }
2598
2599    set_fpscr0(fpscr, flags);
2600
2601    return result;
2602}
2603
2604template <>
2605uint64_t
2606fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2607{
2608    int mode = modeConv(fpscr);
2609    int flags = 0;
2610    int sgn1, exp1, sgn2, exp2;
2611    uint64_t mnt1, mnt2, result;
2612
2613    op1 = fplibNeg<uint64_t>(op1);
2614    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2615    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2616
2617    result = fp64_process_NaNs(op1, op2, mode, &flags);
2618    if (!result) {
2619        if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2620            result = fp64_FPTwo(0);
2621        } else if (exp1 == 2047 || exp2 == 2047) {
2622            result = fp64_infinity(sgn1 ^ sgn2);
2623        } else {
2624            result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
2625        }
2626    }
2627
2628    set_fpscr0(fpscr, flags);
2629
2630    return result;
2631}
2632
2633template <>
2634uint32_t
2635fplibRecpX(uint32_t op, FPSCR &fpscr)
2636{
2637    int mode = modeConv(fpscr);
2638    int flags = 0;
2639    int sgn, exp;
2640    uint32_t mnt, result;
2641
2642    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2643
2644    if (exp == 255 && (uint32_t)(mnt << 9)) {
2645        result = fp32_process_NaN(op, mode, &flags);
2646    }
2647    else {
2648        if (!mnt) { // Zero and denormals
2649            result = fp32_pack(sgn, 254, 0);
2650        } else { // Infinities and normals
2651            result = fp32_pack(sgn, exp ^ 255, 0);
2652        }
2653    }
2654
2655    set_fpscr0(fpscr, flags);
2656
2657    return result;
2658}
2659
2660template <>
2661uint64_t
2662fplibRecpX(uint64_t op, FPSCR &fpscr)
2663{
2664    int mode = modeConv(fpscr);
2665    int flags = 0;
2666    int sgn, exp;
2667    uint64_t mnt, result;
2668
2669    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2670
2671    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2672        result = fp64_process_NaN(op, mode, &flags);
2673    }
2674    else {
2675        if (!mnt) { // Zero and denormals
2676            result = fp64_pack(sgn, 2046, 0);
2677        } else { // Infinities and normals
2678            result = fp64_pack(sgn, exp ^ 2047, 0);
2679        }
2680    }
2681
2682    set_fpscr0(fpscr, flags);
2683
2684    return result;
2685}
2686
2687template <>
2688uint32_t
2689fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2690{
2691    int mode = modeConv(fpscr);
2692    int flags = 0;
2693    int sgn, exp;
2694    uint32_t mnt, result;
2695
2696    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2697    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2698
2699    // Handle NaNs, infinities and zeroes:
2700    if (exp == 255 && (uint32_t)(mnt << 9)) {
2701        result = fp32_process_NaN(op, mode, &flags);
2702    } else if (exp == 255) {
2703        result = fp32_infinity(sgn);
2704    } else if (!mnt) {
2705        result = fp32_zero(sgn);
2706    } else if (exp >= 150) {
2707        // There are no fractional bits
2708        result = op;
2709    } else {
2710        // Truncate towards zero:
2711        uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp);
2712        int err = exp < 118 ? 1 :
2713            (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0);
2714        switch (rounding) {
2715          case FPRounding_TIEEVEN:
2716            x += (err == 3 || (err == 2 && (x & 1)));
2717            break;
2718          case FPRounding_POSINF:
2719            x += err && !sgn;
2720            break;
2721          case FPRounding_NEGINF:
2722            x += err && sgn;
2723            break;
2724          case FPRounding_ZERO:
2725            break;
2726          case FPRounding_TIEAWAY:
2727            x += err >> 1;
2728            break;
2729          default:
2730            assert(0);
2731        }
2732
2733        if (x == 0) {
2734            result = fp32_zero(sgn);
2735        } else {
2736            exp = 150;
2737            mnt = fp32_normalise(x, &exp);
2738            result = fp32_pack(sgn, exp + 8, mnt >> 8);
2739        }
2740
2741        if (err && exact)
2742            flags |= FPLIB_IXC;
2743    }
2744
2745    set_fpscr0(fpscr, flags);
2746
2747    return result;
2748}
2749
2750template <>
2751uint64_t
2752fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2753{
2754    int mode = modeConv(fpscr);
2755    int flags = 0;
2756    int sgn, exp;
2757    uint64_t mnt, result;
2758
2759    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2760    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2761
2762    // Handle NaNs, infinities and zeroes:
2763    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2764        result = fp64_process_NaN(op, mode, &flags);
2765    } else if (exp == 2047) {
2766        result = fp64_infinity(sgn);
2767    } else if (!mnt) {
2768        result = fp64_zero(sgn);
2769    } else if (exp >= 1075) {
2770        // There are no fractional bits
2771        result = op;
2772    } else {
2773        // Truncate towards zero:
2774        uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp);
2775        int err = exp < 1011 ? 1 :
2776            (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0);
2777        switch (rounding) {
2778          case FPRounding_TIEEVEN:
2779            x += (err == 3 || (err == 2 && (x & 1)));
2780            break;
2781          case FPRounding_POSINF:
2782            x += err && !sgn;
2783            break;
2784          case FPRounding_NEGINF:
2785            x += err && sgn;
2786            break;
2787          case FPRounding_ZERO:
2788            break;
2789          case FPRounding_TIEAWAY:
2790            x += err >> 1;
2791            break;
2792          default:
2793            assert(0);
2794        }
2795
2796        if (x == 0) {
2797            result = fp64_zero(sgn);
2798        } else {
2799            exp = 1075;
2800            mnt = fp64_normalise(x, &exp);
2801            result = fp64_pack(sgn, exp + 11, mnt >> 11);
2802        }
2803
2804        if (err && exact)
2805            flags |= FPLIB_IXC;
2806    }
2807
2808    set_fpscr0(fpscr, flags);
2809
2810    return result;
2811}
2812
2813template <>
2814uint32_t
2815fplibSqrt(uint32_t op, FPSCR &fpscr)
2816{
2817    int flags = 0;
2818    uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
2819    set_fpscr0(fpscr, flags);
2820    return result;
2821}
2822
2823template <>
2824uint64_t
2825fplibSqrt(uint64_t op, FPSCR &fpscr)
2826{
2827    int flags = 0;
2828    uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
2829    set_fpscr0(fpscr, flags);
2830    return result;
2831}
2832
2833template <>
2834uint32_t
2835fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2836{
2837    int flags = 0;
2838    uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
2839    set_fpscr0(fpscr, flags);
2840    return result;
2841}
2842
2843template <>
2844uint64_t
2845fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2846{
2847    int flags = 0;
2848    uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
2849    set_fpscr0(fpscr, flags);
2850    return result;
2851}
2852
2853static uint64_t
2854FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2855             int *flags)
2856{
2857    uint64_t x;
2858    int err;
2859
2860    if (exp > 1023 + 63) {
2861        *flags = FPLIB_IOC;
2862        return ((uint64_t)!u << 63) - !sgn;
2863    }
2864
2865    x = lsr64(mnt << 11, 1023 + 63 - exp);
2866    err = (exp > 1023 + 63 - 2 ? 0 :
2867           (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) |
2868           !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1)));
2869
2870    switch (rounding) {
2871      case FPRounding_TIEEVEN:
2872        x += (err == 3 || (err == 2 && (x & 1)));
2873        break;
2874      case FPRounding_POSINF:
2875        x += err && !sgn;
2876        break;
2877      case FPRounding_NEGINF:
2878        x += err && sgn;
2879        break;
2880      case FPRounding_ZERO:
2881        break;
2882      case FPRounding_TIEAWAY:
2883        x += err >> 1;
2884        break;
2885      default:
2886        assert(0);
2887    }
2888
2889    if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) {
2890        *flags = FPLIB_IOC;
2891        return ((uint64_t)!u << 63) - !sgn;
2892    }
2893
2894    if (err) {
2895        *flags = FPLIB_IXC;
2896    }
2897
2898    return sgn ? -x : x;
2899}
2900
2901static uint32_t
2902FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2903             int *flags)
2904{
2905    uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
2906    if (u ? x >= (uint64_t)1 << 32 :
2907        !(x < (uint64_t)1 << 31 ||
2908          (uint64_t)-x <= (uint64_t)1 << 31)) {
2909        *flags = FPLIB_IOC;
2910        x = ((uint32_t)!u << 31) - !sgn;
2911    }
2912    return x;
2913}
2914
2915template <>
2916uint32_t
2917fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2918{
2919    int flags = 0;
2920    int sgn, exp;
2921    uint32_t mnt, result;
2922
2923    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2924    fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2925
2926    // If NaN, set cumulative flag or take exception:
2927    if (exp == 255 && (uint32_t)(mnt << 9)) {
2928        flags = FPLIB_IOC;
2929        result = 0;
2930    } else {
2931        result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits,
2932                              (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2933    }
2934
2935    set_fpscr0(fpscr, flags);
2936
2937    return result;
2938}
2939
2940template <>
2941uint32_t
2942fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2943{
2944    int flags = 0;
2945    int sgn, exp;
2946    uint64_t mnt;
2947    uint32_t result;
2948
2949    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2950    fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2951
2952    // If NaN, set cumulative flag or take exception:
2953    if (exp == 2047 && (uint64_t)(mnt << 12)) {
2954        flags = FPLIB_IOC;
2955        result = 0;
2956    } else {
2957        result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
2958    }
2959
2960    set_fpscr0(fpscr, flags);
2961
2962    return result;
2963}
2964
2965template <>
2966uint64_t
2967fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2968{
2969    int flags = 0;
2970    int sgn, exp;
2971    uint32_t mnt;
2972    uint64_t result;
2973
2974    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2975    fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2976
2977    // If NaN, set cumulative flag or take exception:
2978    if (exp == 255 && (uint32_t)(mnt << 9)) {
2979        flags = FPLIB_IOC;
2980        result = 0;
2981    } else {
2982        result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits,
2983                              (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2984    }
2985
2986    set_fpscr0(fpscr, flags);
2987
2988    return result;
2989}
2990
2991template <>
2992uint64_t
2993fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2994{
2995    int flags = 0;
2996    int sgn, exp;
2997    uint64_t mnt, result;
2998
2999    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
3000    fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
3001
3002    // If NaN, set cumulative flag or take exception:
3003    if (exp == 2047 && (uint64_t)(mnt << 12)) {
3004        flags = FPLIB_IOC;
3005        result = 0;
3006    } else {
3007        result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
3008    }
3009
3010    set_fpscr0(fpscr, flags);
3011
3012    return result;
3013}
3014
3015static uint32_t
3016fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3017{
3018    int x_sgn = !u && a >> 63;
3019    int x_exp = 190 - fbits;
3020    uint64_t x_mnt = x_sgn ? -a : a;
3021
3022    // Handle zero:
3023    if (!x_mnt) {
3024        return fp32_zero(0);
3025    }
3026
3027    // Normalise and convert to 32 bits, collapsing error into bottom bit:
3028    x_mnt = fp64_normalise(x_mnt, &x_exp);
3029    x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
3030
3031    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
3032}
3033
3034static uint64_t
3035fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3036{
3037    int x_sgn = !u && a >> 63;
3038    int x_exp = 1024 + 62 - fbits;
3039    uint64_t x_mnt = x_sgn ? -a : a;
3040
3041    // Handle zero:
3042    if (!x_mnt) {
3043        return fp64_zero(0);
3044    }
3045
3046    x_mnt = fp64_normalise(x_mnt, &x_exp);
3047
3048    return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
3049}
3050
3051template <>
3052uint32_t
3053fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3054{
3055    int flags = 0;
3056    uint32_t res = fp32_cvtf(op, fbits, u,
3057                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3058                             &flags);
3059    set_fpscr0(fpscr, flags);
3060    return res;
3061}
3062
3063template <>
3064uint64_t
3065fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3066{
3067    int flags = 0;
3068    uint64_t res = fp64_cvtf(op, fbits, u,
3069                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3070                             &flags);
3071    set_fpscr0(fpscr, flags);
3072    return res;
3073}
3074
3075}
3076