fplib.cc (10104:ff709c429b7b) fplib.cc (10272:336c7d36ac00)
1/*
2* Copyright (c) 2012-2013 ARM Limited
3* All rights reserved
4*
5* The license below extends only to copyright in the software and shall
6* not be construed as granting a license to any other intellectual
7* property including but not limited to intellectual property relating
8* to a hardware implementation of the functionality of the software
9* licensed hereunder. You may use the software subject to the license
10* terms below provided that you ensure that this notice is replicated
11* unmodified and in its entirety in all distributions of the software,
12* modified or unmodified, in source code or in binary form.
13*
14* Redistribution and use in source and binary forms, with or without
15* modification, are permitted provided that the following conditions are
16* met: redistributions of source code must retain the above copyright
17* notice, this list of conditions and the following disclaimer;
18* redistributions in binary form must reproduce the above copyright
19* notice, this list of conditions and the following disclaimer in the
20* documentation and/or other materials provided with the distribution;
21* neither the name of the copyright holders nor the names of its
22* contributors may be used to endorse or promote products derived from
23* this software without specific prior written permission.
24*
25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36*
37* Authors: Edmund Grimley Evans
38* Thomas Grocutt
39*/
40
41#include <stdint.h>
42
43#include <cassert>
44
45#include "fplib.hh"
46
47namespace ArmISA
48{
49
50#define FPLIB_RN 0
51#define FPLIB_RP 1
52#define FPLIB_RM 2
53#define FPLIB_RZ 3
54#define FPLIB_FZ 4
55#define FPLIB_DN 8
56#define FPLIB_AHP 16
57
58#define FPLIB_IDC 128 // Input Denormal
59#define FPLIB_IXC 16 // Inexact
60#define FPLIB_UFC 8 // Underflow
61#define FPLIB_OFC 4 // Overflow
62#define FPLIB_DZC 2 // Division by Zero
63#define FPLIB_IOC 1 // Invalid Operation
64
65static inline uint16_t
66lsl16(uint16_t x, uint32_t shift)
67{
68 return shift < 16 ? x << shift : 0;
69}
70
71static inline uint16_t
72lsr16(uint16_t x, uint32_t shift)
73{
74 return shift < 16 ? x >> shift : 0;
75}
76
77static inline uint32_t
78lsl32(uint32_t x, uint32_t shift)
79{
80 return shift < 32 ? x << shift : 0;
81}
82
83static inline uint32_t
84lsr32(uint32_t x, uint32_t shift)
85{
86 return shift < 32 ? x >> shift : 0;
87}
88
89static inline uint64_t
90lsl64(uint64_t x, uint32_t shift)
91{
92 return shift < 64 ? x << shift : 0;
93}
94
95static inline uint64_t
96lsr64(uint64_t x, uint32_t shift)
97{
98 return shift < 64 ? x >> shift : 0;
99}
100
101static inline void
102lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
103{
104 if (shift < 64) {
105 *r1 = x1 << shift | x0 >> (64 - shift);
106 *r0 = x0 << shift;
107 } else if (shift < 128) {
108 *r1 = x0 << (shift - 64);
109 *r0 = 0;
110 } else {
111 *r1 = 0;
112 *r0 = 0;
113 }
114}
115
116static inline void
117lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
118{
119 if (shift < 64) {
120 *r0 = x0 >> shift | x1 << (64 - shift);
121 *r1 = x1 >> shift;
122 } else if (shift < 128) {
123 *r0 = x1 >> (shift - 64);
124 *r1 = 0;
125 } else {
126 *r0 = 0;
127 *r1 = 0;
128 }
129}
130
131static inline void
132mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
133{
134 uint32_t mask = ((uint32_t)1 << 31) - 1;
135 uint64_t a0 = a & mask;
136 uint64_t a1 = a >> 31 & mask;
137 uint64_t b0 = b & mask;
138 uint64_t b1 = b >> 31 & mask;
139 uint64_t p0 = a0 * b0;
140 uint64_t p2 = a1 * b1;
141 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
142 uint64_t s0 = p0;
143 uint64_t s1 = (s0 >> 31) + p1;
144 uint64_t s2 = (s1 >> 31) + p2;
145 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
146 *x1 = s2 >> 2;
147}
148
149static inline
150void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
151{
152 uint64_t t0 = (uint64_t)(uint32_t)a * b;
153 uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
154 *x0 = t1 << 32 | (uint32_t)t0;
155 *x1 = t1 >> 32;
156}
157
158static inline void
1/*
2* Copyright (c) 2012-2013 ARM Limited
3* All rights reserved
4*
5* The license below extends only to copyright in the software and shall
6* not be construed as granting a license to any other intellectual
7* property including but not limited to intellectual property relating
8* to a hardware implementation of the functionality of the software
9* licensed hereunder. You may use the software subject to the license
10* terms below provided that you ensure that this notice is replicated
11* unmodified and in its entirety in all distributions of the software,
12* modified or unmodified, in source code or in binary form.
13*
14* Redistribution and use in source and binary forms, with or without
15* modification, are permitted provided that the following conditions are
16* met: redistributions of source code must retain the above copyright
17* notice, this list of conditions and the following disclaimer;
18* redistributions in binary form must reproduce the above copyright
19* notice, this list of conditions and the following disclaimer in the
20* documentation and/or other materials provided with the distribution;
21* neither the name of the copyright holders nor the names of its
22* contributors may be used to endorse or promote products derived from
23* this software without specific prior written permission.
24*
25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36*
37* Authors: Edmund Grimley Evans
38* Thomas Grocutt
39*/
40
41#include <stdint.h>
42
43#include <cassert>
44
45#include "fplib.hh"
46
47namespace ArmISA
48{
49
50#define FPLIB_RN 0
51#define FPLIB_RP 1
52#define FPLIB_RM 2
53#define FPLIB_RZ 3
54#define FPLIB_FZ 4
55#define FPLIB_DN 8
56#define FPLIB_AHP 16
57
58#define FPLIB_IDC 128 // Input Denormal
59#define FPLIB_IXC 16 // Inexact
60#define FPLIB_UFC 8 // Underflow
61#define FPLIB_OFC 4 // Overflow
62#define FPLIB_DZC 2 // Division by Zero
63#define FPLIB_IOC 1 // Invalid Operation
64
65static inline uint16_t
66lsl16(uint16_t x, uint32_t shift)
67{
68 return shift < 16 ? x << shift : 0;
69}
70
71static inline uint16_t
72lsr16(uint16_t x, uint32_t shift)
73{
74 return shift < 16 ? x >> shift : 0;
75}
76
77static inline uint32_t
78lsl32(uint32_t x, uint32_t shift)
79{
80 return shift < 32 ? x << shift : 0;
81}
82
83static inline uint32_t
84lsr32(uint32_t x, uint32_t shift)
85{
86 return shift < 32 ? x >> shift : 0;
87}
88
89static inline uint64_t
90lsl64(uint64_t x, uint32_t shift)
91{
92 return shift < 64 ? x << shift : 0;
93}
94
95static inline uint64_t
96lsr64(uint64_t x, uint32_t shift)
97{
98 return shift < 64 ? x >> shift : 0;
99}
100
101static inline void
102lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
103{
104 if (shift < 64) {
105 *r1 = x1 << shift | x0 >> (64 - shift);
106 *r0 = x0 << shift;
107 } else if (shift < 128) {
108 *r1 = x0 << (shift - 64);
109 *r0 = 0;
110 } else {
111 *r1 = 0;
112 *r0 = 0;
113 }
114}
115
116static inline void
117lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
118{
119 if (shift < 64) {
120 *r0 = x0 >> shift | x1 << (64 - shift);
121 *r1 = x1 >> shift;
122 } else if (shift < 128) {
123 *r0 = x1 >> (shift - 64);
124 *r1 = 0;
125 } else {
126 *r0 = 0;
127 *r1 = 0;
128 }
129}
130
131static inline void
132mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
133{
134 uint32_t mask = ((uint32_t)1 << 31) - 1;
135 uint64_t a0 = a & mask;
136 uint64_t a1 = a >> 31 & mask;
137 uint64_t b0 = b & mask;
138 uint64_t b1 = b >> 31 & mask;
139 uint64_t p0 = a0 * b0;
140 uint64_t p2 = a1 * b1;
141 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
142 uint64_t s0 = p0;
143 uint64_t s1 = (s0 >> 31) + p1;
144 uint64_t s2 = (s1 >> 31) + p2;
145 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
146 *x1 = s2 >> 2;
147}
148
149static inline
150void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
151{
152 uint64_t t0 = (uint64_t)(uint32_t)a * b;
153 uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
154 *x0 = t1 << 32 | (uint32_t)t0;
155 *x1 = t1 >> 32;
156}
157
158static inline void
159mul64x64(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
160{
161 uint64_t a0 = (uint32_t)a;
162 uint64_t a1 = a >> 32;
163 uint64_t b0 = (uint32_t)b;
164 uint64_t b1 = b >> 32;
165 uint64_t t1 = (a0 * b0 >> 32) + a1 * b0;
166 uint64_t t2 = a0 * b1;
167 uint64_t x = ((uint64_t)(uint32_t)t1 + (uint32_t)t2) >> 32;
168 x += t1 >> 32;
169 x += t2 >> 32;
170 x += a1 * b1;
171 *x0 = a * b;
172 *x1 = x;
173}
174
175static inline void
176add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
177 uint64_t b1)
178{
179 *x0 = a0 + b0;
180 *x1 = a1 + b1 + (*x0 < a0);
181}
182
183static inline void
184sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
185 uint64_t b1)
186{
187 *x0 = a0 - b0;
188 *x1 = a1 - b1 - (*x0 > a0);
189}
190
191static inline int
192cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
193{
194 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
195}
196
197static inline uint16_t
198fp16_normalise(uint16_t mnt, int *exp)
199{
200 int shift;
201
202 if (!mnt) {
203 return 0;
204 }
205
206 for (shift = 8; shift; shift >>= 1) {
207 if (!(mnt >> (16 - shift))) {
208 mnt <<= shift;
209 *exp -= shift;
210 }
211 }
212 return mnt;
213}
214
215static inline uint32_t
216fp32_normalise(uint32_t mnt, int *exp)
217{
218 int shift;
219
220 if (!mnt) {
221 return 0;
222 }
223
224 for (shift = 16; shift; shift >>= 1) {
225 if (!(mnt >> (32 - shift))) {
226 mnt <<= shift;
227 *exp -= shift;
228 }
229 }
230 return mnt;
231}
232
233static inline uint64_t
234fp64_normalise(uint64_t mnt, int *exp)
235{
236 int shift;
237
238 if (!mnt) {
239 return 0;
240 }
241
242 for (shift = 32; shift; shift >>= 1) {
243 if (!(mnt >> (64 - shift))) {
244 mnt <<= shift;
245 *exp -= shift;
246 }
247 }
248 return mnt;
249}
250
251static inline void
252fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
253{
254 uint64_t x0 = *mnt0;
255 uint64_t x1 = *mnt1;
256 int shift;
257
258 if (!x0 && !x1) {
259 return;
260 }
261
262 if (!x1) {
263 x1 = x0;
264 x0 = 0;
265 *exp -= 64;
266 }
267
268 for (shift = 32; shift; shift >>= 1) {
269 if (!(x1 >> (64 - shift))) {
270 x1 = x1 << shift | x0 >> (64 - shift);
271 x0 <<= shift;
272 *exp -= shift;
273 }
274 }
275
276 *mnt0 = x0;
277 *mnt1 = x1;
278}
279
280static inline uint16_t
281fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
282{
283 return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1));
284}
285
286static inline uint32_t
287fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
288{
289 return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1));
290}
291
292static inline uint64_t
293fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
294{
295 return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1));
296}
297
298static inline uint16_t
299fp16_zero(int sgn)
300{
301 return fp16_pack(sgn, 0, 0);
302}
303
304static inline uint32_t
305fp32_zero(int sgn)
306{
307 return fp32_pack(sgn, 0, 0);
308}
309
310static inline uint64_t
311fp64_zero(int sgn)
312{
313 return fp64_pack(sgn, 0, 0);
314}
315
316static inline uint16_t
317fp16_max_normal(int sgn)
318{
319 return fp16_pack(sgn, 30, -1);
320}
321
322static inline uint32_t
323fp32_max_normal(int sgn)
324{
325 return fp32_pack(sgn, 254, -1);
326}
327
328static inline uint64_t
329fp64_max_normal(int sgn)
330{
331 return fp64_pack(sgn, 2046, -1);
332}
333
334static inline uint16_t
335fp16_infinity(int sgn)
336{
337 return fp16_pack(sgn, 31, 0);
338}
339
340static inline uint32_t
341fp32_infinity(int sgn)
342{
343 return fp32_pack(sgn, 255, 0);
344}
345
346static inline uint64_t
347fp64_infinity(int sgn)
348{
349 return fp64_pack(sgn, 2047, 0);
350}
351
352static inline uint16_t
353fp16_defaultNaN()
354{
355 return fp16_pack(0, 31, (uint16_t)1 << 9);
356}
357
358static inline uint32_t
359fp32_defaultNaN()
360{
361 return fp32_pack(0, 255, (uint32_t)1 << 22);
362}
363
364static inline uint64_t
365fp64_defaultNaN()
366{
367 return fp64_pack(0, 2047, (uint64_t)1 << 51);
368}
369
370static inline void
371fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
372 int *flags)
373{
374 *sgn = x >> 15;
375 *exp = x >> 10 & 31;
376 *mnt = x & (((uint16_t)1 << 10) - 1);
377
378 // Handle subnormals:
379 if (*exp) {
380 *mnt |= (uint16_t)1 << 10;
381 } else {
382 ++*exp;
383 // There is no flush to zero in this case!
384 }
385}
386
387static inline void
388fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
389 int *flags)
390{
391 *sgn = x >> 31;
392 *exp = x >> 23 & 255;
393 *mnt = x & (((uint32_t)1 << 23) - 1);
394
395 // Handle subnormals:
396 if (*exp) {
397 *mnt |= (uint32_t)1 << 23;
398 } else {
399 ++*exp;
400 if ((mode & FPLIB_FZ) && *mnt) {
401 *flags |= FPLIB_IDC;
402 *mnt = 0;
403 }
404 }
405}
406
407static inline void
408fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
409 int *flags)
410{
411 *sgn = x >> 63;
412 *exp = x >> 52 & 2047;
413 *mnt = x & (((uint64_t)1 << 52) - 1);
414
415 // Handle subnormals:
416 if (*exp) {
417 *mnt |= (uint64_t)1 << 52;
418 } else {
419 ++*exp;
420 if ((mode & FPLIB_FZ) && *mnt) {
421 *flags |= FPLIB_IDC;
422 *mnt = 0;
423 }
424 }
425}
426
427static inline uint32_t
428fp32_process_NaN(uint32_t a, int mode, int *flags)
429{
430 if (!(a >> 22 & 1)) {
431 *flags |= FPLIB_IOC;
432 a |= (uint32_t)1 << 22;
433 }
434 return mode & FPLIB_DN ? fp32_defaultNaN() : a;
435}
436
437static inline uint64_t
438fp64_process_NaN(uint64_t a, int mode, int *flags)
439{
440 if (!(a >> 51 & 1)) {
441 *flags |= FPLIB_IOC;
442 a |= (uint64_t)1 << 51;
443 }
444 return mode & FPLIB_DN ? fp64_defaultNaN() : a;
445}
446
447static uint32_t
448fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
449{
450 int a_exp = a >> 23 & 255;
451 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
452 int b_exp = b >> 23 & 255;
453 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
454
455 // Handle signalling NaNs:
456 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
457 return fp32_process_NaN(a, mode, flags);
458 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
459 return fp32_process_NaN(b, mode, flags);
460
461 // Handle quiet NaNs:
462 if (a_exp == 255 && a_mnt)
463 return fp32_process_NaN(a, mode, flags);
464 if (b_exp == 255 && b_mnt)
465 return fp32_process_NaN(b, mode, flags);
466
467 return 0;
468}
469
470static uint64_t
471fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
472{
473 int a_exp = a >> 52 & 2047;
474 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
475 int b_exp = b >> 52 & 2047;
476 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
477
478 // Handle signalling NaNs:
479 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
480 return fp64_process_NaN(a, mode, flags);
481 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
482 return fp64_process_NaN(b, mode, flags);
483
484 // Handle quiet NaNs:
485 if (a_exp == 2047 && a_mnt)
486 return fp64_process_NaN(a, mode, flags);
487 if (b_exp == 2047 && b_mnt)
488 return fp64_process_NaN(b, mode, flags);
489
490 return 0;
491}
492
493static uint32_t
494fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
495{
496 int a_exp = a >> 23 & 255;
497 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
498 int b_exp = b >> 23 & 255;
499 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
500 int c_exp = c >> 23 & 255;
501 uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1);
502
503 // Handle signalling NaNs:
504 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
505 return fp32_process_NaN(a, mode, flags);
506 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
507 return fp32_process_NaN(b, mode, flags);
508 if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1))
509 return fp32_process_NaN(c, mode, flags);
510
511 // Handle quiet NaNs:
512 if (a_exp == 255 && a_mnt)
513 return fp32_process_NaN(a, mode, flags);
514 if (b_exp == 255 && b_mnt)
515 return fp32_process_NaN(b, mode, flags);
516 if (c_exp == 255 && c_mnt)
517 return fp32_process_NaN(c, mode, flags);
518
519 return 0;
520}
521
522static uint64_t
523fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
524{
525 int a_exp = a >> 52 & 2047;
526 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
527 int b_exp = b >> 52 & 2047;
528 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
529 int c_exp = c >> 52 & 2047;
530 uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1);
531
532 // Handle signalling NaNs:
533 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
534 return fp64_process_NaN(a, mode, flags);
535 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
536 return fp64_process_NaN(b, mode, flags);
537 if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1))
538 return fp64_process_NaN(c, mode, flags);
539
540 // Handle quiet NaNs:
541 if (a_exp == 2047 && a_mnt)
542 return fp64_process_NaN(a, mode, flags);
543 if (b_exp == 2047 && b_mnt)
544 return fp64_process_NaN(b, mode, flags);
545 if (c_exp == 2047 && c_mnt)
546 return fp64_process_NaN(c, mode, flags);
547
548 return 0;
549}
550
551static uint16_t
552fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
553{
554 int biased_exp; // non-negative exponent value for result
555 uint16_t int_mant; // mantissa for result, less than (1 << 11)
556 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
557
558 assert(rm != FPRounding_TIEAWAY);
559
560 // There is no flush to zero in this case!
561
562 // The bottom 5 bits of mnt are orred together:
563 mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0);
564
565 if (exp > 0) {
566 biased_exp = exp;
567 int_mant = mnt >> 2;
568 error = mnt & 3;
569 } else {
570 biased_exp = 0;
571 int_mant = lsr16(mnt, 3 - exp);
572 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
573 }
574
575 if (!biased_exp && error) { // xx should also check fpscr_val<11>
576 *flags |= FPLIB_UFC;
577 }
578
579 // Round up:
580 if ((rm == FPLIB_RN && (error == 3 ||
581 (error == 2 && (int_mant & 1)))) ||
582 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
583 ++int_mant;
584 if (int_mant == (uint32_t)1 << 10) {
585 // Rounded up from denormalized to normalized
586 biased_exp = 1;
587 }
588 if (int_mant == (uint32_t)1 << 11) {
589 // Rounded up to next exponent
590 ++biased_exp;
591 int_mant >>= 1;
592 }
593 }
594
595 // Handle rounding to odd aka Von Neumann rounding:
596 if (error && rm == FPRounding_ODD)
597 int_mant |= 1;
598
599 // Handle overflow:
600 if (!(mode & FPLIB_AHP)) {
601 if (biased_exp >= 31) {
602 *flags |= FPLIB_OFC | FPLIB_IXC;
603 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
604 (rm == FPLIB_RM && sgn)) {
605 return fp16_infinity(sgn);
606 } else {
607 return fp16_max_normal(sgn);
608 }
609 }
610 } else {
611 if (biased_exp >= 32) {
612 *flags |= FPLIB_IOC;
613 return fp16_pack(sgn, 31, -1);
614 }
615 }
616
617 if (error) {
618 *flags |= FPLIB_IXC;
619 }
620
621 return fp16_pack(sgn, biased_exp, int_mant);
622}
623
624static uint32_t
625fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
626{
627 int biased_exp; // non-negative exponent value for result
628 uint32_t int_mant; // mantissa for result, less than (1 << 24)
629 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
630
631 assert(rm != FPRounding_TIEAWAY);
632
633 // Flush to zero:
634 if ((mode & FPLIB_FZ) && exp < 1) {
635 *flags |= FPLIB_UFC;
636 return fp32_zero(sgn);
637 }
638
639 // The bottom 8 bits of mnt are orred together:
640 mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0);
641
642 if (exp > 0) {
643 biased_exp = exp;
644 int_mant = mnt >> 2;
645 error = mnt & 3;
646 } else {
647 biased_exp = 0;
648 int_mant = lsr32(mnt, 3 - exp);
649 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
650 }
651
652 if (!biased_exp && error) { // xx should also check fpscr_val<11>
653 *flags |= FPLIB_UFC;
654 }
655
656 // Round up:
657 if ((rm == FPLIB_RN && (error == 3 ||
658 (error == 2 && (int_mant & 1)))) ||
659 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
660 ++int_mant;
661 if (int_mant == (uint32_t)1 << 23) {
662 // Rounded up from denormalized to normalized
663 biased_exp = 1;
664 }
665 if (int_mant == (uint32_t)1 << 24) {
666 // Rounded up to next exponent
667 ++biased_exp;
668 int_mant >>= 1;
669 }
670 }
671
672 // Handle rounding to odd aka Von Neumann rounding:
673 if (error && rm == FPRounding_ODD)
674 int_mant |= 1;
675
676 // Handle overflow:
677 if (biased_exp >= 255) {
678 *flags |= FPLIB_OFC | FPLIB_IXC;
679 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
680 (rm == FPLIB_RM && sgn)) {
681 return fp32_infinity(sgn);
682 } else {
683 return fp32_max_normal(sgn);
684 }
685 }
686
687 if (error) {
688 *flags |= FPLIB_IXC;
689 }
690
691 return fp32_pack(sgn, biased_exp, int_mant);
692}
693
694static uint32_t
695fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
696{
697 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
698}
699
700static uint64_t
701fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
702{
703 int biased_exp; // non-negative exponent value for result
704 uint64_t int_mant; // mantissa for result, less than (1 << 52)
705 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
706
707 assert(rm != FPRounding_TIEAWAY);
708
709 // Flush to zero:
710 if ((mode & FPLIB_FZ) && exp < 1) {
711 *flags |= FPLIB_UFC;
712 return fp64_zero(sgn);
713 }
714
715 // The bottom 11 bits of mnt are orred together:
716 mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0);
717
718 if (exp > 0) {
719 biased_exp = exp;
720 int_mant = mnt >> 2;
721 error = mnt & 3;
722 } else {
723 biased_exp = 0;
724 int_mant = lsr64(mnt, 3 - exp);
725 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
726 }
727
728 if (!biased_exp && error) { // xx should also check fpscr_val<11>
729 *flags |= FPLIB_UFC;
730 }
731
732 // Round up:
733 if ((rm == FPLIB_RN && (error == 3 ||
734 (error == 2 && (int_mant & 1)))) ||
735 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
736 ++int_mant;
737 if (int_mant == (uint64_t)1 << 52) {
738 // Rounded up from denormalized to normalized
739 biased_exp = 1;
740 }
741 if (int_mant == (uint64_t)1 << 53) {
742 // Rounded up to next exponent
743 ++biased_exp;
744 int_mant >>= 1;
745 }
746 }
747
748 // Handle rounding to odd aka Von Neumann rounding:
749 if (error && rm == FPRounding_ODD)
750 int_mant |= 1;
751
752 // Handle overflow:
753 if (biased_exp >= 2047) {
754 *flags |= FPLIB_OFC | FPLIB_IXC;
755 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
756 (rm == FPLIB_RM && sgn)) {
757 return fp64_infinity(sgn);
758 } else {
759 return fp64_max_normal(sgn);
760 }
761 }
762
763 if (error) {
764 *flags |= FPLIB_IXC;
765 }
766
767 return fp64_pack(sgn, biased_exp, int_mant);
768}
769
770static uint64_t
771fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
772{
773 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
774}
775
776static int
777fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
778{
779 int a_sgn, a_exp, b_sgn, b_exp;
780 uint32_t a_mnt, b_mnt;
781
782 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
783 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
784
785 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
786 (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
787 if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) ||
788 (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1)))
789 *flags |= FPLIB_IOC;
790 return 0;
791 }
792 return a == b || (!a_mnt && !b_mnt);
793}
794
795static int
796fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
797{
798 int a_sgn, a_exp, b_sgn, b_exp;
799 uint32_t a_mnt, b_mnt;
800
801 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
802 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
803
804 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
805 (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
806 *flags |= FPLIB_IOC;
807 return 0;
808 }
809 if (!a_mnt && !b_mnt)
810 return 1;
811 if (a_sgn != b_sgn)
812 return b_sgn;
813 if (a_exp != b_exp)
814 return a_sgn ^ (a_exp > b_exp);
815 if (a_mnt != b_mnt)
816 return a_sgn ^ (a_mnt > b_mnt);
817 return 1;
818}
819
820static int
821fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
822{
823 int a_sgn, a_exp, b_sgn, b_exp;
824 uint32_t a_mnt, b_mnt;
825
826 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
827 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
828
829 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
830 (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
831 *flags |= FPLIB_IOC;
832 return 0;
833 }
834 if (!a_mnt && !b_mnt)
835 return 0;
836 if (a_sgn != b_sgn)
837 return b_sgn;
838 if (a_exp != b_exp)
839 return a_sgn ^ (a_exp > b_exp);
840 if (a_mnt != b_mnt)
841 return a_sgn ^ (a_mnt > b_mnt);
842 return 0;
843}
844
845static int
846fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
847{
848 int a_sgn, a_exp, b_sgn, b_exp;
849 uint64_t a_mnt, b_mnt;
850
851 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
852 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
853
854 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
855 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
856 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) ||
857 (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1)))
858 *flags |= FPLIB_IOC;
859 return 0;
860 }
861 return a == b || (!a_mnt && !b_mnt);
862}
863
864static int
865fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
866{
867 int a_sgn, a_exp, b_sgn, b_exp;
868 uint64_t a_mnt, b_mnt;
869
870 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
871 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
872
873 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
874 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
875 *flags |= FPLIB_IOC;
876 return 0;
877 }
878 if (!a_mnt && !b_mnt)
879 return 1;
880 if (a_sgn != b_sgn)
881 return b_sgn;
882 if (a_exp != b_exp)
883 return a_sgn ^ (a_exp > b_exp);
884 if (a_mnt != b_mnt)
885 return a_sgn ^ (a_mnt > b_mnt);
886 return 1;
887}
888
889static int
890fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
891{
892 int a_sgn, a_exp, b_sgn, b_exp;
893 uint64_t a_mnt, b_mnt;
894
895 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
896 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
897
898 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
899 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
900 *flags |= FPLIB_IOC;
901 return 0;
902 }
903 if (!a_mnt && !b_mnt)
904 return 0;
905 if (a_sgn != b_sgn)
906 return b_sgn;
907 if (a_exp != b_exp)
908 return a_sgn ^ (a_exp > b_exp);
909 if (a_mnt != b_mnt)
910 return a_sgn ^ (a_mnt > b_mnt);
911 return 0;
912}
913
914static uint32_t
915fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
916{
917 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
918 uint32_t a_mnt, b_mnt, x, x_mnt;
919
920 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
921 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
922
923 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
924 return x;
925 }
926
927 b_sgn ^= neg;
928
929 // Handle infinities and zeroes:
930 if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) {
931 *flags |= FPLIB_IOC;
932 return fp32_defaultNaN();
933 } else if (a_exp == 255) {
934 return fp32_infinity(a_sgn);
935 } else if (b_exp == 255) {
936 return fp32_infinity(b_sgn);
937 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
938 return fp32_zero(a_sgn);
939 }
940
941 a_mnt <<= 3;
942 b_mnt <<= 3;
943 if (a_exp >= b_exp) {
944 b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
945 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
946 b_exp = a_exp;
947 } else {
948 a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
949 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
950 a_exp = b_exp;
951 }
952 x_sgn = a_sgn;
953 x_exp = a_exp;
954 if (a_sgn == b_sgn) {
955 x_mnt = a_mnt + b_mnt;
956 } else if (a_mnt >= b_mnt) {
957 x_mnt = a_mnt - b_mnt;
958 } else {
959 x_sgn ^= 1;
960 x_mnt = b_mnt - a_mnt;
961 }
962
963 if (!x_mnt) {
964 // Sign of exact zero result depends on rounding mode
965 return fp32_zero((mode & 3) == 2);
966 }
967
968 x_mnt = fp32_normalise(x_mnt, &x_exp);
969
970 return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags);
971}
972
973static uint64_t
974fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
975{
976 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
977 uint64_t a_mnt, b_mnt, x, x_mnt;
978
979 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
980 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
981
982 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
983 return x;
984 }
985
986 b_sgn ^= neg;
987
988 // Handle infinities and zeroes:
989 if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) {
990 *flags |= FPLIB_IOC;
991 return fp64_defaultNaN();
992 } else if (a_exp == 2047) {
993 return fp64_infinity(a_sgn);
994 } else if (b_exp == 2047) {
995 return fp64_infinity(b_sgn);
996 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
997 return fp64_zero(a_sgn);
998 }
999
1000 a_mnt <<= 3;
1001 b_mnt <<= 3;
1002 if (a_exp >= b_exp) {
1003 b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
1004 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
1005 b_exp = a_exp;
1006 } else {
1007 a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
1008 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
1009 a_exp = b_exp;
1010 }
1011 x_sgn = a_sgn;
1012 x_exp = a_exp;
1013 if (a_sgn == b_sgn) {
1014 x_mnt = a_mnt + b_mnt;
1015 } else if (a_mnt >= b_mnt) {
1016 x_mnt = a_mnt - b_mnt;
1017 } else {
1018 x_sgn ^= 1;
1019 x_mnt = b_mnt - a_mnt;
1020 }
1021
1022 if (!x_mnt) {
1023 // Sign of exact zero result depends on rounding mode
1024 return fp64_zero((mode & 3) == 2);
1025 }
1026
1027 x_mnt = fp64_normalise(x_mnt, &x_exp);
1028
1029 return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags);
1030}
1031
1032static uint32_t
1033fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1034{
1035 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1036 uint32_t a_mnt, b_mnt, x;
1037 uint64_t x_mnt;
1038
1039 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1040 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1041
1042 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1043 return x;
1044 }
1045
1046 // Handle infinities and zeroes:
1047 if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) {
1048 *flags |= FPLIB_IOC;
1049 return fp32_defaultNaN();
1050 } else if (a_exp == 255 || b_exp == 255) {
1051 return fp32_infinity(a_sgn ^ b_sgn);
1052 } else if (!a_mnt || !b_mnt) {
1053 return fp32_zero(a_sgn ^ b_sgn);
1054 }
1055
1056 // Multiply and normalise:
1057 x_sgn = a_sgn ^ b_sgn;
1058 x_exp = a_exp + b_exp - 110;
1059 x_mnt = (uint64_t)a_mnt * b_mnt;
1060 x_mnt = fp64_normalise(x_mnt, &x_exp);
1061
1062 // Convert to 32 bits, collapsing error into bottom bit:
1063 x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33);
1064
1065 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1066}
1067
1068static uint64_t
1069fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1070{
1071 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1072 uint64_t a_mnt, b_mnt, x;
1073 uint64_t x0_mnt, x1_mnt;
1074
1075 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1076 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1077
1078 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1079 return x;
1080 }
1081
1082 // Handle infinities and zeroes:
1083 if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) {
1084 *flags |= FPLIB_IOC;
1085 return fp64_defaultNaN();
1086 } else if (a_exp == 2047 || b_exp == 2047) {
1087 return fp64_infinity(a_sgn ^ b_sgn);
1088 } else if (!a_mnt || !b_mnt) {
1089 return fp64_zero(a_sgn ^ b_sgn);
1090 }
1091
1092 // Multiply and normalise:
1093 x_sgn = a_sgn ^ b_sgn;
1094 x_exp = a_exp + b_exp - 1000;
1095 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1096 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1097
1098 // Convert to 64 bits, collapsing error into bottom bit:
1099 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1100
1101 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1102}
1103
1104static uint32_t
1105fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1106 int mode, int *flags)
1107{
1108 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1109 uint32_t a_mnt, b_mnt, c_mnt, x;
1110 uint64_t x_mnt, y_mnt;
1111
1112 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1113 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1114 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1115
1116 x = fp32_process_NaNs3(a, b, c, mode, flags);
1117
1118 // Quiet NaN added to product of zero and infinity:
1119 if (a_exp == 255 && (a_mnt >> 22 & 1) &&
1120 ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) ||
1121 (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) {
1122 x = fp32_defaultNaN();
1123 *flags |= FPLIB_IOC;
1124 }
1125
1126 if (x) {
1127 return x;
1128 }
1129
1130 // Handle infinities and zeroes:
1131 if ((b_exp == 255 && !c_mnt) ||
1132 (c_exp == 255 && !b_mnt) ||
1133 (a_exp == 255 && (b_exp == 255 || c_exp == 255) &&
1134 (a_sgn != (b_sgn ^ c_sgn)))) {
1135 *flags |= FPLIB_IOC;
1136 return fp32_defaultNaN();
1137 }
1138 if (a_exp == 255)
1139 return fp32_infinity(a_sgn);
1140 if (b_exp == 255 || c_exp == 255)
1141 return fp32_infinity(b_sgn ^ c_sgn);
1142 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1143 return fp32_zero(a_sgn);
1144
1145 x_sgn = a_sgn;
1146 x_exp = a_exp + 13;
1147 x_mnt = (uint64_t)a_mnt << 27;
1148
1149 // Multiply:
1150 y_sgn = b_sgn ^ c_sgn;
1151 y_exp = b_exp + c_exp - 113;
1152 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1153 if (!y_mnt) {
1154 y_exp = x_exp;
1155 }
1156
1157 // Add:
1158 if (x_exp >= y_exp) {
1159 y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1160 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1161 y_exp = x_exp;
1162 } else {
1163 x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1164 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1165 x_exp = y_exp;
1166 }
1167 if (x_sgn == y_sgn) {
1168 x_mnt = x_mnt + y_mnt;
1169 } else if (x_mnt >= y_mnt) {
1170 x_mnt = x_mnt - y_mnt;
1171 } else {
1172 x_sgn ^= 1;
1173 x_mnt = y_mnt - x_mnt;
1174 }
1175
1176 if (!x_mnt) {
1177 // Sign of exact zero result depends on rounding mode
1178 return fp32_zero((mode & 3) == 2);
1179 }
1180
1181 // Normalise and convert to 32 bits, collapsing error into bottom bit:
1182 x_mnt = fp64_normalise(x_mnt, &x_exp);
1183 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1184
1185 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1186}
1187
1188static uint64_t
1189fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1190 int mode, int *flags)
1191{
1192 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1193 uint64_t a_mnt, b_mnt, c_mnt, x;
1194 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1195
1196 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1197 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1198 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1199
1200 x = fp64_process_NaNs3(a, b, c, mode, flags);
1201
1202 // Quiet NaN added to product of zero and infinity:
1203 if (a_exp == 2047 && (a_mnt >> 51 & 1) &&
1204 ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) ||
1205 (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) {
1206 x = fp64_defaultNaN();
1207 *flags |= FPLIB_IOC;
1208 }
1209
1210 if (x) {
1211 return x;
1212 }
1213
1214 // Handle infinities and zeroes:
1215 if ((b_exp == 2047 && !c_mnt) ||
1216 (c_exp == 2047 && !b_mnt) ||
1217 (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) &&
1218 (a_sgn != (b_sgn ^ c_sgn)))) {
1219 *flags |= FPLIB_IOC;
1220 return fp64_defaultNaN();
1221 }
1222 if (a_exp == 2047)
1223 return fp64_infinity(a_sgn);
1224 if (b_exp == 2047 || c_exp == 2047)
1225 return fp64_infinity(b_sgn ^ c_sgn);
1226 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1227 return fp64_zero(a_sgn);
1228
1229 x_sgn = a_sgn;
1230 x_exp = a_exp + 11;
1231 x0_mnt = 0;
1232 x1_mnt = a_mnt;
1233
1234 // Multiply:
1235 y_sgn = b_sgn ^ c_sgn;
1236 y_exp = b_exp + c_exp - 1003;
1237 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1238 if (!y0_mnt && !y1_mnt) {
1239 y_exp = x_exp;
1240 }
1241
1242 // Add:
1243 if (x_exp >= y_exp) {
1244 uint64_t t0, t1;
1245 lsl128(&t0, &t1, y0_mnt, y1_mnt,
1246 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1247 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1248 y0_mnt |= !!(t0 | t1);
1249 y_exp = x_exp;
1250 } else {
1251 uint64_t t0, t1;
1252 lsl128(&t0, &t1, x0_mnt, x1_mnt,
1253 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1254 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1255 x0_mnt |= !!(t0 | t1);
1256 x_exp = y_exp;
1257 }
1258 if (x_sgn == y_sgn) {
1259 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1260 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1261 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1262 } else {
1263 x_sgn ^= 1;
1264 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1265 }
1266
1267 if (!x0_mnt && !x1_mnt) {
1268 // Sign of exact zero result depends on rounding mode
1269 return fp64_zero((mode & 3) == 2);
1270 }
1271
1272 // Normalise and convert to 64 bits, collapsing error into bottom bit:
1273 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1274 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1275
1276 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1277}
1278
1279static uint32_t
1280fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1281{
1282 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1283 uint32_t a_mnt, b_mnt, x;
1284 uint64_t x_mnt;
1285
1286 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1287 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1288
1289 if ((x = fp32_process_NaNs(a, b, mode, flags)))
1290 return x;
1291
1292 // Handle infinities and zeroes:
1293 if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) {
1294 *flags |= FPLIB_IOC;
1295 return fp32_defaultNaN();
1296 }
1297 if (a_exp == 255 || !b_mnt) {
1298 if (a_exp != 255)
1299 *flags |= FPLIB_DZC;
1300 return fp32_infinity(a_sgn ^ b_sgn);
1301 }
1302 if (!a_mnt || b_exp == 255)
1303 return fp32_zero(a_sgn ^ b_sgn);
1304
1305 // Divide, setting bottom bit if inexact:
1306 a_mnt = fp32_normalise(a_mnt, &a_exp);
1307 x_sgn = a_sgn ^ b_sgn;
1308 x_exp = a_exp - b_exp + 172;
1309 x_mnt = ((uint64_t)a_mnt << 18) / b_mnt;
1310 x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18);
1311
1312 // Normalise and convert to 32 bits, collapsing error into bottom bit:
1313 x_mnt = fp64_normalise(x_mnt, &x_exp);
1314 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1315
1316 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1317}
1318
1319static uint64_t
1320fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1321{
1322 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1323 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1324
1325 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1326 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1327
1328 if ((x = fp64_process_NaNs(a, b, mode, flags)))
1329 return x;
1330
1331 // Handle infinities and zeroes:
1332 if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) {
1333 *flags |= FPLIB_IOC;
1334 return fp64_defaultNaN();
1335 }
1336 if (a_exp == 2047 || !b_mnt) {
1337 if (a_exp != 2047)
1338 *flags |= FPLIB_DZC;
1339 return fp64_infinity(a_sgn ^ b_sgn);
1340 }
1341 if (!a_mnt || b_exp == 2047)
1342 return fp64_zero(a_sgn ^ b_sgn);
1343
1344 // Find reciprocal of divisor with Newton-Raphson:
1345 a_mnt = fp64_normalise(a_mnt, &a_exp);
1346 b_mnt = fp64_normalise(b_mnt, &b_exp);
1347 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1348 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1349 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1350 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1351 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1352 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1353
1354 // Multiply by dividend:
1355 x_sgn = a_sgn ^ b_sgn;
1356 x_exp = a_exp - b_exp + 1031;
1357 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough
1358 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1359 x_mnt = x1_mnt;
1360
1361 // This is an underestimate, so try adding one:
1362 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough
1363 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1364 if (c <= 0) {
1365 ++x_mnt;
1366 }
1367
1368 x_mnt = fp64_normalise(x_mnt, &x_exp);
1369
1370 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1371}
1372
1373static void
1374set_fpscr0(FPSCR &fpscr, int flags)
1375{
1376 if (flags & FPLIB_IDC) {
1377 fpscr.idc = 1;
1378 }
1379 if (flags & FPLIB_IOC) {
1380 fpscr.ioc = 1;
1381 }
1382 if (flags & FPLIB_DZC) {
1383 fpscr.dzc = 1;
1384 }
1385 if (flags & FPLIB_OFC) {
1386 fpscr.ofc = 1;
1387 }
1388 if (flags & FPLIB_UFC) {
1389 fpscr.ufc = 1;
1390 }
1391 if (flags & FPLIB_IXC) {
1392 fpscr.ixc = 1;
1393 }
1394}
1395
1396static uint32_t
1397fp32_sqrt(uint32_t a, int mode, int *flags)
1398{
1399 int a_sgn, a_exp, x_sgn, x_exp;
1400 uint32_t a_mnt, x, x_mnt;
1401 uint64_t t0, t1;
1402
1403 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1404
1405 // Handle NaNs:
1406 if (a_exp == 255 && (uint32_t)(a_mnt << 9))
1407 return fp32_process_NaN(a, mode, flags);
1408
1409 // Handle infinities and zeroes:
1410 if (!a_mnt) {
1411 return fp32_zero(a_sgn);
1412 }
1413 if (a_exp == 255 && !a_sgn) {
1414 return fp32_infinity(a_sgn);
1415 }
1416 if (a_sgn) {
1417 *flags |= FPLIB_IOC;
1418 return fp32_defaultNaN();
1419 }
1420
1421 a_mnt = fp32_normalise(a_mnt, &a_exp);
1422 if (!(a_exp & 1)) {
1423 ++a_exp;
1424 a_mnt >>= 1;
1425 }
1426
1427 // x = (a * 3 + 5) / 8
1428 x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28);
1429
1430 // x = (a / x + x) / 2; // 16-bit accuracy
1431 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1432
1433 // x = (a / x + x) / 2; // 16-bit accuracy
1434 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1435
1436 // x = (a / x + x) / 2; // 32-bit accuracy
1437 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
1438
1439 x_sgn = 0;
1440 x_exp = (a_exp + 147) >> 1;
1441 x_mnt = ((x - (1 << 5)) >> 6) + 1;
1442 t1 = (uint64_t)x_mnt * x_mnt;
1443 t0 = (uint64_t)a_mnt << 19;
1444 if (t1 > t0) {
1445 --x_mnt;
1446 }
1447
1448 x_mnt = fp32_normalise(x_mnt, &x_exp);
1449
1450 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
1451}
1452
1453static uint64_t
1454fp64_sqrt(uint64_t a, int mode, int *flags)
1455{
1456 int a_sgn, a_exp, x_sgn, x_exp, c;
1457 uint64_t a_mnt, x_mnt, r, x0, x1;
1458 uint32_t x;
1459
1460 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1461
1462 // Handle NaNs:
1463 if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) {
1464 return fp64_process_NaN(a, mode, flags);
1465 }
1466
1467 // Handle infinities and zeroes:
1468 if (!a_mnt)
1469 return fp64_zero(a_sgn);
1470 if (a_exp == 2047 && !a_sgn)
1471 return fp64_infinity(a_sgn);
1472 if (a_sgn) {
1473 *flags |= FPLIB_IOC;
1474 return fp64_defaultNaN();
1475 }
1476
1477 a_mnt = fp64_normalise(a_mnt, &a_exp);
1478 if (a_exp & 1) {
1479 ++a_exp;
1480 a_mnt >>= 1;
1481 }
1482
1483 // x = (a * 3 + 5) / 8
1484 x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28);
1485
1486 // x = (a / x + x) / 2; // 16-bit accuracy
1487 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1488
1489 // x = (a / x + x) / 2; // 16-bit accuracy
1490 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1491
1492 // x = (a / x + x) / 2; // 32-bit accuracy
1493 x = ((a_mnt / x) >> 2) + (x >> 1);
1494
1495 // r = 1 / x; // 32-bit accuracy
1496 r = ((uint64_t)1 << 62) / x;
1497
1498 // r = r * (2 - x * r); // 64-bit accuracy
1499 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
1500 lsr128(&x0, &x1, x0, x1, 31);
1501
1502 // x = (x + a * r) / 2; // 64-bit accuracy
1503 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
1504 lsl128(&x0, &x1, x0, x1, 5);
1505 lsr128(&x0, &x1, x0, x1, 56);
1506
1507 x0 = ((uint64_t)x << 31) + (x0 >> 1);
1508
1509 x_sgn = 0;
1510 x_exp = (a_exp + 1053) >> 1;
1511 x_mnt = x0;
1512 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
1513 mul62x62(&x0, &x1, x_mnt, x_mnt);
1514 lsl128(&x0, &x1, x0, x1, 19);
1515 c = cmp128(x0, x1, 0, a_mnt);
1516 if (c > 0)
1517 --x_mnt;
1518
1519 x_mnt = fp64_normalise(x_mnt, &x_exp);
1520
1521 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1522}
1523
1524static int
1525modeConv(FPSCR fpscr)
1526{
1527 return (((int) fpscr) >> 22) & 0xF;
1528}
1529
1530static void
1531set_fpscr(FPSCR &fpscr, int flags)
1532{
1533 // translate back to FPSCR
1534 bool underflow = false;
1535 if (flags & FPLIB_IDC) {
1536 fpscr.idc = 1;
1537 }
1538 if (flags & FPLIB_IOC) {
1539 fpscr.ioc = 1;
1540 }
1541 if (flags & FPLIB_DZC) {
1542 fpscr.dzc = 1;
1543 }
1544 if (flags & FPLIB_OFC) {
1545 fpscr.ofc = 1;
1546 }
1547 if (flags & FPLIB_UFC) {
1548 underflow = true; //xx Why is this required?
1549 fpscr.ufc = 1;
1550 }
1551 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
1552 fpscr.ixc = 1;
1553 }
1554}
1555
1556template <>
1557bool
1558fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
1559{
1560 int flags = 0;
1561 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
1562 set_fpscr(fpscr, flags);
1563 return x;
1564}
1565
1566template <>
1567bool
1568fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
1569{
1570 int flags = 0;
1571 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
1572 set_fpscr(fpscr, flags);
1573 return x;
1574}
1575
1576template <>
1577bool
1578fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
1579{
1580 int flags = 0;
1581 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
1582 set_fpscr(fpscr, flags);
1583 return x;
1584}
1585
1586template <>
1587bool
1588fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
1589{
1590 int flags = 0;
1591 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
1592 set_fpscr(fpscr, flags);
1593 return x;
1594}
1595
1596template <>
1597bool
1598fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
1599{
1600 int flags = 0;
1601 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
1602 set_fpscr(fpscr, flags);
1603 return x;
1604}
1605
1606template <>
1607bool
1608fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
1609{
1610 int flags = 0;
1611 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
1612 set_fpscr(fpscr, flags);
1613 return x;
1614}
1615
1616template <>
1617uint32_t
1618fplibAbs(uint32_t op)
1619{
1620 return op & ~((uint32_t)1 << 31);
1621}
1622
1623template <>
1624uint64_t
1625fplibAbs(uint64_t op)
1626{
1627 return op & ~((uint64_t)1 << 63);
1628}
1629
1630template <>
1631uint32_t
1632fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
1633{
1634 int flags = 0;
1635 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
1636 set_fpscr0(fpscr, flags);
1637 return result;
1638}
1639
1640template <>
1641uint64_t
1642fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
1643{
1644 int flags = 0;
1645 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
1646 set_fpscr0(fpscr, flags);
1647 return result;
1648}
1649
1650template <>
1651int
1652fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
1653{
1654 int mode = modeConv(fpscr);
1655 int flags = 0;
1656 int sgn1, exp1, sgn2, exp2, result;
1657 uint32_t mnt1, mnt2;
1658
1659 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1660 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1661
1662 if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) ||
1663 (exp2 == 255 && (uint32_t)(mnt2 << 9))) {
1664 result = 3;
1665 if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) ||
1666 (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) ||
1667 signal_nans)
1668 flags |= FPLIB_IOC;
1669 } else {
1670 if (op1 == op2 || (!mnt1 && !mnt2)) {
1671 result = 6;
1672 } else if (sgn1 != sgn2) {
1673 result = sgn1 ? 8 : 2;
1674 } else if (exp1 != exp2) {
1675 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1676 } else {
1677 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1678 }
1679 }
1680
1681 set_fpscr0(fpscr, flags);
1682
1683 return result;
1684}
1685
1686template <>
1687int
1688fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
1689{
1690 int mode = modeConv(fpscr);
1691 int flags = 0;
1692 int sgn1, exp1, sgn2, exp2, result;
1693 uint64_t mnt1, mnt2;
1694
1695 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1696 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1697
1698 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) ||
1699 (exp2 == 2047 && (uint64_t)(mnt2 << 12))) {
1700 result = 3;
1701 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) ||
1702 (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) ||
1703 signal_nans)
1704 flags |= FPLIB_IOC;
1705 } else {
1706 if (op1 == op2 || (!mnt1 && !mnt2)) {
1707 result = 6;
1708 } else if (sgn1 != sgn2) {
1709 result = sgn1 ? 8 : 2;
1710 } else if (exp1 != exp2) {
1711 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1712 } else {
1713 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1714 }
1715 }
1716
1717 set_fpscr0(fpscr, flags);
1718
1719 return result;
1720}
1721
1722static uint16_t
1723fp16_FPConvertNaN_32(uint32_t op)
1724{
1725 return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13);
1726}
1727
1728static uint16_t
1729fp16_FPConvertNaN_64(uint64_t op)
1730{
1731 return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42);
1732}
1733
1734static uint32_t
1735fp32_FPConvertNaN_16(uint16_t op)
1736{
1737 return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13);
1738}
1739
1740static uint32_t
1741fp32_FPConvertNaN_64(uint64_t op)
1742{
1743 return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29);
1744}
1745
1746static uint64_t
1747fp64_FPConvertNaN_16(uint16_t op)
1748{
1749 return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42);
1750}
1751
1752static uint64_t
1753fp64_FPConvertNaN_32(uint32_t op)
1754{
1755 return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29);
1756}
1757
1758static uint32_t
1759fp32_FPOnePointFive(int sgn)
1760{
1761 return fp32_pack(sgn, 127, (uint64_t)1 << 22);
1762}
1763
1764static uint64_t
1765fp64_FPOnePointFive(int sgn)
1766{
1767 return fp64_pack(sgn, 1023, (uint64_t)1 << 51);
1768}
1769
1770static uint32_t
1771fp32_FPThree(int sgn)
1772{
1773 return fp32_pack(sgn, 128, (uint64_t)1 << 22);
1774}
1775
1776static uint64_t
1777fp64_FPThree(int sgn)
1778{
1779 return fp64_pack(sgn, 1024, (uint64_t)1 << 51);
1780}
1781
1782static uint32_t
1783fp32_FPTwo(int sgn)
1784{
1785 return fp32_pack(sgn, 128, 0);
1786}
1787
1788static uint64_t
1789fp64_FPTwo(int sgn)
1790{
1791 return fp64_pack(sgn, 1024, 0);
1792}
1793
1794template <>
1795uint16_t
1796fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1797{
1798 int mode = modeConv(fpscr);
1799 int flags = 0;
1800 int sgn, exp;
1801 uint32_t mnt;
1802 uint16_t result;
1803
1804 // Unpack floating-point operand optionally with flush-to-zero:
1805 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1806
1807 bool alt_hp = fpscr.ahp;
1808
1809 if (exp == 255 && (uint32_t)(mnt << 9)) {
1810 if (alt_hp) {
1811 result = fp16_zero(sgn);
1812 } else if (fpscr.dn) {
1813 result = fp16_defaultNaN();
1814 } else {
1815 result = fp16_FPConvertNaN_32(op);
1816 }
1817 if (!(mnt >> 22 & 1) || alt_hp) {
1818 flags |= FPLIB_IOC;
1819 }
1820 } else if (exp == 255) {
1821 if (alt_hp) {
1822 result = sgn << 15 | (uint16_t)0x7fff;
1823 flags |= FPLIB_IOC;
1824 } else {
1825 result = fp16_infinity(sgn);
1826 }
1827 } else if (!mnt) {
1828 result = fp16_zero(sgn);
1829 } else {
1830 result = fp16_round_(sgn, exp - 127 + 15,
1831 mnt >> 7 | !!(uint32_t)(mnt << 25),
1832 rounding, mode | alt_hp << 4, &flags);
1833 }
1834
1835 set_fpscr0(fpscr, flags);
1836
1837 return result;
1838}
1839
1840template <>
1841uint16_t
1842fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1843{
1844 int mode = modeConv(fpscr);
1845 int flags = 0;
1846 int sgn, exp;
1847 uint64_t mnt;
1848 uint16_t result;
1849
1850 // Unpack floating-point operand optionally with flush-to-zero:
1851 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1852
1853 bool alt_hp = fpscr.ahp;
1854
1855 if (exp == 2047 && (uint64_t)(mnt << 12)) {
1856 if (alt_hp) {
1857 result = fp16_zero(sgn);
1858 } else if (fpscr.dn) {
1859 result = fp16_defaultNaN();
1860 } else {
1861 result = fp16_FPConvertNaN_64(op);
1862 }
1863 if (!(mnt >> 51 & 1) || alt_hp) {
1864 flags |= FPLIB_IOC;
1865 }
1866 } else if (exp == 2047) {
1867 if (alt_hp) {
1868 result = sgn << 15 | (uint16_t)0x7fff;
1869 flags |= FPLIB_IOC;
1870 } else {
1871 result = fp16_infinity(sgn);
1872 }
1873 } else if (!mnt) {
1874 result = fp16_zero(sgn);
1875 } else {
1876 result = fp16_round_(sgn, exp - 1023 + 15,
1877 mnt >> 36 | !!(uint64_t)(mnt << 28),
1878 rounding, mode | alt_hp << 4, &flags);
1879 }
1880
1881 set_fpscr0(fpscr, flags);
1882
1883 return result;
1884}
1885
1886template <>
1887uint32_t
1888fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1889{
1890 int mode = modeConv(fpscr);
1891 int flags = 0;
1892 int sgn, exp;
1893 uint16_t mnt;
1894 uint32_t result;
1895
1896 // Unpack floating-point operand optionally with flush-to-zero:
1897 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1898
1899 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1900 if (fpscr.dn) {
1901 result = fp32_defaultNaN();
1902 } else {
1903 result = fp32_FPConvertNaN_16(op);
1904 }
1905 if (!(mnt >> 9 & 1)) {
1906 flags |= FPLIB_IOC;
1907 }
1908 } else if (exp == 31 && !fpscr.ahp) {
1909 result = fp32_infinity(sgn);
1910 } else if (!mnt) {
1911 result = fp32_zero(sgn);
1912 } else {
1913 mnt = fp16_normalise(mnt, &exp);
1914 result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8);
1915 }
1916
1917 set_fpscr0(fpscr, flags);
1918
1919 return result;
1920}
1921
1922template <>
1923uint32_t
1924fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1925{
1926 int mode = modeConv(fpscr);
1927 int flags = 0;
1928 int sgn, exp;
1929 uint64_t mnt;
1930 uint32_t result;
1931
1932 // Unpack floating-point operand optionally with flush-to-zero:
1933 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1934
1935 if (exp == 2047 && (uint64_t)(mnt << 12)) {
1936 if (fpscr.dn) {
1937 result = fp32_defaultNaN();
1938 } else {
1939 result = fp32_FPConvertNaN_64(op);
1940 }
1941 if (!(mnt >> 51 & 1)) {
1942 flags |= FPLIB_IOC;
1943 }
1944 } else if (exp == 2047) {
1945 result = fp32_infinity(sgn);
1946 } else if (!mnt) {
1947 result = fp32_zero(sgn);
1948 } else {
1949 result = fp32_round_(sgn, exp - 1023 + 127,
1950 mnt >> 20 | !!(uint64_t)(mnt << 44),
1951 rounding, mode, &flags);
1952 }
1953
1954 set_fpscr0(fpscr, flags);
1955
1956 return result;
1957}
1958
1959template <>
1960uint64_t
1961fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1962{
1963 int mode = modeConv(fpscr);
1964 int flags = 0;
1965 int sgn, exp;
1966 uint16_t mnt;
1967 uint64_t result;
1968
1969 // Unpack floating-point operand optionally with flush-to-zero:
1970 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1971
1972 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1973 if (fpscr.dn) {
1974 result = fp64_defaultNaN();
1975 } else {
1976 result = fp64_FPConvertNaN_16(op);
1977 }
1978 if (!(mnt >> 9 & 1)) {
1979 flags |= FPLIB_IOC;
1980 }
1981 } else if (exp == 31 && !fpscr.ahp) {
1982 result = fp64_infinity(sgn);
1983 } else if (!mnt) {
1984 result = fp64_zero(sgn);
1985 } else {
1986 mnt = fp16_normalise(mnt, &exp);
1987 result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37);
1988 }
1989
1990 set_fpscr0(fpscr, flags);
1991
1992 return result;
1993}
1994
1995template <>
1996uint64_t
1997fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1998{
1999 int mode = modeConv(fpscr);
2000 int flags = 0;
2001 int sgn, exp;
2002 uint32_t mnt;
2003 uint64_t result;
2004
2005 // Unpack floating-point operand optionally with flush-to-zero:
2006 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2007
2008 if (exp == 255 && (uint32_t)(mnt << 9)) {
2009 if (fpscr.dn) {
2010 result = fp64_defaultNaN();
2011 } else {
2012 result = fp64_FPConvertNaN_32(op);
2013 }
2014 if (!(mnt >> 22 & 1)) {
2015 flags |= FPLIB_IOC;
2016 }
2017 } else if (exp == 255) {
2018 result = fp64_infinity(sgn);
2019 } else if (!mnt) {
2020 result = fp64_zero(sgn);
2021 } else {
2022 mnt = fp32_normalise(mnt, &exp);
2023 result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21);
2024 }
2025
2026 set_fpscr0(fpscr, flags);
2027
2028 return result;
2029}
2030
2031template <>
2032uint32_t
2033fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2034{
2035 int flags = 0;
2036 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2037 set_fpscr0(fpscr, flags);
2038 return result;
2039}
2040
2041template <>
2042uint64_t
2043fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2044{
2045 int flags = 0;
2046 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2047 set_fpscr0(fpscr, flags);
2048 return result;
2049}
2050
2051template <>
2052uint32_t
2053fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2054{
2055 int flags = 0;
2056 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2057 set_fpscr0(fpscr, flags);
2058 return result;
2059}
2060
2061template <>
2062uint64_t
2063fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2064{
2065 int flags = 0;
2066 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2067 set_fpscr0(fpscr, flags);
2068 return result;
2069}
2070
2071static uint32_t
2072fp32_repack(int sgn, int exp, uint32_t mnt)
2073{
2074 return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt);
2075}
2076
2077static uint64_t
2078fp64_repack(int sgn, int exp, uint64_t mnt)
2079{
2080 return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt);
2081}
2082
2083static void
2084fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
2085{
2086 // Treat a single quiet-NaN as +Infinity/-Infinity
2087 if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23)
2088 *op1 = fp32_infinity(sgn);
2089 if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23)
2090 *op2 = fp32_infinity(sgn);
2091}
2092
2093static void
2094fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
2095{
2096 // Treat a single quiet-NaN as +Infinity/-Infinity
2097 if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52)
2098 *op1 = fp64_infinity(sgn);
2099 if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52)
2100 *op2 = fp64_infinity(sgn);
2101}
2102
2103template <>
2104uint32_t
2105fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2106{
2107 int mode = modeConv(fpscr);
2108 int flags = 0;
2109 int sgn1, exp1, sgn2, exp2;
2110 uint32_t mnt1, mnt2, x, result;
2111
2112 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2113 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2114
2115 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2116 result = x;
2117 } else {
2118 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2119 fp32_repack(sgn1, exp1, mnt1) :
2120 fp32_repack(sgn2, exp2, mnt2));
2121 }
2122 set_fpscr0(fpscr, flags);
2123 return result;
2124}
2125
2126template <>
2127uint64_t
2128fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2129{
2130 int mode = modeConv(fpscr);
2131 int flags = 0;
2132 int sgn1, exp1, sgn2, exp2;
2133 uint64_t mnt1, mnt2, x, result;
2134
2135 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2136 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2137
2138 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2139 result = x;
2140 } else {
2141 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2142 fp64_repack(sgn1, exp1, mnt1) :
2143 fp64_repack(sgn2, exp2, mnt2));
2144 }
2145 set_fpscr0(fpscr, flags);
2146 return result;
2147}
2148
2149template <>
2150uint32_t
2151fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2152{
2153 fp32_minmaxnum(&op1, &op2, 1);
2154 return fplibMax<uint32_t>(op1, op2, fpscr);
2155}
2156
2157template <>
2158uint64_t
2159fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2160{
2161 fp64_minmaxnum(&op1, &op2, 1);
2162 return fplibMax<uint64_t>(op1, op2, fpscr);
2163}
2164
2165template <>
2166uint32_t
2167fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2168{
2169 int mode = modeConv(fpscr);
2170 int flags = 0;
2171 int sgn1, exp1, sgn2, exp2;
2172 uint32_t mnt1, mnt2, x, result;
2173
2174 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2175 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2176
2177 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2178 result = x;
2179 } else {
2180 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2181 fp32_repack(sgn1, exp1, mnt1) :
2182 fp32_repack(sgn2, exp2, mnt2));
2183 }
2184 set_fpscr0(fpscr, flags);
2185 return result;
2186}
2187
2188template <>
2189uint64_t
2190fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2191{
2192 int mode = modeConv(fpscr);
2193 int flags = 0;
2194 int sgn1, exp1, sgn2, exp2;
2195 uint64_t mnt1, mnt2, x, result;
2196
2197 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2198 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2199
2200 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2201 result = x;
2202 } else {
2203 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2204 fp64_repack(sgn1, exp1, mnt1) :
2205 fp64_repack(sgn2, exp2, mnt2));
2206 }
2207 set_fpscr0(fpscr, flags);
2208 return result;
2209}
2210
2211template <>
2212uint32_t
2213fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2214{
2215 fp32_minmaxnum(&op1, &op2, 0);
2216 return fplibMin<uint32_t>(op1, op2, fpscr);
2217}
2218
2219template <>
2220uint64_t
2221fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2222{
2223 fp64_minmaxnum(&op1, &op2, 0);
2224 return fplibMin<uint64_t>(op1, op2, fpscr);
2225}
2226
2227template <>
2228uint32_t
2229fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2230{
2231 int flags = 0;
2232 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
2233 set_fpscr0(fpscr, flags);
2234 return result;
2235}
2236
2237template <>
2238uint64_t
2239fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2240{
2241 int flags = 0;
2242 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
2243 set_fpscr0(fpscr, flags);
2244 return result;
2245}
2246
2247template <>
2248uint32_t
2249fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2250{
2251 int mode = modeConv(fpscr);
2252 int flags = 0;
2253 int sgn1, exp1, sgn2, exp2;
2254 uint32_t mnt1, mnt2, result;
2255
2256 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2257 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2258
2259 result = fp32_process_NaNs(op1, op2, mode, &flags);
2260 if (!result) {
2261 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2262 result = fp32_FPTwo(sgn1 ^ sgn2);
2263 } else if (exp1 == 255 || exp2 == 255) {
2264 result = fp32_infinity(sgn1 ^ sgn2);
2265 } else if (!mnt1 || !mnt2) {
2266 result = fp32_zero(sgn1 ^ sgn2);
2267 } else {
2268 result = fp32_mul(op1, op2, mode, &flags);
2269 }
2270 }
2271
2272 set_fpscr0(fpscr, flags);
2273
2274 return result;
2275}
2276
2277template <>
2278uint64_t
2279fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2280{
2281 int mode = modeConv(fpscr);
2282 int flags = 0;
2283 int sgn1, exp1, sgn2, exp2;
2284 uint64_t mnt1, mnt2, result;
2285
2286 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2287 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2288
2289 result = fp64_process_NaNs(op1, op2, mode, &flags);
2290 if (!result) {
2291 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2292 result = fp64_FPTwo(sgn1 ^ sgn2);
2293 } else if (exp1 == 2047 || exp2 == 2047) {
2294 result = fp64_infinity(sgn1 ^ sgn2);
2295 } else if (!mnt1 || !mnt2) {
2296 result = fp64_zero(sgn1 ^ sgn2);
2297 } else {
2298 result = fp64_mul(op1, op2, mode, &flags);
2299 }
2300 }
2301
2302 set_fpscr0(fpscr, flags);
2303
2304 return result;
2305}
2306
2307template <>
2308uint32_t
2309fplibNeg(uint32_t op)
2310{
2311 return op ^ (uint32_t)1 << 31;
2312}
2313
2314template <>
2315uint64_t
2316fplibNeg(uint64_t op)
2317{
2318 return op ^ (uint64_t)1 << 63;
2319}
2320
2321static const uint8_t recip_sqrt_estimate[256] = {
2322 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
2323 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
2324 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
2325 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
2326 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
2327 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
2328 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
2329 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
2330 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
2331 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
2332 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
2333 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
2334 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
2335 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
2336 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
2337 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
2338};
2339
2340template <>
2341uint32_t
2342fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
2343{
2344 int mode = modeConv(fpscr);
2345 int flags = 0;
2346 int sgn, exp;
2347 uint32_t mnt, result;
2348
2349 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2350
2351 if (exp == 255 && (uint32_t)(mnt << 9)) {
2352 result = fp32_process_NaN(op, mode, &flags);
2353 } else if (!mnt) {
2354 result = fp32_infinity(sgn);
2355 flags |= FPLIB_DZC;
2356 } else if (sgn) {
2357 result = fp32_defaultNaN();
2358 flags |= FPLIB_IOC;
2359 } else if (exp == 255) {
2360 result = fp32_zero(0);
2361 } else {
2362 exp += 8;
2363 mnt = fp32_normalise(mnt, &exp);
2364 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)];
2365 result = fp32_pack(0, (380 - exp) >> 1, mnt << 15);
2366 }
2367
2368 set_fpscr0(fpscr, flags);
2369
2370 return result;
2371}
2372
2373template <>
2374uint64_t
2375fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
2376{
2377 int mode = modeConv(fpscr);
2378 int flags = 0;
2379 int sgn, exp;
2380 uint64_t mnt, result;
2381
2382 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2383
2384 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2385 result = fp64_process_NaN(op, mode, &flags);
2386 } else if (!mnt) {
2387 result = fp64_infinity(sgn);
2388 flags |= FPLIB_DZC;
2389 } else if (sgn) {
2390 result = fp64_defaultNaN();
2391 flags |= FPLIB_IOC;
2392 } else if (exp == 2047) {
2393 result = fp32_zero(0);
2394 } else {
2395 exp += 11;
2396 mnt = fp64_normalise(mnt, &exp);
2397 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)];
2398 result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44);
2399 }
2400
2401 set_fpscr0(fpscr, flags);
2402
2403 return result;
2404}
2405
2406template <>
2407uint32_t
2408fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2409{
2410 int mode = modeConv(fpscr);
2411 int flags = 0;
2412 int sgn1, exp1, sgn2, exp2;
2413 uint32_t mnt1, mnt2, result;
2414
2415 op1 = fplibNeg<uint32_t>(op1);
2416 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2417 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2418
2419 result = fp32_process_NaNs(op1, op2, mode, &flags);
2420 if (!result) {
2421 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2422 result = fp32_FPOnePointFive(0);
2423 } else if (exp1 == 255 || exp2 == 255) {
2424 result = fp32_infinity(sgn1 ^ sgn2);
2425 } else {
2426 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
2427 }
2428 }
2429
2430 set_fpscr0(fpscr, flags);
2431
2432 return result;
2433}
2434
2435template <>
2436uint64_t
2437fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2438{
2439 int mode = modeConv(fpscr);
2440 int flags = 0;
2441 int sgn1, exp1, sgn2, exp2;
2442 uint64_t mnt1, mnt2, result;
2443
2444 op1 = fplibNeg<uint64_t>(op1);
2445 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2446 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2447
2448 result = fp64_process_NaNs(op1, op2, mode, &flags);
2449 if (!result) {
2450 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2451 result = fp64_FPOnePointFive(0);
2452 } else if (exp1 == 2047 || exp2 == 2047) {
2453 result = fp64_infinity(sgn1 ^ sgn2);
2454 } else {
2455 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
2456 }
2457 }
2458
2459 set_fpscr0(fpscr, flags);
2460
2461 return result;
2462}
2463
2464template <>
2465uint32_t
2466fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2467{
2468 int mode = modeConv(fpscr);
2469 int flags = 0;
2470 int sgn1, exp1, sgn2, exp2;
2471 uint32_t mnt1, mnt2, result;
2472
2473 op1 = fplibNeg<uint32_t>(op1);
2474 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2475 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2476
2477 result = fp32_process_NaNs(op1, op2, mode, &flags);
2478 if (!result) {
2479 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2480 result = fp32_FPTwo(0);
2481 } else if (exp1 == 255 || exp2 == 255) {
2482 result = fp32_infinity(sgn1 ^ sgn2);
2483 } else {
2484 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
2485 }
2486 }
2487
2488 set_fpscr0(fpscr, flags);
2489
2490 return result;
2491}
2492
2493template <>
2494uint32_t
2495fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
2496{
2497 int mode = modeConv(fpscr);
2498 int flags = 0;
2499 int sgn, exp;
2500 uint32_t mnt, result;
2501
2502 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2503
2504 if (exp == 255 && (uint32_t)(mnt << 9)) {
2505 result = fp32_process_NaN(op, mode, &flags);
2506 } else if (exp == 255) {
2507 result = fp32_zero(sgn);
2508 } else if (!mnt) {
2509 result = fp32_infinity(sgn);
2510 flags |= FPLIB_DZC;
2511 } else if (!((uint32_t)(op << 1) >> 22)) {
2512 bool overflow_to_inf = false;
2513 switch (FPCRRounding(fpscr)) {
2514 case FPRounding_TIEEVEN:
2515 overflow_to_inf = true;
2516 break;
2517 case FPRounding_POSINF:
2518 overflow_to_inf = !sgn;
2519 break;
2520 case FPRounding_NEGINF:
2521 overflow_to_inf = sgn;
2522 break;
2523 case FPRounding_ZERO:
2524 overflow_to_inf = false;
2525 break;
2526 default:
2527 assert(0);
2528 }
2529 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
2530 flags |= FPLIB_OFC | FPLIB_IXC;
2531 } else if (fpscr.fz && exp >= 253) {
2532 result = fp32_zero(sgn);
2533 flags |= FPLIB_UFC;
2534 } else {
2535 exp += 8;
2536 mnt = fp32_normalise(mnt, &exp);
2537 int result_exp = 253 - exp;
2538 uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1;
2539 fraction <<= 15;
2540 if (result_exp == 0) {
2541 fraction >>= 1;
2542 } else if (result_exp == -1) {
2543 fraction >>= 2;
2544 result_exp = 0;
2545 }
2546 result = fp32_pack(sgn, result_exp, fraction);
2547 }
2548
2549 set_fpscr0(fpscr, flags);
2550
2551 return result;
2552}
2553
2554template <>
2555uint64_t
2556fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
2557{
2558 int mode = modeConv(fpscr);
2559 int flags = 0;
2560 int sgn, exp;
2561 uint64_t mnt, result;
2562
2563 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2564
2565 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2566 result = fp64_process_NaN(op, mode, &flags);
2567 } else if (exp == 2047) {
2568 result = fp64_zero(sgn);
2569 } else if (!mnt) {
2570 result = fp64_infinity(sgn);
2571 flags |= FPLIB_DZC;
2572 } else if (!((uint64_t)(op << 1) >> 51)) {
2573 bool overflow_to_inf = false;
2574 switch (FPCRRounding(fpscr)) {
2575 case FPRounding_TIEEVEN:
2576 overflow_to_inf = true;
2577 break;
2578 case FPRounding_POSINF:
2579 overflow_to_inf = !sgn;
2580 break;
2581 case FPRounding_NEGINF:
2582 overflow_to_inf = sgn;
2583 break;
2584 case FPRounding_ZERO:
2585 overflow_to_inf = false;
2586 break;
2587 default:
2588 assert(0);
2589 }
2590 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
2591 flags |= FPLIB_OFC | FPLIB_IXC;
2592 } else if (fpscr.fz && exp >= 2045) {
2593 result = fp64_zero(sgn);
2594 flags |= FPLIB_UFC;
2595 } else {
2596 exp += 11;
2597 mnt = fp64_normalise(mnt, &exp);
2598 int result_exp = 2045 - exp;
2599 uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1;
2600 fraction <<= 44;
2601 if (result_exp == 0) {
2602 fraction >>= 1;
2603 } else if (result_exp == -1) {
2604 fraction >>= 2;
2605 result_exp = 0;
2606 }
2607 result = fp64_pack(sgn, result_exp, fraction);
2608 }
2609
2610 set_fpscr0(fpscr, flags);
2611
2612 return result;
2613}
2614
2615template <>
2616uint64_t
2617fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2618{
2619 int mode = modeConv(fpscr);
2620 int flags = 0;
2621 int sgn1, exp1, sgn2, exp2;
2622 uint64_t mnt1, mnt2, result;
2623
2624 op1 = fplibNeg<uint64_t>(op1);
2625 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2626 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2627
2628 result = fp64_process_NaNs(op1, op2, mode, &flags);
2629 if (!result) {
2630 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2631 result = fp64_FPTwo(0);
2632 } else if (exp1 == 2047 || exp2 == 2047) {
2633 result = fp64_infinity(sgn1 ^ sgn2);
2634 } else {
2635 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
2636 }
2637 }
2638
2639 set_fpscr0(fpscr, flags);
2640
2641 return result;
2642}
2643
2644template <>
2645uint32_t
2646fplibRecpX(uint32_t op, FPSCR &fpscr)
2647{
2648 int mode = modeConv(fpscr);
2649 int flags = 0;
2650 int sgn, exp;
2651 uint32_t mnt, result;
2652
2653 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2654
2655 if (exp == 255 && (uint32_t)(mnt << 9)) {
2656 result = fp32_process_NaN(op, mode, &flags);
2657 }
2658 else {
2659 if (!mnt) { // Zero and denormals
2660 result = fp32_pack(sgn, 254, 0);
2661 } else { // Infinities and normals
2662 result = fp32_pack(sgn, exp ^ 255, 0);
2663 }
2664 }
2665
2666 set_fpscr0(fpscr, flags);
2667
2668 return result;
2669}
2670
2671template <>
2672uint64_t
2673fplibRecpX(uint64_t op, FPSCR &fpscr)
2674{
2675 int mode = modeConv(fpscr);
2676 int flags = 0;
2677 int sgn, exp;
2678 uint64_t mnt, result;
2679
2680 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2681
2682 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2683 result = fp64_process_NaN(op, mode, &flags);
2684 }
2685 else {
2686 if (!mnt) { // Zero and denormals
2687 result = fp64_pack(sgn, 2046, 0);
2688 } else { // Infinities and normals
2689 result = fp64_pack(sgn, exp ^ 2047, 0);
2690 }
2691 }
2692
2693 set_fpscr0(fpscr, flags);
2694
2695 return result;
2696}
2697
2698template <>
2699uint32_t
2700fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2701{
2702 int mode = modeConv(fpscr);
2703 int flags = 0;
2704 int sgn, exp;
2705 uint32_t mnt, result;
2706
2707 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2708 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2709
2710 // Handle NaNs, infinities and zeroes:
2711 if (exp == 255 && (uint32_t)(mnt << 9)) {
2712 result = fp32_process_NaN(op, mode, &flags);
2713 } else if (exp == 255) {
2714 result = fp32_infinity(sgn);
2715 } else if (!mnt) {
2716 result = fp32_zero(sgn);
2717 } else if (exp >= 150) {
2718 // There are no fractional bits
2719 result = op;
2720 } else {
2721 // Truncate towards zero:
2722 uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp);
2723 int err = exp < 118 ? 1 :
2724 (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0);
2725 switch (rounding) {
2726 case FPRounding_TIEEVEN:
2727 x += (err == 3 || (err == 2 && (x & 1)));
2728 break;
2729 case FPRounding_POSINF:
2730 x += err && !sgn;
2731 break;
2732 case FPRounding_NEGINF:
2733 x += err && sgn;
2734 break;
2735 case FPRounding_ZERO:
2736 break;
2737 case FPRounding_TIEAWAY:
2738 x += err >> 1;
2739 break;
2740 default:
2741 assert(0);
2742 }
2743
2744 if (x == 0) {
2745 result = fp32_zero(sgn);
2746 } else {
2747 exp = 150;
2748 mnt = fp32_normalise(x, &exp);
2749 result = fp32_pack(sgn, exp + 8, mnt >> 8);
2750 }
2751
2752 if (err && exact)
2753 flags |= FPLIB_IXC;
2754 }
2755
2756 set_fpscr0(fpscr, flags);
2757
2758 return result;
2759}
2760
2761template <>
2762uint64_t
2763fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2764{
2765 int mode = modeConv(fpscr);
2766 int flags = 0;
2767 int sgn, exp;
2768 uint64_t mnt, result;
2769
2770 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2771 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2772
2773 // Handle NaNs, infinities and zeroes:
2774 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2775 result = fp64_process_NaN(op, mode, &flags);
2776 } else if (exp == 2047) {
2777 result = fp64_infinity(sgn);
2778 } else if (!mnt) {
2779 result = fp64_zero(sgn);
2780 } else if (exp >= 1075) {
2781 // There are no fractional bits
2782 result = op;
2783 } else {
2784 // Truncate towards zero:
2785 uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp);
2786 int err = exp < 1011 ? 1 :
2787 (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0);
2788 switch (rounding) {
2789 case FPRounding_TIEEVEN:
2790 x += (err == 3 || (err == 2 && (x & 1)));
2791 break;
2792 case FPRounding_POSINF:
2793 x += err && !sgn;
2794 break;
2795 case FPRounding_NEGINF:
2796 x += err && sgn;
2797 break;
2798 case FPRounding_ZERO:
2799 break;
2800 case FPRounding_TIEAWAY:
2801 x += err >> 1;
2802 break;
2803 default:
2804 assert(0);
2805 }
2806
2807 if (x == 0) {
2808 result = fp64_zero(sgn);
2809 } else {
2810 exp = 1075;
2811 mnt = fp64_normalise(x, &exp);
2812 result = fp64_pack(sgn, exp + 11, mnt >> 11);
2813 }
2814
2815 if (err && exact)
2816 flags |= FPLIB_IXC;
2817 }
2818
2819 set_fpscr0(fpscr, flags);
2820
2821 return result;
2822}
2823
2824template <>
2825uint32_t
2826fplibSqrt(uint32_t op, FPSCR &fpscr)
2827{
2828 int flags = 0;
2829 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
2830 set_fpscr0(fpscr, flags);
2831 return result;
2832}
2833
2834template <>
2835uint64_t
2836fplibSqrt(uint64_t op, FPSCR &fpscr)
2837{
2838 int flags = 0;
2839 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
2840 set_fpscr0(fpscr, flags);
2841 return result;
2842}
2843
2844template <>
2845uint32_t
2846fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2847{
2848 int flags = 0;
2849 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
2850 set_fpscr0(fpscr, flags);
2851 return result;
2852}
2853
2854template <>
2855uint64_t
2856fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2857{
2858 int flags = 0;
2859 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
2860 set_fpscr0(fpscr, flags);
2861 return result;
2862}
2863
2864static uint64_t
2865FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2866 int *flags)
2867{
2868 uint64_t x;
2869 int err;
2870
2871 if (exp > 1023 + 63) {
2872 *flags = FPLIB_IOC;
2873 return ((uint64_t)!u << 63) - !sgn;
2874 }
2875
2876 x = lsr64(mnt << 11, 1023 + 63 - exp);
2877 err = (exp > 1023 + 63 - 2 ? 0 :
2878 (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) |
2879 !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1)));
2880
2881 switch (rounding) {
2882 case FPRounding_TIEEVEN:
2883 x += (err == 3 || (err == 2 && (x & 1)));
2884 break;
2885 case FPRounding_POSINF:
2886 x += err && !sgn;
2887 break;
2888 case FPRounding_NEGINF:
2889 x += err && sgn;
2890 break;
2891 case FPRounding_ZERO:
2892 break;
2893 case FPRounding_TIEAWAY:
2894 x += err >> 1;
2895 break;
2896 default:
2897 assert(0);
2898 }
2899
2900 if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) {
2901 *flags = FPLIB_IOC;
2902 return ((uint64_t)!u << 63) - !sgn;
2903 }
2904
2905 if (err) {
2906 *flags = FPLIB_IXC;
2907 }
2908
2909 return sgn ? -x : x;
2910}
2911
2912static uint32_t
2913FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2914 int *flags)
2915{
2916 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
2917 if (u ? x >= (uint64_t)1 << 32 :
2918 !(x < (uint64_t)1 << 31 ||
2919 (uint64_t)-x <= (uint64_t)1 << 31)) {
2920 *flags = FPLIB_IOC;
2921 x = ((uint32_t)!u << 31) - !sgn;
2922 }
2923 return x;
2924}
2925
2926template <>
2927uint32_t
2928fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2929{
2930 int flags = 0;
2931 int sgn, exp;
2932 uint32_t mnt, result;
2933
2934 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2935 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2936
2937 // If NaN, set cumulative flag or take exception:
2938 if (exp == 255 && (uint32_t)(mnt << 9)) {
2939 flags = FPLIB_IOC;
2940 result = 0;
2941 } else {
2942 result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits,
2943 (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2944 }
2945
2946 set_fpscr0(fpscr, flags);
2947
2948 return result;
2949}
2950
2951template <>
2952uint32_t
2953fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2954{
2955 int flags = 0;
2956 int sgn, exp;
2957 uint64_t mnt;
2958 uint32_t result;
2959
2960 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2961 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2962
2963 // If NaN, set cumulative flag or take exception:
2964 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2965 flags = FPLIB_IOC;
2966 result = 0;
2967 } else {
2968 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
2969 }
2970
2971 set_fpscr0(fpscr, flags);
2972
2973 return result;
2974}
2975
2976template <>
2977uint64_t
2978fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2979{
2980 int flags = 0;
2981 int sgn, exp;
2982 uint32_t mnt;
2983 uint64_t result;
2984
2985 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2986 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2987
2988 // If NaN, set cumulative flag or take exception:
2989 if (exp == 255 && (uint32_t)(mnt << 9)) {
2990 flags = FPLIB_IOC;
2991 result = 0;
2992 } else {
2993 result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits,
2994 (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2995 }
2996
2997 set_fpscr0(fpscr, flags);
2998
2999 return result;
3000}
3001
3002template <>
3003uint64_t
3004fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3005{
3006 int flags = 0;
3007 int sgn, exp;
3008 uint64_t mnt, result;
3009
3010 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
3011 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
3012
3013 // If NaN, set cumulative flag or take exception:
3014 if (exp == 2047 && (uint64_t)(mnt << 12)) {
3015 flags = FPLIB_IOC;
3016 result = 0;
3017 } else {
3018 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
3019 }
3020
3021 set_fpscr0(fpscr, flags);
3022
3023 return result;
3024}
3025
3026static uint32_t
3027fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3028{
3029 int x_sgn = !u && a >> 63;
3030 int x_exp = 190 - fbits;
3031 uint64_t x_mnt = x_sgn ? -a : a;
3032
3033 // Handle zero:
3034 if (!x_mnt) {
3035 return fp32_zero(0);
3036 }
3037
3038 // Normalise and convert to 32 bits, collapsing error into bottom bit:
3039 x_mnt = fp64_normalise(x_mnt, &x_exp);
3040 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
3041
3042 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
3043}
3044
3045static uint64_t
3046fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3047{
3048 int x_sgn = !u && a >> 63;
3049 int x_exp = 1024 + 62 - fbits;
3050 uint64_t x_mnt = x_sgn ? -a : a;
3051
3052 // Handle zero:
3053 if (!x_mnt) {
3054 return fp64_zero(0);
3055 }
3056
3057 x_mnt = fp64_normalise(x_mnt, &x_exp);
3058
3059 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
3060}
3061
3062template <>
3063uint32_t
3064fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3065{
3066 int flags = 0;
3067 uint32_t res = fp32_cvtf(op, fbits, u,
3068 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3069 &flags);
3070 set_fpscr0(fpscr, flags);
3071 return res;
3072}
3073
3074template <>
3075uint64_t
3076fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3077{
3078 int flags = 0;
3079 uint64_t res = fp64_cvtf(op, fbits, u,
3080 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3081 &flags);
3082 set_fpscr0(fpscr, flags);
3083 return res;
3084}
3085
3086}
159add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
160 uint64_t b1)
161{
162 *x0 = a0 + b0;
163 *x1 = a1 + b1 + (*x0 < a0);
164}
165
166static inline void
167sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
168 uint64_t b1)
169{
170 *x0 = a0 - b0;
171 *x1 = a1 - b1 - (*x0 > a0);
172}
173
174static inline int
175cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
176{
177 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
178}
179
180static inline uint16_t
181fp16_normalise(uint16_t mnt, int *exp)
182{
183 int shift;
184
185 if (!mnt) {
186 return 0;
187 }
188
189 for (shift = 8; shift; shift >>= 1) {
190 if (!(mnt >> (16 - shift))) {
191 mnt <<= shift;
192 *exp -= shift;
193 }
194 }
195 return mnt;
196}
197
198static inline uint32_t
199fp32_normalise(uint32_t mnt, int *exp)
200{
201 int shift;
202
203 if (!mnt) {
204 return 0;
205 }
206
207 for (shift = 16; shift; shift >>= 1) {
208 if (!(mnt >> (32 - shift))) {
209 mnt <<= shift;
210 *exp -= shift;
211 }
212 }
213 return mnt;
214}
215
216static inline uint64_t
217fp64_normalise(uint64_t mnt, int *exp)
218{
219 int shift;
220
221 if (!mnt) {
222 return 0;
223 }
224
225 for (shift = 32; shift; shift >>= 1) {
226 if (!(mnt >> (64 - shift))) {
227 mnt <<= shift;
228 *exp -= shift;
229 }
230 }
231 return mnt;
232}
233
234static inline void
235fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
236{
237 uint64_t x0 = *mnt0;
238 uint64_t x1 = *mnt1;
239 int shift;
240
241 if (!x0 && !x1) {
242 return;
243 }
244
245 if (!x1) {
246 x1 = x0;
247 x0 = 0;
248 *exp -= 64;
249 }
250
251 for (shift = 32; shift; shift >>= 1) {
252 if (!(x1 >> (64 - shift))) {
253 x1 = x1 << shift | x0 >> (64 - shift);
254 x0 <<= shift;
255 *exp -= shift;
256 }
257 }
258
259 *mnt0 = x0;
260 *mnt1 = x1;
261}
262
263static inline uint16_t
264fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
265{
266 return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1));
267}
268
269static inline uint32_t
270fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
271{
272 return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1));
273}
274
275static inline uint64_t
276fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
277{
278 return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1));
279}
280
281static inline uint16_t
282fp16_zero(int sgn)
283{
284 return fp16_pack(sgn, 0, 0);
285}
286
287static inline uint32_t
288fp32_zero(int sgn)
289{
290 return fp32_pack(sgn, 0, 0);
291}
292
293static inline uint64_t
294fp64_zero(int sgn)
295{
296 return fp64_pack(sgn, 0, 0);
297}
298
299static inline uint16_t
300fp16_max_normal(int sgn)
301{
302 return fp16_pack(sgn, 30, -1);
303}
304
305static inline uint32_t
306fp32_max_normal(int sgn)
307{
308 return fp32_pack(sgn, 254, -1);
309}
310
311static inline uint64_t
312fp64_max_normal(int sgn)
313{
314 return fp64_pack(sgn, 2046, -1);
315}
316
317static inline uint16_t
318fp16_infinity(int sgn)
319{
320 return fp16_pack(sgn, 31, 0);
321}
322
323static inline uint32_t
324fp32_infinity(int sgn)
325{
326 return fp32_pack(sgn, 255, 0);
327}
328
329static inline uint64_t
330fp64_infinity(int sgn)
331{
332 return fp64_pack(sgn, 2047, 0);
333}
334
335static inline uint16_t
336fp16_defaultNaN()
337{
338 return fp16_pack(0, 31, (uint16_t)1 << 9);
339}
340
341static inline uint32_t
342fp32_defaultNaN()
343{
344 return fp32_pack(0, 255, (uint32_t)1 << 22);
345}
346
347static inline uint64_t
348fp64_defaultNaN()
349{
350 return fp64_pack(0, 2047, (uint64_t)1 << 51);
351}
352
353static inline void
354fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
355 int *flags)
356{
357 *sgn = x >> 15;
358 *exp = x >> 10 & 31;
359 *mnt = x & (((uint16_t)1 << 10) - 1);
360
361 // Handle subnormals:
362 if (*exp) {
363 *mnt |= (uint16_t)1 << 10;
364 } else {
365 ++*exp;
366 // There is no flush to zero in this case!
367 }
368}
369
370static inline void
371fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
372 int *flags)
373{
374 *sgn = x >> 31;
375 *exp = x >> 23 & 255;
376 *mnt = x & (((uint32_t)1 << 23) - 1);
377
378 // Handle subnormals:
379 if (*exp) {
380 *mnt |= (uint32_t)1 << 23;
381 } else {
382 ++*exp;
383 if ((mode & FPLIB_FZ) && *mnt) {
384 *flags |= FPLIB_IDC;
385 *mnt = 0;
386 }
387 }
388}
389
390static inline void
391fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
392 int *flags)
393{
394 *sgn = x >> 63;
395 *exp = x >> 52 & 2047;
396 *mnt = x & (((uint64_t)1 << 52) - 1);
397
398 // Handle subnormals:
399 if (*exp) {
400 *mnt |= (uint64_t)1 << 52;
401 } else {
402 ++*exp;
403 if ((mode & FPLIB_FZ) && *mnt) {
404 *flags |= FPLIB_IDC;
405 *mnt = 0;
406 }
407 }
408}
409
410static inline uint32_t
411fp32_process_NaN(uint32_t a, int mode, int *flags)
412{
413 if (!(a >> 22 & 1)) {
414 *flags |= FPLIB_IOC;
415 a |= (uint32_t)1 << 22;
416 }
417 return mode & FPLIB_DN ? fp32_defaultNaN() : a;
418}
419
420static inline uint64_t
421fp64_process_NaN(uint64_t a, int mode, int *flags)
422{
423 if (!(a >> 51 & 1)) {
424 *flags |= FPLIB_IOC;
425 a |= (uint64_t)1 << 51;
426 }
427 return mode & FPLIB_DN ? fp64_defaultNaN() : a;
428}
429
430static uint32_t
431fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
432{
433 int a_exp = a >> 23 & 255;
434 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
435 int b_exp = b >> 23 & 255;
436 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
437
438 // Handle signalling NaNs:
439 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
440 return fp32_process_NaN(a, mode, flags);
441 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
442 return fp32_process_NaN(b, mode, flags);
443
444 // Handle quiet NaNs:
445 if (a_exp == 255 && a_mnt)
446 return fp32_process_NaN(a, mode, flags);
447 if (b_exp == 255 && b_mnt)
448 return fp32_process_NaN(b, mode, flags);
449
450 return 0;
451}
452
453static uint64_t
454fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
455{
456 int a_exp = a >> 52 & 2047;
457 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
458 int b_exp = b >> 52 & 2047;
459 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
460
461 // Handle signalling NaNs:
462 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
463 return fp64_process_NaN(a, mode, flags);
464 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
465 return fp64_process_NaN(b, mode, flags);
466
467 // Handle quiet NaNs:
468 if (a_exp == 2047 && a_mnt)
469 return fp64_process_NaN(a, mode, flags);
470 if (b_exp == 2047 && b_mnt)
471 return fp64_process_NaN(b, mode, flags);
472
473 return 0;
474}
475
476static uint32_t
477fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
478{
479 int a_exp = a >> 23 & 255;
480 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
481 int b_exp = b >> 23 & 255;
482 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
483 int c_exp = c >> 23 & 255;
484 uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1);
485
486 // Handle signalling NaNs:
487 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
488 return fp32_process_NaN(a, mode, flags);
489 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
490 return fp32_process_NaN(b, mode, flags);
491 if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1))
492 return fp32_process_NaN(c, mode, flags);
493
494 // Handle quiet NaNs:
495 if (a_exp == 255 && a_mnt)
496 return fp32_process_NaN(a, mode, flags);
497 if (b_exp == 255 && b_mnt)
498 return fp32_process_NaN(b, mode, flags);
499 if (c_exp == 255 && c_mnt)
500 return fp32_process_NaN(c, mode, flags);
501
502 return 0;
503}
504
505static uint64_t
506fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
507{
508 int a_exp = a >> 52 & 2047;
509 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
510 int b_exp = b >> 52 & 2047;
511 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
512 int c_exp = c >> 52 & 2047;
513 uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1);
514
515 // Handle signalling NaNs:
516 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
517 return fp64_process_NaN(a, mode, flags);
518 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
519 return fp64_process_NaN(b, mode, flags);
520 if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1))
521 return fp64_process_NaN(c, mode, flags);
522
523 // Handle quiet NaNs:
524 if (a_exp == 2047 && a_mnt)
525 return fp64_process_NaN(a, mode, flags);
526 if (b_exp == 2047 && b_mnt)
527 return fp64_process_NaN(b, mode, flags);
528 if (c_exp == 2047 && c_mnt)
529 return fp64_process_NaN(c, mode, flags);
530
531 return 0;
532}
533
534static uint16_t
535fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
536{
537 int biased_exp; // non-negative exponent value for result
538 uint16_t int_mant; // mantissa for result, less than (1 << 11)
539 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
540
541 assert(rm != FPRounding_TIEAWAY);
542
543 // There is no flush to zero in this case!
544
545 // The bottom 5 bits of mnt are orred together:
546 mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0);
547
548 if (exp > 0) {
549 biased_exp = exp;
550 int_mant = mnt >> 2;
551 error = mnt & 3;
552 } else {
553 biased_exp = 0;
554 int_mant = lsr16(mnt, 3 - exp);
555 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
556 }
557
558 if (!biased_exp && error) { // xx should also check fpscr_val<11>
559 *flags |= FPLIB_UFC;
560 }
561
562 // Round up:
563 if ((rm == FPLIB_RN && (error == 3 ||
564 (error == 2 && (int_mant & 1)))) ||
565 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
566 ++int_mant;
567 if (int_mant == (uint32_t)1 << 10) {
568 // Rounded up from denormalized to normalized
569 biased_exp = 1;
570 }
571 if (int_mant == (uint32_t)1 << 11) {
572 // Rounded up to next exponent
573 ++biased_exp;
574 int_mant >>= 1;
575 }
576 }
577
578 // Handle rounding to odd aka Von Neumann rounding:
579 if (error && rm == FPRounding_ODD)
580 int_mant |= 1;
581
582 // Handle overflow:
583 if (!(mode & FPLIB_AHP)) {
584 if (biased_exp >= 31) {
585 *flags |= FPLIB_OFC | FPLIB_IXC;
586 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
587 (rm == FPLIB_RM && sgn)) {
588 return fp16_infinity(sgn);
589 } else {
590 return fp16_max_normal(sgn);
591 }
592 }
593 } else {
594 if (biased_exp >= 32) {
595 *flags |= FPLIB_IOC;
596 return fp16_pack(sgn, 31, -1);
597 }
598 }
599
600 if (error) {
601 *flags |= FPLIB_IXC;
602 }
603
604 return fp16_pack(sgn, biased_exp, int_mant);
605}
606
607static uint32_t
608fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
609{
610 int biased_exp; // non-negative exponent value for result
611 uint32_t int_mant; // mantissa for result, less than (1 << 24)
612 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
613
614 assert(rm != FPRounding_TIEAWAY);
615
616 // Flush to zero:
617 if ((mode & FPLIB_FZ) && exp < 1) {
618 *flags |= FPLIB_UFC;
619 return fp32_zero(sgn);
620 }
621
622 // The bottom 8 bits of mnt are orred together:
623 mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0);
624
625 if (exp > 0) {
626 biased_exp = exp;
627 int_mant = mnt >> 2;
628 error = mnt & 3;
629 } else {
630 biased_exp = 0;
631 int_mant = lsr32(mnt, 3 - exp);
632 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
633 }
634
635 if (!biased_exp && error) { // xx should also check fpscr_val<11>
636 *flags |= FPLIB_UFC;
637 }
638
639 // Round up:
640 if ((rm == FPLIB_RN && (error == 3 ||
641 (error == 2 && (int_mant & 1)))) ||
642 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
643 ++int_mant;
644 if (int_mant == (uint32_t)1 << 23) {
645 // Rounded up from denormalized to normalized
646 biased_exp = 1;
647 }
648 if (int_mant == (uint32_t)1 << 24) {
649 // Rounded up to next exponent
650 ++biased_exp;
651 int_mant >>= 1;
652 }
653 }
654
655 // Handle rounding to odd aka Von Neumann rounding:
656 if (error && rm == FPRounding_ODD)
657 int_mant |= 1;
658
659 // Handle overflow:
660 if (biased_exp >= 255) {
661 *flags |= FPLIB_OFC | FPLIB_IXC;
662 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
663 (rm == FPLIB_RM && sgn)) {
664 return fp32_infinity(sgn);
665 } else {
666 return fp32_max_normal(sgn);
667 }
668 }
669
670 if (error) {
671 *flags |= FPLIB_IXC;
672 }
673
674 return fp32_pack(sgn, biased_exp, int_mant);
675}
676
677static uint32_t
678fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
679{
680 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
681}
682
683static uint64_t
684fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
685{
686 int biased_exp; // non-negative exponent value for result
687 uint64_t int_mant; // mantissa for result, less than (1 << 52)
688 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
689
690 assert(rm != FPRounding_TIEAWAY);
691
692 // Flush to zero:
693 if ((mode & FPLIB_FZ) && exp < 1) {
694 *flags |= FPLIB_UFC;
695 return fp64_zero(sgn);
696 }
697
698 // The bottom 11 bits of mnt are orred together:
699 mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0);
700
701 if (exp > 0) {
702 biased_exp = exp;
703 int_mant = mnt >> 2;
704 error = mnt & 3;
705 } else {
706 biased_exp = 0;
707 int_mant = lsr64(mnt, 3 - exp);
708 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
709 }
710
711 if (!biased_exp && error) { // xx should also check fpscr_val<11>
712 *flags |= FPLIB_UFC;
713 }
714
715 // Round up:
716 if ((rm == FPLIB_RN && (error == 3 ||
717 (error == 2 && (int_mant & 1)))) ||
718 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
719 ++int_mant;
720 if (int_mant == (uint64_t)1 << 52) {
721 // Rounded up from denormalized to normalized
722 biased_exp = 1;
723 }
724 if (int_mant == (uint64_t)1 << 53) {
725 // Rounded up to next exponent
726 ++biased_exp;
727 int_mant >>= 1;
728 }
729 }
730
731 // Handle rounding to odd aka Von Neumann rounding:
732 if (error && rm == FPRounding_ODD)
733 int_mant |= 1;
734
735 // Handle overflow:
736 if (biased_exp >= 2047) {
737 *flags |= FPLIB_OFC | FPLIB_IXC;
738 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
739 (rm == FPLIB_RM && sgn)) {
740 return fp64_infinity(sgn);
741 } else {
742 return fp64_max_normal(sgn);
743 }
744 }
745
746 if (error) {
747 *flags |= FPLIB_IXC;
748 }
749
750 return fp64_pack(sgn, biased_exp, int_mant);
751}
752
753static uint64_t
754fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
755{
756 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
757}
758
759static int
760fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
761{
762 int a_sgn, a_exp, b_sgn, b_exp;
763 uint32_t a_mnt, b_mnt;
764
765 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
766 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
767
768 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
769 (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
770 if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) ||
771 (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1)))
772 *flags |= FPLIB_IOC;
773 return 0;
774 }
775 return a == b || (!a_mnt && !b_mnt);
776}
777
778static int
779fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
780{
781 int a_sgn, a_exp, b_sgn, b_exp;
782 uint32_t a_mnt, b_mnt;
783
784 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
785 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
786
787 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
788 (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
789 *flags |= FPLIB_IOC;
790 return 0;
791 }
792 if (!a_mnt && !b_mnt)
793 return 1;
794 if (a_sgn != b_sgn)
795 return b_sgn;
796 if (a_exp != b_exp)
797 return a_sgn ^ (a_exp > b_exp);
798 if (a_mnt != b_mnt)
799 return a_sgn ^ (a_mnt > b_mnt);
800 return 1;
801}
802
803static int
804fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
805{
806 int a_sgn, a_exp, b_sgn, b_exp;
807 uint32_t a_mnt, b_mnt;
808
809 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
810 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
811
812 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
813 (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
814 *flags |= FPLIB_IOC;
815 return 0;
816 }
817 if (!a_mnt && !b_mnt)
818 return 0;
819 if (a_sgn != b_sgn)
820 return b_sgn;
821 if (a_exp != b_exp)
822 return a_sgn ^ (a_exp > b_exp);
823 if (a_mnt != b_mnt)
824 return a_sgn ^ (a_mnt > b_mnt);
825 return 0;
826}
827
828static int
829fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
830{
831 int a_sgn, a_exp, b_sgn, b_exp;
832 uint64_t a_mnt, b_mnt;
833
834 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
835 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
836
837 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
838 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
839 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) ||
840 (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1)))
841 *flags |= FPLIB_IOC;
842 return 0;
843 }
844 return a == b || (!a_mnt && !b_mnt);
845}
846
847static int
848fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
849{
850 int a_sgn, a_exp, b_sgn, b_exp;
851 uint64_t a_mnt, b_mnt;
852
853 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
854 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
855
856 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
857 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
858 *flags |= FPLIB_IOC;
859 return 0;
860 }
861 if (!a_mnt && !b_mnt)
862 return 1;
863 if (a_sgn != b_sgn)
864 return b_sgn;
865 if (a_exp != b_exp)
866 return a_sgn ^ (a_exp > b_exp);
867 if (a_mnt != b_mnt)
868 return a_sgn ^ (a_mnt > b_mnt);
869 return 1;
870}
871
872static int
873fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
874{
875 int a_sgn, a_exp, b_sgn, b_exp;
876 uint64_t a_mnt, b_mnt;
877
878 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
879 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
880
881 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
882 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
883 *flags |= FPLIB_IOC;
884 return 0;
885 }
886 if (!a_mnt && !b_mnt)
887 return 0;
888 if (a_sgn != b_sgn)
889 return b_sgn;
890 if (a_exp != b_exp)
891 return a_sgn ^ (a_exp > b_exp);
892 if (a_mnt != b_mnt)
893 return a_sgn ^ (a_mnt > b_mnt);
894 return 0;
895}
896
897static uint32_t
898fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
899{
900 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
901 uint32_t a_mnt, b_mnt, x, x_mnt;
902
903 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
904 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
905
906 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
907 return x;
908 }
909
910 b_sgn ^= neg;
911
912 // Handle infinities and zeroes:
913 if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) {
914 *flags |= FPLIB_IOC;
915 return fp32_defaultNaN();
916 } else if (a_exp == 255) {
917 return fp32_infinity(a_sgn);
918 } else if (b_exp == 255) {
919 return fp32_infinity(b_sgn);
920 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
921 return fp32_zero(a_sgn);
922 }
923
924 a_mnt <<= 3;
925 b_mnt <<= 3;
926 if (a_exp >= b_exp) {
927 b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
928 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
929 b_exp = a_exp;
930 } else {
931 a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
932 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
933 a_exp = b_exp;
934 }
935 x_sgn = a_sgn;
936 x_exp = a_exp;
937 if (a_sgn == b_sgn) {
938 x_mnt = a_mnt + b_mnt;
939 } else if (a_mnt >= b_mnt) {
940 x_mnt = a_mnt - b_mnt;
941 } else {
942 x_sgn ^= 1;
943 x_mnt = b_mnt - a_mnt;
944 }
945
946 if (!x_mnt) {
947 // Sign of exact zero result depends on rounding mode
948 return fp32_zero((mode & 3) == 2);
949 }
950
951 x_mnt = fp32_normalise(x_mnt, &x_exp);
952
953 return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags);
954}
955
956static uint64_t
957fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
958{
959 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
960 uint64_t a_mnt, b_mnt, x, x_mnt;
961
962 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
963 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
964
965 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
966 return x;
967 }
968
969 b_sgn ^= neg;
970
971 // Handle infinities and zeroes:
972 if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) {
973 *flags |= FPLIB_IOC;
974 return fp64_defaultNaN();
975 } else if (a_exp == 2047) {
976 return fp64_infinity(a_sgn);
977 } else if (b_exp == 2047) {
978 return fp64_infinity(b_sgn);
979 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
980 return fp64_zero(a_sgn);
981 }
982
983 a_mnt <<= 3;
984 b_mnt <<= 3;
985 if (a_exp >= b_exp) {
986 b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
987 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
988 b_exp = a_exp;
989 } else {
990 a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
991 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
992 a_exp = b_exp;
993 }
994 x_sgn = a_sgn;
995 x_exp = a_exp;
996 if (a_sgn == b_sgn) {
997 x_mnt = a_mnt + b_mnt;
998 } else if (a_mnt >= b_mnt) {
999 x_mnt = a_mnt - b_mnt;
1000 } else {
1001 x_sgn ^= 1;
1002 x_mnt = b_mnt - a_mnt;
1003 }
1004
1005 if (!x_mnt) {
1006 // Sign of exact zero result depends on rounding mode
1007 return fp64_zero((mode & 3) == 2);
1008 }
1009
1010 x_mnt = fp64_normalise(x_mnt, &x_exp);
1011
1012 return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags);
1013}
1014
1015static uint32_t
1016fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1017{
1018 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1019 uint32_t a_mnt, b_mnt, x;
1020 uint64_t x_mnt;
1021
1022 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1023 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1024
1025 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1026 return x;
1027 }
1028
1029 // Handle infinities and zeroes:
1030 if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) {
1031 *flags |= FPLIB_IOC;
1032 return fp32_defaultNaN();
1033 } else if (a_exp == 255 || b_exp == 255) {
1034 return fp32_infinity(a_sgn ^ b_sgn);
1035 } else if (!a_mnt || !b_mnt) {
1036 return fp32_zero(a_sgn ^ b_sgn);
1037 }
1038
1039 // Multiply and normalise:
1040 x_sgn = a_sgn ^ b_sgn;
1041 x_exp = a_exp + b_exp - 110;
1042 x_mnt = (uint64_t)a_mnt * b_mnt;
1043 x_mnt = fp64_normalise(x_mnt, &x_exp);
1044
1045 // Convert to 32 bits, collapsing error into bottom bit:
1046 x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33);
1047
1048 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1049}
1050
1051static uint64_t
1052fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1053{
1054 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1055 uint64_t a_mnt, b_mnt, x;
1056 uint64_t x0_mnt, x1_mnt;
1057
1058 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1059 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1060
1061 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1062 return x;
1063 }
1064
1065 // Handle infinities and zeroes:
1066 if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) {
1067 *flags |= FPLIB_IOC;
1068 return fp64_defaultNaN();
1069 } else if (a_exp == 2047 || b_exp == 2047) {
1070 return fp64_infinity(a_sgn ^ b_sgn);
1071 } else if (!a_mnt || !b_mnt) {
1072 return fp64_zero(a_sgn ^ b_sgn);
1073 }
1074
1075 // Multiply and normalise:
1076 x_sgn = a_sgn ^ b_sgn;
1077 x_exp = a_exp + b_exp - 1000;
1078 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1079 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1080
1081 // Convert to 64 bits, collapsing error into bottom bit:
1082 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1083
1084 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1085}
1086
1087static uint32_t
1088fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1089 int mode, int *flags)
1090{
1091 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1092 uint32_t a_mnt, b_mnt, c_mnt, x;
1093 uint64_t x_mnt, y_mnt;
1094
1095 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1096 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1097 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1098
1099 x = fp32_process_NaNs3(a, b, c, mode, flags);
1100
1101 // Quiet NaN added to product of zero and infinity:
1102 if (a_exp == 255 && (a_mnt >> 22 & 1) &&
1103 ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) ||
1104 (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) {
1105 x = fp32_defaultNaN();
1106 *flags |= FPLIB_IOC;
1107 }
1108
1109 if (x) {
1110 return x;
1111 }
1112
1113 // Handle infinities and zeroes:
1114 if ((b_exp == 255 && !c_mnt) ||
1115 (c_exp == 255 && !b_mnt) ||
1116 (a_exp == 255 && (b_exp == 255 || c_exp == 255) &&
1117 (a_sgn != (b_sgn ^ c_sgn)))) {
1118 *flags |= FPLIB_IOC;
1119 return fp32_defaultNaN();
1120 }
1121 if (a_exp == 255)
1122 return fp32_infinity(a_sgn);
1123 if (b_exp == 255 || c_exp == 255)
1124 return fp32_infinity(b_sgn ^ c_sgn);
1125 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1126 return fp32_zero(a_sgn);
1127
1128 x_sgn = a_sgn;
1129 x_exp = a_exp + 13;
1130 x_mnt = (uint64_t)a_mnt << 27;
1131
1132 // Multiply:
1133 y_sgn = b_sgn ^ c_sgn;
1134 y_exp = b_exp + c_exp - 113;
1135 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1136 if (!y_mnt) {
1137 y_exp = x_exp;
1138 }
1139
1140 // Add:
1141 if (x_exp >= y_exp) {
1142 y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1143 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1144 y_exp = x_exp;
1145 } else {
1146 x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1147 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1148 x_exp = y_exp;
1149 }
1150 if (x_sgn == y_sgn) {
1151 x_mnt = x_mnt + y_mnt;
1152 } else if (x_mnt >= y_mnt) {
1153 x_mnt = x_mnt - y_mnt;
1154 } else {
1155 x_sgn ^= 1;
1156 x_mnt = y_mnt - x_mnt;
1157 }
1158
1159 if (!x_mnt) {
1160 // Sign of exact zero result depends on rounding mode
1161 return fp32_zero((mode & 3) == 2);
1162 }
1163
1164 // Normalise and convert to 32 bits, collapsing error into bottom bit:
1165 x_mnt = fp64_normalise(x_mnt, &x_exp);
1166 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1167
1168 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1169}
1170
1171static uint64_t
1172fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1173 int mode, int *flags)
1174{
1175 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1176 uint64_t a_mnt, b_mnt, c_mnt, x;
1177 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1178
1179 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1180 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1181 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1182
1183 x = fp64_process_NaNs3(a, b, c, mode, flags);
1184
1185 // Quiet NaN added to product of zero and infinity:
1186 if (a_exp == 2047 && (a_mnt >> 51 & 1) &&
1187 ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) ||
1188 (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) {
1189 x = fp64_defaultNaN();
1190 *flags |= FPLIB_IOC;
1191 }
1192
1193 if (x) {
1194 return x;
1195 }
1196
1197 // Handle infinities and zeroes:
1198 if ((b_exp == 2047 && !c_mnt) ||
1199 (c_exp == 2047 && !b_mnt) ||
1200 (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) &&
1201 (a_sgn != (b_sgn ^ c_sgn)))) {
1202 *flags |= FPLIB_IOC;
1203 return fp64_defaultNaN();
1204 }
1205 if (a_exp == 2047)
1206 return fp64_infinity(a_sgn);
1207 if (b_exp == 2047 || c_exp == 2047)
1208 return fp64_infinity(b_sgn ^ c_sgn);
1209 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1210 return fp64_zero(a_sgn);
1211
1212 x_sgn = a_sgn;
1213 x_exp = a_exp + 11;
1214 x0_mnt = 0;
1215 x1_mnt = a_mnt;
1216
1217 // Multiply:
1218 y_sgn = b_sgn ^ c_sgn;
1219 y_exp = b_exp + c_exp - 1003;
1220 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1221 if (!y0_mnt && !y1_mnt) {
1222 y_exp = x_exp;
1223 }
1224
1225 // Add:
1226 if (x_exp >= y_exp) {
1227 uint64_t t0, t1;
1228 lsl128(&t0, &t1, y0_mnt, y1_mnt,
1229 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1230 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1231 y0_mnt |= !!(t0 | t1);
1232 y_exp = x_exp;
1233 } else {
1234 uint64_t t0, t1;
1235 lsl128(&t0, &t1, x0_mnt, x1_mnt,
1236 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1237 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1238 x0_mnt |= !!(t0 | t1);
1239 x_exp = y_exp;
1240 }
1241 if (x_sgn == y_sgn) {
1242 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1243 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1244 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1245 } else {
1246 x_sgn ^= 1;
1247 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1248 }
1249
1250 if (!x0_mnt && !x1_mnt) {
1251 // Sign of exact zero result depends on rounding mode
1252 return fp64_zero((mode & 3) == 2);
1253 }
1254
1255 // Normalise and convert to 64 bits, collapsing error into bottom bit:
1256 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1257 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1258
1259 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1260}
1261
1262static uint32_t
1263fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1264{
1265 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1266 uint32_t a_mnt, b_mnt, x;
1267 uint64_t x_mnt;
1268
1269 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1270 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1271
1272 if ((x = fp32_process_NaNs(a, b, mode, flags)))
1273 return x;
1274
1275 // Handle infinities and zeroes:
1276 if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) {
1277 *flags |= FPLIB_IOC;
1278 return fp32_defaultNaN();
1279 }
1280 if (a_exp == 255 || !b_mnt) {
1281 if (a_exp != 255)
1282 *flags |= FPLIB_DZC;
1283 return fp32_infinity(a_sgn ^ b_sgn);
1284 }
1285 if (!a_mnt || b_exp == 255)
1286 return fp32_zero(a_sgn ^ b_sgn);
1287
1288 // Divide, setting bottom bit if inexact:
1289 a_mnt = fp32_normalise(a_mnt, &a_exp);
1290 x_sgn = a_sgn ^ b_sgn;
1291 x_exp = a_exp - b_exp + 172;
1292 x_mnt = ((uint64_t)a_mnt << 18) / b_mnt;
1293 x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18);
1294
1295 // Normalise and convert to 32 bits, collapsing error into bottom bit:
1296 x_mnt = fp64_normalise(x_mnt, &x_exp);
1297 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1298
1299 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1300}
1301
1302static uint64_t
1303fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1304{
1305 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1306 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1307
1308 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1309 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1310
1311 if ((x = fp64_process_NaNs(a, b, mode, flags)))
1312 return x;
1313
1314 // Handle infinities and zeroes:
1315 if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) {
1316 *flags |= FPLIB_IOC;
1317 return fp64_defaultNaN();
1318 }
1319 if (a_exp == 2047 || !b_mnt) {
1320 if (a_exp != 2047)
1321 *flags |= FPLIB_DZC;
1322 return fp64_infinity(a_sgn ^ b_sgn);
1323 }
1324 if (!a_mnt || b_exp == 2047)
1325 return fp64_zero(a_sgn ^ b_sgn);
1326
1327 // Find reciprocal of divisor with Newton-Raphson:
1328 a_mnt = fp64_normalise(a_mnt, &a_exp);
1329 b_mnt = fp64_normalise(b_mnt, &b_exp);
1330 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1331 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1332 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1333 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1334 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1335 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1336
1337 // Multiply by dividend:
1338 x_sgn = a_sgn ^ b_sgn;
1339 x_exp = a_exp - b_exp + 1031;
1340 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough
1341 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1342 x_mnt = x1_mnt;
1343
1344 // This is an underestimate, so try adding one:
1345 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough
1346 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1347 if (c <= 0) {
1348 ++x_mnt;
1349 }
1350
1351 x_mnt = fp64_normalise(x_mnt, &x_exp);
1352
1353 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1354}
1355
1356static void
1357set_fpscr0(FPSCR &fpscr, int flags)
1358{
1359 if (flags & FPLIB_IDC) {
1360 fpscr.idc = 1;
1361 }
1362 if (flags & FPLIB_IOC) {
1363 fpscr.ioc = 1;
1364 }
1365 if (flags & FPLIB_DZC) {
1366 fpscr.dzc = 1;
1367 }
1368 if (flags & FPLIB_OFC) {
1369 fpscr.ofc = 1;
1370 }
1371 if (flags & FPLIB_UFC) {
1372 fpscr.ufc = 1;
1373 }
1374 if (flags & FPLIB_IXC) {
1375 fpscr.ixc = 1;
1376 }
1377}
1378
1379static uint32_t
1380fp32_sqrt(uint32_t a, int mode, int *flags)
1381{
1382 int a_sgn, a_exp, x_sgn, x_exp;
1383 uint32_t a_mnt, x, x_mnt;
1384 uint64_t t0, t1;
1385
1386 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1387
1388 // Handle NaNs:
1389 if (a_exp == 255 && (uint32_t)(a_mnt << 9))
1390 return fp32_process_NaN(a, mode, flags);
1391
1392 // Handle infinities and zeroes:
1393 if (!a_mnt) {
1394 return fp32_zero(a_sgn);
1395 }
1396 if (a_exp == 255 && !a_sgn) {
1397 return fp32_infinity(a_sgn);
1398 }
1399 if (a_sgn) {
1400 *flags |= FPLIB_IOC;
1401 return fp32_defaultNaN();
1402 }
1403
1404 a_mnt = fp32_normalise(a_mnt, &a_exp);
1405 if (!(a_exp & 1)) {
1406 ++a_exp;
1407 a_mnt >>= 1;
1408 }
1409
1410 // x = (a * 3 + 5) / 8
1411 x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28);
1412
1413 // x = (a / x + x) / 2; // 16-bit accuracy
1414 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1415
1416 // x = (a / x + x) / 2; // 16-bit accuracy
1417 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1418
1419 // x = (a / x + x) / 2; // 32-bit accuracy
1420 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
1421
1422 x_sgn = 0;
1423 x_exp = (a_exp + 147) >> 1;
1424 x_mnt = ((x - (1 << 5)) >> 6) + 1;
1425 t1 = (uint64_t)x_mnt * x_mnt;
1426 t0 = (uint64_t)a_mnt << 19;
1427 if (t1 > t0) {
1428 --x_mnt;
1429 }
1430
1431 x_mnt = fp32_normalise(x_mnt, &x_exp);
1432
1433 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
1434}
1435
1436static uint64_t
1437fp64_sqrt(uint64_t a, int mode, int *flags)
1438{
1439 int a_sgn, a_exp, x_sgn, x_exp, c;
1440 uint64_t a_mnt, x_mnt, r, x0, x1;
1441 uint32_t x;
1442
1443 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1444
1445 // Handle NaNs:
1446 if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) {
1447 return fp64_process_NaN(a, mode, flags);
1448 }
1449
1450 // Handle infinities and zeroes:
1451 if (!a_mnt)
1452 return fp64_zero(a_sgn);
1453 if (a_exp == 2047 && !a_sgn)
1454 return fp64_infinity(a_sgn);
1455 if (a_sgn) {
1456 *flags |= FPLIB_IOC;
1457 return fp64_defaultNaN();
1458 }
1459
1460 a_mnt = fp64_normalise(a_mnt, &a_exp);
1461 if (a_exp & 1) {
1462 ++a_exp;
1463 a_mnt >>= 1;
1464 }
1465
1466 // x = (a * 3 + 5) / 8
1467 x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28);
1468
1469 // x = (a / x + x) / 2; // 16-bit accuracy
1470 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1471
1472 // x = (a / x + x) / 2; // 16-bit accuracy
1473 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1474
1475 // x = (a / x + x) / 2; // 32-bit accuracy
1476 x = ((a_mnt / x) >> 2) + (x >> 1);
1477
1478 // r = 1 / x; // 32-bit accuracy
1479 r = ((uint64_t)1 << 62) / x;
1480
1481 // r = r * (2 - x * r); // 64-bit accuracy
1482 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
1483 lsr128(&x0, &x1, x0, x1, 31);
1484
1485 // x = (x + a * r) / 2; // 64-bit accuracy
1486 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
1487 lsl128(&x0, &x1, x0, x1, 5);
1488 lsr128(&x0, &x1, x0, x1, 56);
1489
1490 x0 = ((uint64_t)x << 31) + (x0 >> 1);
1491
1492 x_sgn = 0;
1493 x_exp = (a_exp + 1053) >> 1;
1494 x_mnt = x0;
1495 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
1496 mul62x62(&x0, &x1, x_mnt, x_mnt);
1497 lsl128(&x0, &x1, x0, x1, 19);
1498 c = cmp128(x0, x1, 0, a_mnt);
1499 if (c > 0)
1500 --x_mnt;
1501
1502 x_mnt = fp64_normalise(x_mnt, &x_exp);
1503
1504 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1505}
1506
1507static int
1508modeConv(FPSCR fpscr)
1509{
1510 return (((int) fpscr) >> 22) & 0xF;
1511}
1512
1513static void
1514set_fpscr(FPSCR &fpscr, int flags)
1515{
1516 // translate back to FPSCR
1517 bool underflow = false;
1518 if (flags & FPLIB_IDC) {
1519 fpscr.idc = 1;
1520 }
1521 if (flags & FPLIB_IOC) {
1522 fpscr.ioc = 1;
1523 }
1524 if (flags & FPLIB_DZC) {
1525 fpscr.dzc = 1;
1526 }
1527 if (flags & FPLIB_OFC) {
1528 fpscr.ofc = 1;
1529 }
1530 if (flags & FPLIB_UFC) {
1531 underflow = true; //xx Why is this required?
1532 fpscr.ufc = 1;
1533 }
1534 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
1535 fpscr.ixc = 1;
1536 }
1537}
1538
1539template <>
1540bool
1541fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
1542{
1543 int flags = 0;
1544 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
1545 set_fpscr(fpscr, flags);
1546 return x;
1547}
1548
1549template <>
1550bool
1551fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
1552{
1553 int flags = 0;
1554 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
1555 set_fpscr(fpscr, flags);
1556 return x;
1557}
1558
1559template <>
1560bool
1561fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
1562{
1563 int flags = 0;
1564 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
1565 set_fpscr(fpscr, flags);
1566 return x;
1567}
1568
1569template <>
1570bool
1571fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
1572{
1573 int flags = 0;
1574 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
1575 set_fpscr(fpscr, flags);
1576 return x;
1577}
1578
1579template <>
1580bool
1581fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
1582{
1583 int flags = 0;
1584 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
1585 set_fpscr(fpscr, flags);
1586 return x;
1587}
1588
1589template <>
1590bool
1591fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
1592{
1593 int flags = 0;
1594 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
1595 set_fpscr(fpscr, flags);
1596 return x;
1597}
1598
1599template <>
1600uint32_t
1601fplibAbs(uint32_t op)
1602{
1603 return op & ~((uint32_t)1 << 31);
1604}
1605
1606template <>
1607uint64_t
1608fplibAbs(uint64_t op)
1609{
1610 return op & ~((uint64_t)1 << 63);
1611}
1612
1613template <>
1614uint32_t
1615fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
1616{
1617 int flags = 0;
1618 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
1619 set_fpscr0(fpscr, flags);
1620 return result;
1621}
1622
1623template <>
1624uint64_t
1625fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
1626{
1627 int flags = 0;
1628 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
1629 set_fpscr0(fpscr, flags);
1630 return result;
1631}
1632
1633template <>
1634int
1635fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
1636{
1637 int mode = modeConv(fpscr);
1638 int flags = 0;
1639 int sgn1, exp1, sgn2, exp2, result;
1640 uint32_t mnt1, mnt2;
1641
1642 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1643 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1644
1645 if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) ||
1646 (exp2 == 255 && (uint32_t)(mnt2 << 9))) {
1647 result = 3;
1648 if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) ||
1649 (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) ||
1650 signal_nans)
1651 flags |= FPLIB_IOC;
1652 } else {
1653 if (op1 == op2 || (!mnt1 && !mnt2)) {
1654 result = 6;
1655 } else if (sgn1 != sgn2) {
1656 result = sgn1 ? 8 : 2;
1657 } else if (exp1 != exp2) {
1658 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1659 } else {
1660 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1661 }
1662 }
1663
1664 set_fpscr0(fpscr, flags);
1665
1666 return result;
1667}
1668
1669template <>
1670int
1671fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
1672{
1673 int mode = modeConv(fpscr);
1674 int flags = 0;
1675 int sgn1, exp1, sgn2, exp2, result;
1676 uint64_t mnt1, mnt2;
1677
1678 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1679 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1680
1681 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) ||
1682 (exp2 == 2047 && (uint64_t)(mnt2 << 12))) {
1683 result = 3;
1684 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) ||
1685 (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) ||
1686 signal_nans)
1687 flags |= FPLIB_IOC;
1688 } else {
1689 if (op1 == op2 || (!mnt1 && !mnt2)) {
1690 result = 6;
1691 } else if (sgn1 != sgn2) {
1692 result = sgn1 ? 8 : 2;
1693 } else if (exp1 != exp2) {
1694 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1695 } else {
1696 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1697 }
1698 }
1699
1700 set_fpscr0(fpscr, flags);
1701
1702 return result;
1703}
1704
1705static uint16_t
1706fp16_FPConvertNaN_32(uint32_t op)
1707{
1708 return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13);
1709}
1710
1711static uint16_t
1712fp16_FPConvertNaN_64(uint64_t op)
1713{
1714 return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42);
1715}
1716
1717static uint32_t
1718fp32_FPConvertNaN_16(uint16_t op)
1719{
1720 return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13);
1721}
1722
1723static uint32_t
1724fp32_FPConvertNaN_64(uint64_t op)
1725{
1726 return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29);
1727}
1728
1729static uint64_t
1730fp64_FPConvertNaN_16(uint16_t op)
1731{
1732 return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42);
1733}
1734
1735static uint64_t
1736fp64_FPConvertNaN_32(uint32_t op)
1737{
1738 return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29);
1739}
1740
1741static uint32_t
1742fp32_FPOnePointFive(int sgn)
1743{
1744 return fp32_pack(sgn, 127, (uint64_t)1 << 22);
1745}
1746
1747static uint64_t
1748fp64_FPOnePointFive(int sgn)
1749{
1750 return fp64_pack(sgn, 1023, (uint64_t)1 << 51);
1751}
1752
1753static uint32_t
1754fp32_FPThree(int sgn)
1755{
1756 return fp32_pack(sgn, 128, (uint64_t)1 << 22);
1757}
1758
1759static uint64_t
1760fp64_FPThree(int sgn)
1761{
1762 return fp64_pack(sgn, 1024, (uint64_t)1 << 51);
1763}
1764
1765static uint32_t
1766fp32_FPTwo(int sgn)
1767{
1768 return fp32_pack(sgn, 128, 0);
1769}
1770
1771static uint64_t
1772fp64_FPTwo(int sgn)
1773{
1774 return fp64_pack(sgn, 1024, 0);
1775}
1776
1777template <>
1778uint16_t
1779fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1780{
1781 int mode = modeConv(fpscr);
1782 int flags = 0;
1783 int sgn, exp;
1784 uint32_t mnt;
1785 uint16_t result;
1786
1787 // Unpack floating-point operand optionally with flush-to-zero:
1788 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1789
1790 bool alt_hp = fpscr.ahp;
1791
1792 if (exp == 255 && (uint32_t)(mnt << 9)) {
1793 if (alt_hp) {
1794 result = fp16_zero(sgn);
1795 } else if (fpscr.dn) {
1796 result = fp16_defaultNaN();
1797 } else {
1798 result = fp16_FPConvertNaN_32(op);
1799 }
1800 if (!(mnt >> 22 & 1) || alt_hp) {
1801 flags |= FPLIB_IOC;
1802 }
1803 } else if (exp == 255) {
1804 if (alt_hp) {
1805 result = sgn << 15 | (uint16_t)0x7fff;
1806 flags |= FPLIB_IOC;
1807 } else {
1808 result = fp16_infinity(sgn);
1809 }
1810 } else if (!mnt) {
1811 result = fp16_zero(sgn);
1812 } else {
1813 result = fp16_round_(sgn, exp - 127 + 15,
1814 mnt >> 7 | !!(uint32_t)(mnt << 25),
1815 rounding, mode | alt_hp << 4, &flags);
1816 }
1817
1818 set_fpscr0(fpscr, flags);
1819
1820 return result;
1821}
1822
1823template <>
1824uint16_t
1825fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1826{
1827 int mode = modeConv(fpscr);
1828 int flags = 0;
1829 int sgn, exp;
1830 uint64_t mnt;
1831 uint16_t result;
1832
1833 // Unpack floating-point operand optionally with flush-to-zero:
1834 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1835
1836 bool alt_hp = fpscr.ahp;
1837
1838 if (exp == 2047 && (uint64_t)(mnt << 12)) {
1839 if (alt_hp) {
1840 result = fp16_zero(sgn);
1841 } else if (fpscr.dn) {
1842 result = fp16_defaultNaN();
1843 } else {
1844 result = fp16_FPConvertNaN_64(op);
1845 }
1846 if (!(mnt >> 51 & 1) || alt_hp) {
1847 flags |= FPLIB_IOC;
1848 }
1849 } else if (exp == 2047) {
1850 if (alt_hp) {
1851 result = sgn << 15 | (uint16_t)0x7fff;
1852 flags |= FPLIB_IOC;
1853 } else {
1854 result = fp16_infinity(sgn);
1855 }
1856 } else if (!mnt) {
1857 result = fp16_zero(sgn);
1858 } else {
1859 result = fp16_round_(sgn, exp - 1023 + 15,
1860 mnt >> 36 | !!(uint64_t)(mnt << 28),
1861 rounding, mode | alt_hp << 4, &flags);
1862 }
1863
1864 set_fpscr0(fpscr, flags);
1865
1866 return result;
1867}
1868
1869template <>
1870uint32_t
1871fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1872{
1873 int mode = modeConv(fpscr);
1874 int flags = 0;
1875 int sgn, exp;
1876 uint16_t mnt;
1877 uint32_t result;
1878
1879 // Unpack floating-point operand optionally with flush-to-zero:
1880 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1881
1882 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1883 if (fpscr.dn) {
1884 result = fp32_defaultNaN();
1885 } else {
1886 result = fp32_FPConvertNaN_16(op);
1887 }
1888 if (!(mnt >> 9 & 1)) {
1889 flags |= FPLIB_IOC;
1890 }
1891 } else if (exp == 31 && !fpscr.ahp) {
1892 result = fp32_infinity(sgn);
1893 } else if (!mnt) {
1894 result = fp32_zero(sgn);
1895 } else {
1896 mnt = fp16_normalise(mnt, &exp);
1897 result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8);
1898 }
1899
1900 set_fpscr0(fpscr, flags);
1901
1902 return result;
1903}
1904
1905template <>
1906uint32_t
1907fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1908{
1909 int mode = modeConv(fpscr);
1910 int flags = 0;
1911 int sgn, exp;
1912 uint64_t mnt;
1913 uint32_t result;
1914
1915 // Unpack floating-point operand optionally with flush-to-zero:
1916 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1917
1918 if (exp == 2047 && (uint64_t)(mnt << 12)) {
1919 if (fpscr.dn) {
1920 result = fp32_defaultNaN();
1921 } else {
1922 result = fp32_FPConvertNaN_64(op);
1923 }
1924 if (!(mnt >> 51 & 1)) {
1925 flags |= FPLIB_IOC;
1926 }
1927 } else if (exp == 2047) {
1928 result = fp32_infinity(sgn);
1929 } else if (!mnt) {
1930 result = fp32_zero(sgn);
1931 } else {
1932 result = fp32_round_(sgn, exp - 1023 + 127,
1933 mnt >> 20 | !!(uint64_t)(mnt << 44),
1934 rounding, mode, &flags);
1935 }
1936
1937 set_fpscr0(fpscr, flags);
1938
1939 return result;
1940}
1941
1942template <>
1943uint64_t
1944fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1945{
1946 int mode = modeConv(fpscr);
1947 int flags = 0;
1948 int sgn, exp;
1949 uint16_t mnt;
1950 uint64_t result;
1951
1952 // Unpack floating-point operand optionally with flush-to-zero:
1953 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1954
1955 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1956 if (fpscr.dn) {
1957 result = fp64_defaultNaN();
1958 } else {
1959 result = fp64_FPConvertNaN_16(op);
1960 }
1961 if (!(mnt >> 9 & 1)) {
1962 flags |= FPLIB_IOC;
1963 }
1964 } else if (exp == 31 && !fpscr.ahp) {
1965 result = fp64_infinity(sgn);
1966 } else if (!mnt) {
1967 result = fp64_zero(sgn);
1968 } else {
1969 mnt = fp16_normalise(mnt, &exp);
1970 result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37);
1971 }
1972
1973 set_fpscr0(fpscr, flags);
1974
1975 return result;
1976}
1977
1978template <>
1979uint64_t
1980fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1981{
1982 int mode = modeConv(fpscr);
1983 int flags = 0;
1984 int sgn, exp;
1985 uint32_t mnt;
1986 uint64_t result;
1987
1988 // Unpack floating-point operand optionally with flush-to-zero:
1989 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1990
1991 if (exp == 255 && (uint32_t)(mnt << 9)) {
1992 if (fpscr.dn) {
1993 result = fp64_defaultNaN();
1994 } else {
1995 result = fp64_FPConvertNaN_32(op);
1996 }
1997 if (!(mnt >> 22 & 1)) {
1998 flags |= FPLIB_IOC;
1999 }
2000 } else if (exp == 255) {
2001 result = fp64_infinity(sgn);
2002 } else if (!mnt) {
2003 result = fp64_zero(sgn);
2004 } else {
2005 mnt = fp32_normalise(mnt, &exp);
2006 result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21);
2007 }
2008
2009 set_fpscr0(fpscr, flags);
2010
2011 return result;
2012}
2013
2014template <>
2015uint32_t
2016fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2017{
2018 int flags = 0;
2019 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2020 set_fpscr0(fpscr, flags);
2021 return result;
2022}
2023
2024template <>
2025uint64_t
2026fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2027{
2028 int flags = 0;
2029 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2030 set_fpscr0(fpscr, flags);
2031 return result;
2032}
2033
2034template <>
2035uint32_t
2036fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2037{
2038 int flags = 0;
2039 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2040 set_fpscr0(fpscr, flags);
2041 return result;
2042}
2043
2044template <>
2045uint64_t
2046fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2047{
2048 int flags = 0;
2049 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2050 set_fpscr0(fpscr, flags);
2051 return result;
2052}
2053
2054static uint32_t
2055fp32_repack(int sgn, int exp, uint32_t mnt)
2056{
2057 return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt);
2058}
2059
2060static uint64_t
2061fp64_repack(int sgn, int exp, uint64_t mnt)
2062{
2063 return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt);
2064}
2065
2066static void
2067fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
2068{
2069 // Treat a single quiet-NaN as +Infinity/-Infinity
2070 if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23)
2071 *op1 = fp32_infinity(sgn);
2072 if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23)
2073 *op2 = fp32_infinity(sgn);
2074}
2075
2076static void
2077fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
2078{
2079 // Treat a single quiet-NaN as +Infinity/-Infinity
2080 if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52)
2081 *op1 = fp64_infinity(sgn);
2082 if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52)
2083 *op2 = fp64_infinity(sgn);
2084}
2085
2086template <>
2087uint32_t
2088fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2089{
2090 int mode = modeConv(fpscr);
2091 int flags = 0;
2092 int sgn1, exp1, sgn2, exp2;
2093 uint32_t mnt1, mnt2, x, result;
2094
2095 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2096 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2097
2098 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2099 result = x;
2100 } else {
2101 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2102 fp32_repack(sgn1, exp1, mnt1) :
2103 fp32_repack(sgn2, exp2, mnt2));
2104 }
2105 set_fpscr0(fpscr, flags);
2106 return result;
2107}
2108
2109template <>
2110uint64_t
2111fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2112{
2113 int mode = modeConv(fpscr);
2114 int flags = 0;
2115 int sgn1, exp1, sgn2, exp2;
2116 uint64_t mnt1, mnt2, x, result;
2117
2118 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2119 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2120
2121 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2122 result = x;
2123 } else {
2124 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2125 fp64_repack(sgn1, exp1, mnt1) :
2126 fp64_repack(sgn2, exp2, mnt2));
2127 }
2128 set_fpscr0(fpscr, flags);
2129 return result;
2130}
2131
2132template <>
2133uint32_t
2134fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2135{
2136 fp32_minmaxnum(&op1, &op2, 1);
2137 return fplibMax<uint32_t>(op1, op2, fpscr);
2138}
2139
2140template <>
2141uint64_t
2142fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2143{
2144 fp64_minmaxnum(&op1, &op2, 1);
2145 return fplibMax<uint64_t>(op1, op2, fpscr);
2146}
2147
2148template <>
2149uint32_t
2150fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2151{
2152 int mode = modeConv(fpscr);
2153 int flags = 0;
2154 int sgn1, exp1, sgn2, exp2;
2155 uint32_t mnt1, mnt2, x, result;
2156
2157 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2158 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2159
2160 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2161 result = x;
2162 } else {
2163 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2164 fp32_repack(sgn1, exp1, mnt1) :
2165 fp32_repack(sgn2, exp2, mnt2));
2166 }
2167 set_fpscr0(fpscr, flags);
2168 return result;
2169}
2170
2171template <>
2172uint64_t
2173fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2174{
2175 int mode = modeConv(fpscr);
2176 int flags = 0;
2177 int sgn1, exp1, sgn2, exp2;
2178 uint64_t mnt1, mnt2, x, result;
2179
2180 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2181 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2182
2183 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2184 result = x;
2185 } else {
2186 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2187 fp64_repack(sgn1, exp1, mnt1) :
2188 fp64_repack(sgn2, exp2, mnt2));
2189 }
2190 set_fpscr0(fpscr, flags);
2191 return result;
2192}
2193
2194template <>
2195uint32_t
2196fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2197{
2198 fp32_minmaxnum(&op1, &op2, 0);
2199 return fplibMin<uint32_t>(op1, op2, fpscr);
2200}
2201
2202template <>
2203uint64_t
2204fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2205{
2206 fp64_minmaxnum(&op1, &op2, 0);
2207 return fplibMin<uint64_t>(op1, op2, fpscr);
2208}
2209
2210template <>
2211uint32_t
2212fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2213{
2214 int flags = 0;
2215 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
2216 set_fpscr0(fpscr, flags);
2217 return result;
2218}
2219
2220template <>
2221uint64_t
2222fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2223{
2224 int flags = 0;
2225 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
2226 set_fpscr0(fpscr, flags);
2227 return result;
2228}
2229
2230template <>
2231uint32_t
2232fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2233{
2234 int mode = modeConv(fpscr);
2235 int flags = 0;
2236 int sgn1, exp1, sgn2, exp2;
2237 uint32_t mnt1, mnt2, result;
2238
2239 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2240 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2241
2242 result = fp32_process_NaNs(op1, op2, mode, &flags);
2243 if (!result) {
2244 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2245 result = fp32_FPTwo(sgn1 ^ sgn2);
2246 } else if (exp1 == 255 || exp2 == 255) {
2247 result = fp32_infinity(sgn1 ^ sgn2);
2248 } else if (!mnt1 || !mnt2) {
2249 result = fp32_zero(sgn1 ^ sgn2);
2250 } else {
2251 result = fp32_mul(op1, op2, mode, &flags);
2252 }
2253 }
2254
2255 set_fpscr0(fpscr, flags);
2256
2257 return result;
2258}
2259
2260template <>
2261uint64_t
2262fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2263{
2264 int mode = modeConv(fpscr);
2265 int flags = 0;
2266 int sgn1, exp1, sgn2, exp2;
2267 uint64_t mnt1, mnt2, result;
2268
2269 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2270 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2271
2272 result = fp64_process_NaNs(op1, op2, mode, &flags);
2273 if (!result) {
2274 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2275 result = fp64_FPTwo(sgn1 ^ sgn2);
2276 } else if (exp1 == 2047 || exp2 == 2047) {
2277 result = fp64_infinity(sgn1 ^ sgn2);
2278 } else if (!mnt1 || !mnt2) {
2279 result = fp64_zero(sgn1 ^ sgn2);
2280 } else {
2281 result = fp64_mul(op1, op2, mode, &flags);
2282 }
2283 }
2284
2285 set_fpscr0(fpscr, flags);
2286
2287 return result;
2288}
2289
2290template <>
2291uint32_t
2292fplibNeg(uint32_t op)
2293{
2294 return op ^ (uint32_t)1 << 31;
2295}
2296
2297template <>
2298uint64_t
2299fplibNeg(uint64_t op)
2300{
2301 return op ^ (uint64_t)1 << 63;
2302}
2303
2304static const uint8_t recip_sqrt_estimate[256] = {
2305 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
2306 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
2307 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
2308 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
2309 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
2310 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
2311 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
2312 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
2313 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
2314 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
2315 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
2316 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
2317 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
2318 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
2319 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
2320 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
2321};
2322
2323template <>
2324uint32_t
2325fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
2326{
2327 int mode = modeConv(fpscr);
2328 int flags = 0;
2329 int sgn, exp;
2330 uint32_t mnt, result;
2331
2332 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2333
2334 if (exp == 255 && (uint32_t)(mnt << 9)) {
2335 result = fp32_process_NaN(op, mode, &flags);
2336 } else if (!mnt) {
2337 result = fp32_infinity(sgn);
2338 flags |= FPLIB_DZC;
2339 } else if (sgn) {
2340 result = fp32_defaultNaN();
2341 flags |= FPLIB_IOC;
2342 } else if (exp == 255) {
2343 result = fp32_zero(0);
2344 } else {
2345 exp += 8;
2346 mnt = fp32_normalise(mnt, &exp);
2347 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)];
2348 result = fp32_pack(0, (380 - exp) >> 1, mnt << 15);
2349 }
2350
2351 set_fpscr0(fpscr, flags);
2352
2353 return result;
2354}
2355
2356template <>
2357uint64_t
2358fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
2359{
2360 int mode = modeConv(fpscr);
2361 int flags = 0;
2362 int sgn, exp;
2363 uint64_t mnt, result;
2364
2365 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2366
2367 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2368 result = fp64_process_NaN(op, mode, &flags);
2369 } else if (!mnt) {
2370 result = fp64_infinity(sgn);
2371 flags |= FPLIB_DZC;
2372 } else if (sgn) {
2373 result = fp64_defaultNaN();
2374 flags |= FPLIB_IOC;
2375 } else if (exp == 2047) {
2376 result = fp32_zero(0);
2377 } else {
2378 exp += 11;
2379 mnt = fp64_normalise(mnt, &exp);
2380 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)];
2381 result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44);
2382 }
2383
2384 set_fpscr0(fpscr, flags);
2385
2386 return result;
2387}
2388
2389template <>
2390uint32_t
2391fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2392{
2393 int mode = modeConv(fpscr);
2394 int flags = 0;
2395 int sgn1, exp1, sgn2, exp2;
2396 uint32_t mnt1, mnt2, result;
2397
2398 op1 = fplibNeg<uint32_t>(op1);
2399 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2400 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2401
2402 result = fp32_process_NaNs(op1, op2, mode, &flags);
2403 if (!result) {
2404 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2405 result = fp32_FPOnePointFive(0);
2406 } else if (exp1 == 255 || exp2 == 255) {
2407 result = fp32_infinity(sgn1 ^ sgn2);
2408 } else {
2409 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
2410 }
2411 }
2412
2413 set_fpscr0(fpscr, flags);
2414
2415 return result;
2416}
2417
2418template <>
2419uint64_t
2420fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2421{
2422 int mode = modeConv(fpscr);
2423 int flags = 0;
2424 int sgn1, exp1, sgn2, exp2;
2425 uint64_t mnt1, mnt2, result;
2426
2427 op1 = fplibNeg<uint64_t>(op1);
2428 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2429 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2430
2431 result = fp64_process_NaNs(op1, op2, mode, &flags);
2432 if (!result) {
2433 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2434 result = fp64_FPOnePointFive(0);
2435 } else if (exp1 == 2047 || exp2 == 2047) {
2436 result = fp64_infinity(sgn1 ^ sgn2);
2437 } else {
2438 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
2439 }
2440 }
2441
2442 set_fpscr0(fpscr, flags);
2443
2444 return result;
2445}
2446
2447template <>
2448uint32_t
2449fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2450{
2451 int mode = modeConv(fpscr);
2452 int flags = 0;
2453 int sgn1, exp1, sgn2, exp2;
2454 uint32_t mnt1, mnt2, result;
2455
2456 op1 = fplibNeg<uint32_t>(op1);
2457 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2458 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2459
2460 result = fp32_process_NaNs(op1, op2, mode, &flags);
2461 if (!result) {
2462 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2463 result = fp32_FPTwo(0);
2464 } else if (exp1 == 255 || exp2 == 255) {
2465 result = fp32_infinity(sgn1 ^ sgn2);
2466 } else {
2467 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
2468 }
2469 }
2470
2471 set_fpscr0(fpscr, flags);
2472
2473 return result;
2474}
2475
2476template <>
2477uint32_t
2478fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
2479{
2480 int mode = modeConv(fpscr);
2481 int flags = 0;
2482 int sgn, exp;
2483 uint32_t mnt, result;
2484
2485 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2486
2487 if (exp == 255 && (uint32_t)(mnt << 9)) {
2488 result = fp32_process_NaN(op, mode, &flags);
2489 } else if (exp == 255) {
2490 result = fp32_zero(sgn);
2491 } else if (!mnt) {
2492 result = fp32_infinity(sgn);
2493 flags |= FPLIB_DZC;
2494 } else if (!((uint32_t)(op << 1) >> 22)) {
2495 bool overflow_to_inf = false;
2496 switch (FPCRRounding(fpscr)) {
2497 case FPRounding_TIEEVEN:
2498 overflow_to_inf = true;
2499 break;
2500 case FPRounding_POSINF:
2501 overflow_to_inf = !sgn;
2502 break;
2503 case FPRounding_NEGINF:
2504 overflow_to_inf = sgn;
2505 break;
2506 case FPRounding_ZERO:
2507 overflow_to_inf = false;
2508 break;
2509 default:
2510 assert(0);
2511 }
2512 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
2513 flags |= FPLIB_OFC | FPLIB_IXC;
2514 } else if (fpscr.fz && exp >= 253) {
2515 result = fp32_zero(sgn);
2516 flags |= FPLIB_UFC;
2517 } else {
2518 exp += 8;
2519 mnt = fp32_normalise(mnt, &exp);
2520 int result_exp = 253 - exp;
2521 uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1;
2522 fraction <<= 15;
2523 if (result_exp == 0) {
2524 fraction >>= 1;
2525 } else if (result_exp == -1) {
2526 fraction >>= 2;
2527 result_exp = 0;
2528 }
2529 result = fp32_pack(sgn, result_exp, fraction);
2530 }
2531
2532 set_fpscr0(fpscr, flags);
2533
2534 return result;
2535}
2536
2537template <>
2538uint64_t
2539fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
2540{
2541 int mode = modeConv(fpscr);
2542 int flags = 0;
2543 int sgn, exp;
2544 uint64_t mnt, result;
2545
2546 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2547
2548 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2549 result = fp64_process_NaN(op, mode, &flags);
2550 } else if (exp == 2047) {
2551 result = fp64_zero(sgn);
2552 } else if (!mnt) {
2553 result = fp64_infinity(sgn);
2554 flags |= FPLIB_DZC;
2555 } else if (!((uint64_t)(op << 1) >> 51)) {
2556 bool overflow_to_inf = false;
2557 switch (FPCRRounding(fpscr)) {
2558 case FPRounding_TIEEVEN:
2559 overflow_to_inf = true;
2560 break;
2561 case FPRounding_POSINF:
2562 overflow_to_inf = !sgn;
2563 break;
2564 case FPRounding_NEGINF:
2565 overflow_to_inf = sgn;
2566 break;
2567 case FPRounding_ZERO:
2568 overflow_to_inf = false;
2569 break;
2570 default:
2571 assert(0);
2572 }
2573 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
2574 flags |= FPLIB_OFC | FPLIB_IXC;
2575 } else if (fpscr.fz && exp >= 2045) {
2576 result = fp64_zero(sgn);
2577 flags |= FPLIB_UFC;
2578 } else {
2579 exp += 11;
2580 mnt = fp64_normalise(mnt, &exp);
2581 int result_exp = 2045 - exp;
2582 uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1;
2583 fraction <<= 44;
2584 if (result_exp == 0) {
2585 fraction >>= 1;
2586 } else if (result_exp == -1) {
2587 fraction >>= 2;
2588 result_exp = 0;
2589 }
2590 result = fp64_pack(sgn, result_exp, fraction);
2591 }
2592
2593 set_fpscr0(fpscr, flags);
2594
2595 return result;
2596}
2597
2598template <>
2599uint64_t
2600fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2601{
2602 int mode = modeConv(fpscr);
2603 int flags = 0;
2604 int sgn1, exp1, sgn2, exp2;
2605 uint64_t mnt1, mnt2, result;
2606
2607 op1 = fplibNeg<uint64_t>(op1);
2608 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2609 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2610
2611 result = fp64_process_NaNs(op1, op2, mode, &flags);
2612 if (!result) {
2613 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2614 result = fp64_FPTwo(0);
2615 } else if (exp1 == 2047 || exp2 == 2047) {
2616 result = fp64_infinity(sgn1 ^ sgn2);
2617 } else {
2618 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
2619 }
2620 }
2621
2622 set_fpscr0(fpscr, flags);
2623
2624 return result;
2625}
2626
2627template <>
2628uint32_t
2629fplibRecpX(uint32_t op, FPSCR &fpscr)
2630{
2631 int mode = modeConv(fpscr);
2632 int flags = 0;
2633 int sgn, exp;
2634 uint32_t mnt, result;
2635
2636 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2637
2638 if (exp == 255 && (uint32_t)(mnt << 9)) {
2639 result = fp32_process_NaN(op, mode, &flags);
2640 }
2641 else {
2642 if (!mnt) { // Zero and denormals
2643 result = fp32_pack(sgn, 254, 0);
2644 } else { // Infinities and normals
2645 result = fp32_pack(sgn, exp ^ 255, 0);
2646 }
2647 }
2648
2649 set_fpscr0(fpscr, flags);
2650
2651 return result;
2652}
2653
2654template <>
2655uint64_t
2656fplibRecpX(uint64_t op, FPSCR &fpscr)
2657{
2658 int mode = modeConv(fpscr);
2659 int flags = 0;
2660 int sgn, exp;
2661 uint64_t mnt, result;
2662
2663 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2664
2665 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2666 result = fp64_process_NaN(op, mode, &flags);
2667 }
2668 else {
2669 if (!mnt) { // Zero and denormals
2670 result = fp64_pack(sgn, 2046, 0);
2671 } else { // Infinities and normals
2672 result = fp64_pack(sgn, exp ^ 2047, 0);
2673 }
2674 }
2675
2676 set_fpscr0(fpscr, flags);
2677
2678 return result;
2679}
2680
2681template <>
2682uint32_t
2683fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2684{
2685 int mode = modeConv(fpscr);
2686 int flags = 0;
2687 int sgn, exp;
2688 uint32_t mnt, result;
2689
2690 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2691 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2692
2693 // Handle NaNs, infinities and zeroes:
2694 if (exp == 255 && (uint32_t)(mnt << 9)) {
2695 result = fp32_process_NaN(op, mode, &flags);
2696 } else if (exp == 255) {
2697 result = fp32_infinity(sgn);
2698 } else if (!mnt) {
2699 result = fp32_zero(sgn);
2700 } else if (exp >= 150) {
2701 // There are no fractional bits
2702 result = op;
2703 } else {
2704 // Truncate towards zero:
2705 uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp);
2706 int err = exp < 118 ? 1 :
2707 (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0);
2708 switch (rounding) {
2709 case FPRounding_TIEEVEN:
2710 x += (err == 3 || (err == 2 && (x & 1)));
2711 break;
2712 case FPRounding_POSINF:
2713 x += err && !sgn;
2714 break;
2715 case FPRounding_NEGINF:
2716 x += err && sgn;
2717 break;
2718 case FPRounding_ZERO:
2719 break;
2720 case FPRounding_TIEAWAY:
2721 x += err >> 1;
2722 break;
2723 default:
2724 assert(0);
2725 }
2726
2727 if (x == 0) {
2728 result = fp32_zero(sgn);
2729 } else {
2730 exp = 150;
2731 mnt = fp32_normalise(x, &exp);
2732 result = fp32_pack(sgn, exp + 8, mnt >> 8);
2733 }
2734
2735 if (err && exact)
2736 flags |= FPLIB_IXC;
2737 }
2738
2739 set_fpscr0(fpscr, flags);
2740
2741 return result;
2742}
2743
2744template <>
2745uint64_t
2746fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2747{
2748 int mode = modeConv(fpscr);
2749 int flags = 0;
2750 int sgn, exp;
2751 uint64_t mnt, result;
2752
2753 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2754 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2755
2756 // Handle NaNs, infinities and zeroes:
2757 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2758 result = fp64_process_NaN(op, mode, &flags);
2759 } else if (exp == 2047) {
2760 result = fp64_infinity(sgn);
2761 } else if (!mnt) {
2762 result = fp64_zero(sgn);
2763 } else if (exp >= 1075) {
2764 // There are no fractional bits
2765 result = op;
2766 } else {
2767 // Truncate towards zero:
2768 uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp);
2769 int err = exp < 1011 ? 1 :
2770 (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0);
2771 switch (rounding) {
2772 case FPRounding_TIEEVEN:
2773 x += (err == 3 || (err == 2 && (x & 1)));
2774 break;
2775 case FPRounding_POSINF:
2776 x += err && !sgn;
2777 break;
2778 case FPRounding_NEGINF:
2779 x += err && sgn;
2780 break;
2781 case FPRounding_ZERO:
2782 break;
2783 case FPRounding_TIEAWAY:
2784 x += err >> 1;
2785 break;
2786 default:
2787 assert(0);
2788 }
2789
2790 if (x == 0) {
2791 result = fp64_zero(sgn);
2792 } else {
2793 exp = 1075;
2794 mnt = fp64_normalise(x, &exp);
2795 result = fp64_pack(sgn, exp + 11, mnt >> 11);
2796 }
2797
2798 if (err && exact)
2799 flags |= FPLIB_IXC;
2800 }
2801
2802 set_fpscr0(fpscr, flags);
2803
2804 return result;
2805}
2806
2807template <>
2808uint32_t
2809fplibSqrt(uint32_t op, FPSCR &fpscr)
2810{
2811 int flags = 0;
2812 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
2813 set_fpscr0(fpscr, flags);
2814 return result;
2815}
2816
2817template <>
2818uint64_t
2819fplibSqrt(uint64_t op, FPSCR &fpscr)
2820{
2821 int flags = 0;
2822 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
2823 set_fpscr0(fpscr, flags);
2824 return result;
2825}
2826
2827template <>
2828uint32_t
2829fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2830{
2831 int flags = 0;
2832 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
2833 set_fpscr0(fpscr, flags);
2834 return result;
2835}
2836
2837template <>
2838uint64_t
2839fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2840{
2841 int flags = 0;
2842 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
2843 set_fpscr0(fpscr, flags);
2844 return result;
2845}
2846
2847static uint64_t
2848FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2849 int *flags)
2850{
2851 uint64_t x;
2852 int err;
2853
2854 if (exp > 1023 + 63) {
2855 *flags = FPLIB_IOC;
2856 return ((uint64_t)!u << 63) - !sgn;
2857 }
2858
2859 x = lsr64(mnt << 11, 1023 + 63 - exp);
2860 err = (exp > 1023 + 63 - 2 ? 0 :
2861 (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) |
2862 !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1)));
2863
2864 switch (rounding) {
2865 case FPRounding_TIEEVEN:
2866 x += (err == 3 || (err == 2 && (x & 1)));
2867 break;
2868 case FPRounding_POSINF:
2869 x += err && !sgn;
2870 break;
2871 case FPRounding_NEGINF:
2872 x += err && sgn;
2873 break;
2874 case FPRounding_ZERO:
2875 break;
2876 case FPRounding_TIEAWAY:
2877 x += err >> 1;
2878 break;
2879 default:
2880 assert(0);
2881 }
2882
2883 if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) {
2884 *flags = FPLIB_IOC;
2885 return ((uint64_t)!u << 63) - !sgn;
2886 }
2887
2888 if (err) {
2889 *flags = FPLIB_IXC;
2890 }
2891
2892 return sgn ? -x : x;
2893}
2894
2895static uint32_t
2896FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2897 int *flags)
2898{
2899 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
2900 if (u ? x >= (uint64_t)1 << 32 :
2901 !(x < (uint64_t)1 << 31 ||
2902 (uint64_t)-x <= (uint64_t)1 << 31)) {
2903 *flags = FPLIB_IOC;
2904 x = ((uint32_t)!u << 31) - !sgn;
2905 }
2906 return x;
2907}
2908
2909template <>
2910uint32_t
2911fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2912{
2913 int flags = 0;
2914 int sgn, exp;
2915 uint32_t mnt, result;
2916
2917 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2918 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2919
2920 // If NaN, set cumulative flag or take exception:
2921 if (exp == 255 && (uint32_t)(mnt << 9)) {
2922 flags = FPLIB_IOC;
2923 result = 0;
2924 } else {
2925 result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits,
2926 (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2927 }
2928
2929 set_fpscr0(fpscr, flags);
2930
2931 return result;
2932}
2933
2934template <>
2935uint32_t
2936fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2937{
2938 int flags = 0;
2939 int sgn, exp;
2940 uint64_t mnt;
2941 uint32_t result;
2942
2943 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2944 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2945
2946 // If NaN, set cumulative flag or take exception:
2947 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2948 flags = FPLIB_IOC;
2949 result = 0;
2950 } else {
2951 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
2952 }
2953
2954 set_fpscr0(fpscr, flags);
2955
2956 return result;
2957}
2958
2959template <>
2960uint64_t
2961fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2962{
2963 int flags = 0;
2964 int sgn, exp;
2965 uint32_t mnt;
2966 uint64_t result;
2967
2968 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2969 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2970
2971 // If NaN, set cumulative flag or take exception:
2972 if (exp == 255 && (uint32_t)(mnt << 9)) {
2973 flags = FPLIB_IOC;
2974 result = 0;
2975 } else {
2976 result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits,
2977 (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2978 }
2979
2980 set_fpscr0(fpscr, flags);
2981
2982 return result;
2983}
2984
2985template <>
2986uint64_t
2987fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2988{
2989 int flags = 0;
2990 int sgn, exp;
2991 uint64_t mnt, result;
2992
2993 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2994 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2995
2996 // If NaN, set cumulative flag or take exception:
2997 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2998 flags = FPLIB_IOC;
2999 result = 0;
3000 } else {
3001 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
3002 }
3003
3004 set_fpscr0(fpscr, flags);
3005
3006 return result;
3007}
3008
3009static uint32_t
3010fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3011{
3012 int x_sgn = !u && a >> 63;
3013 int x_exp = 190 - fbits;
3014 uint64_t x_mnt = x_sgn ? -a : a;
3015
3016 // Handle zero:
3017 if (!x_mnt) {
3018 return fp32_zero(0);
3019 }
3020
3021 // Normalise and convert to 32 bits, collapsing error into bottom bit:
3022 x_mnt = fp64_normalise(x_mnt, &x_exp);
3023 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
3024
3025 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
3026}
3027
3028static uint64_t
3029fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3030{
3031 int x_sgn = !u && a >> 63;
3032 int x_exp = 1024 + 62 - fbits;
3033 uint64_t x_mnt = x_sgn ? -a : a;
3034
3035 // Handle zero:
3036 if (!x_mnt) {
3037 return fp64_zero(0);
3038 }
3039
3040 x_mnt = fp64_normalise(x_mnt, &x_exp);
3041
3042 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
3043}
3044
3045template <>
3046uint32_t
3047fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3048{
3049 int flags = 0;
3050 uint32_t res = fp32_cvtf(op, fbits, u,
3051 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3052 &flags);
3053 set_fpscr0(fpscr, flags);
3054 return res;
3055}
3056
3057template <>
3058uint64_t
3059fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3060{
3061 int flags = 0;
3062 uint64_t res = fp64_cvtf(op, fbits, u,
3063 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3064 &flags);
3065 set_fpscr0(fpscr, flags);
3066 return res;
3067}
3068
3069}