fplib.cc (13118:897ff5214d07) fplib.cc (13449:2f7efa89c58b)
1/*
2* Copyright (c) 2012-2013, 2017-2018 ARM Limited
3* All rights reserved
4*
5* The license below extends only to copyright in the software and shall
6* not be construed as granting a license to any other intellectual
7* property including but not limited to intellectual property relating
8* to a hardware implementation of the functionality of the software
9* licensed hereunder. You may use the software subject to the license
10* terms below provided that you ensure that this notice is replicated
11* unmodified and in its entirety in all distributions of the software,
12* modified or unmodified, in source code or in binary form.
13*
14* Redistribution and use in source and binary forms, with or without
15* modification, are permitted provided that the following conditions are
16* met: redistributions of source code must retain the above copyright
17* notice, this list of conditions and the following disclaimer;
18* redistributions in binary form must reproduce the above copyright
19* notice, this list of conditions and the following disclaimer in the
20* documentation and/or other materials provided with the distribution;
21* neither the name of the copyright holders nor the names of its
22* contributors may be used to endorse or promote products derived from
23* this software without specific prior written permission.
24*
25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36*
37* Authors: Edmund Grimley Evans
38* Thomas Grocutt
39*/
40
41#include <stdint.h>
42
43#include <cassert>
44
1/*
2* Copyright (c) 2012-2013, 2017-2018 ARM Limited
3* All rights reserved
4*
5* The license below extends only to copyright in the software and shall
6* not be construed as granting a license to any other intellectual
7* property including but not limited to intellectual property relating
8* to a hardware implementation of the functionality of the software
9* licensed hereunder. You may use the software subject to the license
10* terms below provided that you ensure that this notice is replicated
11* unmodified and in its entirety in all distributions of the software,
12* modified or unmodified, in source code or in binary form.
13*
14* Redistribution and use in source and binary forms, with or without
15* modification, are permitted provided that the following conditions are
16* met: redistributions of source code must retain the above copyright
17* notice, this list of conditions and the following disclaimer;
18* redistributions in binary form must reproduce the above copyright
19* notice, this list of conditions and the following disclaimer in the
20* documentation and/or other materials provided with the distribution;
21* neither the name of the copyright holders nor the names of its
22* contributors may be used to endorse or promote products derived from
23* this software without specific prior written permission.
24*
25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36*
37* Authors: Edmund Grimley Evans
38* Thomas Grocutt
39*/
40
41#include <stdint.h>
42
43#include <cassert>
44
45#include "base/logging.hh"
45#include "fplib.hh"
46
47namespace ArmISA
48{
49
50#define FPLIB_RN 0
51#define FPLIB_RP 1
52#define FPLIB_RM 2
53#define FPLIB_RZ 3
54#define FPLIB_FZ 4
55#define FPLIB_DN 8
56#define FPLIB_AHP 16
57#define FPLIB_FZ16 32
58
59#define FPLIB_IDC 128 // Input Denormal
60#define FPLIB_IXC 16 // Inexact
61#define FPLIB_UFC 8 // Underflow
62#define FPLIB_OFC 4 // Overflow
63#define FPLIB_DZC 2 // Division by Zero
64#define FPLIB_IOC 1 // Invalid Operation
65
66#define FP16_BITS 16
67#define FP32_BITS 32
68#define FP64_BITS 64
69
70#define FP16_EXP_BITS 5
71#define FP32_EXP_BITS 8
72#define FP64_EXP_BITS 11
73
74#define FP16_EXP_BIAS 15
75#define FP32_EXP_BIAS 127
76#define FP64_EXP_BIAS 1023
77
78#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
79#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
80#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
81
82#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
83#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
84#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
85
86#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
87#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
88#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
89
90#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
91#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
92#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
93
94static inline uint16_t
95lsl16(uint16_t x, uint32_t shift)
96{
97 return shift < 16 ? x << shift : 0;
98}
99
100static inline uint16_t
101lsr16(uint16_t x, uint32_t shift)
102{
103 return shift < 16 ? x >> shift : 0;
104}
105
106static inline uint32_t
107lsl32(uint32_t x, uint32_t shift)
108{
109 return shift < 32 ? x << shift : 0;
110}
111
112static inline uint32_t
113lsr32(uint32_t x, uint32_t shift)
114{
115 return shift < 32 ? x >> shift : 0;
116}
117
118static inline uint64_t
119lsl64(uint64_t x, uint32_t shift)
120{
121 return shift < 64 ? x << shift : 0;
122}
123
124static inline uint64_t
125lsr64(uint64_t x, uint32_t shift)
126{
127 return shift < 64 ? x >> shift : 0;
128}
129
130static inline void
131lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
132{
133 if (shift == 0) {
134 *r1 = x1;
135 *r0 = x0;
136 } else if (shift < 64) {
137 *r1 = x1 << shift | x0 >> (64 - shift);
138 *r0 = x0 << shift;
139 } else if (shift < 128) {
140 *r1 = x0 << (shift - 64);
141 *r0 = 0;
142 } else {
143 *r1 = 0;
144 *r0 = 0;
145 }
146}
147
148static inline void
149lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
150{
151 if (shift == 0) {
152 *r1 = x1;
153 *r0 = x0;
154 } else if (shift < 64) {
155 *r0 = x0 >> shift | x1 << (64 - shift);
156 *r1 = x1 >> shift;
157 } else if (shift < 128) {
158 *r0 = x1 >> (shift - 64);
159 *r1 = 0;
160 } else {
161 *r0 = 0;
162 *r1 = 0;
163 }
164}
165
166static inline void
167mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
168{
169 uint32_t mask = ((uint32_t)1 << 31) - 1;
170 uint64_t a0 = a & mask;
171 uint64_t a1 = a >> 31 & mask;
172 uint64_t b0 = b & mask;
173 uint64_t b1 = b >> 31 & mask;
174 uint64_t p0 = a0 * b0;
175 uint64_t p2 = a1 * b1;
176 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
177 uint64_t s0 = p0;
178 uint64_t s1 = (s0 >> 31) + p1;
179 uint64_t s2 = (s1 >> 31) + p2;
180 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
181 *x1 = s2 >> 2;
182}
183
184static inline
185void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
186{
187 uint64_t t0 = (uint64_t)(uint32_t)a * b;
188 uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
189 *x0 = t1 << 32 | (uint32_t)t0;
190 *x1 = t1 >> 32;
191}
192
193static inline void
194add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
195 uint64_t b1)
196{
197 *x0 = a0 + b0;
198 *x1 = a1 + b1 + (*x0 < a0);
199}
200
201static inline void
202sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
203 uint64_t b1)
204{
205 *x0 = a0 - b0;
206 *x1 = a1 - b1 - (*x0 > a0);
207}
208
209static inline int
210cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
211{
212 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
213}
214
215static inline uint16_t
216fp16_normalise(uint16_t mnt, int *exp)
217{
218 int shift;
219
220 if (!mnt) {
221 return 0;
222 }
223
224 for (shift = 8; shift; shift >>= 1) {
225 if (!(mnt >> (16 - shift))) {
226 mnt <<= shift;
227 *exp -= shift;
228 }
229 }
230 return mnt;
231}
232
233static inline uint32_t
234fp32_normalise(uint32_t mnt, int *exp)
235{
236 int shift;
237
238 if (!mnt) {
239 return 0;
240 }
241
242 for (shift = 16; shift; shift >>= 1) {
243 if (!(mnt >> (32 - shift))) {
244 mnt <<= shift;
245 *exp -= shift;
246 }
247 }
248 return mnt;
249}
250
251static inline uint64_t
252fp64_normalise(uint64_t mnt, int *exp)
253{
254 int shift;
255
256 if (!mnt) {
257 return 0;
258 }
259
260 for (shift = 32; shift; shift >>= 1) {
261 if (!(mnt >> (64 - shift))) {
262 mnt <<= shift;
263 *exp -= shift;
264 }
265 }
266 return mnt;
267}
268
269static inline void
270fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
271{
272 uint64_t x0 = *mnt0;
273 uint64_t x1 = *mnt1;
274 int shift;
275
276 if (!x0 && !x1) {
277 return;
278 }
279
280 if (!x1) {
281 x1 = x0;
282 x0 = 0;
283 *exp -= 64;
284 }
285
286 for (shift = 32; shift; shift >>= 1) {
287 if (!(x1 >> (64 - shift))) {
288 x1 = x1 << shift | x0 >> (64 - shift);
289 x0 <<= shift;
290 *exp -= shift;
291 }
292 }
293
294 *mnt0 = x0;
295 *mnt1 = x1;
296}
297
298static inline uint16_t
299fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
300{
301 return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt);
302}
303
304static inline uint32_t
305fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
306{
307 return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt);
308}
309
310static inline uint64_t
311fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
312{
313 return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt);
314}
315
316static inline uint16_t
317fp16_zero(int sgn)
318{
319 return fp16_pack(sgn, 0, 0);
320}
321
322static inline uint32_t
323fp32_zero(int sgn)
324{
325 return fp32_pack(sgn, 0, 0);
326}
327
328static inline uint64_t
329fp64_zero(int sgn)
330{
331 return fp64_pack(sgn, 0, 0);
332}
333
334static inline uint16_t
335fp16_max_normal(int sgn)
336{
337 return fp16_pack(sgn, FP16_EXP_INF - 1, -1);
338}
339
340static inline uint32_t
341fp32_max_normal(int sgn)
342{
343 return fp32_pack(sgn, FP32_EXP_INF - 1, -1);
344}
345
346static inline uint64_t
347fp64_max_normal(int sgn)
348{
349 return fp64_pack(sgn, FP64_EXP_INF - 1, -1);
350}
351
352static inline uint16_t
353fp16_infinity(int sgn)
354{
355 return fp16_pack(sgn, FP16_EXP_INF, 0);
356}
357
358static inline uint32_t
359fp32_infinity(int sgn)
360{
361 return fp32_pack(sgn, FP32_EXP_INF, 0);
362}
363
364static inline uint64_t
365fp64_infinity(int sgn)
366{
367 return fp64_pack(sgn, FP64_EXP_INF, 0);
368}
369
370static inline uint16_t
371fp16_defaultNaN()
372{
373 return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1));
374}
375
376static inline uint32_t
377fp32_defaultNaN()
378{
379 return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1));
380}
381
382static inline uint64_t
383fp64_defaultNaN()
384{
385 return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1));
386}
387
388static inline void
389fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
390 int *flags)
391{
392 *sgn = x >> (FP16_BITS - 1);
393 *exp = FP16_EXP(x);
394 *mnt = FP16_MANT(x);
395
396 // Handle subnormals:
397 if (*exp) {
398 *mnt |= 1ULL << FP16_MANT_BITS;
399 } else {
400 ++*exp;
401 // IDC (Input Denormal) is not set in this case.
402 if (mode & FPLIB_FZ16)
403 *mnt = 0;
404 }
405}
406
407static inline void
408fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
409 int *flags)
410{
411 *sgn = x >> (FP32_BITS - 1);
412 *exp = FP32_EXP(x);
413 *mnt = FP32_MANT(x);
414
415 // Handle subnormals:
416 if (*exp) {
417 *mnt |= 1ULL << FP32_MANT_BITS;
418 } else {
419 ++*exp;
420 if ((mode & FPLIB_FZ) && *mnt) {
421 *flags |= FPLIB_IDC;
422 *mnt = 0;
423 }
424 }
425}
426
427static inline void
428fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
429 int *flags)
430{
431 *sgn = x >> (FP64_BITS - 1);
432 *exp = FP64_EXP(x);
433 *mnt = FP64_MANT(x);
434
435 // Handle subnormals:
436 if (*exp) {
437 *mnt |= 1ULL << FP64_MANT_BITS;
438 } else {
439 ++*exp;
440 if ((mode & FPLIB_FZ) && *mnt) {
441 *flags |= FPLIB_IDC;
442 *mnt = 0;
443 }
444 }
445}
446
447static inline int
448fp16_is_NaN(int exp, uint16_t mnt)
449{
450 return exp == FP16_EXP_INF && FP16_MANT(mnt);
451}
452
453static inline int
454fp32_is_NaN(int exp, uint32_t mnt)
455{
456 return exp == FP32_EXP_INF && FP32_MANT(mnt);
457}
458
459static inline int
460fp64_is_NaN(int exp, uint64_t mnt)
461{
462 return exp == FP64_EXP_INF && FP64_MANT(mnt);
463}
464
465static inline int
466fp16_is_signalling_NaN(int exp, uint16_t mnt)
467{
468 return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1);
469}
470
471static inline int
472fp32_is_signalling_NaN(int exp, uint32_t mnt)
473{
474 return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1);
475}
476
477static inline int
478fp64_is_signalling_NaN(int exp, uint64_t mnt)
479{
480 return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1);
481}
482
483static inline int
484fp16_is_quiet_NaN(int exp, uint16_t mnt)
485{
486 return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1);
487}
488
489static inline int
490fp32_is_quiet_NaN(int exp, uint32_t mnt)
491{
492 return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1);
493}
494
495static inline int
496fp64_is_quiet_NaN(int exp, uint64_t mnt)
497{
498 return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1);
499}
500
501static inline int
502fp16_is_infinity(int exp, uint16_t mnt)
503{
504 return exp == FP16_EXP_INF && !FP16_MANT(mnt);
505}
506
507static inline int
508fp32_is_infinity(int exp, uint32_t mnt)
509{
510 return exp == FP32_EXP_INF && !FP32_MANT(mnt);
511}
512
513static inline int
514fp64_is_infinity(int exp, uint64_t mnt)
515{
516 return exp == FP64_EXP_INF && !FP64_MANT(mnt);
517}
518
519static inline uint16_t
520fp16_process_NaN(uint16_t a, int mode, int *flags)
521{
522 if (!(a >> (FP16_MANT_BITS - 1) & 1)) {
523 *flags |= FPLIB_IOC;
524 a |= 1ULL << (FP16_MANT_BITS - 1);
525 }
526 return mode & FPLIB_DN ? fp16_defaultNaN() : a;
527}
528
529static inline uint32_t
530fp32_process_NaN(uint32_t a, int mode, int *flags)
531{
532 if (!(a >> (FP32_MANT_BITS - 1) & 1)) {
533 *flags |= FPLIB_IOC;
534 a |= 1ULL << (FP32_MANT_BITS - 1);
535 }
536 return mode & FPLIB_DN ? fp32_defaultNaN() : a;
537}
538
539static inline uint64_t
540fp64_process_NaN(uint64_t a, int mode, int *flags)
541{
542 if (!(a >> (FP64_MANT_BITS - 1) & 1)) {
543 *flags |= FPLIB_IOC;
544 a |= 1ULL << (FP64_MANT_BITS - 1);
545 }
546 return mode & FPLIB_DN ? fp64_defaultNaN() : a;
547}
548
549static uint16_t
550fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
551{
552 int a_exp = FP16_EXP(a);
553 uint16_t a_mnt = FP16_MANT(a);
554 int b_exp = FP16_EXP(b);
555 uint16_t b_mnt = FP16_MANT(b);
556
557 // Handle signalling NaNs:
558 if (fp16_is_signalling_NaN(a_exp, a_mnt))
559 return fp16_process_NaN(a, mode, flags);
560 if (fp16_is_signalling_NaN(b_exp, b_mnt))
561 return fp16_process_NaN(b, mode, flags);
562
563 // Handle quiet NaNs:
564 if (fp16_is_NaN(a_exp, a_mnt))
565 return fp16_process_NaN(a, mode, flags);
566 if (fp16_is_NaN(b_exp, b_mnt))
567 return fp16_process_NaN(b, mode, flags);
568
569 return 0;
570}
571
572static uint32_t
573fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
574{
575 int a_exp = FP32_EXP(a);
576 uint32_t a_mnt = FP32_MANT(a);
577 int b_exp = FP32_EXP(b);
578 uint32_t b_mnt = FP32_MANT(b);
579
580 // Handle signalling NaNs:
581 if (fp32_is_signalling_NaN(a_exp, a_mnt))
582 return fp32_process_NaN(a, mode, flags);
583 if (fp32_is_signalling_NaN(b_exp, b_mnt))
584 return fp32_process_NaN(b, mode, flags);
585
586 // Handle quiet NaNs:
587 if (fp32_is_NaN(a_exp, a_mnt))
588 return fp32_process_NaN(a, mode, flags);
589 if (fp32_is_NaN(b_exp, b_mnt))
590 return fp32_process_NaN(b, mode, flags);
591
592 return 0;
593}
594
595static uint64_t
596fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
597{
598 int a_exp = FP64_EXP(a);
599 uint64_t a_mnt = FP64_MANT(a);
600 int b_exp = FP64_EXP(b);
601 uint64_t b_mnt = FP64_MANT(b);
602
603 // Handle signalling NaNs:
604 if (fp64_is_signalling_NaN(a_exp, a_mnt))
605 return fp64_process_NaN(a, mode, flags);
606 if (fp64_is_signalling_NaN(b_exp, b_mnt))
607 return fp64_process_NaN(b, mode, flags);
608
609 // Handle quiet NaNs:
610 if (fp64_is_NaN(a_exp, a_mnt))
611 return fp64_process_NaN(a, mode, flags);
612 if (fp64_is_NaN(b_exp, b_mnt))
613 return fp64_process_NaN(b, mode, flags);
614
615 return 0;
616}
617
618static uint16_t
619fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
620{
621 int a_exp = FP16_EXP(a);
622 uint16_t a_mnt = FP16_MANT(a);
623 int b_exp = FP16_EXP(b);
624 uint16_t b_mnt = FP16_MANT(b);
625 int c_exp = FP16_EXP(c);
626 uint16_t c_mnt = FP16_MANT(c);
627
628 // Handle signalling NaNs:
629 if (fp16_is_signalling_NaN(a_exp, a_mnt))
630 return fp16_process_NaN(a, mode, flags);
631 if (fp16_is_signalling_NaN(b_exp, b_mnt))
632 return fp16_process_NaN(b, mode, flags);
633 if (fp16_is_signalling_NaN(c_exp, c_mnt))
634 return fp16_process_NaN(c, mode, flags);
635
636 // Handle quiet NaNs:
637 if (fp16_is_NaN(a_exp, a_mnt))
638 return fp16_process_NaN(a, mode, flags);
639 if (fp16_is_NaN(b_exp, b_mnt))
640 return fp16_process_NaN(b, mode, flags);
641 if (fp16_is_NaN(c_exp, c_mnt))
642 return fp16_process_NaN(c, mode, flags);
643
644 return 0;
645}
646
647static uint32_t
648fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
649{
650 int a_exp = FP32_EXP(a);
651 uint32_t a_mnt = FP32_MANT(a);
652 int b_exp = FP32_EXP(b);
653 uint32_t b_mnt = FP32_MANT(b);
654 int c_exp = FP32_EXP(c);
655 uint32_t c_mnt = FP32_MANT(c);
656
657 // Handle signalling NaNs:
658 if (fp32_is_signalling_NaN(a_exp, a_mnt))
659 return fp32_process_NaN(a, mode, flags);
660 if (fp32_is_signalling_NaN(b_exp, b_mnt))
661 return fp32_process_NaN(b, mode, flags);
662 if (fp32_is_signalling_NaN(c_exp, c_mnt))
663 return fp32_process_NaN(c, mode, flags);
664
665 // Handle quiet NaNs:
666 if (fp32_is_NaN(a_exp, a_mnt))
667 return fp32_process_NaN(a, mode, flags);
668 if (fp32_is_NaN(b_exp, b_mnt))
669 return fp32_process_NaN(b, mode, flags);
670 if (fp32_is_NaN(c_exp, c_mnt))
671 return fp32_process_NaN(c, mode, flags);
672
673 return 0;
674}
675
676static uint64_t
677fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
678{
679 int a_exp = FP64_EXP(a);
680 uint64_t a_mnt = FP64_MANT(a);
681 int b_exp = FP64_EXP(b);
682 uint64_t b_mnt = FP64_MANT(b);
683 int c_exp = FP64_EXP(c);
684 uint64_t c_mnt = FP64_MANT(c);
685
686 // Handle signalling NaNs:
687 if (fp64_is_signalling_NaN(a_exp, a_mnt))
688 return fp64_process_NaN(a, mode, flags);
689 if (fp64_is_signalling_NaN(b_exp, b_mnt))
690 return fp64_process_NaN(b, mode, flags);
691 if (fp64_is_signalling_NaN(c_exp, c_mnt))
692 return fp64_process_NaN(c, mode, flags);
693
694 // Handle quiet NaNs:
695 if (fp64_is_NaN(a_exp, a_mnt))
696 return fp64_process_NaN(a, mode, flags);
697 if (fp64_is_NaN(b_exp, b_mnt))
698 return fp64_process_NaN(b, mode, flags);
699 if (fp64_is_NaN(c_exp, c_mnt))
700 return fp64_process_NaN(c, mode, flags);
701
702 return 0;
703}
704
705static uint16_t
706fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
707{
708 int biased_exp; // non-negative exponent value for result
709 uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS)
710 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
711
712 assert(rm != FPRounding_TIEAWAY);
713
714 // Flush to zero:
715 if ((mode & FPLIB_FZ16) && exp < 1) {
716 *flags |= FPLIB_UFC;
717 return fp16_zero(sgn);
718 }
719
720 // The bottom FP16_EXP_BITS bits of mnt are orred together:
721 mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) |
722 ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0));
723
724 if (exp > 0) {
725 biased_exp = exp;
726 int_mant = mnt >> 2;
727 error = mnt & 3;
728 } else {
729 biased_exp = 0;
730 int_mant = lsr16(mnt, 3 - exp);
731 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
732 }
733
734 if (!biased_exp && error) { // xx should also check fpscr_val<11>
735 *flags |= FPLIB_UFC;
736 }
737
738 // Round up:
739 if ((rm == FPLIB_RN && (error == 3 ||
740 (error == 2 && (int_mant & 1)))) ||
741 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
742 ++int_mant;
743 if (int_mant == 1ULL << FP16_MANT_BITS) {
744 // Rounded up from denormalized to normalized
745 biased_exp = 1;
746 }
747 if (int_mant == 2ULL << FP16_MANT_BITS) {
748 // Rounded up to next exponent
749 ++biased_exp;
750 int_mant >>= 1;
751 }
752 }
753
754 // Handle rounding to odd aka Von Neumann rounding:
755 if (error && rm == FPRounding_ODD)
756 int_mant |= 1;
757
758 // Handle overflow:
759 if (!(mode & FPLIB_AHP)) {
760 if (biased_exp >= (int)FP16_EXP_INF) {
761 *flags |= FPLIB_OFC | FPLIB_IXC;
762 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
763 (rm == FPLIB_RM && sgn)) {
764 return fp16_infinity(sgn);
765 } else {
766 return fp16_max_normal(sgn);
767 }
768 }
769 } else {
770 if (biased_exp >= (int)FP16_EXP_INF + 1) {
771 *flags |= FPLIB_IOC;
772 return fp16_pack(sgn, FP16_EXP_INF, -1);
773 }
774 }
775
776 if (error) {
777 *flags |= FPLIB_IXC;
778 }
779
780 return fp16_pack(sgn, biased_exp, int_mant);
781}
782
783static uint16_t
784fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
785{
786 return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags);
787}
788
789static uint32_t
790fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
791{
792 int biased_exp; // non-negative exponent value for result
793 uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS)
794 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
795
796 assert(rm != FPRounding_TIEAWAY);
797
798 // Flush to zero:
799 if ((mode & FPLIB_FZ) && exp < 1) {
800 *flags |= FPLIB_UFC;
801 return fp32_zero(sgn);
802 }
803
804 // The bottom FP32_EXP_BITS bits of mnt are orred together:
805 mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) |
806 ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0));
807
808 if (exp > 0) {
809 biased_exp = exp;
810 int_mant = mnt >> 2;
811 error = mnt & 3;
812 } else {
813 biased_exp = 0;
814 int_mant = lsr32(mnt, 3 - exp);
815 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
816 }
817
818 if (!biased_exp && error) { // xx should also check fpscr_val<11>
819 *flags |= FPLIB_UFC;
820 }
821
822 // Round up:
823 if ((rm == FPLIB_RN && (error == 3 ||
824 (error == 2 && (int_mant & 1)))) ||
825 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
826 ++int_mant;
827 if (int_mant == 1ULL << FP32_MANT_BITS) {
828 // Rounded up from denormalized to normalized
829 biased_exp = 1;
830 }
831 if (int_mant == 2ULL << FP32_MANT_BITS) {
832 // Rounded up to next exponent
833 ++biased_exp;
834 int_mant >>= 1;
835 }
836 }
837
838 // Handle rounding to odd aka Von Neumann rounding:
839 if (error && rm == FPRounding_ODD)
840 int_mant |= 1;
841
842 // Handle overflow:
843 if (biased_exp >= (int)FP32_EXP_INF) {
844 *flags |= FPLIB_OFC | FPLIB_IXC;
845 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
846 (rm == FPLIB_RM && sgn)) {
847 return fp32_infinity(sgn);
848 } else {
849 return fp32_max_normal(sgn);
850 }
851 }
852
853 if (error) {
854 *flags |= FPLIB_IXC;
855 }
856
857 return fp32_pack(sgn, biased_exp, int_mant);
858}
859
860static uint32_t
861fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
862{
863 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
864}
865
866static uint64_t
867fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
868{
869 int biased_exp; // non-negative exponent value for result
870 uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS)
871 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
872
873 assert(rm != FPRounding_TIEAWAY);
874
875 // Flush to zero:
876 if ((mode & FPLIB_FZ) && exp < 1) {
877 *flags |= FPLIB_UFC;
878 return fp64_zero(sgn);
879 }
880
881 // The bottom FP64_EXP_BITS bits of mnt are orred together:
882 mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) |
883 ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0));
884
885 if (exp > 0) {
886 biased_exp = exp;
887 int_mant = mnt >> 2;
888 error = mnt & 3;
889 } else {
890 biased_exp = 0;
891 int_mant = lsr64(mnt, 3 - exp);
892 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
893 }
894
895 if (!biased_exp && error) { // xx should also check fpscr_val<11>
896 *flags |= FPLIB_UFC;
897 }
898
899 // Round up:
900 if ((rm == FPLIB_RN && (error == 3 ||
901 (error == 2 && (int_mant & 1)))) ||
902 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
903 ++int_mant;
904 if (int_mant == 1ULL << FP64_MANT_BITS) {
905 // Rounded up from denormalized to normalized
906 biased_exp = 1;
907 }
908 if (int_mant == 2ULL << FP64_MANT_BITS) {
909 // Rounded up to next exponent
910 ++biased_exp;
911 int_mant >>= 1;
912 }
913 }
914
915 // Handle rounding to odd aka Von Neumann rounding:
916 if (error && rm == FPRounding_ODD)
917 int_mant |= 1;
918
919 // Handle overflow:
920 if (biased_exp >= (int)FP64_EXP_INF) {
921 *flags |= FPLIB_OFC | FPLIB_IXC;
922 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
923 (rm == FPLIB_RM && sgn)) {
924 return fp64_infinity(sgn);
925 } else {
926 return fp64_max_normal(sgn);
927 }
928 }
929
930 if (error) {
931 *flags |= FPLIB_IXC;
932 }
933
934 return fp64_pack(sgn, biased_exp, int_mant);
935}
936
937static uint64_t
938fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
939{
940 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
941}
942
943static int
944fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
945{
946 int a_sgn, a_exp, b_sgn, b_exp;
947 uint16_t a_mnt, b_mnt;
948
949 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
950 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
951
952 if (fp16_is_NaN(a_exp, a_mnt) ||
953 fp16_is_NaN(b_exp, b_mnt)) {
954 if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
955 fp16_is_signalling_NaN(b_exp, b_mnt))
956 *flags |= FPLIB_IOC;
957 return 0;
958 }
959 return a == b || (!a_mnt && !b_mnt);
960}
961
962static int
963fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
964{
965 int a_sgn, a_exp, b_sgn, b_exp;
966 uint16_t a_mnt, b_mnt;
967
968 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
969 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
970
971 if (fp16_is_NaN(a_exp, a_mnt) ||
972 fp16_is_NaN(b_exp, b_mnt)) {
973 *flags |= FPLIB_IOC;
974 return 0;
975 }
976 if (!a_mnt && !b_mnt)
977 return 1;
978 if (a_sgn != b_sgn)
979 return b_sgn;
980 if (a_exp != b_exp)
981 return a_sgn ^ (a_exp > b_exp);
982 if (a_mnt != b_mnt)
983 return a_sgn ^ (a_mnt > b_mnt);
984 return 1;
985}
986
987static int
988fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
989{
990 int a_sgn, a_exp, b_sgn, b_exp;
991 uint16_t a_mnt, b_mnt;
992
993 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
994 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
995
996 if (fp16_is_NaN(a_exp, a_mnt) ||
997 fp16_is_NaN(b_exp, b_mnt)) {
998 *flags |= FPLIB_IOC;
999 return 0;
1000 }
1001 if (!a_mnt && !b_mnt)
1002 return 0;
1003 if (a_sgn != b_sgn)
1004 return b_sgn;
1005 if (a_exp != b_exp)
1006 return a_sgn ^ (a_exp > b_exp);
1007 if (a_mnt != b_mnt)
1008 return a_sgn ^ (a_mnt > b_mnt);
1009 return 0;
1010}
1011
1012static int
1013fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
1014{
1015 int a_sgn, a_exp, b_sgn, b_exp;
1016 uint16_t a_mnt, b_mnt;
1017
1018 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1019 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1020
1021 if (fp16_is_NaN(a_exp, a_mnt) ||
1022 fp16_is_NaN(b_exp, b_mnt)) {
1023 if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
1024 fp16_is_signalling_NaN(b_exp, b_mnt))
1025 *flags |= FPLIB_IOC;
1026 return 1;
1027 }
1028 return 0;
1029}
1030
1031static int
1032fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
1033{
1034 int a_sgn, a_exp, b_sgn, b_exp;
1035 uint32_t a_mnt, b_mnt;
1036
1037 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1038 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1039
1040 if (fp32_is_NaN(a_exp, a_mnt) ||
1041 fp32_is_NaN(b_exp, b_mnt)) {
1042 if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1043 fp32_is_signalling_NaN(b_exp, b_mnt))
1044 *flags |= FPLIB_IOC;
1045 return 0;
1046 }
1047 return a == b || (!a_mnt && !b_mnt);
1048}
1049
1050static int
1051fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
1052{
1053 int a_sgn, a_exp, b_sgn, b_exp;
1054 uint32_t a_mnt, b_mnt;
1055
1056 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1057 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1058
1059 if (fp32_is_NaN(a_exp, a_mnt) ||
1060 fp32_is_NaN(b_exp, b_mnt)) {
1061 *flags |= FPLIB_IOC;
1062 return 0;
1063 }
1064 if (!a_mnt && !b_mnt)
1065 return 1;
1066 if (a_sgn != b_sgn)
1067 return b_sgn;
1068 if (a_exp != b_exp)
1069 return a_sgn ^ (a_exp > b_exp);
1070 if (a_mnt != b_mnt)
1071 return a_sgn ^ (a_mnt > b_mnt);
1072 return 1;
1073}
1074
1075static int
1076fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
1077{
1078 int a_sgn, a_exp, b_sgn, b_exp;
1079 uint32_t a_mnt, b_mnt;
1080
1081 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1082 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1083
1084 if (fp32_is_NaN(a_exp, a_mnt) ||
1085 fp32_is_NaN(b_exp, b_mnt)) {
1086 *flags |= FPLIB_IOC;
1087 return 0;
1088 }
1089 if (!a_mnt && !b_mnt)
1090 return 0;
1091 if (a_sgn != b_sgn)
1092 return b_sgn;
1093 if (a_exp != b_exp)
1094 return a_sgn ^ (a_exp > b_exp);
1095 if (a_mnt != b_mnt)
1096 return a_sgn ^ (a_mnt > b_mnt);
1097 return 0;
1098}
1099
1100static int
1101fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
1102{
1103 int a_sgn, a_exp, b_sgn, b_exp;
1104 uint32_t a_mnt, b_mnt;
1105
1106 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1107 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1108
1109 if (fp32_is_NaN(a_exp, a_mnt) ||
1110 fp32_is_NaN(b_exp, b_mnt)) {
1111 if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1112 fp32_is_signalling_NaN(b_exp, b_mnt))
1113 *flags |= FPLIB_IOC;
1114 return 1;
1115 }
1116 return 0;
1117}
1118
1119static int
1120fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
1121{
1122 int a_sgn, a_exp, b_sgn, b_exp;
1123 uint64_t a_mnt, b_mnt;
1124
1125 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1126 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1127
1128 if (fp64_is_NaN(a_exp, a_mnt) ||
1129 fp64_is_NaN(b_exp, b_mnt)) {
1130 if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1131 fp64_is_signalling_NaN(b_exp, b_mnt))
1132 *flags |= FPLIB_IOC;
1133 return 0;
1134 }
1135 return a == b || (!a_mnt && !b_mnt);
1136}
1137
1138static int
1139fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
1140{
1141 int a_sgn, a_exp, b_sgn, b_exp;
1142 uint64_t a_mnt, b_mnt;
1143
1144 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1145 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1146
1147 if (fp64_is_NaN(a_exp, a_mnt) ||
1148 fp64_is_NaN(b_exp, b_mnt)) {
1149 *flags |= FPLIB_IOC;
1150 return 0;
1151 }
1152 if (!a_mnt && !b_mnt)
1153 return 1;
1154 if (a_sgn != b_sgn)
1155 return b_sgn;
1156 if (a_exp != b_exp)
1157 return a_sgn ^ (a_exp > b_exp);
1158 if (a_mnt != b_mnt)
1159 return a_sgn ^ (a_mnt > b_mnt);
1160 return 1;
1161}
1162
1163static int
1164fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
1165{
1166 int a_sgn, a_exp, b_sgn, b_exp;
1167 uint64_t a_mnt, b_mnt;
1168
1169 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1170 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1171
1172 if (fp64_is_NaN(a_exp, a_mnt) ||
1173 fp64_is_NaN(b_exp, b_mnt)) {
1174 *flags |= FPLIB_IOC;
1175 return 0;
1176 }
1177 if (!a_mnt && !b_mnt)
1178 return 0;
1179 if (a_sgn != b_sgn)
1180 return b_sgn;
1181 if (a_exp != b_exp)
1182 return a_sgn ^ (a_exp > b_exp);
1183 if (a_mnt != b_mnt)
1184 return a_sgn ^ (a_mnt > b_mnt);
1185 return 0;
1186}
1187
1188static int
1189fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
1190{
1191 int a_sgn, a_exp, b_sgn, b_exp;
1192 uint64_t a_mnt, b_mnt;
1193
1194 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1195 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1196
1197 if (fp64_is_NaN(a_exp, a_mnt) ||
1198 fp64_is_NaN(b_exp, b_mnt)) {
1199 if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1200 fp64_is_signalling_NaN(b_exp, b_mnt))
1201 *flags |= FPLIB_IOC;
1202 return 1;
1203 }
1204 return 0;
1205}
1206
1207static uint16_t
1208fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
1209{
1210 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1211 uint16_t a_mnt, b_mnt, x, x_mnt;
1212
1213 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1214 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1215
1216 if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1217 return x;
1218 }
1219
1220 b_sgn ^= neg;
1221
1222 // Handle infinities and zeroes:
1223 if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) {
1224 *flags |= FPLIB_IOC;
1225 return fp16_defaultNaN();
1226 } else if (a_exp == FP16_EXP_INF) {
1227 return fp16_infinity(a_sgn);
1228 } else if (b_exp == FP16_EXP_INF) {
1229 return fp16_infinity(b_sgn);
1230 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1231 return fp16_zero(a_sgn);
1232 }
1233
1234 a_mnt <<= 3;
1235 b_mnt <<= 3;
1236 if (a_exp >= b_exp) {
1237 b_mnt = (lsr16(b_mnt, a_exp - b_exp) |
1238 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1)));
1239 b_exp = a_exp;
1240 } else {
1241 a_mnt = (lsr16(a_mnt, b_exp - a_exp) |
1242 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1)));
1243 a_exp = b_exp;
1244 }
1245 x_sgn = a_sgn;
1246 x_exp = a_exp;
1247 if (a_sgn == b_sgn) {
1248 x_mnt = a_mnt + b_mnt;
1249 } else if (a_mnt >= b_mnt) {
1250 x_mnt = a_mnt - b_mnt;
1251 } else {
1252 x_sgn ^= 1;
1253 x_mnt = b_mnt - a_mnt;
1254 }
1255
1256 if (!x_mnt) {
1257 // Sign of exact zero result depends on rounding mode
1258 return fp16_zero((mode & 3) == 2);
1259 }
1260
1261 x_mnt = fp16_normalise(x_mnt, &x_exp);
1262
1263 return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1,
1264 mode, flags);
1265}
1266
1267static uint32_t
1268fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
1269{
1270 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1271 uint32_t a_mnt, b_mnt, x, x_mnt;
1272
1273 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1274 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1275
1276 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1277 return x;
1278 }
1279
1280 b_sgn ^= neg;
1281
1282 // Handle infinities and zeroes:
1283 if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) {
1284 *flags |= FPLIB_IOC;
1285 return fp32_defaultNaN();
1286 } else if (a_exp == FP32_EXP_INF) {
1287 return fp32_infinity(a_sgn);
1288 } else if (b_exp == FP32_EXP_INF) {
1289 return fp32_infinity(b_sgn);
1290 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1291 return fp32_zero(a_sgn);
1292 }
1293
1294 a_mnt <<= 3;
1295 b_mnt <<= 3;
1296 if (a_exp >= b_exp) {
1297 b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
1298 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
1299 b_exp = a_exp;
1300 } else {
1301 a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
1302 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
1303 a_exp = b_exp;
1304 }
1305 x_sgn = a_sgn;
1306 x_exp = a_exp;
1307 if (a_sgn == b_sgn) {
1308 x_mnt = a_mnt + b_mnt;
1309 } else if (a_mnt >= b_mnt) {
1310 x_mnt = a_mnt - b_mnt;
1311 } else {
1312 x_sgn ^= 1;
1313 x_mnt = b_mnt - a_mnt;
1314 }
1315
1316 if (!x_mnt) {
1317 // Sign of exact zero result depends on rounding mode
1318 return fp32_zero((mode & 3) == 2);
1319 }
1320
1321 x_mnt = fp32_normalise(x_mnt, &x_exp);
1322
1323 return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1,
1324 mode, flags);
1325}
1326
1327static uint64_t
1328fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
1329{
1330 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1331 uint64_t a_mnt, b_mnt, x, x_mnt;
1332
1333 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1334 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1335
1336 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1337 return x;
1338 }
1339
1340 b_sgn ^= neg;
1341
1342 // Handle infinities and zeroes:
1343 if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) {
1344 *flags |= FPLIB_IOC;
1345 return fp64_defaultNaN();
1346 } else if (a_exp == FP64_EXP_INF) {
1347 return fp64_infinity(a_sgn);
1348 } else if (b_exp == FP64_EXP_INF) {
1349 return fp64_infinity(b_sgn);
1350 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1351 return fp64_zero(a_sgn);
1352 }
1353
1354 a_mnt <<= 3;
1355 b_mnt <<= 3;
1356 if (a_exp >= b_exp) {
1357 b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
1358 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
1359 b_exp = a_exp;
1360 } else {
1361 a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
1362 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
1363 a_exp = b_exp;
1364 }
1365 x_sgn = a_sgn;
1366 x_exp = a_exp;
1367 if (a_sgn == b_sgn) {
1368 x_mnt = a_mnt + b_mnt;
1369 } else if (a_mnt >= b_mnt) {
1370 x_mnt = a_mnt - b_mnt;
1371 } else {
1372 x_sgn ^= 1;
1373 x_mnt = b_mnt - a_mnt;
1374 }
1375
1376 if (!x_mnt) {
1377 // Sign of exact zero result depends on rounding mode
1378 return fp64_zero((mode & 3) == 2);
1379 }
1380
1381 x_mnt = fp64_normalise(x_mnt, &x_exp);
1382
1383 return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1,
1384 mode, flags);
1385}
1386
1387static uint16_t
1388fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
1389{
1390 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1391 uint16_t a_mnt, b_mnt, x;
1392 uint32_t x_mnt;
1393
1394 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1395 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1396
1397 if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1398 return x;
1399 }
1400
1401 // Handle infinities and zeroes:
1402 if ((a_exp == FP16_EXP_INF && !b_mnt) ||
1403 (b_exp == FP16_EXP_INF && !a_mnt)) {
1404 *flags |= FPLIB_IOC;
1405 return fp16_defaultNaN();
1406 } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) {
1407 return fp16_infinity(a_sgn ^ b_sgn);
1408 } else if (!a_mnt || !b_mnt) {
1409 return fp16_zero(a_sgn ^ b_sgn);
1410 }
1411
1412 // Multiply and normalise:
1413 x_sgn = a_sgn ^ b_sgn;
1414 x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1;
1415 x_mnt = (uint32_t)a_mnt * b_mnt;
1416 x_mnt = fp32_normalise(x_mnt, &x_exp);
1417
1418 // Convert to FP16_BITS bits, collapsing error into bottom bit:
1419 x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1);
1420
1421 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1422}
1423
1424static uint32_t
1425fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1426{
1427 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1428 uint32_t a_mnt, b_mnt, x;
1429 uint64_t x_mnt;
1430
1431 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1432 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1433
1434 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1435 return x;
1436 }
1437
1438 // Handle infinities and zeroes:
1439 if ((a_exp == FP32_EXP_INF && !b_mnt) ||
1440 (b_exp == FP32_EXP_INF && !a_mnt)) {
1441 *flags |= FPLIB_IOC;
1442 return fp32_defaultNaN();
1443 } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) {
1444 return fp32_infinity(a_sgn ^ b_sgn);
1445 } else if (!a_mnt || !b_mnt) {
1446 return fp32_zero(a_sgn ^ b_sgn);
1447 }
1448
1449 // Multiply and normalise:
1450 x_sgn = a_sgn ^ b_sgn;
1451 x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1;
1452 x_mnt = (uint64_t)a_mnt * b_mnt;
1453 x_mnt = fp64_normalise(x_mnt, &x_exp);
1454
1455 // Convert to FP32_BITS bits, collapsing error into bottom bit:
1456 x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1);
1457
1458 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1459}
1460
1461static uint64_t
1462fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1463{
1464 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1465 uint64_t a_mnt, b_mnt, x;
1466 uint64_t x0_mnt, x1_mnt;
1467
1468 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1469 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1470
1471 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1472 return x;
1473 }
1474
1475 // Handle infinities and zeroes:
1476 if ((a_exp == FP64_EXP_INF && !b_mnt) ||
1477 (b_exp == FP64_EXP_INF && !a_mnt)) {
1478 *flags |= FPLIB_IOC;
1479 return fp64_defaultNaN();
1480 } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) {
1481 return fp64_infinity(a_sgn ^ b_sgn);
1482 } else if (!a_mnt || !b_mnt) {
1483 return fp64_zero(a_sgn ^ b_sgn);
1484 }
1485
1486 // Multiply and normalise:
1487 x_sgn = a_sgn ^ b_sgn;
1488 x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1;
1489 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1490 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1491
1492 // Convert to FP64_BITS bits, collapsing error into bottom bit:
1493 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1494
1495 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1496}
1497
1498static uint16_t
1499fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale,
1500 int mode, int *flags)
1501{
1502 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1503 uint16_t a_mnt, b_mnt, c_mnt, x;
1504 uint32_t x_mnt, y_mnt;
1505
1506 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1507 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1508 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1509
1510 x = fp16_process_NaNs3(a, b, c, mode, flags);
1511
1512 // Quiet NaN added to product of zero and infinity:
1513 if (fp16_is_quiet_NaN(a_exp, a_mnt) &&
1514 ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) ||
1515 (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) {
1516 x = fp16_defaultNaN();
1517 *flags |= FPLIB_IOC;
1518 }
1519
1520 if (x) {
1521 return x;
1522 }
1523
1524 // Handle infinities and zeroes:
1525 if ((b_exp == FP16_EXP_INF && !c_mnt) ||
1526 (c_exp == FP16_EXP_INF && !b_mnt) ||
1527 (a_exp == FP16_EXP_INF &&
1528 (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) &&
1529 (a_sgn != (b_sgn ^ c_sgn)))) {
1530 *flags |= FPLIB_IOC;
1531 return fp16_defaultNaN();
1532 }
1533 if (a_exp == FP16_EXP_INF)
1534 return fp16_infinity(a_sgn);
1535 if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF)
1536 return fp16_infinity(b_sgn ^ c_sgn);
1537 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1538 return fp16_zero(a_sgn);
1539
1540 x_sgn = a_sgn;
1541 x_exp = a_exp + 2 * FP16_EXP_BITS - 3;
1542 x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4);
1543
1544 // Multiply:
1545 y_sgn = b_sgn ^ c_sgn;
1546 y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3;
1547 y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1548 if (!y_mnt) {
1549 y_exp = x_exp;
1550 }
1551
1552 // Add:
1553 if (x_exp >= y_exp) {
1554 y_mnt = (lsr32(y_mnt, x_exp - y_exp) |
1555 !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1)));
1556 y_exp = x_exp;
1557 } else {
1558 x_mnt = (lsr32(x_mnt, y_exp - x_exp) |
1559 !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1)));
1560 x_exp = y_exp;
1561 }
1562 if (x_sgn == y_sgn) {
1563 x_mnt = x_mnt + y_mnt;
1564 } else if (x_mnt >= y_mnt) {
1565 x_mnt = x_mnt - y_mnt;
1566 } else {
1567 x_sgn ^= 1;
1568 x_mnt = y_mnt - x_mnt;
1569 }
1570
1571 if (!x_mnt) {
1572 // Sign of exact zero result depends on rounding mode
1573 return fp16_zero((mode & 3) == 2);
1574 }
1575
1576 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1577 x_mnt = fp32_normalise(x_mnt, &x_exp);
1578 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1579
1580 return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1581}
1582
1583static uint32_t
1584fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1585 int mode, int *flags)
1586{
1587 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1588 uint32_t a_mnt, b_mnt, c_mnt, x;
1589 uint64_t x_mnt, y_mnt;
1590
1591 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1592 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1593 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1594
1595 x = fp32_process_NaNs3(a, b, c, mode, flags);
1596
1597 // Quiet NaN added to product of zero and infinity:
1598 if (fp32_is_quiet_NaN(a_exp, a_mnt) &&
1599 ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) ||
1600 (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) {
1601 x = fp32_defaultNaN();
1602 *flags |= FPLIB_IOC;
1603 }
1604
1605 if (x) {
1606 return x;
1607 }
1608
1609 // Handle infinities and zeroes:
1610 if ((b_exp == FP32_EXP_INF && !c_mnt) ||
1611 (c_exp == FP32_EXP_INF && !b_mnt) ||
1612 (a_exp == FP32_EXP_INF &&
1613 (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) &&
1614 (a_sgn != (b_sgn ^ c_sgn)))) {
1615 *flags |= FPLIB_IOC;
1616 return fp32_defaultNaN();
1617 }
1618 if (a_exp == FP32_EXP_INF)
1619 return fp32_infinity(a_sgn);
1620 if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF)
1621 return fp32_infinity(b_sgn ^ c_sgn);
1622 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1623 return fp32_zero(a_sgn);
1624
1625 x_sgn = a_sgn;
1626 x_exp = a_exp + 2 * FP32_EXP_BITS - 3;
1627 x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4);
1628
1629 // Multiply:
1630 y_sgn = b_sgn ^ c_sgn;
1631 y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1632 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1633 if (!y_mnt) {
1634 y_exp = x_exp;
1635 }
1636
1637 // Add:
1638 if (x_exp >= y_exp) {
1639 y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1640 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1641 y_exp = x_exp;
1642 } else {
1643 x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1644 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1645 x_exp = y_exp;
1646 }
1647 if (x_sgn == y_sgn) {
1648 x_mnt = x_mnt + y_mnt;
1649 } else if (x_mnt >= y_mnt) {
1650 x_mnt = x_mnt - y_mnt;
1651 } else {
1652 x_sgn ^= 1;
1653 x_mnt = y_mnt - x_mnt;
1654 }
1655
1656 if (!x_mnt) {
1657 // Sign of exact zero result depends on rounding mode
1658 return fp32_zero((mode & 3) == 2);
1659 }
1660
1661 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1662 x_mnt = fp64_normalise(x_mnt, &x_exp);
1663 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1664
1665 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1666}
1667
1668static uint64_t
1669fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1670 int mode, int *flags)
1671{
1672 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1673 uint64_t a_mnt, b_mnt, c_mnt, x;
1674 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1675
1676 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1677 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1678 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1679
1680 x = fp64_process_NaNs3(a, b, c, mode, flags);
1681
1682 // Quiet NaN added to product of zero and infinity:
1683 if (fp64_is_quiet_NaN(a_exp, a_mnt) &&
1684 ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) ||
1685 (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) {
1686 x = fp64_defaultNaN();
1687 *flags |= FPLIB_IOC;
1688 }
1689
1690 if (x) {
1691 return x;
1692 }
1693
1694 // Handle infinities and zeroes:
1695 if ((b_exp == FP64_EXP_INF && !c_mnt) ||
1696 (c_exp == FP64_EXP_INF && !b_mnt) ||
1697 (a_exp == FP64_EXP_INF &&
1698 (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) &&
1699 (a_sgn != (b_sgn ^ c_sgn)))) {
1700 *flags |= FPLIB_IOC;
1701 return fp64_defaultNaN();
1702 }
1703 if (a_exp == FP64_EXP_INF)
1704 return fp64_infinity(a_sgn);
1705 if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF)
1706 return fp64_infinity(b_sgn ^ c_sgn);
1707 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1708 return fp64_zero(a_sgn);
1709
1710 x_sgn = a_sgn;
1711 x_exp = a_exp + FP64_EXP_BITS;
1712 x0_mnt = 0;
1713 x1_mnt = a_mnt;
1714
1715 // Multiply:
1716 y_sgn = b_sgn ^ c_sgn;
1717 y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3;
1718 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1719 if (!y0_mnt && !y1_mnt) {
1720 y_exp = x_exp;
1721 }
1722
1723 // Add:
1724 if (x_exp >= y_exp) {
1725 uint64_t t0, t1;
1726 lsl128(&t0, &t1, y0_mnt, y1_mnt,
1727 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1728 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1729 y0_mnt |= !!(t0 | t1);
1730 y_exp = x_exp;
1731 } else {
1732 uint64_t t0, t1;
1733 lsl128(&t0, &t1, x0_mnt, x1_mnt,
1734 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1735 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1736 x0_mnt |= !!(t0 | t1);
1737 x_exp = y_exp;
1738 }
1739 if (x_sgn == y_sgn) {
1740 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1741 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1742 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1743 } else {
1744 x_sgn ^= 1;
1745 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1746 }
1747
1748 if (!x0_mnt && !x1_mnt) {
1749 // Sign of exact zero result depends on rounding mode
1750 return fp64_zero((mode & 3) == 2);
1751 }
1752
1753 // Normalise into FP64_BITS bits, collapsing error into bottom bit:
1754 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1755 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1756
1757 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1758}
1759
1760static uint16_t
1761fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
1762{
1763 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1764 uint16_t a_mnt, b_mnt, x;
1765 uint32_t x_mnt;
1766
1767 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1768 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1769
1770 if ((x = fp16_process_NaNs(a, b, mode, flags)))
1771 return x;
1772
1773 // Handle infinities and zeroes:
1774 if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) ||
1775 (!a_mnt && !b_mnt)) {
1776 *flags |= FPLIB_IOC;
1777 return fp16_defaultNaN();
1778 }
1779 if (a_exp == FP16_EXP_INF || !b_mnt) {
1780 if (a_exp != FP16_EXP_INF)
1781 *flags |= FPLIB_DZC;
1782 return fp16_infinity(a_sgn ^ b_sgn);
1783 }
1784 if (!a_mnt || b_exp == FP16_EXP_INF)
1785 return fp16_zero(a_sgn ^ b_sgn);
1786
1787 // Divide, setting bottom bit if inexact:
1788 a_mnt = fp16_normalise(a_mnt, &a_exp);
1789 x_sgn = a_sgn ^ b_sgn;
1790 x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3);
1791 x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt;
1792 x_mnt |= (x_mnt * b_mnt !=
1793 (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3));
1794
1795 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1796 x_mnt = fp32_normalise(x_mnt, &x_exp);
1797 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1798
1799 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1800}
1801
1802static uint32_t
1803fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1804{
1805 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1806 uint32_t a_mnt, b_mnt, x;
1807 uint64_t x_mnt;
1808
1809 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1810 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1811
1812 if ((x = fp32_process_NaNs(a, b, mode, flags)))
1813 return x;
1814
1815 // Handle infinities and zeroes:
1816 if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) ||
1817 (!a_mnt && !b_mnt)) {
1818 *flags |= FPLIB_IOC;
1819 return fp32_defaultNaN();
1820 }
1821 if (a_exp == FP32_EXP_INF || !b_mnt) {
1822 if (a_exp != FP32_EXP_INF)
1823 *flags |= FPLIB_DZC;
1824 return fp32_infinity(a_sgn ^ b_sgn);
1825 }
1826 if (!a_mnt || b_exp == FP32_EXP_INF)
1827 return fp32_zero(a_sgn ^ b_sgn);
1828
1829 // Divide, setting bottom bit if inexact:
1830 a_mnt = fp32_normalise(a_mnt, &a_exp);
1831 x_sgn = a_sgn ^ b_sgn;
1832 x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3);
1833 x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt;
1834 x_mnt |= (x_mnt * b_mnt !=
1835 (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3));
1836
1837 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1838 x_mnt = fp64_normalise(x_mnt, &x_exp);
1839 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1840
1841 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1842}
1843
1844static uint64_t
1845fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1846{
1847 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1848 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1849
1850 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1851 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1852
1853 if ((x = fp64_process_NaNs(a, b, mode, flags)))
1854 return x;
1855
1856 // Handle infinities and zeroes:
1857 if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) ||
1858 (!a_mnt && !b_mnt)) {
1859 *flags |= FPLIB_IOC;
1860 return fp64_defaultNaN();
1861 }
1862 if (a_exp == FP64_EXP_INF || !b_mnt) {
1863 if (a_exp != FP64_EXP_INF)
1864 *flags |= FPLIB_DZC;
1865 return fp64_infinity(a_sgn ^ b_sgn);
1866 }
1867 if (!a_mnt || b_exp == FP64_EXP_INF)
1868 return fp64_zero(a_sgn ^ b_sgn);
1869
1870 // Find reciprocal of divisor with Newton-Raphson:
1871 a_mnt = fp64_normalise(a_mnt, &a_exp);
1872 b_mnt = fp64_normalise(b_mnt, &b_exp);
1873 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1874 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1875 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1876 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1877 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1878 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1879
1880 // Multiply by dividend:
1881 x_sgn = a_sgn ^ b_sgn;
1882 x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8;
1883 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1884 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1885 x_mnt = x1_mnt;
1886
1887 // This is an underestimate, so try adding one:
1888 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1889 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1890 if (c <= 0) {
1891 ++x_mnt;
1892 }
1893
1894 x_mnt = fp64_normalise(x_mnt, &x_exp);
1895
1896 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1897}
1898
1899static void
1900set_fpscr0(FPSCR &fpscr, int flags)
1901{
1902 if (flags & FPLIB_IDC) {
1903 fpscr.idc = 1;
1904 }
1905 if (flags & FPLIB_IOC) {
1906 fpscr.ioc = 1;
1907 }
1908 if (flags & FPLIB_DZC) {
1909 fpscr.dzc = 1;
1910 }
1911 if (flags & FPLIB_OFC) {
1912 fpscr.ofc = 1;
1913 }
1914 if (flags & FPLIB_UFC) {
1915 fpscr.ufc = 1;
1916 }
1917 if (flags & FPLIB_IXC) {
1918 fpscr.ixc = 1;
1919 }
1920}
1921
1922static uint16_t
1923fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
1924{
1925 int a_sgn, a_exp;
1926 uint16_t a_mnt;
1927
1928 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1929
1930 // Handle NaNs:
1931 if (fp16_is_NaN(a_exp, a_mnt)) {
1932 return fp16_process_NaN(a, mode, flags);
1933 }
1934
1935 // Handle zeroes:
1936 if (!a_mnt) {
1937 return fp16_zero(a_sgn);
1938 }
1939
1940 // Handle infinities:
1941 if (a_exp == FP16_EXP_INF) {
1942 return fp16_infinity(a_sgn);
1943 }
1944
1945 b = b < -300 ? -300 : b;
1946 b = b > 300 ? 300 : b;
1947 a_exp += b;
1948 a_mnt <<= 3;
1949
1950 a_mnt = fp16_normalise(a_mnt, &a_exp);
1951
1952 return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1,
1953 mode, flags);
1954}
1955
1956static uint32_t
1957fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
1958{
1959 int a_sgn, a_exp;
1960 uint32_t a_mnt;
1961
1962 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1963
1964 // Handle NaNs:
1965 if (fp32_is_NaN(a_exp, a_mnt)) {
1966 return fp32_process_NaN(a, mode, flags);
1967 }
1968
1969 // Handle zeroes:
1970 if (!a_mnt) {
1971 return fp32_zero(a_sgn);
1972 }
1973
1974 // Handle infinities:
1975 if (a_exp == FP32_EXP_INF) {
1976 return fp32_infinity(a_sgn);
1977 }
1978
1979 b = b < -300 ? -300 : b;
1980 b = b > 300 ? 300 : b;
1981 a_exp += b;
1982 a_mnt <<= 3;
1983
1984 a_mnt = fp32_normalise(a_mnt, &a_exp);
1985
1986 return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1,
1987 mode, flags);
1988}
1989
1990static uint64_t
1991fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
1992{
1993 int a_sgn, a_exp;
1994 uint64_t a_mnt;
1995
1996 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1997
1998 // Handle NaNs:
1999 if (fp64_is_NaN(a_exp, a_mnt)) {
2000 return fp64_process_NaN(a, mode, flags);
2001 }
2002
2003 // Handle zeroes:
2004 if (!a_mnt) {
2005 return fp64_zero(a_sgn);
2006 }
2007
2008 // Handle infinities:
2009 if (a_exp == FP64_EXP_INF) {
2010 return fp64_infinity(a_sgn);
2011 }
2012
2013 b = b < -3000 ? -3000 : b;
2014 b = b > 3000 ? 3000 : b;
2015 a_exp += b;
2016 a_mnt <<= 3;
2017
2018 a_mnt = fp64_normalise(a_mnt, &a_exp);
2019
2020 return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1,
2021 mode, flags);
2022}
2023
2024static uint16_t
2025fp16_sqrt(uint16_t a, int mode, int *flags)
2026{
2027 int a_sgn, a_exp, x_sgn, x_exp;
2028 uint16_t a_mnt, x_mnt;
2029 uint32_t x, t0, t1;
2030
2031 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2032
2033 // Handle NaNs:
2034 if (fp16_is_NaN(a_exp, a_mnt))
2035 return fp16_process_NaN(a, mode, flags);
2036
2037 // Handle infinities and zeroes:
2038 if (!a_mnt)
2039 return fp16_zero(a_sgn);
2040 if (a_exp == FP16_EXP_INF && !a_sgn)
2041 return fp16_infinity(a_sgn);
2042 if (a_sgn) {
2043 *flags |= FPLIB_IOC;
2044 return fp16_defaultNaN();
2045 }
2046
2047 a_mnt = fp16_normalise(a_mnt, &a_exp);
2048 if (a_exp & 1) {
2049 ++a_exp;
2050 a_mnt >>= 1;
2051 }
2052
2053 // x = (a * 3 + 5) / 8
2054 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2055
2056 // x = (a / x + x) / 2; // 8-bit accuracy
2057 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2058
2059 // x = (a / x + x) / 2; // 16-bit accuracy
2060 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2061
2062 x_sgn = 0;
2063 x_exp = (a_exp + 27) >> 1;
2064 x_mnt = ((x - (1 << 18)) >> 19) + 1;
2065 t1 = (uint32_t)x_mnt * x_mnt;
2066 t0 = (uint32_t)a_mnt << 9;
2067 if (t1 > t0) {
2068 --x_mnt;
2069 }
2070
2071 x_mnt = fp16_normalise(x_mnt, &x_exp);
2072
2073 return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2074}
2075
2076static uint32_t
2077fp32_sqrt(uint32_t a, int mode, int *flags)
2078{
2079 int a_sgn, a_exp, x_sgn, x_exp;
2080 uint32_t a_mnt, x, x_mnt;
2081 uint64_t t0, t1;
2082
2083 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2084
2085 // Handle NaNs:
2086 if (fp32_is_NaN(a_exp, a_mnt))
2087 return fp32_process_NaN(a, mode, flags);
2088
2089 // Handle infinities and zeroes:
2090 if (!a_mnt)
2091 return fp32_zero(a_sgn);
2092 if (a_exp == FP32_EXP_INF && !a_sgn)
2093 return fp32_infinity(a_sgn);
2094 if (a_sgn) {
2095 *flags |= FPLIB_IOC;
2096 return fp32_defaultNaN();
2097 }
2098
2099 a_mnt = fp32_normalise(a_mnt, &a_exp);
2100 if (!(a_exp & 1)) {
2101 ++a_exp;
2102 a_mnt >>= 1;
2103 }
2104
2105 // x = (a * 3 + 5) / 8
2106 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2107
2108 // x = (a / x + x) / 2; // 8-bit accuracy
2109 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2110
2111 // x = (a / x + x) / 2; // 16-bit accuracy
2112 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2113
2114 // x = (a / x + x) / 2; // 32-bit accuracy
2115 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
2116
2117 x_sgn = 0;
2118 x_exp = (a_exp + 147) >> 1;
2119 x_mnt = ((x - (1 << 5)) >> 6) + 1;
2120 t1 = (uint64_t)x_mnt * x_mnt;
2121 t0 = (uint64_t)a_mnt << 19;
2122 if (t1 > t0) {
2123 --x_mnt;
2124 }
2125
2126 x_mnt = fp32_normalise(x_mnt, &x_exp);
2127
2128 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2129}
2130
2131static uint64_t
2132fp64_sqrt(uint64_t a, int mode, int *flags)
2133{
2134 int a_sgn, a_exp, x_sgn, x_exp, c;
2135 uint64_t a_mnt, x_mnt, r, x0, x1;
2136 uint32_t x;
2137
2138 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2139
2140 // Handle NaNs:
2141 if (fp64_is_NaN(a_exp, a_mnt))
2142 return fp64_process_NaN(a, mode, flags);
2143
2144 // Handle infinities and zeroes:
2145 if (!a_mnt)
2146 return fp64_zero(a_sgn);
2147 if (a_exp == FP64_EXP_INF && !a_sgn)
2148 return fp64_infinity(a_sgn);
2149 if (a_sgn) {
2150 *flags |= FPLIB_IOC;
2151 return fp64_defaultNaN();
2152 }
2153
2154 a_mnt = fp64_normalise(a_mnt, &a_exp);
2155 if (a_exp & 1) {
2156 ++a_exp;
2157 a_mnt >>= 1;
2158 }
2159
2160 // x = (a * 3 + 5) / 8
2161 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2162
2163 // x = (a / x + x) / 2; // 8-bit accuracy
2164 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2165
2166 // x = (a / x + x) / 2; // 16-bit accuracy
2167 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2168
2169 // x = (a / x + x) / 2; // 32-bit accuracy
2170 x = ((a_mnt / x) >> 2) + (x >> 1);
2171
2172 // r = 1 / x; // 32-bit accuracy
2173 r = ((uint64_t)1 << 62) / x;
2174
2175 // r = r * (2 - x * r); // 64-bit accuracy
2176 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
2177 lsr128(&x0, &x1, x0, x1, 31);
2178
2179 // x = (x + a * r) / 2; // 64-bit accuracy
2180 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2181 lsl128(&x0, &x1, x0, x1, 5);
2182 lsr128(&x0, &x1, x0, x1, 56);
2183
2184 x0 = ((uint64_t)x << 31) + (x0 >> 1);
2185
2186 x_sgn = 0;
2187 x_exp = (a_exp + 1053) >> 1;
2188 x_mnt = x0;
2189 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2190 mul62x62(&x0, &x1, x_mnt, x_mnt);
2191 lsl128(&x0, &x1, x0, x1, 19);
2192 c = cmp128(x0, x1, 0, a_mnt);
2193 if (c > 0)
2194 --x_mnt;
2195
2196 x_mnt = fp64_normalise(x_mnt, &x_exp);
2197
2198 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2199}
2200
2201static int
2202modeConv(FPSCR fpscr)
2203{
2204 uint32_t x = (uint32_t)fpscr;
2205 return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0);
2206 // AHP bit is ignored. Only fplibConvert uses AHP.
2207}
2208
2209static void
2210set_fpscr(FPSCR &fpscr, int flags)
2211{
2212 // translate back to FPSCR
2213 bool underflow = false;
2214 if (flags & FPLIB_IDC) {
2215 fpscr.idc = 1;
2216 }
2217 if (flags & FPLIB_IOC) {
2218 fpscr.ioc = 1;
2219 }
2220 if (flags & FPLIB_DZC) {
2221 fpscr.dzc = 1;
2222 }
2223 if (flags & FPLIB_OFC) {
2224 fpscr.ofc = 1;
2225 }
2226 if (flags & FPLIB_UFC) {
2227 underflow = true; //xx Why is this required?
2228 fpscr.ufc = 1;
2229 }
2230 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
2231 fpscr.ixc = 1;
2232 }
2233}
2234
2235template <>
2236bool
2237fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
2238{
2239 int flags = 0;
2240 int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags);
2241 set_fpscr(fpscr, flags);
2242 return x;
2243}
2244
2245template <>
2246bool
2247fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
2248{
2249 int flags = 0;
2250 int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags);
2251 set_fpscr(fpscr, flags);
2252 return x;
2253}
2254
2255template <>
2256bool
2257fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
2258{
2259 int flags = 0;
2260 int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags);
2261 set_fpscr(fpscr, flags);
2262 return x;
2263}
2264
2265template <>
2266bool
2267fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
2268{
2269 int flags = 0;
2270 int x = fp16_compare_un(a, b, modeConv(fpscr), &flags);
2271 set_fpscr(fpscr, flags);
2272 return x;
2273}
2274
2275template <>
2276bool
2277fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
2278{
2279 int flags = 0;
2280 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
2281 set_fpscr(fpscr, flags);
2282 return x;
2283}
2284
2285template <>
2286bool
2287fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
2288{
2289 int flags = 0;
2290 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
2291 set_fpscr(fpscr, flags);
2292 return x;
2293}
2294
2295template <>
2296bool
2297fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
2298{
2299 int flags = 0;
2300 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
2301 set_fpscr(fpscr, flags);
2302 return x;
2303}
2304
2305template <>
2306bool
2307fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr)
2308{
2309 int flags = 0;
2310 int x = fp32_compare_un(a, b, modeConv(fpscr), &flags);
2311 set_fpscr(fpscr, flags);
2312 return x;
2313}
2314
2315template <>
2316bool
2317fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
2318{
2319 int flags = 0;
2320 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
2321 set_fpscr(fpscr, flags);
2322 return x;
2323}
2324
2325template <>
2326bool
2327fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
2328{
2329 int flags = 0;
2330 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
2331 set_fpscr(fpscr, flags);
2332 return x;
2333}
2334
2335template <>
2336bool
2337fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
2338{
2339 int flags = 0;
2340 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
2341 set_fpscr(fpscr, flags);
2342 return x;
2343}
2344
2345template <>
2346bool
2347fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr)
2348{
2349 int flags = 0;
2350 int x = fp64_compare_un(a, b, modeConv(fpscr), &flags);
2351 set_fpscr(fpscr, flags);
2352 return x;
2353}
2354
2355template <>
2356uint16_t
2357fplibAbs(uint16_t op)
2358{
2359 return op & ~(1ULL << (FP16_BITS - 1));
2360}
2361
2362template <>
2363uint32_t
2364fplibAbs(uint32_t op)
2365{
2366 return op & ~(1ULL << (FP32_BITS - 1));
2367}
2368
2369template <>
2370uint64_t
2371fplibAbs(uint64_t op)
2372{
2373 return op & ~(1ULL << (FP64_BITS - 1));
2374}
2375
2376template <>
2377uint16_t
2378fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2379{
2380 int flags = 0;
2381 uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags);
2382 set_fpscr0(fpscr, flags);
2383 return result;
2384}
2385
2386template <>
2387uint32_t
2388fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2389{
2390 int flags = 0;
2391 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
2392 set_fpscr0(fpscr, flags);
2393 return result;
2394}
2395
2396template <>
2397uint64_t
2398fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2399{
2400 int flags = 0;
2401 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
2402 set_fpscr0(fpscr, flags);
2403 return result;
2404}
2405
2406template <>
2407int
2408fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
2409{
2410 int mode = modeConv(fpscr);
2411 int flags = 0;
2412 int sgn1, exp1, sgn2, exp2, result;
2413 uint16_t mnt1, mnt2;
2414
2415 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2416 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2417
2418 if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) {
2419 result = 3;
2420 if (fp16_is_signalling_NaN(exp1, mnt1) ||
2421 fp16_is_signalling_NaN(exp2, mnt2) || signal_nans)
2422 flags |= FPLIB_IOC;
2423 } else {
2424 if (op1 == op2 || (!mnt1 && !mnt2)) {
2425 result = 6;
2426 } else if (sgn1 != sgn2) {
2427 result = sgn1 ? 8 : 2;
2428 } else if (exp1 != exp2) {
2429 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2430 } else {
2431 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2432 }
2433 }
2434
2435 set_fpscr0(fpscr, flags);
2436
2437 return result;
2438}
2439
2440template <>
2441int
2442fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
2443{
2444 int mode = modeConv(fpscr);
2445 int flags = 0;
2446 int sgn1, exp1, sgn2, exp2, result;
2447 uint32_t mnt1, mnt2;
2448
2449 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2450 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2451
2452 if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) {
2453 result = 3;
2454 if (fp32_is_signalling_NaN(exp1, mnt1) ||
2455 fp32_is_signalling_NaN(exp2, mnt2) || signal_nans)
2456 flags |= FPLIB_IOC;
2457 } else {
2458 if (op1 == op2 || (!mnt1 && !mnt2)) {
2459 result = 6;
2460 } else if (sgn1 != sgn2) {
2461 result = sgn1 ? 8 : 2;
2462 } else if (exp1 != exp2) {
2463 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2464 } else {
2465 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2466 }
2467 }
2468
2469 set_fpscr0(fpscr, flags);
2470
2471 return result;
2472}
2473
2474template <>
2475int
2476fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
2477{
2478 int mode = modeConv(fpscr);
2479 int flags = 0;
2480 int sgn1, exp1, sgn2, exp2, result;
2481 uint64_t mnt1, mnt2;
2482
2483 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2484 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2485
2486 if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) {
2487 result = 3;
2488 if (fp64_is_signalling_NaN(exp1, mnt1) ||
2489 fp64_is_signalling_NaN(exp2, mnt2) || signal_nans)
2490 flags |= FPLIB_IOC;
2491 } else {
2492 if (op1 == op2 || (!mnt1 && !mnt2)) {
2493 result = 6;
2494 } else if (sgn1 != sgn2) {
2495 result = sgn1 ? 8 : 2;
2496 } else if (exp1 != exp2) {
2497 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2498 } else {
2499 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2500 }
2501 }
2502
2503 set_fpscr0(fpscr, flags);
2504
2505 return result;
2506}
2507
2508static uint16_t
2509fp16_FPConvertNaN_32(uint32_t op)
2510{
2511 return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF,
2512 1ULL << (FP16_MANT_BITS - 1) |
2513 op >> (FP32_MANT_BITS - FP16_MANT_BITS));
2514}
2515
2516static uint16_t
2517fp16_FPConvertNaN_64(uint64_t op)
2518{
2519 return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF,
2520 1ULL << (FP16_MANT_BITS - 1) |
2521 op >> (FP64_MANT_BITS - FP16_MANT_BITS));
2522}
2523
2524static uint32_t
2525fp32_FPConvertNaN_16(uint16_t op)
2526{
2527 return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF,
2528 1ULL << (FP32_MANT_BITS - 1) |
2529 (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS));
2530}
2531
2532static uint32_t
2533fp32_FPConvertNaN_64(uint64_t op)
2534{
2535 return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF,
2536 1ULL << (FP32_MANT_BITS - 1) |
2537 op >> (FP64_MANT_BITS - FP32_MANT_BITS));
2538}
2539
2540static uint64_t
2541fp64_FPConvertNaN_16(uint16_t op)
2542{
2543 return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF,
2544 1ULL << (FP64_MANT_BITS - 1) |
2545 (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS));
2546}
2547
2548static uint64_t
2549fp64_FPConvertNaN_32(uint32_t op)
2550{
2551 return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF,
2552 1ULL << (FP64_MANT_BITS - 1) |
2553 (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS));
2554}
2555
2556static uint16_t
2557fp16_FPOnePointFive(int sgn)
2558{
2559 return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1));
2560}
2561
2562static uint32_t
2563fp32_FPOnePointFive(int sgn)
2564{
2565 return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1));
2566}
2567
2568static uint64_t
2569fp64_FPOnePointFive(int sgn)
2570{
2571 return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1));
2572}
2573
2574static uint16_t
2575fp16_FPThree(int sgn)
2576{
2577 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1));
2578}
2579
2580static uint32_t
2581fp32_FPThree(int sgn)
2582{
2583 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1));
2584}
2585
2586static uint64_t
2587fp64_FPThree(int sgn)
2588{
2589 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1));
2590}
2591
2592static uint16_t
2593fp16_FPTwo(int sgn)
2594{
2595 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0);
2596}
2597
2598static uint32_t
2599fp32_FPTwo(int sgn)
2600{
2601 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0);
2602}
2603
2604static uint64_t
2605fp64_FPTwo(int sgn)
2606{
2607 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0);
2608}
2609
2610template <>
2611uint16_t
2612fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2613{
2614 int mode = modeConv(fpscr);
2615 int flags = 0;
2616 int sgn, exp;
2617 uint32_t mnt;
2618 uint16_t result;
2619
2620 // Unpack floating-point operand optionally with flush-to-zero:
2621 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2622
2623 bool alt_hp = fpscr.ahp;
2624
2625 if (fp32_is_NaN(exp, mnt)) {
2626 if (alt_hp) {
2627 result = fp16_zero(sgn);
2628 } else if (fpscr.dn) {
2629 result = fp16_defaultNaN();
2630 } else {
2631 result = fp16_FPConvertNaN_32(op);
2632 }
2633 if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) {
2634 flags |= FPLIB_IOC;
2635 }
2636 } else if (exp == FP32_EXP_INF) {
2637 if (alt_hp) {
2638 result = ((uint16_t)sgn << (FP16_BITS - 1) |
2639 ((1ULL << (FP16_BITS - 1)) - 1));
2640 flags |= FPLIB_IOC;
2641 } else {
2642 result = fp16_infinity(sgn);
2643 }
2644 } else if (!mnt) {
2645 result = fp16_zero(sgn);
2646 } else {
2647 result =
2648 fp16_round_(sgn, exp - FP32_EXP_BIAS + FP16_EXP_BIAS,
2649 mnt >> (FP32_MANT_BITS - FP16_BITS) |
2650 !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)),
2651 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2652 }
2653
2654 set_fpscr0(fpscr, flags);
2655
2656 return result;
2657}
2658
2659template <>
2660uint16_t
2661fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2662{
2663 int mode = modeConv(fpscr);
2664 int flags = 0;
2665 int sgn, exp;
2666 uint64_t mnt;
2667 uint16_t result;
2668
2669 // Unpack floating-point operand optionally with flush-to-zero:
2670 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2671
2672 bool alt_hp = fpscr.ahp;
2673
2674 if (fp64_is_NaN(exp, mnt)) {
2675 if (alt_hp) {
2676 result = fp16_zero(sgn);
2677 } else if (fpscr.dn) {
2678 result = fp16_defaultNaN();
2679 } else {
2680 result = fp16_FPConvertNaN_64(op);
2681 }
2682 if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) {
2683 flags |= FPLIB_IOC;
2684 }
2685 } else if (exp == FP64_EXP_INF) {
2686 if (alt_hp) {
2687 result = ((uint16_t)sgn << (FP16_BITS - 1) |
2688 ((1ULL << (FP16_BITS - 1)) - 1));
2689 flags |= FPLIB_IOC;
2690 } else {
2691 result = fp16_infinity(sgn);
2692 }
2693 } else if (!mnt) {
2694 result = fp16_zero(sgn);
2695 } else {
2696 result =
2697 fp16_round_(sgn, exp - FP64_EXP_BIAS + FP16_EXP_BIAS,
2698 mnt >> (FP64_MANT_BITS - FP16_BITS) |
2699 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)),
2700 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2701 }
2702
2703 set_fpscr0(fpscr, flags);
2704
2705 return result;
2706}
2707
2708template <>
2709uint32_t
2710fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2711{
2712 int mode = modeConv(fpscr);
2713 int flags = 0;
2714 int sgn, exp;
2715 uint16_t mnt;
2716 uint32_t result;
2717
2718 // Unpack floating-point operand optionally with flush-to-zero:
2719 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2720
2721 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2722 if (fpscr.dn) {
2723 result = fp32_defaultNaN();
2724 } else {
2725 result = fp32_FPConvertNaN_16(op);
2726 }
2727 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2728 flags |= FPLIB_IOC;
2729 }
2730 } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2731 result = fp32_infinity(sgn);
2732 } else if (!mnt) {
2733 result = fp32_zero(sgn);
2734 } else {
2735 mnt = fp16_normalise(mnt, &exp);
2736 result = fp32_pack(sgn, (exp - FP16_EXP_BIAS +
2737 FP32_EXP_BIAS + FP16_EXP_BITS),
2738 (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1));
2739 }
2740
2741 set_fpscr0(fpscr, flags);
2742
2743 return result;
2744}
2745
2746template <>
2747uint32_t
2748fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2749{
2750 int mode = modeConv(fpscr);
2751 int flags = 0;
2752 int sgn, exp;
2753 uint64_t mnt;
2754 uint32_t result;
2755
2756 // Unpack floating-point operand optionally with flush-to-zero:
2757 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2758
2759 if (fp64_is_NaN(exp, mnt)) {
2760 if (fpscr.dn) {
2761 result = fp32_defaultNaN();
2762 } else {
2763 result = fp32_FPConvertNaN_64(op);
2764 }
2765 if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) {
2766 flags |= FPLIB_IOC;
2767 }
2768 } else if (exp == FP64_EXP_INF) {
2769 result = fp32_infinity(sgn);
2770 } else if (!mnt) {
2771 result = fp32_zero(sgn);
2772 } else {
2773 result =
2774 fp32_round_(sgn, exp - FP64_EXP_BIAS + FP32_EXP_BIAS,
2775 mnt >> (FP64_MANT_BITS - FP32_BITS) |
2776 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)),
2777 rounding, mode, &flags);
2778 }
2779
2780 set_fpscr0(fpscr, flags);
2781
2782 return result;
2783}
2784
2785template <>
2786uint64_t
2787fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2788{
2789 int mode = modeConv(fpscr);
2790 int flags = 0;
2791 int sgn, exp;
2792 uint16_t mnt;
2793 uint64_t result;
2794
2795 // Unpack floating-point operand optionally with flush-to-zero:
2796 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2797
2798 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2799 if (fpscr.dn) {
2800 result = fp64_defaultNaN();
2801 } else {
2802 result = fp64_FPConvertNaN_16(op);
2803 }
2804 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2805 flags |= FPLIB_IOC;
2806 }
2807 } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2808 result = fp64_infinity(sgn);
2809 } else if (!mnt) {
2810 result = fp64_zero(sgn);
2811 } else {
2812 mnt = fp16_normalise(mnt, &exp);
2813 result = fp64_pack(sgn, (exp - FP16_EXP_BIAS +
2814 FP64_EXP_BIAS + FP16_EXP_BITS),
2815 (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1));
2816 }
2817
2818 set_fpscr0(fpscr, flags);
2819
2820 return result;
2821}
2822
2823template <>
2824uint64_t
2825fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2826{
2827 int mode = modeConv(fpscr);
2828 int flags = 0;
2829 int sgn, exp;
2830 uint32_t mnt;
2831 uint64_t result;
2832
2833 // Unpack floating-point operand optionally with flush-to-zero:
2834 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2835
2836 if (fp32_is_NaN(exp, mnt)) {
2837 if (fpscr.dn) {
2838 result = fp64_defaultNaN();
2839 } else {
2840 result = fp64_FPConvertNaN_32(op);
2841 }
2842 if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) {
2843 flags |= FPLIB_IOC;
2844 }
2845 } else if (exp == FP32_EXP_INF) {
2846 result = fp64_infinity(sgn);
2847 } else if (!mnt) {
2848 result = fp64_zero(sgn);
2849 } else {
2850 mnt = fp32_normalise(mnt, &exp);
2851 result = fp64_pack(sgn, (exp - FP32_EXP_BIAS +
2852 FP64_EXP_BIAS + FP32_EXP_BITS),
2853 (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1));
2854 }
2855
2856 set_fpscr0(fpscr, flags);
2857
2858 return result;
2859}
2860
2861template <>
2862uint16_t
2863fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
2864{
2865 int flags = 0;
2866 uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2867 set_fpscr0(fpscr, flags);
2868 return result;
2869}
2870
2871template <>
2872uint32_t
2873fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2874{
2875 int flags = 0;
2876 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2877 set_fpscr0(fpscr, flags);
2878 return result;
2879}
2880
2881template <>
2882uint64_t
2883fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2884{
2885 int flags = 0;
2886 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2887 set_fpscr0(fpscr, flags);
2888 return result;
2889}
2890
2891template <>
2892uint16_t
2893fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2894{
2895 int flags = 0;
2896 uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags);
2897 set_fpscr0(fpscr, flags);
2898 return result;
2899}
2900
2901template <>
2902uint32_t
2903fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2904{
2905 int flags = 0;
2906 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2907 set_fpscr0(fpscr, flags);
2908 return result;
2909}
2910
2911template <>
2912uint64_t
2913fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2914{
2915 int flags = 0;
2916 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2917 set_fpscr0(fpscr, flags);
2918 return result;
2919}
2920
2921template <>
2922uint16_t
2923fplibExpA(uint16_t op)
2924{
2925 static uint16_t coeff[32] = {
2926 0x0000,
2927 0x0016,
2928 0x002d,
2929 0x0045,
2930 0x005d,
2931 0x0075,
2932 0x008e,
2933 0x00a8,
2934 0x00c2,
2935 0x00dc,
2936 0x00f8,
2937 0x0114,
2938 0x0130,
2939 0x014d,
2940 0x016b,
2941 0x0189,
2942 0x01a8,
2943 0x01c8,
2944 0x01e8,
2945 0x0209,
2946 0x022b,
2947 0x024e,
2948 0x0271,
2949 0x0295,
2950 0x02ba,
2951 0x02e0,
2952 0x0306,
2953 0x032e,
2954 0x0356,
2955 0x037f,
2956 0x03a9,
2957 0x03d4
2958 };
2959 return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) |
2960 coeff[op & ((1 << 5) - 1)]);
2961}
2962
2963template <>
2964uint32_t
2965fplibExpA(uint32_t op)
2966{
2967 static uint32_t coeff[64] = {
2968 0x000000,
2969 0x0164d2,
2970 0x02cd87,
2971 0x043a29,
2972 0x05aac3,
2973 0x071f62,
2974 0x08980f,
2975 0x0a14d5,
2976 0x0b95c2,
2977 0x0d1adf,
2978 0x0ea43a,
2979 0x1031dc,
2980 0x11c3d3,
2981 0x135a2b,
2982 0x14f4f0,
2983 0x16942d,
2984 0x1837f0,
2985 0x19e046,
2986 0x1b8d3a,
2987 0x1d3eda,
2988 0x1ef532,
2989 0x20b051,
2990 0x227043,
2991 0x243516,
2992 0x25fed7,
2993 0x27cd94,
2994 0x29a15b,
2995 0x2b7a3a,
2996 0x2d583f,
2997 0x2f3b79,
2998 0x3123f6,
2999 0x3311c4,
3000 0x3504f3,
3001 0x36fd92,
3002 0x38fbaf,
3003 0x3aff5b,
3004 0x3d08a4,
3005 0x3f179a,
3006 0x412c4d,
3007 0x4346cd,
3008 0x45672a,
3009 0x478d75,
3010 0x49b9be,
3011 0x4bec15,
3012 0x4e248c,
3013 0x506334,
3014 0x52a81e,
3015 0x54f35b,
3016 0x5744fd,
3017 0x599d16,
3018 0x5bfbb8,
3019 0x5e60f5,
3020 0x60ccdf,
3021 0x633f89,
3022 0x65b907,
3023 0x68396a,
3024 0x6ac0c7,
3025 0x6d4f30,
3026 0x6fe4ba,
3027 0x728177,
3028 0x75257d,
3029 0x77d0df,
3030 0x7a83b3,
3031 0x7d3e0c
3032 };
3033 return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) |
3034 coeff[op & ((1 << 6) - 1)]);
3035}
3036
3037template <>
3038uint64_t
3039fplibExpA(uint64_t op)
3040{
3041 static uint64_t coeff[64] = {
3042 0x0000000000000ULL,
3043 0x02c9a3e778061ULL,
3044 0x059b0d3158574ULL,
3045 0x0874518759bc8ULL,
3046 0x0b5586cf9890fULL,
3047 0x0e3ec32d3d1a2ULL,
3048 0x11301d0125b51ULL,
3049 0x1429aaea92de0ULL,
3050 0x172b83c7d517bULL,
3051 0x1a35beb6fcb75ULL,
3052 0x1d4873168b9aaULL,
3053 0x2063b88628cd6ULL,
3054 0x2387a6e756238ULL,
3055 0x26b4565e27cddULL,
3056 0x29e9df51fdee1ULL,
3057 0x2d285a6e4030bULL,
3058 0x306fe0a31b715ULL,
3059 0x33c08b26416ffULL,
3060 0x371a7373aa9cbULL,
3061 0x3a7db34e59ff7ULL,
3062 0x3dea64c123422ULL,
3063 0x4160a21f72e2aULL,
3064 0x44e086061892dULL,
3065 0x486a2b5c13cd0ULL,
3066 0x4bfdad5362a27ULL,
3067 0x4f9b2769d2ca7ULL,
3068 0x5342b569d4f82ULL,
3069 0x56f4736b527daULL,
3070 0x5ab07dd485429ULL,
3071 0x5e76f15ad2148ULL,
3072 0x6247eb03a5585ULL,
3073 0x6623882552225ULL,
3074 0x6a09e667f3bcdULL,
3075 0x6dfb23c651a2fULL,
3076 0x71f75e8ec5f74ULL,
3077 0x75feb564267c9ULL,
3078 0x7a11473eb0187ULL,
3079 0x7e2f336cf4e62ULL,
3080 0x82589994cce13ULL,
3081 0x868d99b4492edULL,
3082 0x8ace5422aa0dbULL,
3083 0x8f1ae99157736ULL,
3084 0x93737b0cdc5e5ULL,
3085 0x97d829fde4e50ULL,
3086 0x9c49182a3f090ULL,
3087 0xa0c667b5de565ULL,
3088 0xa5503b23e255dULL,
3089 0xa9e6b5579fdbfULL,
3090 0xae89f995ad3adULL,
3091 0xb33a2b84f15fbULL,
3092 0xb7f76f2fb5e47ULL,
3093 0xbcc1e904bc1d2ULL,
3094 0xc199bdd85529cULL,
3095 0xc67f12e57d14bULL,
3096 0xcb720dcef9069ULL,
3097 0xd072d4a07897cULL,
3098 0xd5818dcfba487ULL,
3099 0xda9e603db3285ULL,
3100 0xdfc97337b9b5fULL,
3101 0xe502ee78b3ff6ULL,
3102 0xea4afa2a490daULL,
3103 0xefa1bee615a27ULL,
3104 0xf50765b6e4540ULL,
3105 0xfa7c1819e90d8ULL
3106 };
3107 return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) |
3108 coeff[op & ((1 << 6) - 1)]);
3109}
3110
3111static uint16_t
3112fp16_repack(int sgn, int exp, uint16_t mnt)
3113{
3114 return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt);
3115}
3116
3117static uint32_t
3118fp32_repack(int sgn, int exp, uint32_t mnt)
3119{
3120 return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt);
3121}
3122
3123static uint64_t
3124fp64_repack(int sgn, int exp, uint64_t mnt)
3125{
3126 return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt);
3127}
3128
3129static void
3130fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
3131{
3132 // Treat a single quiet-NaN as +Infinity/-Infinity
3133 if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) &&
3134 (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS)
3135 *op1 = fp16_infinity(sgn);
3136 if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) &&
3137 (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS)
3138 *op2 = fp16_infinity(sgn);
3139}
3140
3141static void
3142fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
3143{
3144 // Treat a single quiet-NaN as +Infinity/-Infinity
3145 if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) &&
3146 (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS)
3147 *op1 = fp32_infinity(sgn);
3148 if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) &&
3149 (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS)
3150 *op2 = fp32_infinity(sgn);
3151}
3152
3153static void
3154fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
3155{
3156 // Treat a single quiet-NaN as +Infinity/-Infinity
3157 if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) &&
3158 (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS)
3159 *op1 = fp64_infinity(sgn);
3160 if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) &&
3161 (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS)
3162 *op2 = fp64_infinity(sgn);
3163}
3164
3165template <>
3166uint16_t
3167fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3168{
3169 int mode = modeConv(fpscr);
3170 int flags = 0;
3171 int sgn1, exp1, sgn2, exp2;
3172 uint16_t mnt1, mnt2, x, result;
3173
3174 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3175 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3176
3177 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3178 result = x;
3179 } else {
3180 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3181 fp16_repack(sgn1, exp1, mnt1) :
3182 fp16_repack(sgn2, exp2, mnt2));
3183 }
3184 set_fpscr0(fpscr, flags);
3185 return result;
3186}
3187
3188template <>
3189uint32_t
3190fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3191{
3192 int mode = modeConv(fpscr);
3193 int flags = 0;
3194 int sgn1, exp1, sgn2, exp2;
3195 uint32_t mnt1, mnt2, x, result;
3196
3197 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3198 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3199
3200 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3201 result = x;
3202 } else {
3203 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3204 fp32_repack(sgn1, exp1, mnt1) :
3205 fp32_repack(sgn2, exp2, mnt2));
3206 }
3207 set_fpscr0(fpscr, flags);
3208 return result;
3209}
3210
3211template <>
3212uint64_t
3213fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3214{
3215 int mode = modeConv(fpscr);
3216 int flags = 0;
3217 int sgn1, exp1, sgn2, exp2;
3218 uint64_t mnt1, mnt2, x, result;
3219
3220 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3221 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3222
3223 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3224 result = x;
3225 } else {
3226 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3227 fp64_repack(sgn1, exp1, mnt1) :
3228 fp64_repack(sgn2, exp2, mnt2));
3229 }
3230 set_fpscr0(fpscr, flags);
3231 return result;
3232}
3233
3234template <>
3235uint16_t
3236fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3237{
3238 fp16_minmaxnum(&op1, &op2, 1);
3239 return fplibMax<uint16_t>(op1, op2, fpscr);
3240}
3241
3242template <>
3243uint32_t
3244fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3245{
3246 fp32_minmaxnum(&op1, &op2, 1);
3247 return fplibMax<uint32_t>(op1, op2, fpscr);
3248}
3249
3250template <>
3251uint64_t
3252fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3253{
3254 fp64_minmaxnum(&op1, &op2, 1);
3255 return fplibMax<uint64_t>(op1, op2, fpscr);
3256}
3257
3258template <>
3259uint16_t
3260fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3261{
3262 int mode = modeConv(fpscr);
3263 int flags = 0;
3264 int sgn1, exp1, sgn2, exp2;
3265 uint16_t mnt1, mnt2, x, result;
3266
3267 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3268 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3269
3270 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3271 result = x;
3272 } else {
3273 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3274 fp16_repack(sgn1, exp1, mnt1) :
3275 fp16_repack(sgn2, exp2, mnt2));
3276 }
3277 set_fpscr0(fpscr, flags);
3278 return result;
3279}
3280
3281template <>
3282uint32_t
3283fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3284{
3285 int mode = modeConv(fpscr);
3286 int flags = 0;
3287 int sgn1, exp1, sgn2, exp2;
3288 uint32_t mnt1, mnt2, x, result;
3289
3290 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3291 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3292
3293 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3294 result = x;
3295 } else {
3296 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3297 fp32_repack(sgn1, exp1, mnt1) :
3298 fp32_repack(sgn2, exp2, mnt2));
3299 }
3300 set_fpscr0(fpscr, flags);
3301 return result;
3302}
3303
3304template <>
3305uint64_t
3306fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3307{
3308 int mode = modeConv(fpscr);
3309 int flags = 0;
3310 int sgn1, exp1, sgn2, exp2;
3311 uint64_t mnt1, mnt2, x, result;
3312
3313 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3314 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3315
3316 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3317 result = x;
3318 } else {
3319 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3320 fp64_repack(sgn1, exp1, mnt1) :
3321 fp64_repack(sgn2, exp2, mnt2));
3322 }
3323 set_fpscr0(fpscr, flags);
3324 return result;
3325}
3326
3327template <>
3328uint16_t
3329fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3330{
3331 fp16_minmaxnum(&op1, &op2, 0);
3332 return fplibMin<uint16_t>(op1, op2, fpscr);
3333}
3334
3335template <>
3336uint32_t
3337fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3338{
3339 fp32_minmaxnum(&op1, &op2, 0);
3340 return fplibMin<uint32_t>(op1, op2, fpscr);
3341}
3342
3343template <>
3344uint64_t
3345fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3346{
3347 fp64_minmaxnum(&op1, &op2, 0);
3348 return fplibMin<uint64_t>(op1, op2, fpscr);
3349}
3350
3351template <>
3352uint16_t
3353fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3354{
3355 int flags = 0;
3356 uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags);
3357 set_fpscr0(fpscr, flags);
3358 return result;
3359}
3360
3361template <>
3362uint32_t
3363fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3364{
3365 int flags = 0;
3366 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
3367 set_fpscr0(fpscr, flags);
3368 return result;
3369}
3370
3371template <>
3372uint64_t
3373fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3374{
3375 int flags = 0;
3376 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
3377 set_fpscr0(fpscr, flags);
3378 return result;
3379}
3380
3381template <>
3382uint16_t
3383fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3384{
3385 int mode = modeConv(fpscr);
3386 int flags = 0;
3387 int sgn1, exp1, sgn2, exp2;
3388 uint16_t mnt1, mnt2, result;
3389
3390 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3391 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3392
3393 result = fp16_process_NaNs(op1, op2, mode, &flags);
3394 if (!result) {
3395 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3396 (exp2 == FP16_EXP_INF && !mnt1)) {
3397 result = fp16_FPTwo(sgn1 ^ sgn2);
3398 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3399 result = fp16_infinity(sgn1 ^ sgn2);
3400 } else if (!mnt1 || !mnt2) {
3401 result = fp16_zero(sgn1 ^ sgn2);
3402 } else {
3403 result = fp16_mul(op1, op2, mode, &flags);
3404 }
3405 }
3406
3407 set_fpscr0(fpscr, flags);
3408
3409 return result;
3410}
3411
3412template <>
3413uint32_t
3414fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3415{
3416 int mode = modeConv(fpscr);
3417 int flags = 0;
3418 int sgn1, exp1, sgn2, exp2;
3419 uint32_t mnt1, mnt2, result;
3420
3421 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3422 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3423
3424 result = fp32_process_NaNs(op1, op2, mode, &flags);
3425 if (!result) {
3426 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3427 (exp2 == FP32_EXP_INF && !mnt1)) {
3428 result = fp32_FPTwo(sgn1 ^ sgn2);
3429 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3430 result = fp32_infinity(sgn1 ^ sgn2);
3431 } else if (!mnt1 || !mnt2) {
3432 result = fp32_zero(sgn1 ^ sgn2);
3433 } else {
3434 result = fp32_mul(op1, op2, mode, &flags);
3435 }
3436 }
3437
3438 set_fpscr0(fpscr, flags);
3439
3440 return result;
3441}
3442
3443template <>
3444uint64_t
3445fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3446{
3447 int mode = modeConv(fpscr);
3448 int flags = 0;
3449 int sgn1, exp1, sgn2, exp2;
3450 uint64_t mnt1, mnt2, result;
3451
3452 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3453 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3454
3455 result = fp64_process_NaNs(op1, op2, mode, &flags);
3456 if (!result) {
3457 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3458 (exp2 == FP64_EXP_INF && !mnt1)) {
3459 result = fp64_FPTwo(sgn1 ^ sgn2);
3460 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3461 result = fp64_infinity(sgn1 ^ sgn2);
3462 } else if (!mnt1 || !mnt2) {
3463 result = fp64_zero(sgn1 ^ sgn2);
3464 } else {
3465 result = fp64_mul(op1, op2, mode, &flags);
3466 }
3467 }
3468
3469 set_fpscr0(fpscr, flags);
3470
3471 return result;
3472}
3473
3474template <>
3475uint16_t
3476fplibNeg(uint16_t op)
3477{
3478 return op ^ 1ULL << (FP16_BITS - 1);
3479}
3480
3481template <>
3482uint32_t
3483fplibNeg(uint32_t op)
3484{
3485 return op ^ 1ULL << (FP32_BITS - 1);
3486}
3487
3488template <>
3489uint64_t
3490fplibNeg(uint64_t op)
3491{
3492 return op ^ 1ULL << (FP64_BITS - 1);
3493}
3494
3495static const uint8_t recip_sqrt_estimate[256] = {
3496 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3497 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3498 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3499 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3500 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3501 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3502 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3503 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3504 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
3505 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
3506 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
3507 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
3508 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
3509 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
3510 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
3511 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
3512};
3513
3514template <>
3515uint16_t
3516fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
3517{
3518 int mode = modeConv(fpscr);
3519 int flags = 0;
3520 int sgn, exp;
3521 uint16_t mnt, result;
3522
3523 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3524
3525 if (fp16_is_NaN(exp, mnt)) {
3526 result = fp16_process_NaN(op, mode, &flags);
3527 } else if (!mnt) {
3528 result = fp16_infinity(sgn);
3529 flags |= FPLIB_DZC;
3530 } else if (sgn) {
3531 result = fp16_defaultNaN();
3532 flags |= FPLIB_IOC;
3533 } else if (exp == FP16_EXP_INF) {
3534 result = fp16_zero(0);
3535 } else {
3536 exp += FP16_EXP_BITS;
3537 mnt = fp16_normalise(mnt, &exp);
3538 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3539 (mnt >> (FP16_BITS - 8) & 127)];
3540 result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1,
3541 mnt << (FP16_MANT_BITS - 8));
3542 }
3543
3544 set_fpscr0(fpscr, flags);
3545
3546 return result;
3547}
3548
3549template <>
3550uint32_t
3551fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
3552{
3553 int mode = modeConv(fpscr);
3554 int flags = 0;
3555 int sgn, exp;
3556 uint32_t mnt, result;
3557
3558 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3559
3560 if (fp32_is_NaN(exp, mnt)) {
3561 result = fp32_process_NaN(op, mode, &flags);
3562 } else if (!mnt) {
3563 result = fp32_infinity(sgn);
3564 flags |= FPLIB_DZC;
3565 } else if (sgn) {
3566 result = fp32_defaultNaN();
3567 flags |= FPLIB_IOC;
3568 } else if (exp == FP32_EXP_INF) {
3569 result = fp32_zero(0);
3570 } else {
3571 exp += FP32_EXP_BITS;
3572 mnt = fp32_normalise(mnt, &exp);
3573 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3574 (mnt >> (FP32_BITS - 8) & 127)];
3575 result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1,
3576 mnt << (FP32_MANT_BITS - 8));
3577 }
3578
3579 set_fpscr0(fpscr, flags);
3580
3581 return result;
3582}
3583
3584template <>
3585uint64_t
3586fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
3587{
3588 int mode = modeConv(fpscr);
3589 int flags = 0;
3590 int sgn, exp;
3591 uint64_t mnt, result;
3592
3593 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3594
3595 if (fp64_is_NaN(exp, mnt)) {
3596 result = fp64_process_NaN(op, mode, &flags);
3597 } else if (!mnt) {
3598 result = fp64_infinity(sgn);
3599 flags |= FPLIB_DZC;
3600 } else if (sgn) {
3601 result = fp64_defaultNaN();
3602 flags |= FPLIB_IOC;
3603 } else if (exp == FP64_EXP_INF) {
3604 result = fp32_zero(0);
3605 } else {
3606 exp += FP64_EXP_BITS;
3607 mnt = fp64_normalise(mnt, &exp);
3608 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3609 (mnt >> (FP64_BITS - 8) & 127)];
3610 result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1,
3611 mnt << (FP64_MANT_BITS - 8));
3612 }
3613
3614 set_fpscr0(fpscr, flags);
3615
3616 return result;
3617}
3618
3619template <>
3620uint16_t
3621fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3622{
3623 int mode = modeConv(fpscr);
3624 int flags = 0;
3625 int sgn1, exp1, sgn2, exp2;
3626 uint16_t mnt1, mnt2, result;
3627
3628 op1 = fplibNeg<uint16_t>(op1);
3629 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3630 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3631
3632 result = fp16_process_NaNs(op1, op2, mode, &flags);
3633 if (!result) {
3634 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3635 (exp2 == FP16_EXP_INF && !mnt1)) {
3636 result = fp16_FPOnePointFive(0);
3637 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3638 result = fp16_infinity(sgn1 ^ sgn2);
3639 } else {
3640 result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags);
3641 }
3642 }
3643
3644 set_fpscr0(fpscr, flags);
3645
3646 return result;
3647}
3648
3649template <>
3650uint32_t
3651fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3652{
3653 int mode = modeConv(fpscr);
3654 int flags = 0;
3655 int sgn1, exp1, sgn2, exp2;
3656 uint32_t mnt1, mnt2, result;
3657
3658 op1 = fplibNeg<uint32_t>(op1);
3659 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3660 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3661
3662 result = fp32_process_NaNs(op1, op2, mode, &flags);
3663 if (!result) {
3664 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3665 (exp2 == FP32_EXP_INF && !mnt1)) {
3666 result = fp32_FPOnePointFive(0);
3667 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3668 result = fp32_infinity(sgn1 ^ sgn2);
3669 } else {
3670 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
3671 }
3672 }
3673
3674 set_fpscr0(fpscr, flags);
3675
3676 return result;
3677}
3678
3679template <>
3680uint64_t
3681fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3682{
3683 int mode = modeConv(fpscr);
3684 int flags = 0;
3685 int sgn1, exp1, sgn2, exp2;
3686 uint64_t mnt1, mnt2, result;
3687
3688 op1 = fplibNeg<uint64_t>(op1);
3689 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3690 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3691
3692 result = fp64_process_NaNs(op1, op2, mode, &flags);
3693 if (!result) {
3694 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3695 (exp2 == FP64_EXP_INF && !mnt1)) {
3696 result = fp64_FPOnePointFive(0);
3697 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3698 result = fp64_infinity(sgn1 ^ sgn2);
3699 } else {
3700 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
3701 }
3702 }
3703
3704 set_fpscr0(fpscr, flags);
3705
3706 return result;
3707}
3708
3709template <>
3710uint16_t
3711fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
3712{
3713 int mode = modeConv(fpscr);
3714 int flags = 0;
3715 int sgn, exp;
3716 uint16_t mnt, result;
3717
3718 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3719
3720 if (fp16_is_NaN(exp, mnt)) {
3721 result = fp16_process_NaN(op, mode, &flags);
3722 } else if (exp == FP16_EXP_INF) {
3723 result = fp16_zero(sgn);
3724 } else if (!mnt) {
3725 result = fp16_infinity(sgn);
3726 flags |= FPLIB_DZC;
3727 } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) {
3728 bool overflow_to_inf = false;
3729 switch (FPCRRounding(fpscr)) {
3730 case FPRounding_TIEEVEN:
3731 overflow_to_inf = true;
3732 break;
3733 case FPRounding_POSINF:
3734 overflow_to_inf = !sgn;
3735 break;
3736 case FPRounding_NEGINF:
3737 overflow_to_inf = sgn;
3738 break;
3739 case FPRounding_ZERO:
3740 overflow_to_inf = false;
3741 break;
3742 default:
46#include "fplib.hh"
47
48namespace ArmISA
49{
50
51#define FPLIB_RN 0
52#define FPLIB_RP 1
53#define FPLIB_RM 2
54#define FPLIB_RZ 3
55#define FPLIB_FZ 4
56#define FPLIB_DN 8
57#define FPLIB_AHP 16
58#define FPLIB_FZ16 32
59
60#define FPLIB_IDC 128 // Input Denormal
61#define FPLIB_IXC 16 // Inexact
62#define FPLIB_UFC 8 // Underflow
63#define FPLIB_OFC 4 // Overflow
64#define FPLIB_DZC 2 // Division by Zero
65#define FPLIB_IOC 1 // Invalid Operation
66
67#define FP16_BITS 16
68#define FP32_BITS 32
69#define FP64_BITS 64
70
71#define FP16_EXP_BITS 5
72#define FP32_EXP_BITS 8
73#define FP64_EXP_BITS 11
74
75#define FP16_EXP_BIAS 15
76#define FP32_EXP_BIAS 127
77#define FP64_EXP_BIAS 1023
78
79#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
80#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
81#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
82
83#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
84#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
85#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
86
87#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
88#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
89#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
90
91#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
92#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
93#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
94
95static inline uint16_t
96lsl16(uint16_t x, uint32_t shift)
97{
98 return shift < 16 ? x << shift : 0;
99}
100
101static inline uint16_t
102lsr16(uint16_t x, uint32_t shift)
103{
104 return shift < 16 ? x >> shift : 0;
105}
106
107static inline uint32_t
108lsl32(uint32_t x, uint32_t shift)
109{
110 return shift < 32 ? x << shift : 0;
111}
112
113static inline uint32_t
114lsr32(uint32_t x, uint32_t shift)
115{
116 return shift < 32 ? x >> shift : 0;
117}
118
119static inline uint64_t
120lsl64(uint64_t x, uint32_t shift)
121{
122 return shift < 64 ? x << shift : 0;
123}
124
125static inline uint64_t
126lsr64(uint64_t x, uint32_t shift)
127{
128 return shift < 64 ? x >> shift : 0;
129}
130
131static inline void
132lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
133{
134 if (shift == 0) {
135 *r1 = x1;
136 *r0 = x0;
137 } else if (shift < 64) {
138 *r1 = x1 << shift | x0 >> (64 - shift);
139 *r0 = x0 << shift;
140 } else if (shift < 128) {
141 *r1 = x0 << (shift - 64);
142 *r0 = 0;
143 } else {
144 *r1 = 0;
145 *r0 = 0;
146 }
147}
148
149static inline void
150lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
151{
152 if (shift == 0) {
153 *r1 = x1;
154 *r0 = x0;
155 } else if (shift < 64) {
156 *r0 = x0 >> shift | x1 << (64 - shift);
157 *r1 = x1 >> shift;
158 } else if (shift < 128) {
159 *r0 = x1 >> (shift - 64);
160 *r1 = 0;
161 } else {
162 *r0 = 0;
163 *r1 = 0;
164 }
165}
166
167static inline void
168mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
169{
170 uint32_t mask = ((uint32_t)1 << 31) - 1;
171 uint64_t a0 = a & mask;
172 uint64_t a1 = a >> 31 & mask;
173 uint64_t b0 = b & mask;
174 uint64_t b1 = b >> 31 & mask;
175 uint64_t p0 = a0 * b0;
176 uint64_t p2 = a1 * b1;
177 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
178 uint64_t s0 = p0;
179 uint64_t s1 = (s0 >> 31) + p1;
180 uint64_t s2 = (s1 >> 31) + p2;
181 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
182 *x1 = s2 >> 2;
183}
184
185static inline
186void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
187{
188 uint64_t t0 = (uint64_t)(uint32_t)a * b;
189 uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
190 *x0 = t1 << 32 | (uint32_t)t0;
191 *x1 = t1 >> 32;
192}
193
194static inline void
195add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
196 uint64_t b1)
197{
198 *x0 = a0 + b0;
199 *x1 = a1 + b1 + (*x0 < a0);
200}
201
202static inline void
203sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
204 uint64_t b1)
205{
206 *x0 = a0 - b0;
207 *x1 = a1 - b1 - (*x0 > a0);
208}
209
210static inline int
211cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
212{
213 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
214}
215
216static inline uint16_t
217fp16_normalise(uint16_t mnt, int *exp)
218{
219 int shift;
220
221 if (!mnt) {
222 return 0;
223 }
224
225 for (shift = 8; shift; shift >>= 1) {
226 if (!(mnt >> (16 - shift))) {
227 mnt <<= shift;
228 *exp -= shift;
229 }
230 }
231 return mnt;
232}
233
234static inline uint32_t
235fp32_normalise(uint32_t mnt, int *exp)
236{
237 int shift;
238
239 if (!mnt) {
240 return 0;
241 }
242
243 for (shift = 16; shift; shift >>= 1) {
244 if (!(mnt >> (32 - shift))) {
245 mnt <<= shift;
246 *exp -= shift;
247 }
248 }
249 return mnt;
250}
251
252static inline uint64_t
253fp64_normalise(uint64_t mnt, int *exp)
254{
255 int shift;
256
257 if (!mnt) {
258 return 0;
259 }
260
261 for (shift = 32; shift; shift >>= 1) {
262 if (!(mnt >> (64 - shift))) {
263 mnt <<= shift;
264 *exp -= shift;
265 }
266 }
267 return mnt;
268}
269
270static inline void
271fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
272{
273 uint64_t x0 = *mnt0;
274 uint64_t x1 = *mnt1;
275 int shift;
276
277 if (!x0 && !x1) {
278 return;
279 }
280
281 if (!x1) {
282 x1 = x0;
283 x0 = 0;
284 *exp -= 64;
285 }
286
287 for (shift = 32; shift; shift >>= 1) {
288 if (!(x1 >> (64 - shift))) {
289 x1 = x1 << shift | x0 >> (64 - shift);
290 x0 <<= shift;
291 *exp -= shift;
292 }
293 }
294
295 *mnt0 = x0;
296 *mnt1 = x1;
297}
298
299static inline uint16_t
300fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
301{
302 return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt);
303}
304
305static inline uint32_t
306fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
307{
308 return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt);
309}
310
311static inline uint64_t
312fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
313{
314 return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt);
315}
316
317static inline uint16_t
318fp16_zero(int sgn)
319{
320 return fp16_pack(sgn, 0, 0);
321}
322
323static inline uint32_t
324fp32_zero(int sgn)
325{
326 return fp32_pack(sgn, 0, 0);
327}
328
329static inline uint64_t
330fp64_zero(int sgn)
331{
332 return fp64_pack(sgn, 0, 0);
333}
334
335static inline uint16_t
336fp16_max_normal(int sgn)
337{
338 return fp16_pack(sgn, FP16_EXP_INF - 1, -1);
339}
340
341static inline uint32_t
342fp32_max_normal(int sgn)
343{
344 return fp32_pack(sgn, FP32_EXP_INF - 1, -1);
345}
346
347static inline uint64_t
348fp64_max_normal(int sgn)
349{
350 return fp64_pack(sgn, FP64_EXP_INF - 1, -1);
351}
352
353static inline uint16_t
354fp16_infinity(int sgn)
355{
356 return fp16_pack(sgn, FP16_EXP_INF, 0);
357}
358
359static inline uint32_t
360fp32_infinity(int sgn)
361{
362 return fp32_pack(sgn, FP32_EXP_INF, 0);
363}
364
365static inline uint64_t
366fp64_infinity(int sgn)
367{
368 return fp64_pack(sgn, FP64_EXP_INF, 0);
369}
370
371static inline uint16_t
372fp16_defaultNaN()
373{
374 return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1));
375}
376
377static inline uint32_t
378fp32_defaultNaN()
379{
380 return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1));
381}
382
383static inline uint64_t
384fp64_defaultNaN()
385{
386 return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1));
387}
388
389static inline void
390fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
391 int *flags)
392{
393 *sgn = x >> (FP16_BITS - 1);
394 *exp = FP16_EXP(x);
395 *mnt = FP16_MANT(x);
396
397 // Handle subnormals:
398 if (*exp) {
399 *mnt |= 1ULL << FP16_MANT_BITS;
400 } else {
401 ++*exp;
402 // IDC (Input Denormal) is not set in this case.
403 if (mode & FPLIB_FZ16)
404 *mnt = 0;
405 }
406}
407
408static inline void
409fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
410 int *flags)
411{
412 *sgn = x >> (FP32_BITS - 1);
413 *exp = FP32_EXP(x);
414 *mnt = FP32_MANT(x);
415
416 // Handle subnormals:
417 if (*exp) {
418 *mnt |= 1ULL << FP32_MANT_BITS;
419 } else {
420 ++*exp;
421 if ((mode & FPLIB_FZ) && *mnt) {
422 *flags |= FPLIB_IDC;
423 *mnt = 0;
424 }
425 }
426}
427
428static inline void
429fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
430 int *flags)
431{
432 *sgn = x >> (FP64_BITS - 1);
433 *exp = FP64_EXP(x);
434 *mnt = FP64_MANT(x);
435
436 // Handle subnormals:
437 if (*exp) {
438 *mnt |= 1ULL << FP64_MANT_BITS;
439 } else {
440 ++*exp;
441 if ((mode & FPLIB_FZ) && *mnt) {
442 *flags |= FPLIB_IDC;
443 *mnt = 0;
444 }
445 }
446}
447
448static inline int
449fp16_is_NaN(int exp, uint16_t mnt)
450{
451 return exp == FP16_EXP_INF && FP16_MANT(mnt);
452}
453
454static inline int
455fp32_is_NaN(int exp, uint32_t mnt)
456{
457 return exp == FP32_EXP_INF && FP32_MANT(mnt);
458}
459
460static inline int
461fp64_is_NaN(int exp, uint64_t mnt)
462{
463 return exp == FP64_EXP_INF && FP64_MANT(mnt);
464}
465
466static inline int
467fp16_is_signalling_NaN(int exp, uint16_t mnt)
468{
469 return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1);
470}
471
472static inline int
473fp32_is_signalling_NaN(int exp, uint32_t mnt)
474{
475 return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1);
476}
477
478static inline int
479fp64_is_signalling_NaN(int exp, uint64_t mnt)
480{
481 return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1);
482}
483
484static inline int
485fp16_is_quiet_NaN(int exp, uint16_t mnt)
486{
487 return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1);
488}
489
490static inline int
491fp32_is_quiet_NaN(int exp, uint32_t mnt)
492{
493 return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1);
494}
495
496static inline int
497fp64_is_quiet_NaN(int exp, uint64_t mnt)
498{
499 return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1);
500}
501
502static inline int
503fp16_is_infinity(int exp, uint16_t mnt)
504{
505 return exp == FP16_EXP_INF && !FP16_MANT(mnt);
506}
507
508static inline int
509fp32_is_infinity(int exp, uint32_t mnt)
510{
511 return exp == FP32_EXP_INF && !FP32_MANT(mnt);
512}
513
514static inline int
515fp64_is_infinity(int exp, uint64_t mnt)
516{
517 return exp == FP64_EXP_INF && !FP64_MANT(mnt);
518}
519
520static inline uint16_t
521fp16_process_NaN(uint16_t a, int mode, int *flags)
522{
523 if (!(a >> (FP16_MANT_BITS - 1) & 1)) {
524 *flags |= FPLIB_IOC;
525 a |= 1ULL << (FP16_MANT_BITS - 1);
526 }
527 return mode & FPLIB_DN ? fp16_defaultNaN() : a;
528}
529
530static inline uint32_t
531fp32_process_NaN(uint32_t a, int mode, int *flags)
532{
533 if (!(a >> (FP32_MANT_BITS - 1) & 1)) {
534 *flags |= FPLIB_IOC;
535 a |= 1ULL << (FP32_MANT_BITS - 1);
536 }
537 return mode & FPLIB_DN ? fp32_defaultNaN() : a;
538}
539
540static inline uint64_t
541fp64_process_NaN(uint64_t a, int mode, int *flags)
542{
543 if (!(a >> (FP64_MANT_BITS - 1) & 1)) {
544 *flags |= FPLIB_IOC;
545 a |= 1ULL << (FP64_MANT_BITS - 1);
546 }
547 return mode & FPLIB_DN ? fp64_defaultNaN() : a;
548}
549
550static uint16_t
551fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
552{
553 int a_exp = FP16_EXP(a);
554 uint16_t a_mnt = FP16_MANT(a);
555 int b_exp = FP16_EXP(b);
556 uint16_t b_mnt = FP16_MANT(b);
557
558 // Handle signalling NaNs:
559 if (fp16_is_signalling_NaN(a_exp, a_mnt))
560 return fp16_process_NaN(a, mode, flags);
561 if (fp16_is_signalling_NaN(b_exp, b_mnt))
562 return fp16_process_NaN(b, mode, flags);
563
564 // Handle quiet NaNs:
565 if (fp16_is_NaN(a_exp, a_mnt))
566 return fp16_process_NaN(a, mode, flags);
567 if (fp16_is_NaN(b_exp, b_mnt))
568 return fp16_process_NaN(b, mode, flags);
569
570 return 0;
571}
572
573static uint32_t
574fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
575{
576 int a_exp = FP32_EXP(a);
577 uint32_t a_mnt = FP32_MANT(a);
578 int b_exp = FP32_EXP(b);
579 uint32_t b_mnt = FP32_MANT(b);
580
581 // Handle signalling NaNs:
582 if (fp32_is_signalling_NaN(a_exp, a_mnt))
583 return fp32_process_NaN(a, mode, flags);
584 if (fp32_is_signalling_NaN(b_exp, b_mnt))
585 return fp32_process_NaN(b, mode, flags);
586
587 // Handle quiet NaNs:
588 if (fp32_is_NaN(a_exp, a_mnt))
589 return fp32_process_NaN(a, mode, flags);
590 if (fp32_is_NaN(b_exp, b_mnt))
591 return fp32_process_NaN(b, mode, flags);
592
593 return 0;
594}
595
596static uint64_t
597fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
598{
599 int a_exp = FP64_EXP(a);
600 uint64_t a_mnt = FP64_MANT(a);
601 int b_exp = FP64_EXP(b);
602 uint64_t b_mnt = FP64_MANT(b);
603
604 // Handle signalling NaNs:
605 if (fp64_is_signalling_NaN(a_exp, a_mnt))
606 return fp64_process_NaN(a, mode, flags);
607 if (fp64_is_signalling_NaN(b_exp, b_mnt))
608 return fp64_process_NaN(b, mode, flags);
609
610 // Handle quiet NaNs:
611 if (fp64_is_NaN(a_exp, a_mnt))
612 return fp64_process_NaN(a, mode, flags);
613 if (fp64_is_NaN(b_exp, b_mnt))
614 return fp64_process_NaN(b, mode, flags);
615
616 return 0;
617}
618
619static uint16_t
620fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
621{
622 int a_exp = FP16_EXP(a);
623 uint16_t a_mnt = FP16_MANT(a);
624 int b_exp = FP16_EXP(b);
625 uint16_t b_mnt = FP16_MANT(b);
626 int c_exp = FP16_EXP(c);
627 uint16_t c_mnt = FP16_MANT(c);
628
629 // Handle signalling NaNs:
630 if (fp16_is_signalling_NaN(a_exp, a_mnt))
631 return fp16_process_NaN(a, mode, flags);
632 if (fp16_is_signalling_NaN(b_exp, b_mnt))
633 return fp16_process_NaN(b, mode, flags);
634 if (fp16_is_signalling_NaN(c_exp, c_mnt))
635 return fp16_process_NaN(c, mode, flags);
636
637 // Handle quiet NaNs:
638 if (fp16_is_NaN(a_exp, a_mnt))
639 return fp16_process_NaN(a, mode, flags);
640 if (fp16_is_NaN(b_exp, b_mnt))
641 return fp16_process_NaN(b, mode, flags);
642 if (fp16_is_NaN(c_exp, c_mnt))
643 return fp16_process_NaN(c, mode, flags);
644
645 return 0;
646}
647
648static uint32_t
649fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
650{
651 int a_exp = FP32_EXP(a);
652 uint32_t a_mnt = FP32_MANT(a);
653 int b_exp = FP32_EXP(b);
654 uint32_t b_mnt = FP32_MANT(b);
655 int c_exp = FP32_EXP(c);
656 uint32_t c_mnt = FP32_MANT(c);
657
658 // Handle signalling NaNs:
659 if (fp32_is_signalling_NaN(a_exp, a_mnt))
660 return fp32_process_NaN(a, mode, flags);
661 if (fp32_is_signalling_NaN(b_exp, b_mnt))
662 return fp32_process_NaN(b, mode, flags);
663 if (fp32_is_signalling_NaN(c_exp, c_mnt))
664 return fp32_process_NaN(c, mode, flags);
665
666 // Handle quiet NaNs:
667 if (fp32_is_NaN(a_exp, a_mnt))
668 return fp32_process_NaN(a, mode, flags);
669 if (fp32_is_NaN(b_exp, b_mnt))
670 return fp32_process_NaN(b, mode, flags);
671 if (fp32_is_NaN(c_exp, c_mnt))
672 return fp32_process_NaN(c, mode, flags);
673
674 return 0;
675}
676
677static uint64_t
678fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
679{
680 int a_exp = FP64_EXP(a);
681 uint64_t a_mnt = FP64_MANT(a);
682 int b_exp = FP64_EXP(b);
683 uint64_t b_mnt = FP64_MANT(b);
684 int c_exp = FP64_EXP(c);
685 uint64_t c_mnt = FP64_MANT(c);
686
687 // Handle signalling NaNs:
688 if (fp64_is_signalling_NaN(a_exp, a_mnt))
689 return fp64_process_NaN(a, mode, flags);
690 if (fp64_is_signalling_NaN(b_exp, b_mnt))
691 return fp64_process_NaN(b, mode, flags);
692 if (fp64_is_signalling_NaN(c_exp, c_mnt))
693 return fp64_process_NaN(c, mode, flags);
694
695 // Handle quiet NaNs:
696 if (fp64_is_NaN(a_exp, a_mnt))
697 return fp64_process_NaN(a, mode, flags);
698 if (fp64_is_NaN(b_exp, b_mnt))
699 return fp64_process_NaN(b, mode, flags);
700 if (fp64_is_NaN(c_exp, c_mnt))
701 return fp64_process_NaN(c, mode, flags);
702
703 return 0;
704}
705
706static uint16_t
707fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
708{
709 int biased_exp; // non-negative exponent value for result
710 uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS)
711 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
712
713 assert(rm != FPRounding_TIEAWAY);
714
715 // Flush to zero:
716 if ((mode & FPLIB_FZ16) && exp < 1) {
717 *flags |= FPLIB_UFC;
718 return fp16_zero(sgn);
719 }
720
721 // The bottom FP16_EXP_BITS bits of mnt are orred together:
722 mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) |
723 ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0));
724
725 if (exp > 0) {
726 biased_exp = exp;
727 int_mant = mnt >> 2;
728 error = mnt & 3;
729 } else {
730 biased_exp = 0;
731 int_mant = lsr16(mnt, 3 - exp);
732 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
733 }
734
735 if (!biased_exp && error) { // xx should also check fpscr_val<11>
736 *flags |= FPLIB_UFC;
737 }
738
739 // Round up:
740 if ((rm == FPLIB_RN && (error == 3 ||
741 (error == 2 && (int_mant & 1)))) ||
742 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
743 ++int_mant;
744 if (int_mant == 1ULL << FP16_MANT_BITS) {
745 // Rounded up from denormalized to normalized
746 biased_exp = 1;
747 }
748 if (int_mant == 2ULL << FP16_MANT_BITS) {
749 // Rounded up to next exponent
750 ++biased_exp;
751 int_mant >>= 1;
752 }
753 }
754
755 // Handle rounding to odd aka Von Neumann rounding:
756 if (error && rm == FPRounding_ODD)
757 int_mant |= 1;
758
759 // Handle overflow:
760 if (!(mode & FPLIB_AHP)) {
761 if (biased_exp >= (int)FP16_EXP_INF) {
762 *flags |= FPLIB_OFC | FPLIB_IXC;
763 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
764 (rm == FPLIB_RM && sgn)) {
765 return fp16_infinity(sgn);
766 } else {
767 return fp16_max_normal(sgn);
768 }
769 }
770 } else {
771 if (biased_exp >= (int)FP16_EXP_INF + 1) {
772 *flags |= FPLIB_IOC;
773 return fp16_pack(sgn, FP16_EXP_INF, -1);
774 }
775 }
776
777 if (error) {
778 *flags |= FPLIB_IXC;
779 }
780
781 return fp16_pack(sgn, biased_exp, int_mant);
782}
783
784static uint16_t
785fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
786{
787 return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags);
788}
789
790static uint32_t
791fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
792{
793 int biased_exp; // non-negative exponent value for result
794 uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS)
795 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
796
797 assert(rm != FPRounding_TIEAWAY);
798
799 // Flush to zero:
800 if ((mode & FPLIB_FZ) && exp < 1) {
801 *flags |= FPLIB_UFC;
802 return fp32_zero(sgn);
803 }
804
805 // The bottom FP32_EXP_BITS bits of mnt are orred together:
806 mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) |
807 ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0));
808
809 if (exp > 0) {
810 biased_exp = exp;
811 int_mant = mnt >> 2;
812 error = mnt & 3;
813 } else {
814 biased_exp = 0;
815 int_mant = lsr32(mnt, 3 - exp);
816 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
817 }
818
819 if (!biased_exp && error) { // xx should also check fpscr_val<11>
820 *flags |= FPLIB_UFC;
821 }
822
823 // Round up:
824 if ((rm == FPLIB_RN && (error == 3 ||
825 (error == 2 && (int_mant & 1)))) ||
826 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
827 ++int_mant;
828 if (int_mant == 1ULL << FP32_MANT_BITS) {
829 // Rounded up from denormalized to normalized
830 biased_exp = 1;
831 }
832 if (int_mant == 2ULL << FP32_MANT_BITS) {
833 // Rounded up to next exponent
834 ++biased_exp;
835 int_mant >>= 1;
836 }
837 }
838
839 // Handle rounding to odd aka Von Neumann rounding:
840 if (error && rm == FPRounding_ODD)
841 int_mant |= 1;
842
843 // Handle overflow:
844 if (biased_exp >= (int)FP32_EXP_INF) {
845 *flags |= FPLIB_OFC | FPLIB_IXC;
846 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
847 (rm == FPLIB_RM && sgn)) {
848 return fp32_infinity(sgn);
849 } else {
850 return fp32_max_normal(sgn);
851 }
852 }
853
854 if (error) {
855 *flags |= FPLIB_IXC;
856 }
857
858 return fp32_pack(sgn, biased_exp, int_mant);
859}
860
861static uint32_t
862fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
863{
864 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
865}
866
867static uint64_t
868fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
869{
870 int biased_exp; // non-negative exponent value for result
871 uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS)
872 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
873
874 assert(rm != FPRounding_TIEAWAY);
875
876 // Flush to zero:
877 if ((mode & FPLIB_FZ) && exp < 1) {
878 *flags |= FPLIB_UFC;
879 return fp64_zero(sgn);
880 }
881
882 // The bottom FP64_EXP_BITS bits of mnt are orred together:
883 mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) |
884 ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0));
885
886 if (exp > 0) {
887 biased_exp = exp;
888 int_mant = mnt >> 2;
889 error = mnt & 3;
890 } else {
891 biased_exp = 0;
892 int_mant = lsr64(mnt, 3 - exp);
893 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
894 }
895
896 if (!biased_exp && error) { // xx should also check fpscr_val<11>
897 *flags |= FPLIB_UFC;
898 }
899
900 // Round up:
901 if ((rm == FPLIB_RN && (error == 3 ||
902 (error == 2 && (int_mant & 1)))) ||
903 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
904 ++int_mant;
905 if (int_mant == 1ULL << FP64_MANT_BITS) {
906 // Rounded up from denormalized to normalized
907 biased_exp = 1;
908 }
909 if (int_mant == 2ULL << FP64_MANT_BITS) {
910 // Rounded up to next exponent
911 ++biased_exp;
912 int_mant >>= 1;
913 }
914 }
915
916 // Handle rounding to odd aka Von Neumann rounding:
917 if (error && rm == FPRounding_ODD)
918 int_mant |= 1;
919
920 // Handle overflow:
921 if (biased_exp >= (int)FP64_EXP_INF) {
922 *flags |= FPLIB_OFC | FPLIB_IXC;
923 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
924 (rm == FPLIB_RM && sgn)) {
925 return fp64_infinity(sgn);
926 } else {
927 return fp64_max_normal(sgn);
928 }
929 }
930
931 if (error) {
932 *flags |= FPLIB_IXC;
933 }
934
935 return fp64_pack(sgn, biased_exp, int_mant);
936}
937
938static uint64_t
939fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
940{
941 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
942}
943
944static int
945fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
946{
947 int a_sgn, a_exp, b_sgn, b_exp;
948 uint16_t a_mnt, b_mnt;
949
950 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
951 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
952
953 if (fp16_is_NaN(a_exp, a_mnt) ||
954 fp16_is_NaN(b_exp, b_mnt)) {
955 if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
956 fp16_is_signalling_NaN(b_exp, b_mnt))
957 *flags |= FPLIB_IOC;
958 return 0;
959 }
960 return a == b || (!a_mnt && !b_mnt);
961}
962
963static int
964fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
965{
966 int a_sgn, a_exp, b_sgn, b_exp;
967 uint16_t a_mnt, b_mnt;
968
969 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
970 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
971
972 if (fp16_is_NaN(a_exp, a_mnt) ||
973 fp16_is_NaN(b_exp, b_mnt)) {
974 *flags |= FPLIB_IOC;
975 return 0;
976 }
977 if (!a_mnt && !b_mnt)
978 return 1;
979 if (a_sgn != b_sgn)
980 return b_sgn;
981 if (a_exp != b_exp)
982 return a_sgn ^ (a_exp > b_exp);
983 if (a_mnt != b_mnt)
984 return a_sgn ^ (a_mnt > b_mnt);
985 return 1;
986}
987
988static int
989fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
990{
991 int a_sgn, a_exp, b_sgn, b_exp;
992 uint16_t a_mnt, b_mnt;
993
994 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
995 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
996
997 if (fp16_is_NaN(a_exp, a_mnt) ||
998 fp16_is_NaN(b_exp, b_mnt)) {
999 *flags |= FPLIB_IOC;
1000 return 0;
1001 }
1002 if (!a_mnt && !b_mnt)
1003 return 0;
1004 if (a_sgn != b_sgn)
1005 return b_sgn;
1006 if (a_exp != b_exp)
1007 return a_sgn ^ (a_exp > b_exp);
1008 if (a_mnt != b_mnt)
1009 return a_sgn ^ (a_mnt > b_mnt);
1010 return 0;
1011}
1012
1013static int
1014fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
1015{
1016 int a_sgn, a_exp, b_sgn, b_exp;
1017 uint16_t a_mnt, b_mnt;
1018
1019 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1020 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1021
1022 if (fp16_is_NaN(a_exp, a_mnt) ||
1023 fp16_is_NaN(b_exp, b_mnt)) {
1024 if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
1025 fp16_is_signalling_NaN(b_exp, b_mnt))
1026 *flags |= FPLIB_IOC;
1027 return 1;
1028 }
1029 return 0;
1030}
1031
1032static int
1033fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
1034{
1035 int a_sgn, a_exp, b_sgn, b_exp;
1036 uint32_t a_mnt, b_mnt;
1037
1038 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1039 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1040
1041 if (fp32_is_NaN(a_exp, a_mnt) ||
1042 fp32_is_NaN(b_exp, b_mnt)) {
1043 if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1044 fp32_is_signalling_NaN(b_exp, b_mnt))
1045 *flags |= FPLIB_IOC;
1046 return 0;
1047 }
1048 return a == b || (!a_mnt && !b_mnt);
1049}
1050
1051static int
1052fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
1053{
1054 int a_sgn, a_exp, b_sgn, b_exp;
1055 uint32_t a_mnt, b_mnt;
1056
1057 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1058 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1059
1060 if (fp32_is_NaN(a_exp, a_mnt) ||
1061 fp32_is_NaN(b_exp, b_mnt)) {
1062 *flags |= FPLIB_IOC;
1063 return 0;
1064 }
1065 if (!a_mnt && !b_mnt)
1066 return 1;
1067 if (a_sgn != b_sgn)
1068 return b_sgn;
1069 if (a_exp != b_exp)
1070 return a_sgn ^ (a_exp > b_exp);
1071 if (a_mnt != b_mnt)
1072 return a_sgn ^ (a_mnt > b_mnt);
1073 return 1;
1074}
1075
1076static int
1077fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
1078{
1079 int a_sgn, a_exp, b_sgn, b_exp;
1080 uint32_t a_mnt, b_mnt;
1081
1082 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1083 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1084
1085 if (fp32_is_NaN(a_exp, a_mnt) ||
1086 fp32_is_NaN(b_exp, b_mnt)) {
1087 *flags |= FPLIB_IOC;
1088 return 0;
1089 }
1090 if (!a_mnt && !b_mnt)
1091 return 0;
1092 if (a_sgn != b_sgn)
1093 return b_sgn;
1094 if (a_exp != b_exp)
1095 return a_sgn ^ (a_exp > b_exp);
1096 if (a_mnt != b_mnt)
1097 return a_sgn ^ (a_mnt > b_mnt);
1098 return 0;
1099}
1100
1101static int
1102fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
1103{
1104 int a_sgn, a_exp, b_sgn, b_exp;
1105 uint32_t a_mnt, b_mnt;
1106
1107 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1108 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1109
1110 if (fp32_is_NaN(a_exp, a_mnt) ||
1111 fp32_is_NaN(b_exp, b_mnt)) {
1112 if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1113 fp32_is_signalling_NaN(b_exp, b_mnt))
1114 *flags |= FPLIB_IOC;
1115 return 1;
1116 }
1117 return 0;
1118}
1119
1120static int
1121fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
1122{
1123 int a_sgn, a_exp, b_sgn, b_exp;
1124 uint64_t a_mnt, b_mnt;
1125
1126 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1127 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1128
1129 if (fp64_is_NaN(a_exp, a_mnt) ||
1130 fp64_is_NaN(b_exp, b_mnt)) {
1131 if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1132 fp64_is_signalling_NaN(b_exp, b_mnt))
1133 *flags |= FPLIB_IOC;
1134 return 0;
1135 }
1136 return a == b || (!a_mnt && !b_mnt);
1137}
1138
1139static int
1140fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
1141{
1142 int a_sgn, a_exp, b_sgn, b_exp;
1143 uint64_t a_mnt, b_mnt;
1144
1145 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1146 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1147
1148 if (fp64_is_NaN(a_exp, a_mnt) ||
1149 fp64_is_NaN(b_exp, b_mnt)) {
1150 *flags |= FPLIB_IOC;
1151 return 0;
1152 }
1153 if (!a_mnt && !b_mnt)
1154 return 1;
1155 if (a_sgn != b_sgn)
1156 return b_sgn;
1157 if (a_exp != b_exp)
1158 return a_sgn ^ (a_exp > b_exp);
1159 if (a_mnt != b_mnt)
1160 return a_sgn ^ (a_mnt > b_mnt);
1161 return 1;
1162}
1163
1164static int
1165fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
1166{
1167 int a_sgn, a_exp, b_sgn, b_exp;
1168 uint64_t a_mnt, b_mnt;
1169
1170 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1171 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1172
1173 if (fp64_is_NaN(a_exp, a_mnt) ||
1174 fp64_is_NaN(b_exp, b_mnt)) {
1175 *flags |= FPLIB_IOC;
1176 return 0;
1177 }
1178 if (!a_mnt && !b_mnt)
1179 return 0;
1180 if (a_sgn != b_sgn)
1181 return b_sgn;
1182 if (a_exp != b_exp)
1183 return a_sgn ^ (a_exp > b_exp);
1184 if (a_mnt != b_mnt)
1185 return a_sgn ^ (a_mnt > b_mnt);
1186 return 0;
1187}
1188
1189static int
1190fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
1191{
1192 int a_sgn, a_exp, b_sgn, b_exp;
1193 uint64_t a_mnt, b_mnt;
1194
1195 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1196 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1197
1198 if (fp64_is_NaN(a_exp, a_mnt) ||
1199 fp64_is_NaN(b_exp, b_mnt)) {
1200 if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1201 fp64_is_signalling_NaN(b_exp, b_mnt))
1202 *flags |= FPLIB_IOC;
1203 return 1;
1204 }
1205 return 0;
1206}
1207
1208static uint16_t
1209fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
1210{
1211 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1212 uint16_t a_mnt, b_mnt, x, x_mnt;
1213
1214 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1215 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1216
1217 if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1218 return x;
1219 }
1220
1221 b_sgn ^= neg;
1222
1223 // Handle infinities and zeroes:
1224 if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) {
1225 *flags |= FPLIB_IOC;
1226 return fp16_defaultNaN();
1227 } else if (a_exp == FP16_EXP_INF) {
1228 return fp16_infinity(a_sgn);
1229 } else if (b_exp == FP16_EXP_INF) {
1230 return fp16_infinity(b_sgn);
1231 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1232 return fp16_zero(a_sgn);
1233 }
1234
1235 a_mnt <<= 3;
1236 b_mnt <<= 3;
1237 if (a_exp >= b_exp) {
1238 b_mnt = (lsr16(b_mnt, a_exp - b_exp) |
1239 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1)));
1240 b_exp = a_exp;
1241 } else {
1242 a_mnt = (lsr16(a_mnt, b_exp - a_exp) |
1243 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1)));
1244 a_exp = b_exp;
1245 }
1246 x_sgn = a_sgn;
1247 x_exp = a_exp;
1248 if (a_sgn == b_sgn) {
1249 x_mnt = a_mnt + b_mnt;
1250 } else if (a_mnt >= b_mnt) {
1251 x_mnt = a_mnt - b_mnt;
1252 } else {
1253 x_sgn ^= 1;
1254 x_mnt = b_mnt - a_mnt;
1255 }
1256
1257 if (!x_mnt) {
1258 // Sign of exact zero result depends on rounding mode
1259 return fp16_zero((mode & 3) == 2);
1260 }
1261
1262 x_mnt = fp16_normalise(x_mnt, &x_exp);
1263
1264 return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1,
1265 mode, flags);
1266}
1267
1268static uint32_t
1269fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
1270{
1271 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1272 uint32_t a_mnt, b_mnt, x, x_mnt;
1273
1274 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1275 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1276
1277 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1278 return x;
1279 }
1280
1281 b_sgn ^= neg;
1282
1283 // Handle infinities and zeroes:
1284 if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) {
1285 *flags |= FPLIB_IOC;
1286 return fp32_defaultNaN();
1287 } else if (a_exp == FP32_EXP_INF) {
1288 return fp32_infinity(a_sgn);
1289 } else if (b_exp == FP32_EXP_INF) {
1290 return fp32_infinity(b_sgn);
1291 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1292 return fp32_zero(a_sgn);
1293 }
1294
1295 a_mnt <<= 3;
1296 b_mnt <<= 3;
1297 if (a_exp >= b_exp) {
1298 b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
1299 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
1300 b_exp = a_exp;
1301 } else {
1302 a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
1303 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
1304 a_exp = b_exp;
1305 }
1306 x_sgn = a_sgn;
1307 x_exp = a_exp;
1308 if (a_sgn == b_sgn) {
1309 x_mnt = a_mnt + b_mnt;
1310 } else if (a_mnt >= b_mnt) {
1311 x_mnt = a_mnt - b_mnt;
1312 } else {
1313 x_sgn ^= 1;
1314 x_mnt = b_mnt - a_mnt;
1315 }
1316
1317 if (!x_mnt) {
1318 // Sign of exact zero result depends on rounding mode
1319 return fp32_zero((mode & 3) == 2);
1320 }
1321
1322 x_mnt = fp32_normalise(x_mnt, &x_exp);
1323
1324 return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1,
1325 mode, flags);
1326}
1327
1328static uint64_t
1329fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
1330{
1331 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1332 uint64_t a_mnt, b_mnt, x, x_mnt;
1333
1334 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1335 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1336
1337 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1338 return x;
1339 }
1340
1341 b_sgn ^= neg;
1342
1343 // Handle infinities and zeroes:
1344 if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) {
1345 *flags |= FPLIB_IOC;
1346 return fp64_defaultNaN();
1347 } else if (a_exp == FP64_EXP_INF) {
1348 return fp64_infinity(a_sgn);
1349 } else if (b_exp == FP64_EXP_INF) {
1350 return fp64_infinity(b_sgn);
1351 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1352 return fp64_zero(a_sgn);
1353 }
1354
1355 a_mnt <<= 3;
1356 b_mnt <<= 3;
1357 if (a_exp >= b_exp) {
1358 b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
1359 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
1360 b_exp = a_exp;
1361 } else {
1362 a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
1363 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
1364 a_exp = b_exp;
1365 }
1366 x_sgn = a_sgn;
1367 x_exp = a_exp;
1368 if (a_sgn == b_sgn) {
1369 x_mnt = a_mnt + b_mnt;
1370 } else if (a_mnt >= b_mnt) {
1371 x_mnt = a_mnt - b_mnt;
1372 } else {
1373 x_sgn ^= 1;
1374 x_mnt = b_mnt - a_mnt;
1375 }
1376
1377 if (!x_mnt) {
1378 // Sign of exact zero result depends on rounding mode
1379 return fp64_zero((mode & 3) == 2);
1380 }
1381
1382 x_mnt = fp64_normalise(x_mnt, &x_exp);
1383
1384 return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1,
1385 mode, flags);
1386}
1387
1388static uint16_t
1389fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
1390{
1391 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1392 uint16_t a_mnt, b_mnt, x;
1393 uint32_t x_mnt;
1394
1395 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1396 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1397
1398 if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1399 return x;
1400 }
1401
1402 // Handle infinities and zeroes:
1403 if ((a_exp == FP16_EXP_INF && !b_mnt) ||
1404 (b_exp == FP16_EXP_INF && !a_mnt)) {
1405 *flags |= FPLIB_IOC;
1406 return fp16_defaultNaN();
1407 } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) {
1408 return fp16_infinity(a_sgn ^ b_sgn);
1409 } else if (!a_mnt || !b_mnt) {
1410 return fp16_zero(a_sgn ^ b_sgn);
1411 }
1412
1413 // Multiply and normalise:
1414 x_sgn = a_sgn ^ b_sgn;
1415 x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1;
1416 x_mnt = (uint32_t)a_mnt * b_mnt;
1417 x_mnt = fp32_normalise(x_mnt, &x_exp);
1418
1419 // Convert to FP16_BITS bits, collapsing error into bottom bit:
1420 x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1);
1421
1422 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1423}
1424
1425static uint32_t
1426fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1427{
1428 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1429 uint32_t a_mnt, b_mnt, x;
1430 uint64_t x_mnt;
1431
1432 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1433 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1434
1435 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1436 return x;
1437 }
1438
1439 // Handle infinities and zeroes:
1440 if ((a_exp == FP32_EXP_INF && !b_mnt) ||
1441 (b_exp == FP32_EXP_INF && !a_mnt)) {
1442 *flags |= FPLIB_IOC;
1443 return fp32_defaultNaN();
1444 } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) {
1445 return fp32_infinity(a_sgn ^ b_sgn);
1446 } else if (!a_mnt || !b_mnt) {
1447 return fp32_zero(a_sgn ^ b_sgn);
1448 }
1449
1450 // Multiply and normalise:
1451 x_sgn = a_sgn ^ b_sgn;
1452 x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1;
1453 x_mnt = (uint64_t)a_mnt * b_mnt;
1454 x_mnt = fp64_normalise(x_mnt, &x_exp);
1455
1456 // Convert to FP32_BITS bits, collapsing error into bottom bit:
1457 x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1);
1458
1459 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1460}
1461
1462static uint64_t
1463fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1464{
1465 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1466 uint64_t a_mnt, b_mnt, x;
1467 uint64_t x0_mnt, x1_mnt;
1468
1469 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1470 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1471
1472 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1473 return x;
1474 }
1475
1476 // Handle infinities and zeroes:
1477 if ((a_exp == FP64_EXP_INF && !b_mnt) ||
1478 (b_exp == FP64_EXP_INF && !a_mnt)) {
1479 *flags |= FPLIB_IOC;
1480 return fp64_defaultNaN();
1481 } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) {
1482 return fp64_infinity(a_sgn ^ b_sgn);
1483 } else if (!a_mnt || !b_mnt) {
1484 return fp64_zero(a_sgn ^ b_sgn);
1485 }
1486
1487 // Multiply and normalise:
1488 x_sgn = a_sgn ^ b_sgn;
1489 x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1;
1490 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1491 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1492
1493 // Convert to FP64_BITS bits, collapsing error into bottom bit:
1494 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1495
1496 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1497}
1498
1499static uint16_t
1500fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale,
1501 int mode, int *flags)
1502{
1503 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1504 uint16_t a_mnt, b_mnt, c_mnt, x;
1505 uint32_t x_mnt, y_mnt;
1506
1507 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1508 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1509 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1510
1511 x = fp16_process_NaNs3(a, b, c, mode, flags);
1512
1513 // Quiet NaN added to product of zero and infinity:
1514 if (fp16_is_quiet_NaN(a_exp, a_mnt) &&
1515 ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) ||
1516 (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) {
1517 x = fp16_defaultNaN();
1518 *flags |= FPLIB_IOC;
1519 }
1520
1521 if (x) {
1522 return x;
1523 }
1524
1525 // Handle infinities and zeroes:
1526 if ((b_exp == FP16_EXP_INF && !c_mnt) ||
1527 (c_exp == FP16_EXP_INF && !b_mnt) ||
1528 (a_exp == FP16_EXP_INF &&
1529 (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) &&
1530 (a_sgn != (b_sgn ^ c_sgn)))) {
1531 *flags |= FPLIB_IOC;
1532 return fp16_defaultNaN();
1533 }
1534 if (a_exp == FP16_EXP_INF)
1535 return fp16_infinity(a_sgn);
1536 if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF)
1537 return fp16_infinity(b_sgn ^ c_sgn);
1538 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1539 return fp16_zero(a_sgn);
1540
1541 x_sgn = a_sgn;
1542 x_exp = a_exp + 2 * FP16_EXP_BITS - 3;
1543 x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4);
1544
1545 // Multiply:
1546 y_sgn = b_sgn ^ c_sgn;
1547 y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3;
1548 y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1549 if (!y_mnt) {
1550 y_exp = x_exp;
1551 }
1552
1553 // Add:
1554 if (x_exp >= y_exp) {
1555 y_mnt = (lsr32(y_mnt, x_exp - y_exp) |
1556 !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1)));
1557 y_exp = x_exp;
1558 } else {
1559 x_mnt = (lsr32(x_mnt, y_exp - x_exp) |
1560 !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1)));
1561 x_exp = y_exp;
1562 }
1563 if (x_sgn == y_sgn) {
1564 x_mnt = x_mnt + y_mnt;
1565 } else if (x_mnt >= y_mnt) {
1566 x_mnt = x_mnt - y_mnt;
1567 } else {
1568 x_sgn ^= 1;
1569 x_mnt = y_mnt - x_mnt;
1570 }
1571
1572 if (!x_mnt) {
1573 // Sign of exact zero result depends on rounding mode
1574 return fp16_zero((mode & 3) == 2);
1575 }
1576
1577 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1578 x_mnt = fp32_normalise(x_mnt, &x_exp);
1579 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1580
1581 return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1582}
1583
1584static uint32_t
1585fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1586 int mode, int *flags)
1587{
1588 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1589 uint32_t a_mnt, b_mnt, c_mnt, x;
1590 uint64_t x_mnt, y_mnt;
1591
1592 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1593 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1594 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1595
1596 x = fp32_process_NaNs3(a, b, c, mode, flags);
1597
1598 // Quiet NaN added to product of zero and infinity:
1599 if (fp32_is_quiet_NaN(a_exp, a_mnt) &&
1600 ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) ||
1601 (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) {
1602 x = fp32_defaultNaN();
1603 *flags |= FPLIB_IOC;
1604 }
1605
1606 if (x) {
1607 return x;
1608 }
1609
1610 // Handle infinities and zeroes:
1611 if ((b_exp == FP32_EXP_INF && !c_mnt) ||
1612 (c_exp == FP32_EXP_INF && !b_mnt) ||
1613 (a_exp == FP32_EXP_INF &&
1614 (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) &&
1615 (a_sgn != (b_sgn ^ c_sgn)))) {
1616 *flags |= FPLIB_IOC;
1617 return fp32_defaultNaN();
1618 }
1619 if (a_exp == FP32_EXP_INF)
1620 return fp32_infinity(a_sgn);
1621 if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF)
1622 return fp32_infinity(b_sgn ^ c_sgn);
1623 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1624 return fp32_zero(a_sgn);
1625
1626 x_sgn = a_sgn;
1627 x_exp = a_exp + 2 * FP32_EXP_BITS - 3;
1628 x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4);
1629
1630 // Multiply:
1631 y_sgn = b_sgn ^ c_sgn;
1632 y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1633 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1634 if (!y_mnt) {
1635 y_exp = x_exp;
1636 }
1637
1638 // Add:
1639 if (x_exp >= y_exp) {
1640 y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1641 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1642 y_exp = x_exp;
1643 } else {
1644 x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1645 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1646 x_exp = y_exp;
1647 }
1648 if (x_sgn == y_sgn) {
1649 x_mnt = x_mnt + y_mnt;
1650 } else if (x_mnt >= y_mnt) {
1651 x_mnt = x_mnt - y_mnt;
1652 } else {
1653 x_sgn ^= 1;
1654 x_mnt = y_mnt - x_mnt;
1655 }
1656
1657 if (!x_mnt) {
1658 // Sign of exact zero result depends on rounding mode
1659 return fp32_zero((mode & 3) == 2);
1660 }
1661
1662 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1663 x_mnt = fp64_normalise(x_mnt, &x_exp);
1664 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1665
1666 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1667}
1668
1669static uint64_t
1670fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1671 int mode, int *flags)
1672{
1673 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1674 uint64_t a_mnt, b_mnt, c_mnt, x;
1675 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1676
1677 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1678 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1679 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1680
1681 x = fp64_process_NaNs3(a, b, c, mode, flags);
1682
1683 // Quiet NaN added to product of zero and infinity:
1684 if (fp64_is_quiet_NaN(a_exp, a_mnt) &&
1685 ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) ||
1686 (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) {
1687 x = fp64_defaultNaN();
1688 *flags |= FPLIB_IOC;
1689 }
1690
1691 if (x) {
1692 return x;
1693 }
1694
1695 // Handle infinities and zeroes:
1696 if ((b_exp == FP64_EXP_INF && !c_mnt) ||
1697 (c_exp == FP64_EXP_INF && !b_mnt) ||
1698 (a_exp == FP64_EXP_INF &&
1699 (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) &&
1700 (a_sgn != (b_sgn ^ c_sgn)))) {
1701 *flags |= FPLIB_IOC;
1702 return fp64_defaultNaN();
1703 }
1704 if (a_exp == FP64_EXP_INF)
1705 return fp64_infinity(a_sgn);
1706 if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF)
1707 return fp64_infinity(b_sgn ^ c_sgn);
1708 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1709 return fp64_zero(a_sgn);
1710
1711 x_sgn = a_sgn;
1712 x_exp = a_exp + FP64_EXP_BITS;
1713 x0_mnt = 0;
1714 x1_mnt = a_mnt;
1715
1716 // Multiply:
1717 y_sgn = b_sgn ^ c_sgn;
1718 y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3;
1719 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1720 if (!y0_mnt && !y1_mnt) {
1721 y_exp = x_exp;
1722 }
1723
1724 // Add:
1725 if (x_exp >= y_exp) {
1726 uint64_t t0, t1;
1727 lsl128(&t0, &t1, y0_mnt, y1_mnt,
1728 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1729 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1730 y0_mnt |= !!(t0 | t1);
1731 y_exp = x_exp;
1732 } else {
1733 uint64_t t0, t1;
1734 lsl128(&t0, &t1, x0_mnt, x1_mnt,
1735 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1736 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1737 x0_mnt |= !!(t0 | t1);
1738 x_exp = y_exp;
1739 }
1740 if (x_sgn == y_sgn) {
1741 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1742 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1743 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1744 } else {
1745 x_sgn ^= 1;
1746 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1747 }
1748
1749 if (!x0_mnt && !x1_mnt) {
1750 // Sign of exact zero result depends on rounding mode
1751 return fp64_zero((mode & 3) == 2);
1752 }
1753
1754 // Normalise into FP64_BITS bits, collapsing error into bottom bit:
1755 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1756 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1757
1758 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1759}
1760
1761static uint16_t
1762fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
1763{
1764 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1765 uint16_t a_mnt, b_mnt, x;
1766 uint32_t x_mnt;
1767
1768 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1769 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1770
1771 if ((x = fp16_process_NaNs(a, b, mode, flags)))
1772 return x;
1773
1774 // Handle infinities and zeroes:
1775 if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) ||
1776 (!a_mnt && !b_mnt)) {
1777 *flags |= FPLIB_IOC;
1778 return fp16_defaultNaN();
1779 }
1780 if (a_exp == FP16_EXP_INF || !b_mnt) {
1781 if (a_exp != FP16_EXP_INF)
1782 *flags |= FPLIB_DZC;
1783 return fp16_infinity(a_sgn ^ b_sgn);
1784 }
1785 if (!a_mnt || b_exp == FP16_EXP_INF)
1786 return fp16_zero(a_sgn ^ b_sgn);
1787
1788 // Divide, setting bottom bit if inexact:
1789 a_mnt = fp16_normalise(a_mnt, &a_exp);
1790 x_sgn = a_sgn ^ b_sgn;
1791 x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3);
1792 x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt;
1793 x_mnt |= (x_mnt * b_mnt !=
1794 (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3));
1795
1796 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1797 x_mnt = fp32_normalise(x_mnt, &x_exp);
1798 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1799
1800 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1801}
1802
1803static uint32_t
1804fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1805{
1806 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1807 uint32_t a_mnt, b_mnt, x;
1808 uint64_t x_mnt;
1809
1810 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1811 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1812
1813 if ((x = fp32_process_NaNs(a, b, mode, flags)))
1814 return x;
1815
1816 // Handle infinities and zeroes:
1817 if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) ||
1818 (!a_mnt && !b_mnt)) {
1819 *flags |= FPLIB_IOC;
1820 return fp32_defaultNaN();
1821 }
1822 if (a_exp == FP32_EXP_INF || !b_mnt) {
1823 if (a_exp != FP32_EXP_INF)
1824 *flags |= FPLIB_DZC;
1825 return fp32_infinity(a_sgn ^ b_sgn);
1826 }
1827 if (!a_mnt || b_exp == FP32_EXP_INF)
1828 return fp32_zero(a_sgn ^ b_sgn);
1829
1830 // Divide, setting bottom bit if inexact:
1831 a_mnt = fp32_normalise(a_mnt, &a_exp);
1832 x_sgn = a_sgn ^ b_sgn;
1833 x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3);
1834 x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt;
1835 x_mnt |= (x_mnt * b_mnt !=
1836 (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3));
1837
1838 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1839 x_mnt = fp64_normalise(x_mnt, &x_exp);
1840 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1841
1842 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1843}
1844
1845static uint64_t
1846fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1847{
1848 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1849 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1850
1851 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1852 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1853
1854 if ((x = fp64_process_NaNs(a, b, mode, flags)))
1855 return x;
1856
1857 // Handle infinities and zeroes:
1858 if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) ||
1859 (!a_mnt && !b_mnt)) {
1860 *flags |= FPLIB_IOC;
1861 return fp64_defaultNaN();
1862 }
1863 if (a_exp == FP64_EXP_INF || !b_mnt) {
1864 if (a_exp != FP64_EXP_INF)
1865 *flags |= FPLIB_DZC;
1866 return fp64_infinity(a_sgn ^ b_sgn);
1867 }
1868 if (!a_mnt || b_exp == FP64_EXP_INF)
1869 return fp64_zero(a_sgn ^ b_sgn);
1870
1871 // Find reciprocal of divisor with Newton-Raphson:
1872 a_mnt = fp64_normalise(a_mnt, &a_exp);
1873 b_mnt = fp64_normalise(b_mnt, &b_exp);
1874 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1875 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1876 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1877 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1878 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1879 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1880
1881 // Multiply by dividend:
1882 x_sgn = a_sgn ^ b_sgn;
1883 x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8;
1884 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1885 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1886 x_mnt = x1_mnt;
1887
1888 // This is an underestimate, so try adding one:
1889 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1890 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1891 if (c <= 0) {
1892 ++x_mnt;
1893 }
1894
1895 x_mnt = fp64_normalise(x_mnt, &x_exp);
1896
1897 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1898}
1899
1900static void
1901set_fpscr0(FPSCR &fpscr, int flags)
1902{
1903 if (flags & FPLIB_IDC) {
1904 fpscr.idc = 1;
1905 }
1906 if (flags & FPLIB_IOC) {
1907 fpscr.ioc = 1;
1908 }
1909 if (flags & FPLIB_DZC) {
1910 fpscr.dzc = 1;
1911 }
1912 if (flags & FPLIB_OFC) {
1913 fpscr.ofc = 1;
1914 }
1915 if (flags & FPLIB_UFC) {
1916 fpscr.ufc = 1;
1917 }
1918 if (flags & FPLIB_IXC) {
1919 fpscr.ixc = 1;
1920 }
1921}
1922
1923static uint16_t
1924fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
1925{
1926 int a_sgn, a_exp;
1927 uint16_t a_mnt;
1928
1929 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1930
1931 // Handle NaNs:
1932 if (fp16_is_NaN(a_exp, a_mnt)) {
1933 return fp16_process_NaN(a, mode, flags);
1934 }
1935
1936 // Handle zeroes:
1937 if (!a_mnt) {
1938 return fp16_zero(a_sgn);
1939 }
1940
1941 // Handle infinities:
1942 if (a_exp == FP16_EXP_INF) {
1943 return fp16_infinity(a_sgn);
1944 }
1945
1946 b = b < -300 ? -300 : b;
1947 b = b > 300 ? 300 : b;
1948 a_exp += b;
1949 a_mnt <<= 3;
1950
1951 a_mnt = fp16_normalise(a_mnt, &a_exp);
1952
1953 return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1,
1954 mode, flags);
1955}
1956
1957static uint32_t
1958fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
1959{
1960 int a_sgn, a_exp;
1961 uint32_t a_mnt;
1962
1963 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1964
1965 // Handle NaNs:
1966 if (fp32_is_NaN(a_exp, a_mnt)) {
1967 return fp32_process_NaN(a, mode, flags);
1968 }
1969
1970 // Handle zeroes:
1971 if (!a_mnt) {
1972 return fp32_zero(a_sgn);
1973 }
1974
1975 // Handle infinities:
1976 if (a_exp == FP32_EXP_INF) {
1977 return fp32_infinity(a_sgn);
1978 }
1979
1980 b = b < -300 ? -300 : b;
1981 b = b > 300 ? 300 : b;
1982 a_exp += b;
1983 a_mnt <<= 3;
1984
1985 a_mnt = fp32_normalise(a_mnt, &a_exp);
1986
1987 return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1,
1988 mode, flags);
1989}
1990
1991static uint64_t
1992fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
1993{
1994 int a_sgn, a_exp;
1995 uint64_t a_mnt;
1996
1997 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1998
1999 // Handle NaNs:
2000 if (fp64_is_NaN(a_exp, a_mnt)) {
2001 return fp64_process_NaN(a, mode, flags);
2002 }
2003
2004 // Handle zeroes:
2005 if (!a_mnt) {
2006 return fp64_zero(a_sgn);
2007 }
2008
2009 // Handle infinities:
2010 if (a_exp == FP64_EXP_INF) {
2011 return fp64_infinity(a_sgn);
2012 }
2013
2014 b = b < -3000 ? -3000 : b;
2015 b = b > 3000 ? 3000 : b;
2016 a_exp += b;
2017 a_mnt <<= 3;
2018
2019 a_mnt = fp64_normalise(a_mnt, &a_exp);
2020
2021 return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1,
2022 mode, flags);
2023}
2024
2025static uint16_t
2026fp16_sqrt(uint16_t a, int mode, int *flags)
2027{
2028 int a_sgn, a_exp, x_sgn, x_exp;
2029 uint16_t a_mnt, x_mnt;
2030 uint32_t x, t0, t1;
2031
2032 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2033
2034 // Handle NaNs:
2035 if (fp16_is_NaN(a_exp, a_mnt))
2036 return fp16_process_NaN(a, mode, flags);
2037
2038 // Handle infinities and zeroes:
2039 if (!a_mnt)
2040 return fp16_zero(a_sgn);
2041 if (a_exp == FP16_EXP_INF && !a_sgn)
2042 return fp16_infinity(a_sgn);
2043 if (a_sgn) {
2044 *flags |= FPLIB_IOC;
2045 return fp16_defaultNaN();
2046 }
2047
2048 a_mnt = fp16_normalise(a_mnt, &a_exp);
2049 if (a_exp & 1) {
2050 ++a_exp;
2051 a_mnt >>= 1;
2052 }
2053
2054 // x = (a * 3 + 5) / 8
2055 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2056
2057 // x = (a / x + x) / 2; // 8-bit accuracy
2058 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2059
2060 // x = (a / x + x) / 2; // 16-bit accuracy
2061 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2062
2063 x_sgn = 0;
2064 x_exp = (a_exp + 27) >> 1;
2065 x_mnt = ((x - (1 << 18)) >> 19) + 1;
2066 t1 = (uint32_t)x_mnt * x_mnt;
2067 t0 = (uint32_t)a_mnt << 9;
2068 if (t1 > t0) {
2069 --x_mnt;
2070 }
2071
2072 x_mnt = fp16_normalise(x_mnt, &x_exp);
2073
2074 return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2075}
2076
2077static uint32_t
2078fp32_sqrt(uint32_t a, int mode, int *flags)
2079{
2080 int a_sgn, a_exp, x_sgn, x_exp;
2081 uint32_t a_mnt, x, x_mnt;
2082 uint64_t t0, t1;
2083
2084 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2085
2086 // Handle NaNs:
2087 if (fp32_is_NaN(a_exp, a_mnt))
2088 return fp32_process_NaN(a, mode, flags);
2089
2090 // Handle infinities and zeroes:
2091 if (!a_mnt)
2092 return fp32_zero(a_sgn);
2093 if (a_exp == FP32_EXP_INF && !a_sgn)
2094 return fp32_infinity(a_sgn);
2095 if (a_sgn) {
2096 *flags |= FPLIB_IOC;
2097 return fp32_defaultNaN();
2098 }
2099
2100 a_mnt = fp32_normalise(a_mnt, &a_exp);
2101 if (!(a_exp & 1)) {
2102 ++a_exp;
2103 a_mnt >>= 1;
2104 }
2105
2106 // x = (a * 3 + 5) / 8
2107 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2108
2109 // x = (a / x + x) / 2; // 8-bit accuracy
2110 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2111
2112 // x = (a / x + x) / 2; // 16-bit accuracy
2113 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2114
2115 // x = (a / x + x) / 2; // 32-bit accuracy
2116 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
2117
2118 x_sgn = 0;
2119 x_exp = (a_exp + 147) >> 1;
2120 x_mnt = ((x - (1 << 5)) >> 6) + 1;
2121 t1 = (uint64_t)x_mnt * x_mnt;
2122 t0 = (uint64_t)a_mnt << 19;
2123 if (t1 > t0) {
2124 --x_mnt;
2125 }
2126
2127 x_mnt = fp32_normalise(x_mnt, &x_exp);
2128
2129 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2130}
2131
2132static uint64_t
2133fp64_sqrt(uint64_t a, int mode, int *flags)
2134{
2135 int a_sgn, a_exp, x_sgn, x_exp, c;
2136 uint64_t a_mnt, x_mnt, r, x0, x1;
2137 uint32_t x;
2138
2139 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2140
2141 // Handle NaNs:
2142 if (fp64_is_NaN(a_exp, a_mnt))
2143 return fp64_process_NaN(a, mode, flags);
2144
2145 // Handle infinities and zeroes:
2146 if (!a_mnt)
2147 return fp64_zero(a_sgn);
2148 if (a_exp == FP64_EXP_INF && !a_sgn)
2149 return fp64_infinity(a_sgn);
2150 if (a_sgn) {
2151 *flags |= FPLIB_IOC;
2152 return fp64_defaultNaN();
2153 }
2154
2155 a_mnt = fp64_normalise(a_mnt, &a_exp);
2156 if (a_exp & 1) {
2157 ++a_exp;
2158 a_mnt >>= 1;
2159 }
2160
2161 // x = (a * 3 + 5) / 8
2162 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2163
2164 // x = (a / x + x) / 2; // 8-bit accuracy
2165 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2166
2167 // x = (a / x + x) / 2; // 16-bit accuracy
2168 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2169
2170 // x = (a / x + x) / 2; // 32-bit accuracy
2171 x = ((a_mnt / x) >> 2) + (x >> 1);
2172
2173 // r = 1 / x; // 32-bit accuracy
2174 r = ((uint64_t)1 << 62) / x;
2175
2176 // r = r * (2 - x * r); // 64-bit accuracy
2177 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
2178 lsr128(&x0, &x1, x0, x1, 31);
2179
2180 // x = (x + a * r) / 2; // 64-bit accuracy
2181 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2182 lsl128(&x0, &x1, x0, x1, 5);
2183 lsr128(&x0, &x1, x0, x1, 56);
2184
2185 x0 = ((uint64_t)x << 31) + (x0 >> 1);
2186
2187 x_sgn = 0;
2188 x_exp = (a_exp + 1053) >> 1;
2189 x_mnt = x0;
2190 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2191 mul62x62(&x0, &x1, x_mnt, x_mnt);
2192 lsl128(&x0, &x1, x0, x1, 19);
2193 c = cmp128(x0, x1, 0, a_mnt);
2194 if (c > 0)
2195 --x_mnt;
2196
2197 x_mnt = fp64_normalise(x_mnt, &x_exp);
2198
2199 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2200}
2201
2202static int
2203modeConv(FPSCR fpscr)
2204{
2205 uint32_t x = (uint32_t)fpscr;
2206 return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0);
2207 // AHP bit is ignored. Only fplibConvert uses AHP.
2208}
2209
2210static void
2211set_fpscr(FPSCR &fpscr, int flags)
2212{
2213 // translate back to FPSCR
2214 bool underflow = false;
2215 if (flags & FPLIB_IDC) {
2216 fpscr.idc = 1;
2217 }
2218 if (flags & FPLIB_IOC) {
2219 fpscr.ioc = 1;
2220 }
2221 if (flags & FPLIB_DZC) {
2222 fpscr.dzc = 1;
2223 }
2224 if (flags & FPLIB_OFC) {
2225 fpscr.ofc = 1;
2226 }
2227 if (flags & FPLIB_UFC) {
2228 underflow = true; //xx Why is this required?
2229 fpscr.ufc = 1;
2230 }
2231 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
2232 fpscr.ixc = 1;
2233 }
2234}
2235
2236template <>
2237bool
2238fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
2239{
2240 int flags = 0;
2241 int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags);
2242 set_fpscr(fpscr, flags);
2243 return x;
2244}
2245
2246template <>
2247bool
2248fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
2249{
2250 int flags = 0;
2251 int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags);
2252 set_fpscr(fpscr, flags);
2253 return x;
2254}
2255
2256template <>
2257bool
2258fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
2259{
2260 int flags = 0;
2261 int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags);
2262 set_fpscr(fpscr, flags);
2263 return x;
2264}
2265
2266template <>
2267bool
2268fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
2269{
2270 int flags = 0;
2271 int x = fp16_compare_un(a, b, modeConv(fpscr), &flags);
2272 set_fpscr(fpscr, flags);
2273 return x;
2274}
2275
2276template <>
2277bool
2278fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
2279{
2280 int flags = 0;
2281 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
2282 set_fpscr(fpscr, flags);
2283 return x;
2284}
2285
2286template <>
2287bool
2288fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
2289{
2290 int flags = 0;
2291 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
2292 set_fpscr(fpscr, flags);
2293 return x;
2294}
2295
2296template <>
2297bool
2298fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
2299{
2300 int flags = 0;
2301 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
2302 set_fpscr(fpscr, flags);
2303 return x;
2304}
2305
2306template <>
2307bool
2308fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr)
2309{
2310 int flags = 0;
2311 int x = fp32_compare_un(a, b, modeConv(fpscr), &flags);
2312 set_fpscr(fpscr, flags);
2313 return x;
2314}
2315
2316template <>
2317bool
2318fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
2319{
2320 int flags = 0;
2321 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
2322 set_fpscr(fpscr, flags);
2323 return x;
2324}
2325
2326template <>
2327bool
2328fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
2329{
2330 int flags = 0;
2331 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
2332 set_fpscr(fpscr, flags);
2333 return x;
2334}
2335
2336template <>
2337bool
2338fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
2339{
2340 int flags = 0;
2341 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
2342 set_fpscr(fpscr, flags);
2343 return x;
2344}
2345
2346template <>
2347bool
2348fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr)
2349{
2350 int flags = 0;
2351 int x = fp64_compare_un(a, b, modeConv(fpscr), &flags);
2352 set_fpscr(fpscr, flags);
2353 return x;
2354}
2355
2356template <>
2357uint16_t
2358fplibAbs(uint16_t op)
2359{
2360 return op & ~(1ULL << (FP16_BITS - 1));
2361}
2362
2363template <>
2364uint32_t
2365fplibAbs(uint32_t op)
2366{
2367 return op & ~(1ULL << (FP32_BITS - 1));
2368}
2369
2370template <>
2371uint64_t
2372fplibAbs(uint64_t op)
2373{
2374 return op & ~(1ULL << (FP64_BITS - 1));
2375}
2376
2377template <>
2378uint16_t
2379fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2380{
2381 int flags = 0;
2382 uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags);
2383 set_fpscr0(fpscr, flags);
2384 return result;
2385}
2386
2387template <>
2388uint32_t
2389fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2390{
2391 int flags = 0;
2392 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
2393 set_fpscr0(fpscr, flags);
2394 return result;
2395}
2396
2397template <>
2398uint64_t
2399fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2400{
2401 int flags = 0;
2402 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
2403 set_fpscr0(fpscr, flags);
2404 return result;
2405}
2406
2407template <>
2408int
2409fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
2410{
2411 int mode = modeConv(fpscr);
2412 int flags = 0;
2413 int sgn1, exp1, sgn2, exp2, result;
2414 uint16_t mnt1, mnt2;
2415
2416 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2417 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2418
2419 if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) {
2420 result = 3;
2421 if (fp16_is_signalling_NaN(exp1, mnt1) ||
2422 fp16_is_signalling_NaN(exp2, mnt2) || signal_nans)
2423 flags |= FPLIB_IOC;
2424 } else {
2425 if (op1 == op2 || (!mnt1 && !mnt2)) {
2426 result = 6;
2427 } else if (sgn1 != sgn2) {
2428 result = sgn1 ? 8 : 2;
2429 } else if (exp1 != exp2) {
2430 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2431 } else {
2432 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2433 }
2434 }
2435
2436 set_fpscr0(fpscr, flags);
2437
2438 return result;
2439}
2440
2441template <>
2442int
2443fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
2444{
2445 int mode = modeConv(fpscr);
2446 int flags = 0;
2447 int sgn1, exp1, sgn2, exp2, result;
2448 uint32_t mnt1, mnt2;
2449
2450 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2451 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2452
2453 if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) {
2454 result = 3;
2455 if (fp32_is_signalling_NaN(exp1, mnt1) ||
2456 fp32_is_signalling_NaN(exp2, mnt2) || signal_nans)
2457 flags |= FPLIB_IOC;
2458 } else {
2459 if (op1 == op2 || (!mnt1 && !mnt2)) {
2460 result = 6;
2461 } else if (sgn1 != sgn2) {
2462 result = sgn1 ? 8 : 2;
2463 } else if (exp1 != exp2) {
2464 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2465 } else {
2466 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2467 }
2468 }
2469
2470 set_fpscr0(fpscr, flags);
2471
2472 return result;
2473}
2474
2475template <>
2476int
2477fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
2478{
2479 int mode = modeConv(fpscr);
2480 int flags = 0;
2481 int sgn1, exp1, sgn2, exp2, result;
2482 uint64_t mnt1, mnt2;
2483
2484 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2485 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2486
2487 if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) {
2488 result = 3;
2489 if (fp64_is_signalling_NaN(exp1, mnt1) ||
2490 fp64_is_signalling_NaN(exp2, mnt2) || signal_nans)
2491 flags |= FPLIB_IOC;
2492 } else {
2493 if (op1 == op2 || (!mnt1 && !mnt2)) {
2494 result = 6;
2495 } else if (sgn1 != sgn2) {
2496 result = sgn1 ? 8 : 2;
2497 } else if (exp1 != exp2) {
2498 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2499 } else {
2500 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2501 }
2502 }
2503
2504 set_fpscr0(fpscr, flags);
2505
2506 return result;
2507}
2508
2509static uint16_t
2510fp16_FPConvertNaN_32(uint32_t op)
2511{
2512 return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF,
2513 1ULL << (FP16_MANT_BITS - 1) |
2514 op >> (FP32_MANT_BITS - FP16_MANT_BITS));
2515}
2516
2517static uint16_t
2518fp16_FPConvertNaN_64(uint64_t op)
2519{
2520 return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF,
2521 1ULL << (FP16_MANT_BITS - 1) |
2522 op >> (FP64_MANT_BITS - FP16_MANT_BITS));
2523}
2524
2525static uint32_t
2526fp32_FPConvertNaN_16(uint16_t op)
2527{
2528 return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF,
2529 1ULL << (FP32_MANT_BITS - 1) |
2530 (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS));
2531}
2532
2533static uint32_t
2534fp32_FPConvertNaN_64(uint64_t op)
2535{
2536 return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF,
2537 1ULL << (FP32_MANT_BITS - 1) |
2538 op >> (FP64_MANT_BITS - FP32_MANT_BITS));
2539}
2540
2541static uint64_t
2542fp64_FPConvertNaN_16(uint16_t op)
2543{
2544 return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF,
2545 1ULL << (FP64_MANT_BITS - 1) |
2546 (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS));
2547}
2548
2549static uint64_t
2550fp64_FPConvertNaN_32(uint32_t op)
2551{
2552 return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF,
2553 1ULL << (FP64_MANT_BITS - 1) |
2554 (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS));
2555}
2556
2557static uint16_t
2558fp16_FPOnePointFive(int sgn)
2559{
2560 return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1));
2561}
2562
2563static uint32_t
2564fp32_FPOnePointFive(int sgn)
2565{
2566 return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1));
2567}
2568
2569static uint64_t
2570fp64_FPOnePointFive(int sgn)
2571{
2572 return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1));
2573}
2574
2575static uint16_t
2576fp16_FPThree(int sgn)
2577{
2578 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1));
2579}
2580
2581static uint32_t
2582fp32_FPThree(int sgn)
2583{
2584 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1));
2585}
2586
2587static uint64_t
2588fp64_FPThree(int sgn)
2589{
2590 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1));
2591}
2592
2593static uint16_t
2594fp16_FPTwo(int sgn)
2595{
2596 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0);
2597}
2598
2599static uint32_t
2600fp32_FPTwo(int sgn)
2601{
2602 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0);
2603}
2604
2605static uint64_t
2606fp64_FPTwo(int sgn)
2607{
2608 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0);
2609}
2610
2611template <>
2612uint16_t
2613fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2614{
2615 int mode = modeConv(fpscr);
2616 int flags = 0;
2617 int sgn, exp;
2618 uint32_t mnt;
2619 uint16_t result;
2620
2621 // Unpack floating-point operand optionally with flush-to-zero:
2622 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2623
2624 bool alt_hp = fpscr.ahp;
2625
2626 if (fp32_is_NaN(exp, mnt)) {
2627 if (alt_hp) {
2628 result = fp16_zero(sgn);
2629 } else if (fpscr.dn) {
2630 result = fp16_defaultNaN();
2631 } else {
2632 result = fp16_FPConvertNaN_32(op);
2633 }
2634 if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) {
2635 flags |= FPLIB_IOC;
2636 }
2637 } else if (exp == FP32_EXP_INF) {
2638 if (alt_hp) {
2639 result = ((uint16_t)sgn << (FP16_BITS - 1) |
2640 ((1ULL << (FP16_BITS - 1)) - 1));
2641 flags |= FPLIB_IOC;
2642 } else {
2643 result = fp16_infinity(sgn);
2644 }
2645 } else if (!mnt) {
2646 result = fp16_zero(sgn);
2647 } else {
2648 result =
2649 fp16_round_(sgn, exp - FP32_EXP_BIAS + FP16_EXP_BIAS,
2650 mnt >> (FP32_MANT_BITS - FP16_BITS) |
2651 !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)),
2652 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2653 }
2654
2655 set_fpscr0(fpscr, flags);
2656
2657 return result;
2658}
2659
2660template <>
2661uint16_t
2662fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2663{
2664 int mode = modeConv(fpscr);
2665 int flags = 0;
2666 int sgn, exp;
2667 uint64_t mnt;
2668 uint16_t result;
2669
2670 // Unpack floating-point operand optionally with flush-to-zero:
2671 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2672
2673 bool alt_hp = fpscr.ahp;
2674
2675 if (fp64_is_NaN(exp, mnt)) {
2676 if (alt_hp) {
2677 result = fp16_zero(sgn);
2678 } else if (fpscr.dn) {
2679 result = fp16_defaultNaN();
2680 } else {
2681 result = fp16_FPConvertNaN_64(op);
2682 }
2683 if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) {
2684 flags |= FPLIB_IOC;
2685 }
2686 } else if (exp == FP64_EXP_INF) {
2687 if (alt_hp) {
2688 result = ((uint16_t)sgn << (FP16_BITS - 1) |
2689 ((1ULL << (FP16_BITS - 1)) - 1));
2690 flags |= FPLIB_IOC;
2691 } else {
2692 result = fp16_infinity(sgn);
2693 }
2694 } else if (!mnt) {
2695 result = fp16_zero(sgn);
2696 } else {
2697 result =
2698 fp16_round_(sgn, exp - FP64_EXP_BIAS + FP16_EXP_BIAS,
2699 mnt >> (FP64_MANT_BITS - FP16_BITS) |
2700 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)),
2701 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2702 }
2703
2704 set_fpscr0(fpscr, flags);
2705
2706 return result;
2707}
2708
2709template <>
2710uint32_t
2711fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2712{
2713 int mode = modeConv(fpscr);
2714 int flags = 0;
2715 int sgn, exp;
2716 uint16_t mnt;
2717 uint32_t result;
2718
2719 // Unpack floating-point operand optionally with flush-to-zero:
2720 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2721
2722 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2723 if (fpscr.dn) {
2724 result = fp32_defaultNaN();
2725 } else {
2726 result = fp32_FPConvertNaN_16(op);
2727 }
2728 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2729 flags |= FPLIB_IOC;
2730 }
2731 } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2732 result = fp32_infinity(sgn);
2733 } else if (!mnt) {
2734 result = fp32_zero(sgn);
2735 } else {
2736 mnt = fp16_normalise(mnt, &exp);
2737 result = fp32_pack(sgn, (exp - FP16_EXP_BIAS +
2738 FP32_EXP_BIAS + FP16_EXP_BITS),
2739 (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1));
2740 }
2741
2742 set_fpscr0(fpscr, flags);
2743
2744 return result;
2745}
2746
2747template <>
2748uint32_t
2749fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2750{
2751 int mode = modeConv(fpscr);
2752 int flags = 0;
2753 int sgn, exp;
2754 uint64_t mnt;
2755 uint32_t result;
2756
2757 // Unpack floating-point operand optionally with flush-to-zero:
2758 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2759
2760 if (fp64_is_NaN(exp, mnt)) {
2761 if (fpscr.dn) {
2762 result = fp32_defaultNaN();
2763 } else {
2764 result = fp32_FPConvertNaN_64(op);
2765 }
2766 if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) {
2767 flags |= FPLIB_IOC;
2768 }
2769 } else if (exp == FP64_EXP_INF) {
2770 result = fp32_infinity(sgn);
2771 } else if (!mnt) {
2772 result = fp32_zero(sgn);
2773 } else {
2774 result =
2775 fp32_round_(sgn, exp - FP64_EXP_BIAS + FP32_EXP_BIAS,
2776 mnt >> (FP64_MANT_BITS - FP32_BITS) |
2777 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)),
2778 rounding, mode, &flags);
2779 }
2780
2781 set_fpscr0(fpscr, flags);
2782
2783 return result;
2784}
2785
2786template <>
2787uint64_t
2788fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2789{
2790 int mode = modeConv(fpscr);
2791 int flags = 0;
2792 int sgn, exp;
2793 uint16_t mnt;
2794 uint64_t result;
2795
2796 // Unpack floating-point operand optionally with flush-to-zero:
2797 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2798
2799 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2800 if (fpscr.dn) {
2801 result = fp64_defaultNaN();
2802 } else {
2803 result = fp64_FPConvertNaN_16(op);
2804 }
2805 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2806 flags |= FPLIB_IOC;
2807 }
2808 } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2809 result = fp64_infinity(sgn);
2810 } else if (!mnt) {
2811 result = fp64_zero(sgn);
2812 } else {
2813 mnt = fp16_normalise(mnt, &exp);
2814 result = fp64_pack(sgn, (exp - FP16_EXP_BIAS +
2815 FP64_EXP_BIAS + FP16_EXP_BITS),
2816 (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1));
2817 }
2818
2819 set_fpscr0(fpscr, flags);
2820
2821 return result;
2822}
2823
2824template <>
2825uint64_t
2826fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2827{
2828 int mode = modeConv(fpscr);
2829 int flags = 0;
2830 int sgn, exp;
2831 uint32_t mnt;
2832 uint64_t result;
2833
2834 // Unpack floating-point operand optionally with flush-to-zero:
2835 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2836
2837 if (fp32_is_NaN(exp, mnt)) {
2838 if (fpscr.dn) {
2839 result = fp64_defaultNaN();
2840 } else {
2841 result = fp64_FPConvertNaN_32(op);
2842 }
2843 if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) {
2844 flags |= FPLIB_IOC;
2845 }
2846 } else if (exp == FP32_EXP_INF) {
2847 result = fp64_infinity(sgn);
2848 } else if (!mnt) {
2849 result = fp64_zero(sgn);
2850 } else {
2851 mnt = fp32_normalise(mnt, &exp);
2852 result = fp64_pack(sgn, (exp - FP32_EXP_BIAS +
2853 FP64_EXP_BIAS + FP32_EXP_BITS),
2854 (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1));
2855 }
2856
2857 set_fpscr0(fpscr, flags);
2858
2859 return result;
2860}
2861
2862template <>
2863uint16_t
2864fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
2865{
2866 int flags = 0;
2867 uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2868 set_fpscr0(fpscr, flags);
2869 return result;
2870}
2871
2872template <>
2873uint32_t
2874fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2875{
2876 int flags = 0;
2877 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2878 set_fpscr0(fpscr, flags);
2879 return result;
2880}
2881
2882template <>
2883uint64_t
2884fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2885{
2886 int flags = 0;
2887 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2888 set_fpscr0(fpscr, flags);
2889 return result;
2890}
2891
2892template <>
2893uint16_t
2894fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2895{
2896 int flags = 0;
2897 uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags);
2898 set_fpscr0(fpscr, flags);
2899 return result;
2900}
2901
2902template <>
2903uint32_t
2904fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2905{
2906 int flags = 0;
2907 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2908 set_fpscr0(fpscr, flags);
2909 return result;
2910}
2911
2912template <>
2913uint64_t
2914fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2915{
2916 int flags = 0;
2917 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2918 set_fpscr0(fpscr, flags);
2919 return result;
2920}
2921
2922template <>
2923uint16_t
2924fplibExpA(uint16_t op)
2925{
2926 static uint16_t coeff[32] = {
2927 0x0000,
2928 0x0016,
2929 0x002d,
2930 0x0045,
2931 0x005d,
2932 0x0075,
2933 0x008e,
2934 0x00a8,
2935 0x00c2,
2936 0x00dc,
2937 0x00f8,
2938 0x0114,
2939 0x0130,
2940 0x014d,
2941 0x016b,
2942 0x0189,
2943 0x01a8,
2944 0x01c8,
2945 0x01e8,
2946 0x0209,
2947 0x022b,
2948 0x024e,
2949 0x0271,
2950 0x0295,
2951 0x02ba,
2952 0x02e0,
2953 0x0306,
2954 0x032e,
2955 0x0356,
2956 0x037f,
2957 0x03a9,
2958 0x03d4
2959 };
2960 return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) |
2961 coeff[op & ((1 << 5) - 1)]);
2962}
2963
2964template <>
2965uint32_t
2966fplibExpA(uint32_t op)
2967{
2968 static uint32_t coeff[64] = {
2969 0x000000,
2970 0x0164d2,
2971 0x02cd87,
2972 0x043a29,
2973 0x05aac3,
2974 0x071f62,
2975 0x08980f,
2976 0x0a14d5,
2977 0x0b95c2,
2978 0x0d1adf,
2979 0x0ea43a,
2980 0x1031dc,
2981 0x11c3d3,
2982 0x135a2b,
2983 0x14f4f0,
2984 0x16942d,
2985 0x1837f0,
2986 0x19e046,
2987 0x1b8d3a,
2988 0x1d3eda,
2989 0x1ef532,
2990 0x20b051,
2991 0x227043,
2992 0x243516,
2993 0x25fed7,
2994 0x27cd94,
2995 0x29a15b,
2996 0x2b7a3a,
2997 0x2d583f,
2998 0x2f3b79,
2999 0x3123f6,
3000 0x3311c4,
3001 0x3504f3,
3002 0x36fd92,
3003 0x38fbaf,
3004 0x3aff5b,
3005 0x3d08a4,
3006 0x3f179a,
3007 0x412c4d,
3008 0x4346cd,
3009 0x45672a,
3010 0x478d75,
3011 0x49b9be,
3012 0x4bec15,
3013 0x4e248c,
3014 0x506334,
3015 0x52a81e,
3016 0x54f35b,
3017 0x5744fd,
3018 0x599d16,
3019 0x5bfbb8,
3020 0x5e60f5,
3021 0x60ccdf,
3022 0x633f89,
3023 0x65b907,
3024 0x68396a,
3025 0x6ac0c7,
3026 0x6d4f30,
3027 0x6fe4ba,
3028 0x728177,
3029 0x75257d,
3030 0x77d0df,
3031 0x7a83b3,
3032 0x7d3e0c
3033 };
3034 return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) |
3035 coeff[op & ((1 << 6) - 1)]);
3036}
3037
3038template <>
3039uint64_t
3040fplibExpA(uint64_t op)
3041{
3042 static uint64_t coeff[64] = {
3043 0x0000000000000ULL,
3044 0x02c9a3e778061ULL,
3045 0x059b0d3158574ULL,
3046 0x0874518759bc8ULL,
3047 0x0b5586cf9890fULL,
3048 0x0e3ec32d3d1a2ULL,
3049 0x11301d0125b51ULL,
3050 0x1429aaea92de0ULL,
3051 0x172b83c7d517bULL,
3052 0x1a35beb6fcb75ULL,
3053 0x1d4873168b9aaULL,
3054 0x2063b88628cd6ULL,
3055 0x2387a6e756238ULL,
3056 0x26b4565e27cddULL,
3057 0x29e9df51fdee1ULL,
3058 0x2d285a6e4030bULL,
3059 0x306fe0a31b715ULL,
3060 0x33c08b26416ffULL,
3061 0x371a7373aa9cbULL,
3062 0x3a7db34e59ff7ULL,
3063 0x3dea64c123422ULL,
3064 0x4160a21f72e2aULL,
3065 0x44e086061892dULL,
3066 0x486a2b5c13cd0ULL,
3067 0x4bfdad5362a27ULL,
3068 0x4f9b2769d2ca7ULL,
3069 0x5342b569d4f82ULL,
3070 0x56f4736b527daULL,
3071 0x5ab07dd485429ULL,
3072 0x5e76f15ad2148ULL,
3073 0x6247eb03a5585ULL,
3074 0x6623882552225ULL,
3075 0x6a09e667f3bcdULL,
3076 0x6dfb23c651a2fULL,
3077 0x71f75e8ec5f74ULL,
3078 0x75feb564267c9ULL,
3079 0x7a11473eb0187ULL,
3080 0x7e2f336cf4e62ULL,
3081 0x82589994cce13ULL,
3082 0x868d99b4492edULL,
3083 0x8ace5422aa0dbULL,
3084 0x8f1ae99157736ULL,
3085 0x93737b0cdc5e5ULL,
3086 0x97d829fde4e50ULL,
3087 0x9c49182a3f090ULL,
3088 0xa0c667b5de565ULL,
3089 0xa5503b23e255dULL,
3090 0xa9e6b5579fdbfULL,
3091 0xae89f995ad3adULL,
3092 0xb33a2b84f15fbULL,
3093 0xb7f76f2fb5e47ULL,
3094 0xbcc1e904bc1d2ULL,
3095 0xc199bdd85529cULL,
3096 0xc67f12e57d14bULL,
3097 0xcb720dcef9069ULL,
3098 0xd072d4a07897cULL,
3099 0xd5818dcfba487ULL,
3100 0xda9e603db3285ULL,
3101 0xdfc97337b9b5fULL,
3102 0xe502ee78b3ff6ULL,
3103 0xea4afa2a490daULL,
3104 0xefa1bee615a27ULL,
3105 0xf50765b6e4540ULL,
3106 0xfa7c1819e90d8ULL
3107 };
3108 return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) |
3109 coeff[op & ((1 << 6) - 1)]);
3110}
3111
3112static uint16_t
3113fp16_repack(int sgn, int exp, uint16_t mnt)
3114{
3115 return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt);
3116}
3117
3118static uint32_t
3119fp32_repack(int sgn, int exp, uint32_t mnt)
3120{
3121 return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt);
3122}
3123
3124static uint64_t
3125fp64_repack(int sgn, int exp, uint64_t mnt)
3126{
3127 return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt);
3128}
3129
3130static void
3131fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
3132{
3133 // Treat a single quiet-NaN as +Infinity/-Infinity
3134 if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) &&
3135 (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS)
3136 *op1 = fp16_infinity(sgn);
3137 if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) &&
3138 (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS)
3139 *op2 = fp16_infinity(sgn);
3140}
3141
3142static void
3143fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
3144{
3145 // Treat a single quiet-NaN as +Infinity/-Infinity
3146 if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) &&
3147 (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS)
3148 *op1 = fp32_infinity(sgn);
3149 if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) &&
3150 (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS)
3151 *op2 = fp32_infinity(sgn);
3152}
3153
3154static void
3155fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
3156{
3157 // Treat a single quiet-NaN as +Infinity/-Infinity
3158 if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) &&
3159 (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS)
3160 *op1 = fp64_infinity(sgn);
3161 if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) &&
3162 (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS)
3163 *op2 = fp64_infinity(sgn);
3164}
3165
3166template <>
3167uint16_t
3168fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3169{
3170 int mode = modeConv(fpscr);
3171 int flags = 0;
3172 int sgn1, exp1, sgn2, exp2;
3173 uint16_t mnt1, mnt2, x, result;
3174
3175 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3176 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3177
3178 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3179 result = x;
3180 } else {
3181 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3182 fp16_repack(sgn1, exp1, mnt1) :
3183 fp16_repack(sgn2, exp2, mnt2));
3184 }
3185 set_fpscr0(fpscr, flags);
3186 return result;
3187}
3188
3189template <>
3190uint32_t
3191fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3192{
3193 int mode = modeConv(fpscr);
3194 int flags = 0;
3195 int sgn1, exp1, sgn2, exp2;
3196 uint32_t mnt1, mnt2, x, result;
3197
3198 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3199 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3200
3201 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3202 result = x;
3203 } else {
3204 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3205 fp32_repack(sgn1, exp1, mnt1) :
3206 fp32_repack(sgn2, exp2, mnt2));
3207 }
3208 set_fpscr0(fpscr, flags);
3209 return result;
3210}
3211
3212template <>
3213uint64_t
3214fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3215{
3216 int mode = modeConv(fpscr);
3217 int flags = 0;
3218 int sgn1, exp1, sgn2, exp2;
3219 uint64_t mnt1, mnt2, x, result;
3220
3221 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3222 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3223
3224 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3225 result = x;
3226 } else {
3227 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3228 fp64_repack(sgn1, exp1, mnt1) :
3229 fp64_repack(sgn2, exp2, mnt2));
3230 }
3231 set_fpscr0(fpscr, flags);
3232 return result;
3233}
3234
3235template <>
3236uint16_t
3237fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3238{
3239 fp16_minmaxnum(&op1, &op2, 1);
3240 return fplibMax<uint16_t>(op1, op2, fpscr);
3241}
3242
3243template <>
3244uint32_t
3245fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3246{
3247 fp32_minmaxnum(&op1, &op2, 1);
3248 return fplibMax<uint32_t>(op1, op2, fpscr);
3249}
3250
3251template <>
3252uint64_t
3253fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3254{
3255 fp64_minmaxnum(&op1, &op2, 1);
3256 return fplibMax<uint64_t>(op1, op2, fpscr);
3257}
3258
3259template <>
3260uint16_t
3261fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3262{
3263 int mode = modeConv(fpscr);
3264 int flags = 0;
3265 int sgn1, exp1, sgn2, exp2;
3266 uint16_t mnt1, mnt2, x, result;
3267
3268 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3269 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3270
3271 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3272 result = x;
3273 } else {
3274 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3275 fp16_repack(sgn1, exp1, mnt1) :
3276 fp16_repack(sgn2, exp2, mnt2));
3277 }
3278 set_fpscr0(fpscr, flags);
3279 return result;
3280}
3281
3282template <>
3283uint32_t
3284fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3285{
3286 int mode = modeConv(fpscr);
3287 int flags = 0;
3288 int sgn1, exp1, sgn2, exp2;
3289 uint32_t mnt1, mnt2, x, result;
3290
3291 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3292 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3293
3294 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3295 result = x;
3296 } else {
3297 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3298 fp32_repack(sgn1, exp1, mnt1) :
3299 fp32_repack(sgn2, exp2, mnt2));
3300 }
3301 set_fpscr0(fpscr, flags);
3302 return result;
3303}
3304
3305template <>
3306uint64_t
3307fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3308{
3309 int mode = modeConv(fpscr);
3310 int flags = 0;
3311 int sgn1, exp1, sgn2, exp2;
3312 uint64_t mnt1, mnt2, x, result;
3313
3314 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3315 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3316
3317 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3318 result = x;
3319 } else {
3320 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3321 fp64_repack(sgn1, exp1, mnt1) :
3322 fp64_repack(sgn2, exp2, mnt2));
3323 }
3324 set_fpscr0(fpscr, flags);
3325 return result;
3326}
3327
3328template <>
3329uint16_t
3330fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3331{
3332 fp16_minmaxnum(&op1, &op2, 0);
3333 return fplibMin<uint16_t>(op1, op2, fpscr);
3334}
3335
3336template <>
3337uint32_t
3338fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3339{
3340 fp32_minmaxnum(&op1, &op2, 0);
3341 return fplibMin<uint32_t>(op1, op2, fpscr);
3342}
3343
3344template <>
3345uint64_t
3346fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3347{
3348 fp64_minmaxnum(&op1, &op2, 0);
3349 return fplibMin<uint64_t>(op1, op2, fpscr);
3350}
3351
3352template <>
3353uint16_t
3354fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3355{
3356 int flags = 0;
3357 uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags);
3358 set_fpscr0(fpscr, flags);
3359 return result;
3360}
3361
3362template <>
3363uint32_t
3364fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3365{
3366 int flags = 0;
3367 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
3368 set_fpscr0(fpscr, flags);
3369 return result;
3370}
3371
3372template <>
3373uint64_t
3374fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3375{
3376 int flags = 0;
3377 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
3378 set_fpscr0(fpscr, flags);
3379 return result;
3380}
3381
3382template <>
3383uint16_t
3384fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3385{
3386 int mode = modeConv(fpscr);
3387 int flags = 0;
3388 int sgn1, exp1, sgn2, exp2;
3389 uint16_t mnt1, mnt2, result;
3390
3391 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3392 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3393
3394 result = fp16_process_NaNs(op1, op2, mode, &flags);
3395 if (!result) {
3396 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3397 (exp2 == FP16_EXP_INF && !mnt1)) {
3398 result = fp16_FPTwo(sgn1 ^ sgn2);
3399 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3400 result = fp16_infinity(sgn1 ^ sgn2);
3401 } else if (!mnt1 || !mnt2) {
3402 result = fp16_zero(sgn1 ^ sgn2);
3403 } else {
3404 result = fp16_mul(op1, op2, mode, &flags);
3405 }
3406 }
3407
3408 set_fpscr0(fpscr, flags);
3409
3410 return result;
3411}
3412
3413template <>
3414uint32_t
3415fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3416{
3417 int mode = modeConv(fpscr);
3418 int flags = 0;
3419 int sgn1, exp1, sgn2, exp2;
3420 uint32_t mnt1, mnt2, result;
3421
3422 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3423 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3424
3425 result = fp32_process_NaNs(op1, op2, mode, &flags);
3426 if (!result) {
3427 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3428 (exp2 == FP32_EXP_INF && !mnt1)) {
3429 result = fp32_FPTwo(sgn1 ^ sgn2);
3430 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3431 result = fp32_infinity(sgn1 ^ sgn2);
3432 } else if (!mnt1 || !mnt2) {
3433 result = fp32_zero(sgn1 ^ sgn2);
3434 } else {
3435 result = fp32_mul(op1, op2, mode, &flags);
3436 }
3437 }
3438
3439 set_fpscr0(fpscr, flags);
3440
3441 return result;
3442}
3443
3444template <>
3445uint64_t
3446fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3447{
3448 int mode = modeConv(fpscr);
3449 int flags = 0;
3450 int sgn1, exp1, sgn2, exp2;
3451 uint64_t mnt1, mnt2, result;
3452
3453 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3454 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3455
3456 result = fp64_process_NaNs(op1, op2, mode, &flags);
3457 if (!result) {
3458 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3459 (exp2 == FP64_EXP_INF && !mnt1)) {
3460 result = fp64_FPTwo(sgn1 ^ sgn2);
3461 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3462 result = fp64_infinity(sgn1 ^ sgn2);
3463 } else if (!mnt1 || !mnt2) {
3464 result = fp64_zero(sgn1 ^ sgn2);
3465 } else {
3466 result = fp64_mul(op1, op2, mode, &flags);
3467 }
3468 }
3469
3470 set_fpscr0(fpscr, flags);
3471
3472 return result;
3473}
3474
3475template <>
3476uint16_t
3477fplibNeg(uint16_t op)
3478{
3479 return op ^ 1ULL << (FP16_BITS - 1);
3480}
3481
3482template <>
3483uint32_t
3484fplibNeg(uint32_t op)
3485{
3486 return op ^ 1ULL << (FP32_BITS - 1);
3487}
3488
3489template <>
3490uint64_t
3491fplibNeg(uint64_t op)
3492{
3493 return op ^ 1ULL << (FP64_BITS - 1);
3494}
3495
3496static const uint8_t recip_sqrt_estimate[256] = {
3497 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3498 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3499 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3500 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3501 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3502 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3503 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3504 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3505 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
3506 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
3507 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
3508 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
3509 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
3510 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
3511 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
3512 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
3513};
3514
3515template <>
3516uint16_t
3517fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
3518{
3519 int mode = modeConv(fpscr);
3520 int flags = 0;
3521 int sgn, exp;
3522 uint16_t mnt, result;
3523
3524 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3525
3526 if (fp16_is_NaN(exp, mnt)) {
3527 result = fp16_process_NaN(op, mode, &flags);
3528 } else if (!mnt) {
3529 result = fp16_infinity(sgn);
3530 flags |= FPLIB_DZC;
3531 } else if (sgn) {
3532 result = fp16_defaultNaN();
3533 flags |= FPLIB_IOC;
3534 } else if (exp == FP16_EXP_INF) {
3535 result = fp16_zero(0);
3536 } else {
3537 exp += FP16_EXP_BITS;
3538 mnt = fp16_normalise(mnt, &exp);
3539 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3540 (mnt >> (FP16_BITS - 8) & 127)];
3541 result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1,
3542 mnt << (FP16_MANT_BITS - 8));
3543 }
3544
3545 set_fpscr0(fpscr, flags);
3546
3547 return result;
3548}
3549
3550template <>
3551uint32_t
3552fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
3553{
3554 int mode = modeConv(fpscr);
3555 int flags = 0;
3556 int sgn, exp;
3557 uint32_t mnt, result;
3558
3559 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3560
3561 if (fp32_is_NaN(exp, mnt)) {
3562 result = fp32_process_NaN(op, mode, &flags);
3563 } else if (!mnt) {
3564 result = fp32_infinity(sgn);
3565 flags |= FPLIB_DZC;
3566 } else if (sgn) {
3567 result = fp32_defaultNaN();
3568 flags |= FPLIB_IOC;
3569 } else if (exp == FP32_EXP_INF) {
3570 result = fp32_zero(0);
3571 } else {
3572 exp += FP32_EXP_BITS;
3573 mnt = fp32_normalise(mnt, &exp);
3574 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3575 (mnt >> (FP32_BITS - 8) & 127)];
3576 result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1,
3577 mnt << (FP32_MANT_BITS - 8));
3578 }
3579
3580 set_fpscr0(fpscr, flags);
3581
3582 return result;
3583}
3584
3585template <>
3586uint64_t
3587fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
3588{
3589 int mode = modeConv(fpscr);
3590 int flags = 0;
3591 int sgn, exp;
3592 uint64_t mnt, result;
3593
3594 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3595
3596 if (fp64_is_NaN(exp, mnt)) {
3597 result = fp64_process_NaN(op, mode, &flags);
3598 } else if (!mnt) {
3599 result = fp64_infinity(sgn);
3600 flags |= FPLIB_DZC;
3601 } else if (sgn) {
3602 result = fp64_defaultNaN();
3603 flags |= FPLIB_IOC;
3604 } else if (exp == FP64_EXP_INF) {
3605 result = fp32_zero(0);
3606 } else {
3607 exp += FP64_EXP_BITS;
3608 mnt = fp64_normalise(mnt, &exp);
3609 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3610 (mnt >> (FP64_BITS - 8) & 127)];
3611 result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1,
3612 mnt << (FP64_MANT_BITS - 8));
3613 }
3614
3615 set_fpscr0(fpscr, flags);
3616
3617 return result;
3618}
3619
3620template <>
3621uint16_t
3622fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3623{
3624 int mode = modeConv(fpscr);
3625 int flags = 0;
3626 int sgn1, exp1, sgn2, exp2;
3627 uint16_t mnt1, mnt2, result;
3628
3629 op1 = fplibNeg<uint16_t>(op1);
3630 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3631 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3632
3633 result = fp16_process_NaNs(op1, op2, mode, &flags);
3634 if (!result) {
3635 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3636 (exp2 == FP16_EXP_INF && !mnt1)) {
3637 result = fp16_FPOnePointFive(0);
3638 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3639 result = fp16_infinity(sgn1 ^ sgn2);
3640 } else {
3641 result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags);
3642 }
3643 }
3644
3645 set_fpscr0(fpscr, flags);
3646
3647 return result;
3648}
3649
3650template <>
3651uint32_t
3652fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3653{
3654 int mode = modeConv(fpscr);
3655 int flags = 0;
3656 int sgn1, exp1, sgn2, exp2;
3657 uint32_t mnt1, mnt2, result;
3658
3659 op1 = fplibNeg<uint32_t>(op1);
3660 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3661 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3662
3663 result = fp32_process_NaNs(op1, op2, mode, &flags);
3664 if (!result) {
3665 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3666 (exp2 == FP32_EXP_INF && !mnt1)) {
3667 result = fp32_FPOnePointFive(0);
3668 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3669 result = fp32_infinity(sgn1 ^ sgn2);
3670 } else {
3671 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
3672 }
3673 }
3674
3675 set_fpscr0(fpscr, flags);
3676
3677 return result;
3678}
3679
3680template <>
3681uint64_t
3682fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3683{
3684 int mode = modeConv(fpscr);
3685 int flags = 0;
3686 int sgn1, exp1, sgn2, exp2;
3687 uint64_t mnt1, mnt2, result;
3688
3689 op1 = fplibNeg<uint64_t>(op1);
3690 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3691 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3692
3693 result = fp64_process_NaNs(op1, op2, mode, &flags);
3694 if (!result) {
3695 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3696 (exp2 == FP64_EXP_INF && !mnt1)) {
3697 result = fp64_FPOnePointFive(0);
3698 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3699 result = fp64_infinity(sgn1 ^ sgn2);
3700 } else {
3701 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
3702 }
3703 }
3704
3705 set_fpscr0(fpscr, flags);
3706
3707 return result;
3708}
3709
3710template <>
3711uint16_t
3712fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
3713{
3714 int mode = modeConv(fpscr);
3715 int flags = 0;
3716 int sgn, exp;
3717 uint16_t mnt, result;
3718
3719 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3720
3721 if (fp16_is_NaN(exp, mnt)) {
3722 result = fp16_process_NaN(op, mode, &flags);
3723 } else if (exp == FP16_EXP_INF) {
3724 result = fp16_zero(sgn);
3725 } else if (!mnt) {
3726 result = fp16_infinity(sgn);
3727 flags |= FPLIB_DZC;
3728 } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) {
3729 bool overflow_to_inf = false;
3730 switch (FPCRRounding(fpscr)) {
3731 case FPRounding_TIEEVEN:
3732 overflow_to_inf = true;
3733 break;
3734 case FPRounding_POSINF:
3735 overflow_to_inf = !sgn;
3736 break;
3737 case FPRounding_NEGINF:
3738 overflow_to_inf = sgn;
3739 break;
3740 case FPRounding_ZERO:
3741 overflow_to_inf = false;
3742 break;
3743 default:
3743 assert(0);
3744 panic("Unrecognized FP rounding mode");
3744 }
3745 result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn);
3746 flags |= FPLIB_OFC | FPLIB_IXC;
3747 } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) {
3748 result = fp16_zero(sgn);
3749 flags |= FPLIB_UFC;
3750 } else {
3751 exp += FP16_EXP_BITS;
3752 mnt = fp16_normalise(mnt, &exp);
3753 int result_exp = 2 * FP16_EXP_BIAS - 1 - exp;
3754 uint16_t fraction = (((uint32_t)1 << 19) /
3755 (mnt >> (FP16_BITS - 10) | 1) + 1) >> 1;
3756 fraction <<= FP16_MANT_BITS - 8;
3757 if (result_exp == 0) {
3758 fraction >>= 1;
3759 } else if (result_exp == -1) {
3760 fraction >>= 2;
3761 result_exp = 0;
3762 }
3763 result = fp16_pack(sgn, result_exp, fraction);
3764 }
3765
3766 set_fpscr0(fpscr, flags);
3767
3768 return result;
3769}
3770
3771template <>
3772uint32_t
3773fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
3774{
3775 int mode = modeConv(fpscr);
3776 int flags = 0;
3777 int sgn, exp;
3778 uint32_t mnt, result;
3779
3780 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3781
3782 if (fp32_is_NaN(exp, mnt)) {
3783 result = fp32_process_NaN(op, mode, &flags);
3784 } else if (exp == FP32_EXP_INF) {
3785 result = fp32_zero(sgn);
3786 } else if (!mnt) {
3787 result = fp32_infinity(sgn);
3788 flags |= FPLIB_DZC;
3789 } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) {
3790 bool overflow_to_inf = false;
3791 switch (FPCRRounding(fpscr)) {
3792 case FPRounding_TIEEVEN:
3793 overflow_to_inf = true;
3794 break;
3795 case FPRounding_POSINF:
3796 overflow_to_inf = !sgn;
3797 break;
3798 case FPRounding_NEGINF:
3799 overflow_to_inf = sgn;
3800 break;
3801 case FPRounding_ZERO:
3802 overflow_to_inf = false;
3803 break;
3804 default:
3745 }
3746 result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn);
3747 flags |= FPLIB_OFC | FPLIB_IXC;
3748 } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) {
3749 result = fp16_zero(sgn);
3750 flags |= FPLIB_UFC;
3751 } else {
3752 exp += FP16_EXP_BITS;
3753 mnt = fp16_normalise(mnt, &exp);
3754 int result_exp = 2 * FP16_EXP_BIAS - 1 - exp;
3755 uint16_t fraction = (((uint32_t)1 << 19) /
3756 (mnt >> (FP16_BITS - 10) | 1) + 1) >> 1;
3757 fraction <<= FP16_MANT_BITS - 8;
3758 if (result_exp == 0) {
3759 fraction >>= 1;
3760 } else if (result_exp == -1) {
3761 fraction >>= 2;
3762 result_exp = 0;
3763 }
3764 result = fp16_pack(sgn, result_exp, fraction);
3765 }
3766
3767 set_fpscr0(fpscr, flags);
3768
3769 return result;
3770}
3771
3772template <>
3773uint32_t
3774fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
3775{
3776 int mode = modeConv(fpscr);
3777 int flags = 0;
3778 int sgn, exp;
3779 uint32_t mnt, result;
3780
3781 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3782
3783 if (fp32_is_NaN(exp, mnt)) {
3784 result = fp32_process_NaN(op, mode, &flags);
3785 } else if (exp == FP32_EXP_INF) {
3786 result = fp32_zero(sgn);
3787 } else if (!mnt) {
3788 result = fp32_infinity(sgn);
3789 flags |= FPLIB_DZC;
3790 } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) {
3791 bool overflow_to_inf = false;
3792 switch (FPCRRounding(fpscr)) {
3793 case FPRounding_TIEEVEN:
3794 overflow_to_inf = true;
3795 break;
3796 case FPRounding_POSINF:
3797 overflow_to_inf = !sgn;
3798 break;
3799 case FPRounding_NEGINF:
3800 overflow_to_inf = sgn;
3801 break;
3802 case FPRounding_ZERO:
3803 overflow_to_inf = false;
3804 break;
3805 default:
3805 assert(0);
3806 panic("Unrecognized FP rounding mode");
3806 }
3807 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
3808 flags |= FPLIB_OFC | FPLIB_IXC;
3809 } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) {
3810 result = fp32_zero(sgn);
3811 flags |= FPLIB_UFC;
3812 } else {
3813 exp += FP32_EXP_BITS;
3814 mnt = fp32_normalise(mnt, &exp);
3815 int result_exp = 2 * FP32_EXP_BIAS - 1 - exp;
3816 uint32_t fraction = (((uint32_t)1 << 19) /
3817 (mnt >> (FP32_BITS - 10) | 1) + 1) >> 1;
3818 fraction <<= FP32_MANT_BITS - 8;
3819 if (result_exp == 0) {
3820 fraction >>= 1;
3821 } else if (result_exp == -1) {
3822 fraction >>= 2;
3823 result_exp = 0;
3824 }
3825 result = fp32_pack(sgn, result_exp, fraction);
3826 }
3827
3828 set_fpscr0(fpscr, flags);
3829
3830 return result;
3831}
3832
3833template <>
3834uint64_t
3835fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
3836{
3837 int mode = modeConv(fpscr);
3838 int flags = 0;
3839 int sgn, exp;
3840 uint64_t mnt, result;
3841
3842 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3843
3844 if (fp64_is_NaN(exp, mnt)) {
3845 result = fp64_process_NaN(op, mode, &flags);
3846 } else if (exp == FP64_EXP_INF) {
3847 result = fp64_zero(sgn);
3848 } else if (!mnt) {
3849 result = fp64_infinity(sgn);
3850 flags |= FPLIB_DZC;
3851 } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) {
3852 bool overflow_to_inf = false;
3853 switch (FPCRRounding(fpscr)) {
3854 case FPRounding_TIEEVEN:
3855 overflow_to_inf = true;
3856 break;
3857 case FPRounding_POSINF:
3858 overflow_to_inf = !sgn;
3859 break;
3860 case FPRounding_NEGINF:
3861 overflow_to_inf = sgn;
3862 break;
3863 case FPRounding_ZERO:
3864 overflow_to_inf = false;
3865 break;
3866 default:
3807 }
3808 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
3809 flags |= FPLIB_OFC | FPLIB_IXC;
3810 } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) {
3811 result = fp32_zero(sgn);
3812 flags |= FPLIB_UFC;
3813 } else {
3814 exp += FP32_EXP_BITS;
3815 mnt = fp32_normalise(mnt, &exp);
3816 int result_exp = 2 * FP32_EXP_BIAS - 1 - exp;
3817 uint32_t fraction = (((uint32_t)1 << 19) /
3818 (mnt >> (FP32_BITS - 10) | 1) + 1) >> 1;
3819 fraction <<= FP32_MANT_BITS - 8;
3820 if (result_exp == 0) {
3821 fraction >>= 1;
3822 } else if (result_exp == -1) {
3823 fraction >>= 2;
3824 result_exp = 0;
3825 }
3826 result = fp32_pack(sgn, result_exp, fraction);
3827 }
3828
3829 set_fpscr0(fpscr, flags);
3830
3831 return result;
3832}
3833
3834template <>
3835uint64_t
3836fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
3837{
3838 int mode = modeConv(fpscr);
3839 int flags = 0;
3840 int sgn, exp;
3841 uint64_t mnt, result;
3842
3843 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3844
3845 if (fp64_is_NaN(exp, mnt)) {
3846 result = fp64_process_NaN(op, mode, &flags);
3847 } else if (exp == FP64_EXP_INF) {
3848 result = fp64_zero(sgn);
3849 } else if (!mnt) {
3850 result = fp64_infinity(sgn);
3851 flags |= FPLIB_DZC;
3852 } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) {
3853 bool overflow_to_inf = false;
3854 switch (FPCRRounding(fpscr)) {
3855 case FPRounding_TIEEVEN:
3856 overflow_to_inf = true;
3857 break;
3858 case FPRounding_POSINF:
3859 overflow_to_inf = !sgn;
3860 break;
3861 case FPRounding_NEGINF:
3862 overflow_to_inf = sgn;
3863 break;
3864 case FPRounding_ZERO:
3865 overflow_to_inf = false;
3866 break;
3867 default:
3867 assert(0);
3868 panic("Unrecognized FP rounding mode");
3868 }
3869 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
3870 flags |= FPLIB_OFC | FPLIB_IXC;
3871 } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) {
3872 result = fp64_zero(sgn);
3873 flags |= FPLIB_UFC;
3874 } else {
3875 exp += FP64_EXP_BITS;
3876 mnt = fp64_normalise(mnt, &exp);
3877 int result_exp = 2 * FP64_EXP_BIAS - 1 - exp;
3878 uint64_t fraction = (((uint32_t)1 << 19) /
3879 (mnt >> (FP64_BITS - 10) | 1) + 1) >> 1;
3880 fraction <<= FP64_MANT_BITS - 8;
3881 if (result_exp == 0) {
3882 fraction >>= 1;
3883 } else if (result_exp == -1) {
3884 fraction >>= 2;
3885 result_exp = 0;
3886 }
3887 result = fp64_pack(sgn, result_exp, fraction);
3888 }
3889
3890 set_fpscr0(fpscr, flags);
3891
3892 return result;
3893}
3894
3895template <>
3896uint16_t
3897fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3898{
3899 int mode = modeConv(fpscr);
3900 int flags = 0;
3901 int sgn1, exp1, sgn2, exp2;
3902 uint16_t mnt1, mnt2, result;
3903
3904 op1 = fplibNeg<uint16_t>(op1);
3905 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3906 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3907
3908 result = fp16_process_NaNs(op1, op2, mode, &flags);
3909 if (!result) {
3910 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3911 (exp2 == FP16_EXP_INF && !mnt1)) {
3912 result = fp16_FPTwo(0);
3913 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3914 result = fp16_infinity(sgn1 ^ sgn2);
3915 } else {
3916 result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags);
3917 }
3918 }
3919
3920 set_fpscr0(fpscr, flags);
3921
3922 return result;
3923}
3924
3925template <>
3926uint32_t
3927fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3928{
3929 int mode = modeConv(fpscr);
3930 int flags = 0;
3931 int sgn1, exp1, sgn2, exp2;
3932 uint32_t mnt1, mnt2, result;
3933
3934 op1 = fplibNeg<uint32_t>(op1);
3935 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3936 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3937
3938 result = fp32_process_NaNs(op1, op2, mode, &flags);
3939 if (!result) {
3940 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3941 (exp2 == FP32_EXP_INF && !mnt1)) {
3942 result = fp32_FPTwo(0);
3943 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3944 result = fp32_infinity(sgn1 ^ sgn2);
3945 } else {
3946 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
3947 }
3948 }
3949
3950 set_fpscr0(fpscr, flags);
3951
3952 return result;
3953}
3954
3955template <>
3956uint64_t
3957fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3958{
3959 int mode = modeConv(fpscr);
3960 int flags = 0;
3961 int sgn1, exp1, sgn2, exp2;
3962 uint64_t mnt1, mnt2, result;
3963
3964 op1 = fplibNeg<uint64_t>(op1);
3965 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3966 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3967
3968 result = fp64_process_NaNs(op1, op2, mode, &flags);
3969 if (!result) {
3970 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3971 (exp2 == FP64_EXP_INF && !mnt1)) {
3972 result = fp64_FPTwo(0);
3973 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3974 result = fp64_infinity(sgn1 ^ sgn2);
3975 } else {
3976 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
3977 }
3978 }
3979
3980 set_fpscr0(fpscr, flags);
3981
3982 return result;
3983}
3984
3985template <>
3986uint16_t
3987fplibRecpX(uint16_t op, FPSCR &fpscr)
3988{
3989 int mode = modeConv(fpscr);
3990 int flags = 0;
3991 int sgn, exp;
3992 uint16_t mnt, result;
3993
3994 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3995
3996 if (fp16_is_NaN(exp, mnt)) {
3997 result = fp16_process_NaN(op, mode, &flags);
3998 }
3999 else {
4000 if (!mnt) { // Zero and denormals
4001 result = fp16_pack(sgn, FP16_EXP_INF - 1, 0);
4002 } else { // Infinities and normals
4003 result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0);
4004 }
4005 }
4006
4007 set_fpscr0(fpscr, flags);
4008
4009 return result;
4010}
4011
4012template <>
4013uint32_t
4014fplibRecpX(uint32_t op, FPSCR &fpscr)
4015{
4016 int mode = modeConv(fpscr);
4017 int flags = 0;
4018 int sgn, exp;
4019 uint32_t mnt, result;
4020
4021 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4022
4023 if (fp32_is_NaN(exp, mnt)) {
4024 result = fp32_process_NaN(op, mode, &flags);
4025 }
4026 else {
4027 if (!mnt) { // Zero and denormals
4028 result = fp32_pack(sgn, FP32_EXP_INF - 1, 0);
4029 } else { // Infinities and normals
4030 result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0);
4031 }
4032 }
4033
4034 set_fpscr0(fpscr, flags);
4035
4036 return result;
4037}
4038
4039template <>
4040uint64_t
4041fplibRecpX(uint64_t op, FPSCR &fpscr)
4042{
4043 int mode = modeConv(fpscr);
4044 int flags = 0;
4045 int sgn, exp;
4046 uint64_t mnt, result;
4047
4048 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4049
4050 if (fp64_is_NaN(exp, mnt)) {
4051 result = fp64_process_NaN(op, mode, &flags);
4052 }
4053 else {
4054 if (!mnt) { // Zero and denormals
4055 result = fp64_pack(sgn, FP64_EXP_INF - 1, 0);
4056 } else { // Infinities and normals
4057 result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0);
4058 }
4059 }
4060
4061 set_fpscr0(fpscr, flags);
4062
4063 return result;
4064}
4065
4066template <>
4067uint16_t
4068fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4069{
4070 int expint = FP16_EXP_BIAS + FP16_MANT_BITS;
4071 int mode = modeConv(fpscr);
4072 int flags = 0;
4073 int sgn, exp;
4074 uint16_t mnt, result;
4075
4076 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4077 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4078
4079 // Handle NaNs, infinities and zeroes:
4080 if (fp16_is_NaN(exp, mnt)) {
4081 result = fp16_process_NaN(op, mode, &flags);
4082 } else if (exp == FP16_EXP_INF) {
4083 result = fp16_infinity(sgn);
4084 } else if (!mnt) {
4085 result = fp16_zero(sgn);
4086 } else if (exp >= expint) {
4087 // There are no fractional bits
4088 result = op;
4089 } else {
4090 // Truncate towards zero:
4091 uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp);
4092 int err = exp < expint - FP16_BITS ? 1 :
4093 ((mnt << 1 >> (expint - exp - 1) & 3) |
4094 ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0));
4095 switch (rounding) {
4096 case FPRounding_TIEEVEN:
4097 x += (err == 3 || (err == 2 && (x & 1)));
4098 break;
4099 case FPRounding_POSINF:
4100 x += err && !sgn;
4101 break;
4102 case FPRounding_NEGINF:
4103 x += err && sgn;
4104 break;
4105 case FPRounding_ZERO:
4106 break;
4107 case FPRounding_TIEAWAY:
4108 x += err >> 1;
4109 break;
4110 default:
3869 }
3870 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
3871 flags |= FPLIB_OFC | FPLIB_IXC;
3872 } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) {
3873 result = fp64_zero(sgn);
3874 flags |= FPLIB_UFC;
3875 } else {
3876 exp += FP64_EXP_BITS;
3877 mnt = fp64_normalise(mnt, &exp);
3878 int result_exp = 2 * FP64_EXP_BIAS - 1 - exp;
3879 uint64_t fraction = (((uint32_t)1 << 19) /
3880 (mnt >> (FP64_BITS - 10) | 1) + 1) >> 1;
3881 fraction <<= FP64_MANT_BITS - 8;
3882 if (result_exp == 0) {
3883 fraction >>= 1;
3884 } else if (result_exp == -1) {
3885 fraction >>= 2;
3886 result_exp = 0;
3887 }
3888 result = fp64_pack(sgn, result_exp, fraction);
3889 }
3890
3891 set_fpscr0(fpscr, flags);
3892
3893 return result;
3894}
3895
3896template <>
3897uint16_t
3898fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3899{
3900 int mode = modeConv(fpscr);
3901 int flags = 0;
3902 int sgn1, exp1, sgn2, exp2;
3903 uint16_t mnt1, mnt2, result;
3904
3905 op1 = fplibNeg<uint16_t>(op1);
3906 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3907 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3908
3909 result = fp16_process_NaNs(op1, op2, mode, &flags);
3910 if (!result) {
3911 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3912 (exp2 == FP16_EXP_INF && !mnt1)) {
3913 result = fp16_FPTwo(0);
3914 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3915 result = fp16_infinity(sgn1 ^ sgn2);
3916 } else {
3917 result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags);
3918 }
3919 }
3920
3921 set_fpscr0(fpscr, flags);
3922
3923 return result;
3924}
3925
3926template <>
3927uint32_t
3928fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3929{
3930 int mode = modeConv(fpscr);
3931 int flags = 0;
3932 int sgn1, exp1, sgn2, exp2;
3933 uint32_t mnt1, mnt2, result;
3934
3935 op1 = fplibNeg<uint32_t>(op1);
3936 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3937 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3938
3939 result = fp32_process_NaNs(op1, op2, mode, &flags);
3940 if (!result) {
3941 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3942 (exp2 == FP32_EXP_INF && !mnt1)) {
3943 result = fp32_FPTwo(0);
3944 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3945 result = fp32_infinity(sgn1 ^ sgn2);
3946 } else {
3947 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
3948 }
3949 }
3950
3951 set_fpscr0(fpscr, flags);
3952
3953 return result;
3954}
3955
3956template <>
3957uint64_t
3958fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3959{
3960 int mode = modeConv(fpscr);
3961 int flags = 0;
3962 int sgn1, exp1, sgn2, exp2;
3963 uint64_t mnt1, mnt2, result;
3964
3965 op1 = fplibNeg<uint64_t>(op1);
3966 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3967 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3968
3969 result = fp64_process_NaNs(op1, op2, mode, &flags);
3970 if (!result) {
3971 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3972 (exp2 == FP64_EXP_INF && !mnt1)) {
3973 result = fp64_FPTwo(0);
3974 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3975 result = fp64_infinity(sgn1 ^ sgn2);
3976 } else {
3977 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
3978 }
3979 }
3980
3981 set_fpscr0(fpscr, flags);
3982
3983 return result;
3984}
3985
3986template <>
3987uint16_t
3988fplibRecpX(uint16_t op, FPSCR &fpscr)
3989{
3990 int mode = modeConv(fpscr);
3991 int flags = 0;
3992 int sgn, exp;
3993 uint16_t mnt, result;
3994
3995 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3996
3997 if (fp16_is_NaN(exp, mnt)) {
3998 result = fp16_process_NaN(op, mode, &flags);
3999 }
4000 else {
4001 if (!mnt) { // Zero and denormals
4002 result = fp16_pack(sgn, FP16_EXP_INF - 1, 0);
4003 } else { // Infinities and normals
4004 result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0);
4005 }
4006 }
4007
4008 set_fpscr0(fpscr, flags);
4009
4010 return result;
4011}
4012
4013template <>
4014uint32_t
4015fplibRecpX(uint32_t op, FPSCR &fpscr)
4016{
4017 int mode = modeConv(fpscr);
4018 int flags = 0;
4019 int sgn, exp;
4020 uint32_t mnt, result;
4021
4022 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4023
4024 if (fp32_is_NaN(exp, mnt)) {
4025 result = fp32_process_NaN(op, mode, &flags);
4026 }
4027 else {
4028 if (!mnt) { // Zero and denormals
4029 result = fp32_pack(sgn, FP32_EXP_INF - 1, 0);
4030 } else { // Infinities and normals
4031 result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0);
4032 }
4033 }
4034
4035 set_fpscr0(fpscr, flags);
4036
4037 return result;
4038}
4039
4040template <>
4041uint64_t
4042fplibRecpX(uint64_t op, FPSCR &fpscr)
4043{
4044 int mode = modeConv(fpscr);
4045 int flags = 0;
4046 int sgn, exp;
4047 uint64_t mnt, result;
4048
4049 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4050
4051 if (fp64_is_NaN(exp, mnt)) {
4052 result = fp64_process_NaN(op, mode, &flags);
4053 }
4054 else {
4055 if (!mnt) { // Zero and denormals
4056 result = fp64_pack(sgn, FP64_EXP_INF - 1, 0);
4057 } else { // Infinities and normals
4058 result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0);
4059 }
4060 }
4061
4062 set_fpscr0(fpscr, flags);
4063
4064 return result;
4065}
4066
4067template <>
4068uint16_t
4069fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4070{
4071 int expint = FP16_EXP_BIAS + FP16_MANT_BITS;
4072 int mode = modeConv(fpscr);
4073 int flags = 0;
4074 int sgn, exp;
4075 uint16_t mnt, result;
4076
4077 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4078 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4079
4080 // Handle NaNs, infinities and zeroes:
4081 if (fp16_is_NaN(exp, mnt)) {
4082 result = fp16_process_NaN(op, mode, &flags);
4083 } else if (exp == FP16_EXP_INF) {
4084 result = fp16_infinity(sgn);
4085 } else if (!mnt) {
4086 result = fp16_zero(sgn);
4087 } else if (exp >= expint) {
4088 // There are no fractional bits
4089 result = op;
4090 } else {
4091 // Truncate towards zero:
4092 uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp);
4093 int err = exp < expint - FP16_BITS ? 1 :
4094 ((mnt << 1 >> (expint - exp - 1) & 3) |
4095 ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0));
4096 switch (rounding) {
4097 case FPRounding_TIEEVEN:
4098 x += (err == 3 || (err == 2 && (x & 1)));
4099 break;
4100 case FPRounding_POSINF:
4101 x += err && !sgn;
4102 break;
4103 case FPRounding_NEGINF:
4104 x += err && sgn;
4105 break;
4106 case FPRounding_ZERO:
4107 break;
4108 case FPRounding_TIEAWAY:
4109 x += err >> 1;
4110 break;
4111 default:
4111 assert(0);
4112 panic("Unrecognized FP rounding mode");
4112 }
4113
4114 if (x == 0) {
4115 result = fp16_zero(sgn);
4116 } else {
4117 exp = expint;
4118 mnt = fp16_normalise(x, &exp);
4119 result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS);
4120 }
4121
4122 if (err && exact)
4123 flags |= FPLIB_IXC;
4124 }
4125
4126 set_fpscr0(fpscr, flags);
4127
4128 return result;
4129}
4130
4131template <>
4132uint32_t
4133fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4134{
4135 int expint = FP32_EXP_BIAS + FP32_MANT_BITS;
4136 int mode = modeConv(fpscr);
4137 int flags = 0;
4138 int sgn, exp;
4139 uint32_t mnt, result;
4140
4141 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4142 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4143
4144 // Handle NaNs, infinities and zeroes:
4145 if (fp32_is_NaN(exp, mnt)) {
4146 result = fp32_process_NaN(op, mode, &flags);
4147 } else if (exp == FP32_EXP_INF) {
4148 result = fp32_infinity(sgn);
4149 } else if (!mnt) {
4150 result = fp32_zero(sgn);
4151 } else if (exp >= expint) {
4152 // There are no fractional bits
4153 result = op;
4154 } else {
4155 // Truncate towards zero:
4156 uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp);
4157 int err = exp < expint - FP32_BITS ? 1 :
4158 ((mnt << 1 >> (expint - exp - 1) & 3) |
4159 ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0));
4160 switch (rounding) {
4161 case FPRounding_TIEEVEN:
4162 x += (err == 3 || (err == 2 && (x & 1)));
4163 break;
4164 case FPRounding_POSINF:
4165 x += err && !sgn;
4166 break;
4167 case FPRounding_NEGINF:
4168 x += err && sgn;
4169 break;
4170 case FPRounding_ZERO:
4171 break;
4172 case FPRounding_TIEAWAY:
4173 x += err >> 1;
4174 break;
4175 default:
4113 }
4114
4115 if (x == 0) {
4116 result = fp16_zero(sgn);
4117 } else {
4118 exp = expint;
4119 mnt = fp16_normalise(x, &exp);
4120 result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS);
4121 }
4122
4123 if (err && exact)
4124 flags |= FPLIB_IXC;
4125 }
4126
4127 set_fpscr0(fpscr, flags);
4128
4129 return result;
4130}
4131
4132template <>
4133uint32_t
4134fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4135{
4136 int expint = FP32_EXP_BIAS + FP32_MANT_BITS;
4137 int mode = modeConv(fpscr);
4138 int flags = 0;
4139 int sgn, exp;
4140 uint32_t mnt, result;
4141
4142 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4143 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4144
4145 // Handle NaNs, infinities and zeroes:
4146 if (fp32_is_NaN(exp, mnt)) {
4147 result = fp32_process_NaN(op, mode, &flags);
4148 } else if (exp == FP32_EXP_INF) {
4149 result = fp32_infinity(sgn);
4150 } else if (!mnt) {
4151 result = fp32_zero(sgn);
4152 } else if (exp >= expint) {
4153 // There are no fractional bits
4154 result = op;
4155 } else {
4156 // Truncate towards zero:
4157 uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp);
4158 int err = exp < expint - FP32_BITS ? 1 :
4159 ((mnt << 1 >> (expint - exp - 1) & 3) |
4160 ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0));
4161 switch (rounding) {
4162 case FPRounding_TIEEVEN:
4163 x += (err == 3 || (err == 2 && (x & 1)));
4164 break;
4165 case FPRounding_POSINF:
4166 x += err && !sgn;
4167 break;
4168 case FPRounding_NEGINF:
4169 x += err && sgn;
4170 break;
4171 case FPRounding_ZERO:
4172 break;
4173 case FPRounding_TIEAWAY:
4174 x += err >> 1;
4175 break;
4176 default:
4176 assert(0);
4177 panic("Unrecognized FP rounding mode");
4177 }
4178
4179 if (x == 0) {
4180 result = fp32_zero(sgn);
4181 } else {
4182 exp = expint;
4183 mnt = fp32_normalise(x, &exp);
4184 result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS);
4185 }
4186
4187 if (err && exact)
4188 flags |= FPLIB_IXC;
4189 }
4190
4191 set_fpscr0(fpscr, flags);
4192
4193 return result;
4194}
4195
4196template <>
4197uint64_t
4198fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4199{
4200 int expint = FP64_EXP_BIAS + FP64_MANT_BITS;
4201 int mode = modeConv(fpscr);
4202 int flags = 0;
4203 int sgn, exp;
4204 uint64_t mnt, result;
4205
4206 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4207 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4208
4209 // Handle NaNs, infinities and zeroes:
4210 if (fp64_is_NaN(exp, mnt)) {
4211 result = fp64_process_NaN(op, mode, &flags);
4212 } else if (exp == FP64_EXP_INF) {
4213 result = fp64_infinity(sgn);
4214 } else if (!mnt) {
4215 result = fp64_zero(sgn);
4216 } else if (exp >= expint) {
4217 // There are no fractional bits
4218 result = op;
4219 } else {
4220 // Truncate towards zero:
4221 uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp);
4222 int err = exp < expint - FP64_BITS ? 1 :
4223 ((mnt << 1 >> (expint - exp - 1) & 3) |
4224 ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0));
4225 switch (rounding) {
4226 case FPRounding_TIEEVEN:
4227 x += (err == 3 || (err == 2 && (x & 1)));
4228 break;
4229 case FPRounding_POSINF:
4230 x += err && !sgn;
4231 break;
4232 case FPRounding_NEGINF:
4233 x += err && sgn;
4234 break;
4235 case FPRounding_ZERO:
4236 break;
4237 case FPRounding_TIEAWAY:
4238 x += err >> 1;
4239 break;
4240 default:
4178 }
4179
4180 if (x == 0) {
4181 result = fp32_zero(sgn);
4182 } else {
4183 exp = expint;
4184 mnt = fp32_normalise(x, &exp);
4185 result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS);
4186 }
4187
4188 if (err && exact)
4189 flags |= FPLIB_IXC;
4190 }
4191
4192 set_fpscr0(fpscr, flags);
4193
4194 return result;
4195}
4196
4197template <>
4198uint64_t
4199fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4200{
4201 int expint = FP64_EXP_BIAS + FP64_MANT_BITS;
4202 int mode = modeConv(fpscr);
4203 int flags = 0;
4204 int sgn, exp;
4205 uint64_t mnt, result;
4206
4207 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4208 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4209
4210 // Handle NaNs, infinities and zeroes:
4211 if (fp64_is_NaN(exp, mnt)) {
4212 result = fp64_process_NaN(op, mode, &flags);
4213 } else if (exp == FP64_EXP_INF) {
4214 result = fp64_infinity(sgn);
4215 } else if (!mnt) {
4216 result = fp64_zero(sgn);
4217 } else if (exp >= expint) {
4218 // There are no fractional bits
4219 result = op;
4220 } else {
4221 // Truncate towards zero:
4222 uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp);
4223 int err = exp < expint - FP64_BITS ? 1 :
4224 ((mnt << 1 >> (expint - exp - 1) & 3) |
4225 ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0));
4226 switch (rounding) {
4227 case FPRounding_TIEEVEN:
4228 x += (err == 3 || (err == 2 && (x & 1)));
4229 break;
4230 case FPRounding_POSINF:
4231 x += err && !sgn;
4232 break;
4233 case FPRounding_NEGINF:
4234 x += err && sgn;
4235 break;
4236 case FPRounding_ZERO:
4237 break;
4238 case FPRounding_TIEAWAY:
4239 x += err >> 1;
4240 break;
4241 default:
4241 assert(0);
4242 panic("Unrecognized FP rounding mode");
4242 }
4243
4244 if (x == 0) {
4245 result = fp64_zero(sgn);
4246 } else {
4247 exp = expint;
4248 mnt = fp64_normalise(x, &exp);
4249 result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS);
4250 }
4251
4252 if (err && exact)
4253 flags |= FPLIB_IXC;
4254 }
4255
4256 set_fpscr0(fpscr, flags);
4257
4258 return result;
4259}
4260
4261template <>
4262uint16_t
4263fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4264{
4265 int flags = 0;
4266 uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags);
4267 set_fpscr0(fpscr, flags);
4268 return result;
4269}
4270
4271template <>
4272uint32_t
4273fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4274{
4275 int flags = 0;
4276 uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags);
4277 set_fpscr0(fpscr, flags);
4278 return result;
4279}
4280
4281template <>
4282uint64_t
4283fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4284{
4285 int flags = 0;
4286 uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags);
4287 set_fpscr0(fpscr, flags);
4288 return result;
4289}
4290
4291template <>
4292uint16_t
4293fplibSqrt(uint16_t op, FPSCR &fpscr)
4294{
4295 int flags = 0;
4296 uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags);
4297 set_fpscr0(fpscr, flags);
4298 return result;
4299}
4300
4301template <>
4302uint32_t
4303fplibSqrt(uint32_t op, FPSCR &fpscr)
4304{
4305 int flags = 0;
4306 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
4307 set_fpscr0(fpscr, flags);
4308 return result;
4309}
4310
4311template <>
4312uint64_t
4313fplibSqrt(uint64_t op, FPSCR &fpscr)
4314{
4315 int flags = 0;
4316 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
4317 set_fpscr0(fpscr, flags);
4318 return result;
4319}
4320
4321template <>
4322uint16_t
4323fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4324{
4325 int flags = 0;
4326 uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags);
4327 set_fpscr0(fpscr, flags);
4328 return result;
4329}
4330
4331template <>
4332uint32_t
4333fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4334{
4335 int flags = 0;
4336 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
4337 set_fpscr0(fpscr, flags);
4338 return result;
4339}
4340
4341template <>
4342uint64_t
4343fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4344{
4345 int flags = 0;
4346 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
4347 set_fpscr0(fpscr, flags);
4348 return result;
4349}
4350
4351template <>
4352uint16_t
4353fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
4354{
4355 static uint16_t coeff[2][8] = {
4356 {
4357 0x3c00,
4358 0xb155,
4359 0x2030,
4360 0x0000,
4361 0x0000,
4362 0x0000,
4363 0x0000,
4364 0x0000,
4365 },
4366 {
4367 0x3c00,
4368 0xb800,
4369 0x293a,
4370 0x0000,
4371 0x0000,
4372 0x0000,
4373 0x0000,
4374 0x0000
4375 }
4376 };
4377 int flags = 0;
4378 uint16_t result =
4379 fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1,
4380 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4381 set_fpscr0(fpscr, flags);
4382 return result;
4383}
4384
4385template <>
4386uint32_t
4387fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr)
4388{
4389 static uint32_t coeff[2][8] = {
4390 {
4391 0x3f800000,
4392 0xbe2aaaab,
4393 0x3c088886,
4394 0xb95008b9,
4395 0x36369d6d,
4396 0x00000000,
4397 0x00000000,
4398 0x00000000
4399 },
4400 {
4401 0x3f800000,
4402 0xbf000000,
4403 0x3d2aaaa6,
4404 0xbab60705,
4405 0x37cd37cc,
4406 0x00000000,
4407 0x00000000,
4408 0x00000000
4409 }
4410 };
4411 int flags = 0;
4412 uint32_t result =
4413 fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1,
4414 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4415 set_fpscr0(fpscr, flags);
4416 return result;
4417}
4418
4419template <>
4420uint64_t
4421fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr)
4422{
4423 static uint64_t coeff[2][8] = {
4424 {
4425 0x3ff0000000000000ULL,
4426 0xbfc5555555555543ULL,
4427 0x3f8111111110f30cULL,
4428 0xbf2a01a019b92fc6ULL,
4429 0x3ec71de351f3d22bULL,
4430 0xbe5ae5e2b60f7b91ULL,
4431 0x3de5d8408868552fULL,
4432 0x0000000000000000ULL
4433 },
4434 {
4435 0x3ff0000000000000ULL,
4436 0xbfe0000000000000ULL,
4437 0x3fa5555555555536ULL,
4438 0xbf56c16c16c13a0bULL,
4439 0x3efa01a019b1e8d8ULL,
4440 0xbe927e4f7282f468ULL,
4441 0x3e21ee96d2641b13ULL,
4442 0xbda8f76380fbb401ULL
4443 }
4444 };
4445 int flags = 0;
4446 uint64_t result =
4447 fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1,
4448 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4449 set_fpscr0(fpscr, flags);
4450 return result;
4451}
4452
4453template <>
4454uint16_t
4455fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4456{
4457 int flags = 0;
4458 int sgn, exp;
4459 uint16_t mnt;
4460
4461 int mode = modeConv(fpscr);
4462 uint16_t result = fp16_mul(op1, op1, mode, &flags);
4463 set_fpscr0(fpscr, flags);
4464
4465 fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4466 if (!fp16_is_NaN(exp, mnt)) {
4467 result = (result & ~(1ULL << (FP16_BITS - 1))) |
4468 op2 << (FP16_BITS - 1);
4469 }
4470 return result;
4471}
4472
4473template <>
4474uint32_t
4475fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4476{
4477 int flags = 0;
4478 int sgn, exp;
4479 uint32_t mnt;
4480
4481 int mode = modeConv(fpscr);
4482 uint32_t result = fp32_mul(op1, op1, mode, &flags);
4483 set_fpscr0(fpscr, flags);
4484
4485 fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4486 if (!fp32_is_NaN(exp, mnt)) {
4487 result = (result & ~(1ULL << (FP32_BITS - 1))) | op2 << (FP32_BITS - 1);
4488 }
4489 return result;
4490}
4491
4492template <>
4493uint64_t
4494fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4495{
4496 int flags = 0;
4497 int sgn, exp;
4498 uint64_t mnt;
4499
4500 int mode = modeConv(fpscr);
4501 uint64_t result = fp64_mul(op1, op1, mode, &flags);
4502 set_fpscr0(fpscr, flags);
4503
4504 fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4505 if (!fp64_is_NaN(exp, mnt)) {
4506 result = (result & ~(1ULL << (FP64_BITS - 1))) | op2 << (FP64_BITS - 1);
4507 }
4508 return result;
4509}
4510
4511template <>
4512uint16_t
4513fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4514{
4515 static constexpr uint16_t fpOne =
4516 (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0
4517 if (op2 & 1)
4518 op1 = fpOne;
4519 return op1 ^ ((op2 >> 1) << (FP16_BITS - 1));
4520}
4521
4522template <>
4523uint32_t
4524fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4525{
4526 static constexpr uint32_t fpOne =
4527 (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0
4528 if (op2 & 1)
4529 op1 = fpOne;
4530 return op1 ^ ((op2 >> 1) << (FP32_BITS - 1));
4531}
4532
4533template <>
4534uint64_t
4535fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4536{
4537 static constexpr uint64_t fpOne =
4538 (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0
4539 if (op2 & 1)
4540 op1 = fpOne;
4541 return op1 ^ ((op2 >> 1) << (FP64_BITS - 1));
4542}
4543
4544static uint64_t
4545FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4546 int *flags)
4547{
4548 int expmax = FP64_EXP_BIAS + FP64_BITS - 1;
4549 uint64_t x;
4550 int err;
4551
4552 if (exp > expmax) {
4553 *flags = FPLIB_IOC;
4554 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4555 }
4556
4557 x = lsr64(mnt << FP64_EXP_BITS, expmax - exp);
4558 err = (exp > expmax - 2 ? 0 :
4559 (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) |
4560 !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1)));
4561
4562 switch (rounding) {
4563 case FPRounding_TIEEVEN:
4564 x += (err == 3 || (err == 2 && (x & 1)));
4565 break;
4566 case FPRounding_POSINF:
4567 x += err && !sgn;
4568 break;
4569 case FPRounding_NEGINF:
4570 x += err && sgn;
4571 break;
4572 case FPRounding_ZERO:
4573 break;
4574 case FPRounding_TIEAWAY:
4575 x += err >> 1;
4576 break;
4577 default:
4243 }
4244
4245 if (x == 0) {
4246 result = fp64_zero(sgn);
4247 } else {
4248 exp = expint;
4249 mnt = fp64_normalise(x, &exp);
4250 result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS);
4251 }
4252
4253 if (err && exact)
4254 flags |= FPLIB_IXC;
4255 }
4256
4257 set_fpscr0(fpscr, flags);
4258
4259 return result;
4260}
4261
4262template <>
4263uint16_t
4264fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4265{
4266 int flags = 0;
4267 uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags);
4268 set_fpscr0(fpscr, flags);
4269 return result;
4270}
4271
4272template <>
4273uint32_t
4274fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4275{
4276 int flags = 0;
4277 uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags);
4278 set_fpscr0(fpscr, flags);
4279 return result;
4280}
4281
4282template <>
4283uint64_t
4284fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4285{
4286 int flags = 0;
4287 uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags);
4288 set_fpscr0(fpscr, flags);
4289 return result;
4290}
4291
4292template <>
4293uint16_t
4294fplibSqrt(uint16_t op, FPSCR &fpscr)
4295{
4296 int flags = 0;
4297 uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags);
4298 set_fpscr0(fpscr, flags);
4299 return result;
4300}
4301
4302template <>
4303uint32_t
4304fplibSqrt(uint32_t op, FPSCR &fpscr)
4305{
4306 int flags = 0;
4307 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
4308 set_fpscr0(fpscr, flags);
4309 return result;
4310}
4311
4312template <>
4313uint64_t
4314fplibSqrt(uint64_t op, FPSCR &fpscr)
4315{
4316 int flags = 0;
4317 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
4318 set_fpscr0(fpscr, flags);
4319 return result;
4320}
4321
4322template <>
4323uint16_t
4324fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4325{
4326 int flags = 0;
4327 uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags);
4328 set_fpscr0(fpscr, flags);
4329 return result;
4330}
4331
4332template <>
4333uint32_t
4334fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4335{
4336 int flags = 0;
4337 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
4338 set_fpscr0(fpscr, flags);
4339 return result;
4340}
4341
4342template <>
4343uint64_t
4344fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4345{
4346 int flags = 0;
4347 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
4348 set_fpscr0(fpscr, flags);
4349 return result;
4350}
4351
4352template <>
4353uint16_t
4354fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
4355{
4356 static uint16_t coeff[2][8] = {
4357 {
4358 0x3c00,
4359 0xb155,
4360 0x2030,
4361 0x0000,
4362 0x0000,
4363 0x0000,
4364 0x0000,
4365 0x0000,
4366 },
4367 {
4368 0x3c00,
4369 0xb800,
4370 0x293a,
4371 0x0000,
4372 0x0000,
4373 0x0000,
4374 0x0000,
4375 0x0000
4376 }
4377 };
4378 int flags = 0;
4379 uint16_t result =
4380 fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1,
4381 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4382 set_fpscr0(fpscr, flags);
4383 return result;
4384}
4385
4386template <>
4387uint32_t
4388fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr)
4389{
4390 static uint32_t coeff[2][8] = {
4391 {
4392 0x3f800000,
4393 0xbe2aaaab,
4394 0x3c088886,
4395 0xb95008b9,
4396 0x36369d6d,
4397 0x00000000,
4398 0x00000000,
4399 0x00000000
4400 },
4401 {
4402 0x3f800000,
4403 0xbf000000,
4404 0x3d2aaaa6,
4405 0xbab60705,
4406 0x37cd37cc,
4407 0x00000000,
4408 0x00000000,
4409 0x00000000
4410 }
4411 };
4412 int flags = 0;
4413 uint32_t result =
4414 fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1,
4415 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4416 set_fpscr0(fpscr, flags);
4417 return result;
4418}
4419
4420template <>
4421uint64_t
4422fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr)
4423{
4424 static uint64_t coeff[2][8] = {
4425 {
4426 0x3ff0000000000000ULL,
4427 0xbfc5555555555543ULL,
4428 0x3f8111111110f30cULL,
4429 0xbf2a01a019b92fc6ULL,
4430 0x3ec71de351f3d22bULL,
4431 0xbe5ae5e2b60f7b91ULL,
4432 0x3de5d8408868552fULL,
4433 0x0000000000000000ULL
4434 },
4435 {
4436 0x3ff0000000000000ULL,
4437 0xbfe0000000000000ULL,
4438 0x3fa5555555555536ULL,
4439 0xbf56c16c16c13a0bULL,
4440 0x3efa01a019b1e8d8ULL,
4441 0xbe927e4f7282f468ULL,
4442 0x3e21ee96d2641b13ULL,
4443 0xbda8f76380fbb401ULL
4444 }
4445 };
4446 int flags = 0;
4447 uint64_t result =
4448 fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1,
4449 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4450 set_fpscr0(fpscr, flags);
4451 return result;
4452}
4453
4454template <>
4455uint16_t
4456fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4457{
4458 int flags = 0;
4459 int sgn, exp;
4460 uint16_t mnt;
4461
4462 int mode = modeConv(fpscr);
4463 uint16_t result = fp16_mul(op1, op1, mode, &flags);
4464 set_fpscr0(fpscr, flags);
4465
4466 fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4467 if (!fp16_is_NaN(exp, mnt)) {
4468 result = (result & ~(1ULL << (FP16_BITS - 1))) |
4469 op2 << (FP16_BITS - 1);
4470 }
4471 return result;
4472}
4473
4474template <>
4475uint32_t
4476fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4477{
4478 int flags = 0;
4479 int sgn, exp;
4480 uint32_t mnt;
4481
4482 int mode = modeConv(fpscr);
4483 uint32_t result = fp32_mul(op1, op1, mode, &flags);
4484 set_fpscr0(fpscr, flags);
4485
4486 fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4487 if (!fp32_is_NaN(exp, mnt)) {
4488 result = (result & ~(1ULL << (FP32_BITS - 1))) | op2 << (FP32_BITS - 1);
4489 }
4490 return result;
4491}
4492
4493template <>
4494uint64_t
4495fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4496{
4497 int flags = 0;
4498 int sgn, exp;
4499 uint64_t mnt;
4500
4501 int mode = modeConv(fpscr);
4502 uint64_t result = fp64_mul(op1, op1, mode, &flags);
4503 set_fpscr0(fpscr, flags);
4504
4505 fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4506 if (!fp64_is_NaN(exp, mnt)) {
4507 result = (result & ~(1ULL << (FP64_BITS - 1))) | op2 << (FP64_BITS - 1);
4508 }
4509 return result;
4510}
4511
4512template <>
4513uint16_t
4514fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4515{
4516 static constexpr uint16_t fpOne =
4517 (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0
4518 if (op2 & 1)
4519 op1 = fpOne;
4520 return op1 ^ ((op2 >> 1) << (FP16_BITS - 1));
4521}
4522
4523template <>
4524uint32_t
4525fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4526{
4527 static constexpr uint32_t fpOne =
4528 (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0
4529 if (op2 & 1)
4530 op1 = fpOne;
4531 return op1 ^ ((op2 >> 1) << (FP32_BITS - 1));
4532}
4533
4534template <>
4535uint64_t
4536fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4537{
4538 static constexpr uint64_t fpOne =
4539 (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0
4540 if (op2 & 1)
4541 op1 = fpOne;
4542 return op1 ^ ((op2 >> 1) << (FP64_BITS - 1));
4543}
4544
4545static uint64_t
4546FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4547 int *flags)
4548{
4549 int expmax = FP64_EXP_BIAS + FP64_BITS - 1;
4550 uint64_t x;
4551 int err;
4552
4553 if (exp > expmax) {
4554 *flags = FPLIB_IOC;
4555 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4556 }
4557
4558 x = lsr64(mnt << FP64_EXP_BITS, expmax - exp);
4559 err = (exp > expmax - 2 ? 0 :
4560 (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) |
4561 !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1)));
4562
4563 switch (rounding) {
4564 case FPRounding_TIEEVEN:
4565 x += (err == 3 || (err == 2 && (x & 1)));
4566 break;
4567 case FPRounding_POSINF:
4568 x += err && !sgn;
4569 break;
4570 case FPRounding_NEGINF:
4571 x += err && sgn;
4572 break;
4573 case FPRounding_ZERO:
4574 break;
4575 case FPRounding_TIEAWAY:
4576 x += err >> 1;
4577 break;
4578 default:
4578 assert(0);
4579 panic("Unrecognized FP rounding mode");
4579 }
4580
4581 if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) {
4582 *flags = FPLIB_IOC;
4583 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4584 }
4585
4586 if (err) {
4587 *flags = FPLIB_IXC;
4588 }
4589
4590 return sgn ? -x : x;
4591}
4592
4593static uint32_t
4594FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4595 int *flags)
4596{
4597 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4598 if (u ? x >= 1ULL << FP32_BITS :
4599 !(x < 1ULL << (FP32_BITS - 1) ||
4600 (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) {
4601 *flags = FPLIB_IOC;
4602 x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn;
4603 }
4604 return x;
4605}
4606
4607static uint16_t
4608FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4609 int *flags)
4610{
4611 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4612 if (u ? x >= 1ULL << FP16_BITS :
4613 !(x < 1ULL << (FP16_BITS - 1) ||
4614 (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) {
4615 *flags = FPLIB_IOC;
4616 x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn;
4617 }
4618 return x;
4619}
4620
4621template <>
4622uint16_t
4623fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4624 FPSCR &fpscr)
4625{
4626 int flags = 0;
4627 int sgn, exp;
4628 uint16_t mnt, result;
4629
4630 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4631 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4632
4633 // If NaN, set cumulative flag or take exception:
4634 if (fp16_is_NaN(exp, mnt)) {
4635 flags = FPLIB_IOC;
4636 result = 0;
4637 } else {
4638 assert(fbits >= 0);
4639 // Infinity is treated as an ordinary normalised number that saturates.
4640 result =
4641 FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4642 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4643 u, rounding, &flags);
4644 }
4645
4646 set_fpscr0(fpscr, flags);
4647
4648 return result;
4649}
4650
4651template <>
4652uint32_t
4653fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4654 FPSCR &fpscr)
4655{
4656 int flags = 0;
4657 int sgn, exp;
4658 uint16_t mnt;
4659 uint32_t result;
4660
4661 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4662 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4663
4664 // If NaN, set cumulative flag or take exception:
4665 if (fp16_is_NaN(exp, mnt)) {
4666 flags = FPLIB_IOC;
4667 result = 0;
4668 } else {
4669 assert(fbits >= 0);
4670 if (exp == FP16_EXP_INF)
4671 exp = 255; // infinity: make it big enough to saturate
4672 result =
4673 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4674 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4675 u, rounding, &flags);
4676 }
4677
4678 set_fpscr0(fpscr, flags);
4679
4680 return result;
4681}
4682
4683template <>
4684uint32_t
4685fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4686{
4687 int flags = 0;
4688 int sgn, exp;
4689 uint32_t mnt, result;
4690
4691 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4692 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4693
4694 // If NaN, set cumulative flag or take exception:
4695 if (fp32_is_NaN(exp, mnt)) {
4696 flags = FPLIB_IOC;
4697 result = 0;
4698 } else {
4699 assert(fbits >= 0);
4700 // Infinity is treated as an ordinary normalised number that saturates.
4701 result =
4702 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4703 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4704 u, rounding, &flags);
4705 }
4706
4707 set_fpscr0(fpscr, flags);
4708
4709 return result;
4710}
4711
4712template <>
4713uint32_t
4714fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4715{
4716 int flags = 0;
4717 int sgn, exp;
4718 uint64_t mnt;
4719 uint32_t result;
4720
4721 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4722 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4723
4724 // If NaN, set cumulative flag or take exception:
4725 if (fp64_is_NaN(exp, mnt)) {
4726 flags = FPLIB_IOC;
4727 result = 0;
4728 } else {
4729 assert(fbits >= 0);
4730 // Infinity is treated as an ordinary normalised number that saturates.
4731 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
4732 }
4733
4734 set_fpscr0(fpscr, flags);
4735
4736 return result;
4737}
4738
4739template <>
4740uint64_t
4741fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4742 FPSCR &fpscr)
4743{
4744 int flags = 0;
4745 int sgn, exp;
4746 uint16_t mnt;
4747 uint64_t result;
4748
4749 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4750 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4751
4752 // If NaN, set cumulative flag or take exception:
4753 if (fp16_is_NaN(exp, mnt)) {
4754 flags = FPLIB_IOC;
4755 result = 0;
4756 } else {
4757 assert(fbits >= 0);
4758 if (exp == FP16_EXP_INF)
4759 exp = 255; // infinity: make it big enough to saturate
4760 result =
4761 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4762 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4763 u, rounding, &flags);
4764 }
4765
4766 set_fpscr0(fpscr, flags);
4767
4768 return result;
4769}
4770
4771template <>
4772uint64_t
4773fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4774{
4775 int flags = 0;
4776 int sgn, exp;
4777 uint32_t mnt;
4778 uint64_t result;
4779
4780 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4781 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4782
4783 // If NaN, set cumulative flag or take exception:
4784 if (fp32_is_NaN(exp, mnt)) {
4785 flags = FPLIB_IOC;
4786 result = 0;
4787 } else {
4788 assert(fbits >= 0);
4789 // Infinity is treated as an ordinary normalised number that saturates.
4790 result =
4791 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4792 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4793 u, rounding, &flags);
4794 }
4795
4796 set_fpscr0(fpscr, flags);
4797
4798 return result;
4799}
4800
4801template <>
4802uint64_t
4803fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4804{
4805 int flags = 0;
4806 int sgn, exp;
4807 uint64_t mnt, result;
4808
4809 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4810 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4811
4812 // If NaN, set cumulative flag or take exception:
4813 if (fp64_is_NaN(exp, mnt)) {
4814 flags = FPLIB_IOC;
4815 result = 0;
4816 } else {
4817 assert(fbits >= 0);
4818 // Infinity is treated as an ordinary normalised number that saturates.
4819 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
4820 }
4821
4822 set_fpscr0(fpscr, flags);
4823
4824 return result;
4825}
4826
4827static uint16_t
4828fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4829{
4830 int x_sgn = !u && a >> (FP64_BITS - 1);
4831 int x_exp = FP16_EXP_BIAS + FP64_BITS - 1 - fbits;
4832 uint64_t x_mnt = x_sgn ? -a : a;
4833
4834 // Handle zero:
4835 if (!x_mnt) {
4836 return fp16_zero(0);
4837 }
4838
4839 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
4840 x_mnt = fp64_normalise(x_mnt, &x_exp);
4841 x_mnt = (x_mnt >> (FP64_BITS - FP16_BITS - 1) |
4842 !!(x_mnt & ((1ULL << (FP64_BITS - FP16_BITS - 1)) - 1)));
4843
4844 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
4845}
4846
4847static uint32_t
4848fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4849{
4850 int x_sgn = !u && a >> (FP64_BITS - 1);
4851 int x_exp = FP32_EXP_BIAS + FP64_BITS - 1 - fbits;
4852 uint64_t x_mnt = x_sgn ? -a : a;
4853
4854 // Handle zero:
4855 if (!x_mnt) {
4856 return fp32_zero(0);
4857 }
4858
4859 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
4860 x_mnt = fp64_normalise(x_mnt, &x_exp);
4861 x_mnt = (x_mnt >> (FP64_BITS - FP32_BITS - 1) |
4862 !!(x_mnt & ((1ULL << (FP64_BITS - FP32_BITS - 1)) - 1)));
4863
4864 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
4865}
4866
4867static uint64_t
4868fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4869{
4870 int x_sgn = !u && a >> (FP64_BITS - 1);
4871 int x_exp = FP64_EXP_BIAS + FP64_BITS - 1 - fbits;
4872 uint64_t x_mnt = x_sgn ? -a : a;
4873
4874 // Handle zero:
4875 if (!x_mnt) {
4876 return fp64_zero(0);
4877 }
4878
4879 x_mnt = fp64_normalise(x_mnt, &x_exp);
4880
4881 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
4882}
4883
4884template <>
4885uint16_t
4886fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
4887 FPSCR &fpscr)
4888{
4889 int flags = 0;
4890 uint16_t res = fp16_cvtf(op, fbits, u,
4891 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4892 &flags);
4893 set_fpscr0(fpscr, flags);
4894 return res;
4895}
4896
4897template <>
4898uint32_t
4899fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4900{
4901 int flags = 0;
4902 uint32_t res = fp32_cvtf(op, fbits, u,
4903 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4904 &flags);
4905 set_fpscr0(fpscr, flags);
4906 return res;
4907}
4908
4909template <>
4910uint64_t
4911fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4912{
4913 int flags = 0;
4914 uint64_t res = fp64_cvtf(op, fbits, u,
4915 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4916 &flags);
4917 set_fpscr0(fpscr, flags);
4918 return res;
4919}
4920
4921template <>
4922uint16_t
4923fplibInfinity(int sgn)
4924{
4925 return fp16_infinity(sgn);
4926}
4927
4928template <>
4929uint32_t
4930fplibInfinity(int sgn)
4931{
4932 return fp32_infinity(sgn);
4933}
4934
4935template <>
4936uint64_t
4937fplibInfinity(int sgn)
4938{
4939 return fp64_infinity(sgn);
4940}
4941
4942template <>
4943uint16_t
4944fplibDefaultNaN()
4945{
4946 return fp16_defaultNaN();
4947}
4948
4949template <>
4950uint32_t
4951fplibDefaultNaN()
4952{
4953 return fp32_defaultNaN();
4954}
4955
4956template <>
4957uint64_t
4958fplibDefaultNaN()
4959{
4960 return fp64_defaultNaN();
4961}
4962
4963}
4580 }
4581
4582 if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) {
4583 *flags = FPLIB_IOC;
4584 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4585 }
4586
4587 if (err) {
4588 *flags = FPLIB_IXC;
4589 }
4590
4591 return sgn ? -x : x;
4592}
4593
4594static uint32_t
4595FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4596 int *flags)
4597{
4598 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4599 if (u ? x >= 1ULL << FP32_BITS :
4600 !(x < 1ULL << (FP32_BITS - 1) ||
4601 (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) {
4602 *flags = FPLIB_IOC;
4603 x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn;
4604 }
4605 return x;
4606}
4607
4608static uint16_t
4609FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4610 int *flags)
4611{
4612 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4613 if (u ? x >= 1ULL << FP16_BITS :
4614 !(x < 1ULL << (FP16_BITS - 1) ||
4615 (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) {
4616 *flags = FPLIB_IOC;
4617 x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn;
4618 }
4619 return x;
4620}
4621
4622template <>
4623uint16_t
4624fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4625 FPSCR &fpscr)
4626{
4627 int flags = 0;
4628 int sgn, exp;
4629 uint16_t mnt, result;
4630
4631 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4632 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4633
4634 // If NaN, set cumulative flag or take exception:
4635 if (fp16_is_NaN(exp, mnt)) {
4636 flags = FPLIB_IOC;
4637 result = 0;
4638 } else {
4639 assert(fbits >= 0);
4640 // Infinity is treated as an ordinary normalised number that saturates.
4641 result =
4642 FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4643 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4644 u, rounding, &flags);
4645 }
4646
4647 set_fpscr0(fpscr, flags);
4648
4649 return result;
4650}
4651
4652template <>
4653uint32_t
4654fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4655 FPSCR &fpscr)
4656{
4657 int flags = 0;
4658 int sgn, exp;
4659 uint16_t mnt;
4660 uint32_t result;
4661
4662 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4663 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4664
4665 // If NaN, set cumulative flag or take exception:
4666 if (fp16_is_NaN(exp, mnt)) {
4667 flags = FPLIB_IOC;
4668 result = 0;
4669 } else {
4670 assert(fbits >= 0);
4671 if (exp == FP16_EXP_INF)
4672 exp = 255; // infinity: make it big enough to saturate
4673 result =
4674 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4675 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4676 u, rounding, &flags);
4677 }
4678
4679 set_fpscr0(fpscr, flags);
4680
4681 return result;
4682}
4683
4684template <>
4685uint32_t
4686fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4687{
4688 int flags = 0;
4689 int sgn, exp;
4690 uint32_t mnt, result;
4691
4692 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4693 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4694
4695 // If NaN, set cumulative flag or take exception:
4696 if (fp32_is_NaN(exp, mnt)) {
4697 flags = FPLIB_IOC;
4698 result = 0;
4699 } else {
4700 assert(fbits >= 0);
4701 // Infinity is treated as an ordinary normalised number that saturates.
4702 result =
4703 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4704 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4705 u, rounding, &flags);
4706 }
4707
4708 set_fpscr0(fpscr, flags);
4709
4710 return result;
4711}
4712
4713template <>
4714uint32_t
4715fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4716{
4717 int flags = 0;
4718 int sgn, exp;
4719 uint64_t mnt;
4720 uint32_t result;
4721
4722 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4723 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4724
4725 // If NaN, set cumulative flag or take exception:
4726 if (fp64_is_NaN(exp, mnt)) {
4727 flags = FPLIB_IOC;
4728 result = 0;
4729 } else {
4730 assert(fbits >= 0);
4731 // Infinity is treated as an ordinary normalised number that saturates.
4732 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
4733 }
4734
4735 set_fpscr0(fpscr, flags);
4736
4737 return result;
4738}
4739
4740template <>
4741uint64_t
4742fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4743 FPSCR &fpscr)
4744{
4745 int flags = 0;
4746 int sgn, exp;
4747 uint16_t mnt;
4748 uint64_t result;
4749
4750 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4751 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4752
4753 // If NaN, set cumulative flag or take exception:
4754 if (fp16_is_NaN(exp, mnt)) {
4755 flags = FPLIB_IOC;
4756 result = 0;
4757 } else {
4758 assert(fbits >= 0);
4759 if (exp == FP16_EXP_INF)
4760 exp = 255; // infinity: make it big enough to saturate
4761 result =
4762 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4763 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4764 u, rounding, &flags);
4765 }
4766
4767 set_fpscr0(fpscr, flags);
4768
4769 return result;
4770}
4771
4772template <>
4773uint64_t
4774fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4775{
4776 int flags = 0;
4777 int sgn, exp;
4778 uint32_t mnt;
4779 uint64_t result;
4780
4781 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4782 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4783
4784 // If NaN, set cumulative flag or take exception:
4785 if (fp32_is_NaN(exp, mnt)) {
4786 flags = FPLIB_IOC;
4787 result = 0;
4788 } else {
4789 assert(fbits >= 0);
4790 // Infinity is treated as an ordinary normalised number that saturates.
4791 result =
4792 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4793 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4794 u, rounding, &flags);
4795 }
4796
4797 set_fpscr0(fpscr, flags);
4798
4799 return result;
4800}
4801
4802template <>
4803uint64_t
4804fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4805{
4806 int flags = 0;
4807 int sgn, exp;
4808 uint64_t mnt, result;
4809
4810 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4811 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4812
4813 // If NaN, set cumulative flag or take exception:
4814 if (fp64_is_NaN(exp, mnt)) {
4815 flags = FPLIB_IOC;
4816 result = 0;
4817 } else {
4818 assert(fbits >= 0);
4819 // Infinity is treated as an ordinary normalised number that saturates.
4820 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
4821 }
4822
4823 set_fpscr0(fpscr, flags);
4824
4825 return result;
4826}
4827
4828static uint16_t
4829fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4830{
4831 int x_sgn = !u && a >> (FP64_BITS - 1);
4832 int x_exp = FP16_EXP_BIAS + FP64_BITS - 1 - fbits;
4833 uint64_t x_mnt = x_sgn ? -a : a;
4834
4835 // Handle zero:
4836 if (!x_mnt) {
4837 return fp16_zero(0);
4838 }
4839
4840 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
4841 x_mnt = fp64_normalise(x_mnt, &x_exp);
4842 x_mnt = (x_mnt >> (FP64_BITS - FP16_BITS - 1) |
4843 !!(x_mnt & ((1ULL << (FP64_BITS - FP16_BITS - 1)) - 1)));
4844
4845 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
4846}
4847
4848static uint32_t
4849fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4850{
4851 int x_sgn = !u && a >> (FP64_BITS - 1);
4852 int x_exp = FP32_EXP_BIAS + FP64_BITS - 1 - fbits;
4853 uint64_t x_mnt = x_sgn ? -a : a;
4854
4855 // Handle zero:
4856 if (!x_mnt) {
4857 return fp32_zero(0);
4858 }
4859
4860 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
4861 x_mnt = fp64_normalise(x_mnt, &x_exp);
4862 x_mnt = (x_mnt >> (FP64_BITS - FP32_BITS - 1) |
4863 !!(x_mnt & ((1ULL << (FP64_BITS - FP32_BITS - 1)) - 1)));
4864
4865 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
4866}
4867
4868static uint64_t
4869fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4870{
4871 int x_sgn = !u && a >> (FP64_BITS - 1);
4872 int x_exp = FP64_EXP_BIAS + FP64_BITS - 1 - fbits;
4873 uint64_t x_mnt = x_sgn ? -a : a;
4874
4875 // Handle zero:
4876 if (!x_mnt) {
4877 return fp64_zero(0);
4878 }
4879
4880 x_mnt = fp64_normalise(x_mnt, &x_exp);
4881
4882 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
4883}
4884
4885template <>
4886uint16_t
4887fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
4888 FPSCR &fpscr)
4889{
4890 int flags = 0;
4891 uint16_t res = fp16_cvtf(op, fbits, u,
4892 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4893 &flags);
4894 set_fpscr0(fpscr, flags);
4895 return res;
4896}
4897
4898template <>
4899uint32_t
4900fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4901{
4902 int flags = 0;
4903 uint32_t res = fp32_cvtf(op, fbits, u,
4904 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4905 &flags);
4906 set_fpscr0(fpscr, flags);
4907 return res;
4908}
4909
4910template <>
4911uint64_t
4912fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4913{
4914 int flags = 0;
4915 uint64_t res = fp64_cvtf(op, fbits, u,
4916 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4917 &flags);
4918 set_fpscr0(fpscr, flags);
4919 return res;
4920}
4921
4922template <>
4923uint16_t
4924fplibInfinity(int sgn)
4925{
4926 return fp16_infinity(sgn);
4927}
4928
4929template <>
4930uint32_t
4931fplibInfinity(int sgn)
4932{
4933 return fp32_infinity(sgn);
4934}
4935
4936template <>
4937uint64_t
4938fplibInfinity(int sgn)
4939{
4940 return fp64_infinity(sgn);
4941}
4942
4943template <>
4944uint16_t
4945fplibDefaultNaN()
4946{
4947 return fp16_defaultNaN();
4948}
4949
4950template <>
4951uint32_t
4952fplibDefaultNaN()
4953{
4954 return fp32_defaultNaN();
4955}
4956
4957template <>
4958uint64_t
4959fplibDefaultNaN()
4960{
4961 return fp64_defaultNaN();
4962}
4963
4964}