Deleted Added
sdiff udiff text old ( 13118:897ff5214d07 ) new ( 13449:2f7efa89c58b )
full compact
1/*
2* Copyright (c) 2012-2013, 2017-2018 ARM Limited
3* All rights reserved
4*
5* The license below extends only to copyright in the software and shall
6* not be construed as granting a license to any other intellectual
7* property including but not limited to intellectual property relating
8* to a hardware implementation of the functionality of the software
9* licensed hereunder. You may use the software subject to the license
10* terms below provided that you ensure that this notice is replicated
11* unmodified and in its entirety in all distributions of the software,
12* modified or unmodified, in source code or in binary form.
13*
14* Redistribution and use in source and binary forms, with or without
15* modification, are permitted provided that the following conditions are
16* met: redistributions of source code must retain the above copyright
17* notice, this list of conditions and the following disclaimer;
18* redistributions in binary form must reproduce the above copyright
19* notice, this list of conditions and the following disclaimer in the
20* documentation and/or other materials provided with the distribution;
21* neither the name of the copyright holders nor the names of its
22* contributors may be used to endorse or promote products derived from
23* this software without specific prior written permission.
24*
25* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36*
37* Authors: Edmund Grimley Evans
38* Thomas Grocutt
39*/
40
41#include <stdint.h>
42
43#include <cassert>
44
45#include "fplib.hh"
46
47namespace ArmISA
48{
49
50#define FPLIB_RN 0
51#define FPLIB_RP 1
52#define FPLIB_RM 2
53#define FPLIB_RZ 3
54#define FPLIB_FZ 4
55#define FPLIB_DN 8
56#define FPLIB_AHP 16
57#define FPLIB_FZ16 32
58
59#define FPLIB_IDC 128 // Input Denormal
60#define FPLIB_IXC 16 // Inexact
61#define FPLIB_UFC 8 // Underflow
62#define FPLIB_OFC 4 // Overflow
63#define FPLIB_DZC 2 // Division by Zero
64#define FPLIB_IOC 1 // Invalid Operation
65
66#define FP16_BITS 16
67#define FP32_BITS 32
68#define FP64_BITS 64
69
70#define FP16_EXP_BITS 5
71#define FP32_EXP_BITS 8
72#define FP64_EXP_BITS 11
73
74#define FP16_EXP_BIAS 15
75#define FP32_EXP_BIAS 127
76#define FP64_EXP_BIAS 1023
77
78#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
79#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
80#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
81
82#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
83#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
84#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
85
86#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
87#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
88#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
89
90#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
91#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
92#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
93
94static inline uint16_t
95lsl16(uint16_t x, uint32_t shift)
96{
97 return shift < 16 ? x << shift : 0;
98}
99
100static inline uint16_t
101lsr16(uint16_t x, uint32_t shift)
102{
103 return shift < 16 ? x >> shift : 0;
104}
105
106static inline uint32_t
107lsl32(uint32_t x, uint32_t shift)
108{
109 return shift < 32 ? x << shift : 0;
110}
111
112static inline uint32_t
113lsr32(uint32_t x, uint32_t shift)
114{
115 return shift < 32 ? x >> shift : 0;
116}
117
118static inline uint64_t
119lsl64(uint64_t x, uint32_t shift)
120{
121 return shift < 64 ? x << shift : 0;
122}
123
124static inline uint64_t
125lsr64(uint64_t x, uint32_t shift)
126{
127 return shift < 64 ? x >> shift : 0;
128}
129
130static inline void
131lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
132{
133 if (shift == 0) {
134 *r1 = x1;
135 *r0 = x0;
136 } else if (shift < 64) {
137 *r1 = x1 << shift | x0 >> (64 - shift);
138 *r0 = x0 << shift;
139 } else if (shift < 128) {
140 *r1 = x0 << (shift - 64);
141 *r0 = 0;
142 } else {
143 *r1 = 0;
144 *r0 = 0;
145 }
146}
147
148static inline void
149lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
150{
151 if (shift == 0) {
152 *r1 = x1;
153 *r0 = x0;
154 } else if (shift < 64) {
155 *r0 = x0 >> shift | x1 << (64 - shift);
156 *r1 = x1 >> shift;
157 } else if (shift < 128) {
158 *r0 = x1 >> (shift - 64);
159 *r1 = 0;
160 } else {
161 *r0 = 0;
162 *r1 = 0;
163 }
164}
165
166static inline void
167mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
168{
169 uint32_t mask = ((uint32_t)1 << 31) - 1;
170 uint64_t a0 = a & mask;
171 uint64_t a1 = a >> 31 & mask;
172 uint64_t b0 = b & mask;
173 uint64_t b1 = b >> 31 & mask;
174 uint64_t p0 = a0 * b0;
175 uint64_t p2 = a1 * b1;
176 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
177 uint64_t s0 = p0;
178 uint64_t s1 = (s0 >> 31) + p1;
179 uint64_t s2 = (s1 >> 31) + p2;
180 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
181 *x1 = s2 >> 2;
182}
183
184static inline
185void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
186{
187 uint64_t t0 = (uint64_t)(uint32_t)a * b;
188 uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
189 *x0 = t1 << 32 | (uint32_t)t0;
190 *x1 = t1 >> 32;
191}
192
193static inline void
194add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
195 uint64_t b1)
196{
197 *x0 = a0 + b0;
198 *x1 = a1 + b1 + (*x0 < a0);
199}
200
201static inline void
202sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
203 uint64_t b1)
204{
205 *x0 = a0 - b0;
206 *x1 = a1 - b1 - (*x0 > a0);
207}
208
209static inline int
210cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
211{
212 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
213}
214
215static inline uint16_t
216fp16_normalise(uint16_t mnt, int *exp)
217{
218 int shift;
219
220 if (!mnt) {
221 return 0;
222 }
223
224 for (shift = 8; shift; shift >>= 1) {
225 if (!(mnt >> (16 - shift))) {
226 mnt <<= shift;
227 *exp -= shift;
228 }
229 }
230 return mnt;
231}
232
233static inline uint32_t
234fp32_normalise(uint32_t mnt, int *exp)
235{
236 int shift;
237
238 if (!mnt) {
239 return 0;
240 }
241
242 for (shift = 16; shift; shift >>= 1) {
243 if (!(mnt >> (32 - shift))) {
244 mnt <<= shift;
245 *exp -= shift;
246 }
247 }
248 return mnt;
249}
250
251static inline uint64_t
252fp64_normalise(uint64_t mnt, int *exp)
253{
254 int shift;
255
256 if (!mnt) {
257 return 0;
258 }
259
260 for (shift = 32; shift; shift >>= 1) {
261 if (!(mnt >> (64 - shift))) {
262 mnt <<= shift;
263 *exp -= shift;
264 }
265 }
266 return mnt;
267}
268
269static inline void
270fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
271{
272 uint64_t x0 = *mnt0;
273 uint64_t x1 = *mnt1;
274 int shift;
275
276 if (!x0 && !x1) {
277 return;
278 }
279
280 if (!x1) {
281 x1 = x0;
282 x0 = 0;
283 *exp -= 64;
284 }
285
286 for (shift = 32; shift; shift >>= 1) {
287 if (!(x1 >> (64 - shift))) {
288 x1 = x1 << shift | x0 >> (64 - shift);
289 x0 <<= shift;
290 *exp -= shift;
291 }
292 }
293
294 *mnt0 = x0;
295 *mnt1 = x1;
296}
297
298static inline uint16_t
299fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
300{
301 return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt);
302}
303
304static inline uint32_t
305fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
306{
307 return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt);
308}
309
310static inline uint64_t
311fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
312{
313 return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt);
314}
315
316static inline uint16_t
317fp16_zero(int sgn)
318{
319 return fp16_pack(sgn, 0, 0);
320}
321
322static inline uint32_t
323fp32_zero(int sgn)
324{
325 return fp32_pack(sgn, 0, 0);
326}
327
328static inline uint64_t
329fp64_zero(int sgn)
330{
331 return fp64_pack(sgn, 0, 0);
332}
333
334static inline uint16_t
335fp16_max_normal(int sgn)
336{
337 return fp16_pack(sgn, FP16_EXP_INF - 1, -1);
338}
339
340static inline uint32_t
341fp32_max_normal(int sgn)
342{
343 return fp32_pack(sgn, FP32_EXP_INF - 1, -1);
344}
345
346static inline uint64_t
347fp64_max_normal(int sgn)
348{
349 return fp64_pack(sgn, FP64_EXP_INF - 1, -1);
350}
351
352static inline uint16_t
353fp16_infinity(int sgn)
354{
355 return fp16_pack(sgn, FP16_EXP_INF, 0);
356}
357
358static inline uint32_t
359fp32_infinity(int sgn)
360{
361 return fp32_pack(sgn, FP32_EXP_INF, 0);
362}
363
364static inline uint64_t
365fp64_infinity(int sgn)
366{
367 return fp64_pack(sgn, FP64_EXP_INF, 0);
368}
369
370static inline uint16_t
371fp16_defaultNaN()
372{
373 return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1));
374}
375
376static inline uint32_t
377fp32_defaultNaN()
378{
379 return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1));
380}
381
382static inline uint64_t
383fp64_defaultNaN()
384{
385 return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1));
386}
387
388static inline void
389fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
390 int *flags)
391{
392 *sgn = x >> (FP16_BITS - 1);
393 *exp = FP16_EXP(x);
394 *mnt = FP16_MANT(x);
395
396 // Handle subnormals:
397 if (*exp) {
398 *mnt |= 1ULL << FP16_MANT_BITS;
399 } else {
400 ++*exp;
401 // IDC (Input Denormal) is not set in this case.
402 if (mode & FPLIB_FZ16)
403 *mnt = 0;
404 }
405}
406
407static inline void
408fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
409 int *flags)
410{
411 *sgn = x >> (FP32_BITS - 1);
412 *exp = FP32_EXP(x);
413 *mnt = FP32_MANT(x);
414
415 // Handle subnormals:
416 if (*exp) {
417 *mnt |= 1ULL << FP32_MANT_BITS;
418 } else {
419 ++*exp;
420 if ((mode & FPLIB_FZ) && *mnt) {
421 *flags |= FPLIB_IDC;
422 *mnt = 0;
423 }
424 }
425}
426
427static inline void
428fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
429 int *flags)
430{
431 *sgn = x >> (FP64_BITS - 1);
432 *exp = FP64_EXP(x);
433 *mnt = FP64_MANT(x);
434
435 // Handle subnormals:
436 if (*exp) {
437 *mnt |= 1ULL << FP64_MANT_BITS;
438 } else {
439 ++*exp;
440 if ((mode & FPLIB_FZ) && *mnt) {
441 *flags |= FPLIB_IDC;
442 *mnt = 0;
443 }
444 }
445}
446
447static inline int
448fp16_is_NaN(int exp, uint16_t mnt)
449{
450 return exp == FP16_EXP_INF && FP16_MANT(mnt);
451}
452
453static inline int
454fp32_is_NaN(int exp, uint32_t mnt)
455{
456 return exp == FP32_EXP_INF && FP32_MANT(mnt);
457}
458
459static inline int
460fp64_is_NaN(int exp, uint64_t mnt)
461{
462 return exp == FP64_EXP_INF && FP64_MANT(mnt);
463}
464
465static inline int
466fp16_is_signalling_NaN(int exp, uint16_t mnt)
467{
468 return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1);
469}
470
471static inline int
472fp32_is_signalling_NaN(int exp, uint32_t mnt)
473{
474 return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1);
475}
476
477static inline int
478fp64_is_signalling_NaN(int exp, uint64_t mnt)
479{
480 return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1);
481}
482
483static inline int
484fp16_is_quiet_NaN(int exp, uint16_t mnt)
485{
486 return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1);
487}
488
489static inline int
490fp32_is_quiet_NaN(int exp, uint32_t mnt)
491{
492 return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1);
493}
494
495static inline int
496fp64_is_quiet_NaN(int exp, uint64_t mnt)
497{
498 return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1);
499}
500
501static inline int
502fp16_is_infinity(int exp, uint16_t mnt)
503{
504 return exp == FP16_EXP_INF && !FP16_MANT(mnt);
505}
506
507static inline int
508fp32_is_infinity(int exp, uint32_t mnt)
509{
510 return exp == FP32_EXP_INF && !FP32_MANT(mnt);
511}
512
513static inline int
514fp64_is_infinity(int exp, uint64_t mnt)
515{
516 return exp == FP64_EXP_INF && !FP64_MANT(mnt);
517}
518
519static inline uint16_t
520fp16_process_NaN(uint16_t a, int mode, int *flags)
521{
522 if (!(a >> (FP16_MANT_BITS - 1) & 1)) {
523 *flags |= FPLIB_IOC;
524 a |= 1ULL << (FP16_MANT_BITS - 1);
525 }
526 return mode & FPLIB_DN ? fp16_defaultNaN() : a;
527}
528
529static inline uint32_t
530fp32_process_NaN(uint32_t a, int mode, int *flags)
531{
532 if (!(a >> (FP32_MANT_BITS - 1) & 1)) {
533 *flags |= FPLIB_IOC;
534 a |= 1ULL << (FP32_MANT_BITS - 1);
535 }
536 return mode & FPLIB_DN ? fp32_defaultNaN() : a;
537}
538
539static inline uint64_t
540fp64_process_NaN(uint64_t a, int mode, int *flags)
541{
542 if (!(a >> (FP64_MANT_BITS - 1) & 1)) {
543 *flags |= FPLIB_IOC;
544 a |= 1ULL << (FP64_MANT_BITS - 1);
545 }
546 return mode & FPLIB_DN ? fp64_defaultNaN() : a;
547}
548
549static uint16_t
550fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
551{
552 int a_exp = FP16_EXP(a);
553 uint16_t a_mnt = FP16_MANT(a);
554 int b_exp = FP16_EXP(b);
555 uint16_t b_mnt = FP16_MANT(b);
556
557 // Handle signalling NaNs:
558 if (fp16_is_signalling_NaN(a_exp, a_mnt))
559 return fp16_process_NaN(a, mode, flags);
560 if (fp16_is_signalling_NaN(b_exp, b_mnt))
561 return fp16_process_NaN(b, mode, flags);
562
563 // Handle quiet NaNs:
564 if (fp16_is_NaN(a_exp, a_mnt))
565 return fp16_process_NaN(a, mode, flags);
566 if (fp16_is_NaN(b_exp, b_mnt))
567 return fp16_process_NaN(b, mode, flags);
568
569 return 0;
570}
571
572static uint32_t
573fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
574{
575 int a_exp = FP32_EXP(a);
576 uint32_t a_mnt = FP32_MANT(a);
577 int b_exp = FP32_EXP(b);
578 uint32_t b_mnt = FP32_MANT(b);
579
580 // Handle signalling NaNs:
581 if (fp32_is_signalling_NaN(a_exp, a_mnt))
582 return fp32_process_NaN(a, mode, flags);
583 if (fp32_is_signalling_NaN(b_exp, b_mnt))
584 return fp32_process_NaN(b, mode, flags);
585
586 // Handle quiet NaNs:
587 if (fp32_is_NaN(a_exp, a_mnt))
588 return fp32_process_NaN(a, mode, flags);
589 if (fp32_is_NaN(b_exp, b_mnt))
590 return fp32_process_NaN(b, mode, flags);
591
592 return 0;
593}
594
595static uint64_t
596fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
597{
598 int a_exp = FP64_EXP(a);
599 uint64_t a_mnt = FP64_MANT(a);
600 int b_exp = FP64_EXP(b);
601 uint64_t b_mnt = FP64_MANT(b);
602
603 // Handle signalling NaNs:
604 if (fp64_is_signalling_NaN(a_exp, a_mnt))
605 return fp64_process_NaN(a, mode, flags);
606 if (fp64_is_signalling_NaN(b_exp, b_mnt))
607 return fp64_process_NaN(b, mode, flags);
608
609 // Handle quiet NaNs:
610 if (fp64_is_NaN(a_exp, a_mnt))
611 return fp64_process_NaN(a, mode, flags);
612 if (fp64_is_NaN(b_exp, b_mnt))
613 return fp64_process_NaN(b, mode, flags);
614
615 return 0;
616}
617
618static uint16_t
619fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
620{
621 int a_exp = FP16_EXP(a);
622 uint16_t a_mnt = FP16_MANT(a);
623 int b_exp = FP16_EXP(b);
624 uint16_t b_mnt = FP16_MANT(b);
625 int c_exp = FP16_EXP(c);
626 uint16_t c_mnt = FP16_MANT(c);
627
628 // Handle signalling NaNs:
629 if (fp16_is_signalling_NaN(a_exp, a_mnt))
630 return fp16_process_NaN(a, mode, flags);
631 if (fp16_is_signalling_NaN(b_exp, b_mnt))
632 return fp16_process_NaN(b, mode, flags);
633 if (fp16_is_signalling_NaN(c_exp, c_mnt))
634 return fp16_process_NaN(c, mode, flags);
635
636 // Handle quiet NaNs:
637 if (fp16_is_NaN(a_exp, a_mnt))
638 return fp16_process_NaN(a, mode, flags);
639 if (fp16_is_NaN(b_exp, b_mnt))
640 return fp16_process_NaN(b, mode, flags);
641 if (fp16_is_NaN(c_exp, c_mnt))
642 return fp16_process_NaN(c, mode, flags);
643
644 return 0;
645}
646
647static uint32_t
648fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
649{
650 int a_exp = FP32_EXP(a);
651 uint32_t a_mnt = FP32_MANT(a);
652 int b_exp = FP32_EXP(b);
653 uint32_t b_mnt = FP32_MANT(b);
654 int c_exp = FP32_EXP(c);
655 uint32_t c_mnt = FP32_MANT(c);
656
657 // Handle signalling NaNs:
658 if (fp32_is_signalling_NaN(a_exp, a_mnt))
659 return fp32_process_NaN(a, mode, flags);
660 if (fp32_is_signalling_NaN(b_exp, b_mnt))
661 return fp32_process_NaN(b, mode, flags);
662 if (fp32_is_signalling_NaN(c_exp, c_mnt))
663 return fp32_process_NaN(c, mode, flags);
664
665 // Handle quiet NaNs:
666 if (fp32_is_NaN(a_exp, a_mnt))
667 return fp32_process_NaN(a, mode, flags);
668 if (fp32_is_NaN(b_exp, b_mnt))
669 return fp32_process_NaN(b, mode, flags);
670 if (fp32_is_NaN(c_exp, c_mnt))
671 return fp32_process_NaN(c, mode, flags);
672
673 return 0;
674}
675
676static uint64_t
677fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
678{
679 int a_exp = FP64_EXP(a);
680 uint64_t a_mnt = FP64_MANT(a);
681 int b_exp = FP64_EXP(b);
682 uint64_t b_mnt = FP64_MANT(b);
683 int c_exp = FP64_EXP(c);
684 uint64_t c_mnt = FP64_MANT(c);
685
686 // Handle signalling NaNs:
687 if (fp64_is_signalling_NaN(a_exp, a_mnt))
688 return fp64_process_NaN(a, mode, flags);
689 if (fp64_is_signalling_NaN(b_exp, b_mnt))
690 return fp64_process_NaN(b, mode, flags);
691 if (fp64_is_signalling_NaN(c_exp, c_mnt))
692 return fp64_process_NaN(c, mode, flags);
693
694 // Handle quiet NaNs:
695 if (fp64_is_NaN(a_exp, a_mnt))
696 return fp64_process_NaN(a, mode, flags);
697 if (fp64_is_NaN(b_exp, b_mnt))
698 return fp64_process_NaN(b, mode, flags);
699 if (fp64_is_NaN(c_exp, c_mnt))
700 return fp64_process_NaN(c, mode, flags);
701
702 return 0;
703}
704
705static uint16_t
706fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
707{
708 int biased_exp; // non-negative exponent value for result
709 uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS)
710 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
711
712 assert(rm != FPRounding_TIEAWAY);
713
714 // Flush to zero:
715 if ((mode & FPLIB_FZ16) && exp < 1) {
716 *flags |= FPLIB_UFC;
717 return fp16_zero(sgn);
718 }
719
720 // The bottom FP16_EXP_BITS bits of mnt are orred together:
721 mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) |
722 ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0));
723
724 if (exp > 0) {
725 biased_exp = exp;
726 int_mant = mnt >> 2;
727 error = mnt & 3;
728 } else {
729 biased_exp = 0;
730 int_mant = lsr16(mnt, 3 - exp);
731 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
732 }
733
734 if (!biased_exp && error) { // xx should also check fpscr_val<11>
735 *flags |= FPLIB_UFC;
736 }
737
738 // Round up:
739 if ((rm == FPLIB_RN && (error == 3 ||
740 (error == 2 && (int_mant & 1)))) ||
741 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
742 ++int_mant;
743 if (int_mant == 1ULL << FP16_MANT_BITS) {
744 // Rounded up from denormalized to normalized
745 biased_exp = 1;
746 }
747 if (int_mant == 2ULL << FP16_MANT_BITS) {
748 // Rounded up to next exponent
749 ++biased_exp;
750 int_mant >>= 1;
751 }
752 }
753
754 // Handle rounding to odd aka Von Neumann rounding:
755 if (error && rm == FPRounding_ODD)
756 int_mant |= 1;
757
758 // Handle overflow:
759 if (!(mode & FPLIB_AHP)) {
760 if (biased_exp >= (int)FP16_EXP_INF) {
761 *flags |= FPLIB_OFC | FPLIB_IXC;
762 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
763 (rm == FPLIB_RM && sgn)) {
764 return fp16_infinity(sgn);
765 } else {
766 return fp16_max_normal(sgn);
767 }
768 }
769 } else {
770 if (biased_exp >= (int)FP16_EXP_INF + 1) {
771 *flags |= FPLIB_IOC;
772 return fp16_pack(sgn, FP16_EXP_INF, -1);
773 }
774 }
775
776 if (error) {
777 *flags |= FPLIB_IXC;
778 }
779
780 return fp16_pack(sgn, biased_exp, int_mant);
781}
782
783static uint16_t
784fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
785{
786 return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags);
787}
788
789static uint32_t
790fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
791{
792 int biased_exp; // non-negative exponent value for result
793 uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS)
794 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
795
796 assert(rm != FPRounding_TIEAWAY);
797
798 // Flush to zero:
799 if ((mode & FPLIB_FZ) && exp < 1) {
800 *flags |= FPLIB_UFC;
801 return fp32_zero(sgn);
802 }
803
804 // The bottom FP32_EXP_BITS bits of mnt are orred together:
805 mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) |
806 ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0));
807
808 if (exp > 0) {
809 biased_exp = exp;
810 int_mant = mnt >> 2;
811 error = mnt & 3;
812 } else {
813 biased_exp = 0;
814 int_mant = lsr32(mnt, 3 - exp);
815 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
816 }
817
818 if (!biased_exp && error) { // xx should also check fpscr_val<11>
819 *flags |= FPLIB_UFC;
820 }
821
822 // Round up:
823 if ((rm == FPLIB_RN && (error == 3 ||
824 (error == 2 && (int_mant & 1)))) ||
825 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
826 ++int_mant;
827 if (int_mant == 1ULL << FP32_MANT_BITS) {
828 // Rounded up from denormalized to normalized
829 biased_exp = 1;
830 }
831 if (int_mant == 2ULL << FP32_MANT_BITS) {
832 // Rounded up to next exponent
833 ++biased_exp;
834 int_mant >>= 1;
835 }
836 }
837
838 // Handle rounding to odd aka Von Neumann rounding:
839 if (error && rm == FPRounding_ODD)
840 int_mant |= 1;
841
842 // Handle overflow:
843 if (biased_exp >= (int)FP32_EXP_INF) {
844 *flags |= FPLIB_OFC | FPLIB_IXC;
845 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
846 (rm == FPLIB_RM && sgn)) {
847 return fp32_infinity(sgn);
848 } else {
849 return fp32_max_normal(sgn);
850 }
851 }
852
853 if (error) {
854 *flags |= FPLIB_IXC;
855 }
856
857 return fp32_pack(sgn, biased_exp, int_mant);
858}
859
860static uint32_t
861fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
862{
863 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
864}
865
866static uint64_t
867fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
868{
869 int biased_exp; // non-negative exponent value for result
870 uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS)
871 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
872
873 assert(rm != FPRounding_TIEAWAY);
874
875 // Flush to zero:
876 if ((mode & FPLIB_FZ) && exp < 1) {
877 *flags |= FPLIB_UFC;
878 return fp64_zero(sgn);
879 }
880
881 // The bottom FP64_EXP_BITS bits of mnt are orred together:
882 mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) |
883 ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0));
884
885 if (exp > 0) {
886 biased_exp = exp;
887 int_mant = mnt >> 2;
888 error = mnt & 3;
889 } else {
890 biased_exp = 0;
891 int_mant = lsr64(mnt, 3 - exp);
892 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
893 }
894
895 if (!biased_exp && error) { // xx should also check fpscr_val<11>
896 *flags |= FPLIB_UFC;
897 }
898
899 // Round up:
900 if ((rm == FPLIB_RN && (error == 3 ||
901 (error == 2 && (int_mant & 1)))) ||
902 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
903 ++int_mant;
904 if (int_mant == 1ULL << FP64_MANT_BITS) {
905 // Rounded up from denormalized to normalized
906 biased_exp = 1;
907 }
908 if (int_mant == 2ULL << FP64_MANT_BITS) {
909 // Rounded up to next exponent
910 ++biased_exp;
911 int_mant >>= 1;
912 }
913 }
914
915 // Handle rounding to odd aka Von Neumann rounding:
916 if (error && rm == FPRounding_ODD)
917 int_mant |= 1;
918
919 // Handle overflow:
920 if (biased_exp >= (int)FP64_EXP_INF) {
921 *flags |= FPLIB_OFC | FPLIB_IXC;
922 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
923 (rm == FPLIB_RM && sgn)) {
924 return fp64_infinity(sgn);
925 } else {
926 return fp64_max_normal(sgn);
927 }
928 }
929
930 if (error) {
931 *flags |= FPLIB_IXC;
932 }
933
934 return fp64_pack(sgn, biased_exp, int_mant);
935}
936
937static uint64_t
938fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
939{
940 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
941}
942
943static int
944fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
945{
946 int a_sgn, a_exp, b_sgn, b_exp;
947 uint16_t a_mnt, b_mnt;
948
949 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
950 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
951
952 if (fp16_is_NaN(a_exp, a_mnt) ||
953 fp16_is_NaN(b_exp, b_mnt)) {
954 if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
955 fp16_is_signalling_NaN(b_exp, b_mnt))
956 *flags |= FPLIB_IOC;
957 return 0;
958 }
959 return a == b || (!a_mnt && !b_mnt);
960}
961
962static int
963fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
964{
965 int a_sgn, a_exp, b_sgn, b_exp;
966 uint16_t a_mnt, b_mnt;
967
968 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
969 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
970
971 if (fp16_is_NaN(a_exp, a_mnt) ||
972 fp16_is_NaN(b_exp, b_mnt)) {
973 *flags |= FPLIB_IOC;
974 return 0;
975 }
976 if (!a_mnt && !b_mnt)
977 return 1;
978 if (a_sgn != b_sgn)
979 return b_sgn;
980 if (a_exp != b_exp)
981 return a_sgn ^ (a_exp > b_exp);
982 if (a_mnt != b_mnt)
983 return a_sgn ^ (a_mnt > b_mnt);
984 return 1;
985}
986
987static int
988fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
989{
990 int a_sgn, a_exp, b_sgn, b_exp;
991 uint16_t a_mnt, b_mnt;
992
993 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
994 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
995
996 if (fp16_is_NaN(a_exp, a_mnt) ||
997 fp16_is_NaN(b_exp, b_mnt)) {
998 *flags |= FPLIB_IOC;
999 return 0;
1000 }
1001 if (!a_mnt && !b_mnt)
1002 return 0;
1003 if (a_sgn != b_sgn)
1004 return b_sgn;
1005 if (a_exp != b_exp)
1006 return a_sgn ^ (a_exp > b_exp);
1007 if (a_mnt != b_mnt)
1008 return a_sgn ^ (a_mnt > b_mnt);
1009 return 0;
1010}
1011
1012static int
1013fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
1014{
1015 int a_sgn, a_exp, b_sgn, b_exp;
1016 uint16_t a_mnt, b_mnt;
1017
1018 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1019 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1020
1021 if (fp16_is_NaN(a_exp, a_mnt) ||
1022 fp16_is_NaN(b_exp, b_mnt)) {
1023 if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
1024 fp16_is_signalling_NaN(b_exp, b_mnt))
1025 *flags |= FPLIB_IOC;
1026 return 1;
1027 }
1028 return 0;
1029}
1030
1031static int
1032fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
1033{
1034 int a_sgn, a_exp, b_sgn, b_exp;
1035 uint32_t a_mnt, b_mnt;
1036
1037 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1038 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1039
1040 if (fp32_is_NaN(a_exp, a_mnt) ||
1041 fp32_is_NaN(b_exp, b_mnt)) {
1042 if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1043 fp32_is_signalling_NaN(b_exp, b_mnt))
1044 *flags |= FPLIB_IOC;
1045 return 0;
1046 }
1047 return a == b || (!a_mnt && !b_mnt);
1048}
1049
1050static int
1051fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
1052{
1053 int a_sgn, a_exp, b_sgn, b_exp;
1054 uint32_t a_mnt, b_mnt;
1055
1056 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1057 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1058
1059 if (fp32_is_NaN(a_exp, a_mnt) ||
1060 fp32_is_NaN(b_exp, b_mnt)) {
1061 *flags |= FPLIB_IOC;
1062 return 0;
1063 }
1064 if (!a_mnt && !b_mnt)
1065 return 1;
1066 if (a_sgn != b_sgn)
1067 return b_sgn;
1068 if (a_exp != b_exp)
1069 return a_sgn ^ (a_exp > b_exp);
1070 if (a_mnt != b_mnt)
1071 return a_sgn ^ (a_mnt > b_mnt);
1072 return 1;
1073}
1074
1075static int
1076fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
1077{
1078 int a_sgn, a_exp, b_sgn, b_exp;
1079 uint32_t a_mnt, b_mnt;
1080
1081 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1082 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1083
1084 if (fp32_is_NaN(a_exp, a_mnt) ||
1085 fp32_is_NaN(b_exp, b_mnt)) {
1086 *flags |= FPLIB_IOC;
1087 return 0;
1088 }
1089 if (!a_mnt && !b_mnt)
1090 return 0;
1091 if (a_sgn != b_sgn)
1092 return b_sgn;
1093 if (a_exp != b_exp)
1094 return a_sgn ^ (a_exp > b_exp);
1095 if (a_mnt != b_mnt)
1096 return a_sgn ^ (a_mnt > b_mnt);
1097 return 0;
1098}
1099
1100static int
1101fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
1102{
1103 int a_sgn, a_exp, b_sgn, b_exp;
1104 uint32_t a_mnt, b_mnt;
1105
1106 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1107 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1108
1109 if (fp32_is_NaN(a_exp, a_mnt) ||
1110 fp32_is_NaN(b_exp, b_mnt)) {
1111 if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1112 fp32_is_signalling_NaN(b_exp, b_mnt))
1113 *flags |= FPLIB_IOC;
1114 return 1;
1115 }
1116 return 0;
1117}
1118
1119static int
1120fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
1121{
1122 int a_sgn, a_exp, b_sgn, b_exp;
1123 uint64_t a_mnt, b_mnt;
1124
1125 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1126 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1127
1128 if (fp64_is_NaN(a_exp, a_mnt) ||
1129 fp64_is_NaN(b_exp, b_mnt)) {
1130 if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1131 fp64_is_signalling_NaN(b_exp, b_mnt))
1132 *flags |= FPLIB_IOC;
1133 return 0;
1134 }
1135 return a == b || (!a_mnt && !b_mnt);
1136}
1137
1138static int
1139fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
1140{
1141 int a_sgn, a_exp, b_sgn, b_exp;
1142 uint64_t a_mnt, b_mnt;
1143
1144 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1145 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1146
1147 if (fp64_is_NaN(a_exp, a_mnt) ||
1148 fp64_is_NaN(b_exp, b_mnt)) {
1149 *flags |= FPLIB_IOC;
1150 return 0;
1151 }
1152 if (!a_mnt && !b_mnt)
1153 return 1;
1154 if (a_sgn != b_sgn)
1155 return b_sgn;
1156 if (a_exp != b_exp)
1157 return a_sgn ^ (a_exp > b_exp);
1158 if (a_mnt != b_mnt)
1159 return a_sgn ^ (a_mnt > b_mnt);
1160 return 1;
1161}
1162
1163static int
1164fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
1165{
1166 int a_sgn, a_exp, b_sgn, b_exp;
1167 uint64_t a_mnt, b_mnt;
1168
1169 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1170 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1171
1172 if (fp64_is_NaN(a_exp, a_mnt) ||
1173 fp64_is_NaN(b_exp, b_mnt)) {
1174 *flags |= FPLIB_IOC;
1175 return 0;
1176 }
1177 if (!a_mnt && !b_mnt)
1178 return 0;
1179 if (a_sgn != b_sgn)
1180 return b_sgn;
1181 if (a_exp != b_exp)
1182 return a_sgn ^ (a_exp > b_exp);
1183 if (a_mnt != b_mnt)
1184 return a_sgn ^ (a_mnt > b_mnt);
1185 return 0;
1186}
1187
1188static int
1189fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
1190{
1191 int a_sgn, a_exp, b_sgn, b_exp;
1192 uint64_t a_mnt, b_mnt;
1193
1194 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1195 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1196
1197 if (fp64_is_NaN(a_exp, a_mnt) ||
1198 fp64_is_NaN(b_exp, b_mnt)) {
1199 if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1200 fp64_is_signalling_NaN(b_exp, b_mnt))
1201 *flags |= FPLIB_IOC;
1202 return 1;
1203 }
1204 return 0;
1205}
1206
1207static uint16_t
1208fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
1209{
1210 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1211 uint16_t a_mnt, b_mnt, x, x_mnt;
1212
1213 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1214 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1215
1216 if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1217 return x;
1218 }
1219
1220 b_sgn ^= neg;
1221
1222 // Handle infinities and zeroes:
1223 if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) {
1224 *flags |= FPLIB_IOC;
1225 return fp16_defaultNaN();
1226 } else if (a_exp == FP16_EXP_INF) {
1227 return fp16_infinity(a_sgn);
1228 } else if (b_exp == FP16_EXP_INF) {
1229 return fp16_infinity(b_sgn);
1230 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1231 return fp16_zero(a_sgn);
1232 }
1233
1234 a_mnt <<= 3;
1235 b_mnt <<= 3;
1236 if (a_exp >= b_exp) {
1237 b_mnt = (lsr16(b_mnt, a_exp - b_exp) |
1238 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1)));
1239 b_exp = a_exp;
1240 } else {
1241 a_mnt = (lsr16(a_mnt, b_exp - a_exp) |
1242 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1)));
1243 a_exp = b_exp;
1244 }
1245 x_sgn = a_sgn;
1246 x_exp = a_exp;
1247 if (a_sgn == b_sgn) {
1248 x_mnt = a_mnt + b_mnt;
1249 } else if (a_mnt >= b_mnt) {
1250 x_mnt = a_mnt - b_mnt;
1251 } else {
1252 x_sgn ^= 1;
1253 x_mnt = b_mnt - a_mnt;
1254 }
1255
1256 if (!x_mnt) {
1257 // Sign of exact zero result depends on rounding mode
1258 return fp16_zero((mode & 3) == 2);
1259 }
1260
1261 x_mnt = fp16_normalise(x_mnt, &x_exp);
1262
1263 return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1,
1264 mode, flags);
1265}
1266
1267static uint32_t
1268fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
1269{
1270 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1271 uint32_t a_mnt, b_mnt, x, x_mnt;
1272
1273 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1274 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1275
1276 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1277 return x;
1278 }
1279
1280 b_sgn ^= neg;
1281
1282 // Handle infinities and zeroes:
1283 if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) {
1284 *flags |= FPLIB_IOC;
1285 return fp32_defaultNaN();
1286 } else if (a_exp == FP32_EXP_INF) {
1287 return fp32_infinity(a_sgn);
1288 } else if (b_exp == FP32_EXP_INF) {
1289 return fp32_infinity(b_sgn);
1290 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1291 return fp32_zero(a_sgn);
1292 }
1293
1294 a_mnt <<= 3;
1295 b_mnt <<= 3;
1296 if (a_exp >= b_exp) {
1297 b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
1298 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
1299 b_exp = a_exp;
1300 } else {
1301 a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
1302 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
1303 a_exp = b_exp;
1304 }
1305 x_sgn = a_sgn;
1306 x_exp = a_exp;
1307 if (a_sgn == b_sgn) {
1308 x_mnt = a_mnt + b_mnt;
1309 } else if (a_mnt >= b_mnt) {
1310 x_mnt = a_mnt - b_mnt;
1311 } else {
1312 x_sgn ^= 1;
1313 x_mnt = b_mnt - a_mnt;
1314 }
1315
1316 if (!x_mnt) {
1317 // Sign of exact zero result depends on rounding mode
1318 return fp32_zero((mode & 3) == 2);
1319 }
1320
1321 x_mnt = fp32_normalise(x_mnt, &x_exp);
1322
1323 return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1,
1324 mode, flags);
1325}
1326
1327static uint64_t
1328fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
1329{
1330 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1331 uint64_t a_mnt, b_mnt, x, x_mnt;
1332
1333 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1334 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1335
1336 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1337 return x;
1338 }
1339
1340 b_sgn ^= neg;
1341
1342 // Handle infinities and zeroes:
1343 if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) {
1344 *flags |= FPLIB_IOC;
1345 return fp64_defaultNaN();
1346 } else if (a_exp == FP64_EXP_INF) {
1347 return fp64_infinity(a_sgn);
1348 } else if (b_exp == FP64_EXP_INF) {
1349 return fp64_infinity(b_sgn);
1350 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1351 return fp64_zero(a_sgn);
1352 }
1353
1354 a_mnt <<= 3;
1355 b_mnt <<= 3;
1356 if (a_exp >= b_exp) {
1357 b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
1358 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
1359 b_exp = a_exp;
1360 } else {
1361 a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
1362 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
1363 a_exp = b_exp;
1364 }
1365 x_sgn = a_sgn;
1366 x_exp = a_exp;
1367 if (a_sgn == b_sgn) {
1368 x_mnt = a_mnt + b_mnt;
1369 } else if (a_mnt >= b_mnt) {
1370 x_mnt = a_mnt - b_mnt;
1371 } else {
1372 x_sgn ^= 1;
1373 x_mnt = b_mnt - a_mnt;
1374 }
1375
1376 if (!x_mnt) {
1377 // Sign of exact zero result depends on rounding mode
1378 return fp64_zero((mode & 3) == 2);
1379 }
1380
1381 x_mnt = fp64_normalise(x_mnt, &x_exp);
1382
1383 return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1,
1384 mode, flags);
1385}
1386
1387static uint16_t
1388fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
1389{
1390 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1391 uint16_t a_mnt, b_mnt, x;
1392 uint32_t x_mnt;
1393
1394 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1395 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1396
1397 if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1398 return x;
1399 }
1400
1401 // Handle infinities and zeroes:
1402 if ((a_exp == FP16_EXP_INF && !b_mnt) ||
1403 (b_exp == FP16_EXP_INF && !a_mnt)) {
1404 *flags |= FPLIB_IOC;
1405 return fp16_defaultNaN();
1406 } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) {
1407 return fp16_infinity(a_sgn ^ b_sgn);
1408 } else if (!a_mnt || !b_mnt) {
1409 return fp16_zero(a_sgn ^ b_sgn);
1410 }
1411
1412 // Multiply and normalise:
1413 x_sgn = a_sgn ^ b_sgn;
1414 x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1;
1415 x_mnt = (uint32_t)a_mnt * b_mnt;
1416 x_mnt = fp32_normalise(x_mnt, &x_exp);
1417
1418 // Convert to FP16_BITS bits, collapsing error into bottom bit:
1419 x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1);
1420
1421 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1422}
1423
1424static uint32_t
1425fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1426{
1427 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1428 uint32_t a_mnt, b_mnt, x;
1429 uint64_t x_mnt;
1430
1431 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1432 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1433
1434 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1435 return x;
1436 }
1437
1438 // Handle infinities and zeroes:
1439 if ((a_exp == FP32_EXP_INF && !b_mnt) ||
1440 (b_exp == FP32_EXP_INF && !a_mnt)) {
1441 *flags |= FPLIB_IOC;
1442 return fp32_defaultNaN();
1443 } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) {
1444 return fp32_infinity(a_sgn ^ b_sgn);
1445 } else if (!a_mnt || !b_mnt) {
1446 return fp32_zero(a_sgn ^ b_sgn);
1447 }
1448
1449 // Multiply and normalise:
1450 x_sgn = a_sgn ^ b_sgn;
1451 x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1;
1452 x_mnt = (uint64_t)a_mnt * b_mnt;
1453 x_mnt = fp64_normalise(x_mnt, &x_exp);
1454
1455 // Convert to FP32_BITS bits, collapsing error into bottom bit:
1456 x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1);
1457
1458 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1459}
1460
1461static uint64_t
1462fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1463{
1464 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1465 uint64_t a_mnt, b_mnt, x;
1466 uint64_t x0_mnt, x1_mnt;
1467
1468 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1469 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1470
1471 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1472 return x;
1473 }
1474
1475 // Handle infinities and zeroes:
1476 if ((a_exp == FP64_EXP_INF && !b_mnt) ||
1477 (b_exp == FP64_EXP_INF && !a_mnt)) {
1478 *flags |= FPLIB_IOC;
1479 return fp64_defaultNaN();
1480 } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) {
1481 return fp64_infinity(a_sgn ^ b_sgn);
1482 } else if (!a_mnt || !b_mnt) {
1483 return fp64_zero(a_sgn ^ b_sgn);
1484 }
1485
1486 // Multiply and normalise:
1487 x_sgn = a_sgn ^ b_sgn;
1488 x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1;
1489 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1490 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1491
1492 // Convert to FP64_BITS bits, collapsing error into bottom bit:
1493 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1494
1495 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1496}
1497
1498static uint16_t
1499fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale,
1500 int mode, int *flags)
1501{
1502 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1503 uint16_t a_mnt, b_mnt, c_mnt, x;
1504 uint32_t x_mnt, y_mnt;
1505
1506 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1507 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1508 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1509
1510 x = fp16_process_NaNs3(a, b, c, mode, flags);
1511
1512 // Quiet NaN added to product of zero and infinity:
1513 if (fp16_is_quiet_NaN(a_exp, a_mnt) &&
1514 ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) ||
1515 (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) {
1516 x = fp16_defaultNaN();
1517 *flags |= FPLIB_IOC;
1518 }
1519
1520 if (x) {
1521 return x;
1522 }
1523
1524 // Handle infinities and zeroes:
1525 if ((b_exp == FP16_EXP_INF && !c_mnt) ||
1526 (c_exp == FP16_EXP_INF && !b_mnt) ||
1527 (a_exp == FP16_EXP_INF &&
1528 (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) &&
1529 (a_sgn != (b_sgn ^ c_sgn)))) {
1530 *flags |= FPLIB_IOC;
1531 return fp16_defaultNaN();
1532 }
1533 if (a_exp == FP16_EXP_INF)
1534 return fp16_infinity(a_sgn);
1535 if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF)
1536 return fp16_infinity(b_sgn ^ c_sgn);
1537 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1538 return fp16_zero(a_sgn);
1539
1540 x_sgn = a_sgn;
1541 x_exp = a_exp + 2 * FP16_EXP_BITS - 3;
1542 x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4);
1543
1544 // Multiply:
1545 y_sgn = b_sgn ^ c_sgn;
1546 y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3;
1547 y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1548 if (!y_mnt) {
1549 y_exp = x_exp;
1550 }
1551
1552 // Add:
1553 if (x_exp >= y_exp) {
1554 y_mnt = (lsr32(y_mnt, x_exp - y_exp) |
1555 !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1)));
1556 y_exp = x_exp;
1557 } else {
1558 x_mnt = (lsr32(x_mnt, y_exp - x_exp) |
1559 !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1)));
1560 x_exp = y_exp;
1561 }
1562 if (x_sgn == y_sgn) {
1563 x_mnt = x_mnt + y_mnt;
1564 } else if (x_mnt >= y_mnt) {
1565 x_mnt = x_mnt - y_mnt;
1566 } else {
1567 x_sgn ^= 1;
1568 x_mnt = y_mnt - x_mnt;
1569 }
1570
1571 if (!x_mnt) {
1572 // Sign of exact zero result depends on rounding mode
1573 return fp16_zero((mode & 3) == 2);
1574 }
1575
1576 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1577 x_mnt = fp32_normalise(x_mnt, &x_exp);
1578 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1579
1580 return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1581}
1582
1583static uint32_t
1584fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1585 int mode, int *flags)
1586{
1587 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1588 uint32_t a_mnt, b_mnt, c_mnt, x;
1589 uint64_t x_mnt, y_mnt;
1590
1591 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1592 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1593 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1594
1595 x = fp32_process_NaNs3(a, b, c, mode, flags);
1596
1597 // Quiet NaN added to product of zero and infinity:
1598 if (fp32_is_quiet_NaN(a_exp, a_mnt) &&
1599 ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) ||
1600 (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) {
1601 x = fp32_defaultNaN();
1602 *flags |= FPLIB_IOC;
1603 }
1604
1605 if (x) {
1606 return x;
1607 }
1608
1609 // Handle infinities and zeroes:
1610 if ((b_exp == FP32_EXP_INF && !c_mnt) ||
1611 (c_exp == FP32_EXP_INF && !b_mnt) ||
1612 (a_exp == FP32_EXP_INF &&
1613 (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) &&
1614 (a_sgn != (b_sgn ^ c_sgn)))) {
1615 *flags |= FPLIB_IOC;
1616 return fp32_defaultNaN();
1617 }
1618 if (a_exp == FP32_EXP_INF)
1619 return fp32_infinity(a_sgn);
1620 if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF)
1621 return fp32_infinity(b_sgn ^ c_sgn);
1622 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1623 return fp32_zero(a_sgn);
1624
1625 x_sgn = a_sgn;
1626 x_exp = a_exp + 2 * FP32_EXP_BITS - 3;
1627 x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4);
1628
1629 // Multiply:
1630 y_sgn = b_sgn ^ c_sgn;
1631 y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1632 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1633 if (!y_mnt) {
1634 y_exp = x_exp;
1635 }
1636
1637 // Add:
1638 if (x_exp >= y_exp) {
1639 y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1640 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1641 y_exp = x_exp;
1642 } else {
1643 x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1644 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1645 x_exp = y_exp;
1646 }
1647 if (x_sgn == y_sgn) {
1648 x_mnt = x_mnt + y_mnt;
1649 } else if (x_mnt >= y_mnt) {
1650 x_mnt = x_mnt - y_mnt;
1651 } else {
1652 x_sgn ^= 1;
1653 x_mnt = y_mnt - x_mnt;
1654 }
1655
1656 if (!x_mnt) {
1657 // Sign of exact zero result depends on rounding mode
1658 return fp32_zero((mode & 3) == 2);
1659 }
1660
1661 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1662 x_mnt = fp64_normalise(x_mnt, &x_exp);
1663 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1664
1665 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1666}
1667
1668static uint64_t
1669fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1670 int mode, int *flags)
1671{
1672 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1673 uint64_t a_mnt, b_mnt, c_mnt, x;
1674 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1675
1676 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1677 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1678 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1679
1680 x = fp64_process_NaNs3(a, b, c, mode, flags);
1681
1682 // Quiet NaN added to product of zero and infinity:
1683 if (fp64_is_quiet_NaN(a_exp, a_mnt) &&
1684 ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) ||
1685 (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) {
1686 x = fp64_defaultNaN();
1687 *flags |= FPLIB_IOC;
1688 }
1689
1690 if (x) {
1691 return x;
1692 }
1693
1694 // Handle infinities and zeroes:
1695 if ((b_exp == FP64_EXP_INF && !c_mnt) ||
1696 (c_exp == FP64_EXP_INF && !b_mnt) ||
1697 (a_exp == FP64_EXP_INF &&
1698 (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) &&
1699 (a_sgn != (b_sgn ^ c_sgn)))) {
1700 *flags |= FPLIB_IOC;
1701 return fp64_defaultNaN();
1702 }
1703 if (a_exp == FP64_EXP_INF)
1704 return fp64_infinity(a_sgn);
1705 if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF)
1706 return fp64_infinity(b_sgn ^ c_sgn);
1707 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1708 return fp64_zero(a_sgn);
1709
1710 x_sgn = a_sgn;
1711 x_exp = a_exp + FP64_EXP_BITS;
1712 x0_mnt = 0;
1713 x1_mnt = a_mnt;
1714
1715 // Multiply:
1716 y_sgn = b_sgn ^ c_sgn;
1717 y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3;
1718 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1719 if (!y0_mnt && !y1_mnt) {
1720 y_exp = x_exp;
1721 }
1722
1723 // Add:
1724 if (x_exp >= y_exp) {
1725 uint64_t t0, t1;
1726 lsl128(&t0, &t1, y0_mnt, y1_mnt,
1727 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1728 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1729 y0_mnt |= !!(t0 | t1);
1730 y_exp = x_exp;
1731 } else {
1732 uint64_t t0, t1;
1733 lsl128(&t0, &t1, x0_mnt, x1_mnt,
1734 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1735 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1736 x0_mnt |= !!(t0 | t1);
1737 x_exp = y_exp;
1738 }
1739 if (x_sgn == y_sgn) {
1740 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1741 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1742 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1743 } else {
1744 x_sgn ^= 1;
1745 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1746 }
1747
1748 if (!x0_mnt && !x1_mnt) {
1749 // Sign of exact zero result depends on rounding mode
1750 return fp64_zero((mode & 3) == 2);
1751 }
1752
1753 // Normalise into FP64_BITS bits, collapsing error into bottom bit:
1754 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1755 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1756
1757 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1758}
1759
1760static uint16_t
1761fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
1762{
1763 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1764 uint16_t a_mnt, b_mnt, x;
1765 uint32_t x_mnt;
1766
1767 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1768 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1769
1770 if ((x = fp16_process_NaNs(a, b, mode, flags)))
1771 return x;
1772
1773 // Handle infinities and zeroes:
1774 if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) ||
1775 (!a_mnt && !b_mnt)) {
1776 *flags |= FPLIB_IOC;
1777 return fp16_defaultNaN();
1778 }
1779 if (a_exp == FP16_EXP_INF || !b_mnt) {
1780 if (a_exp != FP16_EXP_INF)
1781 *flags |= FPLIB_DZC;
1782 return fp16_infinity(a_sgn ^ b_sgn);
1783 }
1784 if (!a_mnt || b_exp == FP16_EXP_INF)
1785 return fp16_zero(a_sgn ^ b_sgn);
1786
1787 // Divide, setting bottom bit if inexact:
1788 a_mnt = fp16_normalise(a_mnt, &a_exp);
1789 x_sgn = a_sgn ^ b_sgn;
1790 x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3);
1791 x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt;
1792 x_mnt |= (x_mnt * b_mnt !=
1793 (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3));
1794
1795 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1796 x_mnt = fp32_normalise(x_mnt, &x_exp);
1797 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1798
1799 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1800}
1801
1802static uint32_t
1803fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1804{
1805 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1806 uint32_t a_mnt, b_mnt, x;
1807 uint64_t x_mnt;
1808
1809 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1810 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1811
1812 if ((x = fp32_process_NaNs(a, b, mode, flags)))
1813 return x;
1814
1815 // Handle infinities and zeroes:
1816 if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) ||
1817 (!a_mnt && !b_mnt)) {
1818 *flags |= FPLIB_IOC;
1819 return fp32_defaultNaN();
1820 }
1821 if (a_exp == FP32_EXP_INF || !b_mnt) {
1822 if (a_exp != FP32_EXP_INF)
1823 *flags |= FPLIB_DZC;
1824 return fp32_infinity(a_sgn ^ b_sgn);
1825 }
1826 if (!a_mnt || b_exp == FP32_EXP_INF)
1827 return fp32_zero(a_sgn ^ b_sgn);
1828
1829 // Divide, setting bottom bit if inexact:
1830 a_mnt = fp32_normalise(a_mnt, &a_exp);
1831 x_sgn = a_sgn ^ b_sgn;
1832 x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3);
1833 x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt;
1834 x_mnt |= (x_mnt * b_mnt !=
1835 (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3));
1836
1837 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1838 x_mnt = fp64_normalise(x_mnt, &x_exp);
1839 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1840
1841 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1842}
1843
1844static uint64_t
1845fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1846{
1847 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1848 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1849
1850 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1851 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1852
1853 if ((x = fp64_process_NaNs(a, b, mode, flags)))
1854 return x;
1855
1856 // Handle infinities and zeroes:
1857 if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) ||
1858 (!a_mnt && !b_mnt)) {
1859 *flags |= FPLIB_IOC;
1860 return fp64_defaultNaN();
1861 }
1862 if (a_exp == FP64_EXP_INF || !b_mnt) {
1863 if (a_exp != FP64_EXP_INF)
1864 *flags |= FPLIB_DZC;
1865 return fp64_infinity(a_sgn ^ b_sgn);
1866 }
1867 if (!a_mnt || b_exp == FP64_EXP_INF)
1868 return fp64_zero(a_sgn ^ b_sgn);
1869
1870 // Find reciprocal of divisor with Newton-Raphson:
1871 a_mnt = fp64_normalise(a_mnt, &a_exp);
1872 b_mnt = fp64_normalise(b_mnt, &b_exp);
1873 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1874 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1875 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1876 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1877 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1878 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1879
1880 // Multiply by dividend:
1881 x_sgn = a_sgn ^ b_sgn;
1882 x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8;
1883 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1884 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1885 x_mnt = x1_mnt;
1886
1887 // This is an underestimate, so try adding one:
1888 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1889 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1890 if (c <= 0) {
1891 ++x_mnt;
1892 }
1893
1894 x_mnt = fp64_normalise(x_mnt, &x_exp);
1895
1896 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1897}
1898
1899static void
1900set_fpscr0(FPSCR &fpscr, int flags)
1901{
1902 if (flags & FPLIB_IDC) {
1903 fpscr.idc = 1;
1904 }
1905 if (flags & FPLIB_IOC) {
1906 fpscr.ioc = 1;
1907 }
1908 if (flags & FPLIB_DZC) {
1909 fpscr.dzc = 1;
1910 }
1911 if (flags & FPLIB_OFC) {
1912 fpscr.ofc = 1;
1913 }
1914 if (flags & FPLIB_UFC) {
1915 fpscr.ufc = 1;
1916 }
1917 if (flags & FPLIB_IXC) {
1918 fpscr.ixc = 1;
1919 }
1920}
1921
1922static uint16_t
1923fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
1924{
1925 int a_sgn, a_exp;
1926 uint16_t a_mnt;
1927
1928 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1929
1930 // Handle NaNs:
1931 if (fp16_is_NaN(a_exp, a_mnt)) {
1932 return fp16_process_NaN(a, mode, flags);
1933 }
1934
1935 // Handle zeroes:
1936 if (!a_mnt) {
1937 return fp16_zero(a_sgn);
1938 }
1939
1940 // Handle infinities:
1941 if (a_exp == FP16_EXP_INF) {
1942 return fp16_infinity(a_sgn);
1943 }
1944
1945 b = b < -300 ? -300 : b;
1946 b = b > 300 ? 300 : b;
1947 a_exp += b;
1948 a_mnt <<= 3;
1949
1950 a_mnt = fp16_normalise(a_mnt, &a_exp);
1951
1952 return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1,
1953 mode, flags);
1954}
1955
1956static uint32_t
1957fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
1958{
1959 int a_sgn, a_exp;
1960 uint32_t a_mnt;
1961
1962 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1963
1964 // Handle NaNs:
1965 if (fp32_is_NaN(a_exp, a_mnt)) {
1966 return fp32_process_NaN(a, mode, flags);
1967 }
1968
1969 // Handle zeroes:
1970 if (!a_mnt) {
1971 return fp32_zero(a_sgn);
1972 }
1973
1974 // Handle infinities:
1975 if (a_exp == FP32_EXP_INF) {
1976 return fp32_infinity(a_sgn);
1977 }
1978
1979 b = b < -300 ? -300 : b;
1980 b = b > 300 ? 300 : b;
1981 a_exp += b;
1982 a_mnt <<= 3;
1983
1984 a_mnt = fp32_normalise(a_mnt, &a_exp);
1985
1986 return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1,
1987 mode, flags);
1988}
1989
1990static uint64_t
1991fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
1992{
1993 int a_sgn, a_exp;
1994 uint64_t a_mnt;
1995
1996 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1997
1998 // Handle NaNs:
1999 if (fp64_is_NaN(a_exp, a_mnt)) {
2000 return fp64_process_NaN(a, mode, flags);
2001 }
2002
2003 // Handle zeroes:
2004 if (!a_mnt) {
2005 return fp64_zero(a_sgn);
2006 }
2007
2008 // Handle infinities:
2009 if (a_exp == FP64_EXP_INF) {
2010 return fp64_infinity(a_sgn);
2011 }
2012
2013 b = b < -3000 ? -3000 : b;
2014 b = b > 3000 ? 3000 : b;
2015 a_exp += b;
2016 a_mnt <<= 3;
2017
2018 a_mnt = fp64_normalise(a_mnt, &a_exp);
2019
2020 return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1,
2021 mode, flags);
2022}
2023
2024static uint16_t
2025fp16_sqrt(uint16_t a, int mode, int *flags)
2026{
2027 int a_sgn, a_exp, x_sgn, x_exp;
2028 uint16_t a_mnt, x_mnt;
2029 uint32_t x, t0, t1;
2030
2031 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2032
2033 // Handle NaNs:
2034 if (fp16_is_NaN(a_exp, a_mnt))
2035 return fp16_process_NaN(a, mode, flags);
2036
2037 // Handle infinities and zeroes:
2038 if (!a_mnt)
2039 return fp16_zero(a_sgn);
2040 if (a_exp == FP16_EXP_INF && !a_sgn)
2041 return fp16_infinity(a_sgn);
2042 if (a_sgn) {
2043 *flags |= FPLIB_IOC;
2044 return fp16_defaultNaN();
2045 }
2046
2047 a_mnt = fp16_normalise(a_mnt, &a_exp);
2048 if (a_exp & 1) {
2049 ++a_exp;
2050 a_mnt >>= 1;
2051 }
2052
2053 // x = (a * 3 + 5) / 8
2054 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2055
2056 // x = (a / x + x) / 2; // 8-bit accuracy
2057 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2058
2059 // x = (a / x + x) / 2; // 16-bit accuracy
2060 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2061
2062 x_sgn = 0;
2063 x_exp = (a_exp + 27) >> 1;
2064 x_mnt = ((x - (1 << 18)) >> 19) + 1;
2065 t1 = (uint32_t)x_mnt * x_mnt;
2066 t0 = (uint32_t)a_mnt << 9;
2067 if (t1 > t0) {
2068 --x_mnt;
2069 }
2070
2071 x_mnt = fp16_normalise(x_mnt, &x_exp);
2072
2073 return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2074}
2075
2076static uint32_t
2077fp32_sqrt(uint32_t a, int mode, int *flags)
2078{
2079 int a_sgn, a_exp, x_sgn, x_exp;
2080 uint32_t a_mnt, x, x_mnt;
2081 uint64_t t0, t1;
2082
2083 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2084
2085 // Handle NaNs:
2086 if (fp32_is_NaN(a_exp, a_mnt))
2087 return fp32_process_NaN(a, mode, flags);
2088
2089 // Handle infinities and zeroes:
2090 if (!a_mnt)
2091 return fp32_zero(a_sgn);
2092 if (a_exp == FP32_EXP_INF && !a_sgn)
2093 return fp32_infinity(a_sgn);
2094 if (a_sgn) {
2095 *flags |= FPLIB_IOC;
2096 return fp32_defaultNaN();
2097 }
2098
2099 a_mnt = fp32_normalise(a_mnt, &a_exp);
2100 if (!(a_exp & 1)) {
2101 ++a_exp;
2102 a_mnt >>= 1;
2103 }
2104
2105 // x = (a * 3 + 5) / 8
2106 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2107
2108 // x = (a / x + x) / 2; // 8-bit accuracy
2109 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2110
2111 // x = (a / x + x) / 2; // 16-bit accuracy
2112 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2113
2114 // x = (a / x + x) / 2; // 32-bit accuracy
2115 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
2116
2117 x_sgn = 0;
2118 x_exp = (a_exp + 147) >> 1;
2119 x_mnt = ((x - (1 << 5)) >> 6) + 1;
2120 t1 = (uint64_t)x_mnt * x_mnt;
2121 t0 = (uint64_t)a_mnt << 19;
2122 if (t1 > t0) {
2123 --x_mnt;
2124 }
2125
2126 x_mnt = fp32_normalise(x_mnt, &x_exp);
2127
2128 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2129}
2130
2131static uint64_t
2132fp64_sqrt(uint64_t a, int mode, int *flags)
2133{
2134 int a_sgn, a_exp, x_sgn, x_exp, c;
2135 uint64_t a_mnt, x_mnt, r, x0, x1;
2136 uint32_t x;
2137
2138 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2139
2140 // Handle NaNs:
2141 if (fp64_is_NaN(a_exp, a_mnt))
2142 return fp64_process_NaN(a, mode, flags);
2143
2144 // Handle infinities and zeroes:
2145 if (!a_mnt)
2146 return fp64_zero(a_sgn);
2147 if (a_exp == FP64_EXP_INF && !a_sgn)
2148 return fp64_infinity(a_sgn);
2149 if (a_sgn) {
2150 *flags |= FPLIB_IOC;
2151 return fp64_defaultNaN();
2152 }
2153
2154 a_mnt = fp64_normalise(a_mnt, &a_exp);
2155 if (a_exp & 1) {
2156 ++a_exp;
2157 a_mnt >>= 1;
2158 }
2159
2160 // x = (a * 3 + 5) / 8
2161 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2162
2163 // x = (a / x + x) / 2; // 8-bit accuracy
2164 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2165
2166 // x = (a / x + x) / 2; // 16-bit accuracy
2167 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2168
2169 // x = (a / x + x) / 2; // 32-bit accuracy
2170 x = ((a_mnt / x) >> 2) + (x >> 1);
2171
2172 // r = 1 / x; // 32-bit accuracy
2173 r = ((uint64_t)1 << 62) / x;
2174
2175 // r = r * (2 - x * r); // 64-bit accuracy
2176 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
2177 lsr128(&x0, &x1, x0, x1, 31);
2178
2179 // x = (x + a * r) / 2; // 64-bit accuracy
2180 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2181 lsl128(&x0, &x1, x0, x1, 5);
2182 lsr128(&x0, &x1, x0, x1, 56);
2183
2184 x0 = ((uint64_t)x << 31) + (x0 >> 1);
2185
2186 x_sgn = 0;
2187 x_exp = (a_exp + 1053) >> 1;
2188 x_mnt = x0;
2189 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2190 mul62x62(&x0, &x1, x_mnt, x_mnt);
2191 lsl128(&x0, &x1, x0, x1, 19);
2192 c = cmp128(x0, x1, 0, a_mnt);
2193 if (c > 0)
2194 --x_mnt;
2195
2196 x_mnt = fp64_normalise(x_mnt, &x_exp);
2197
2198 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2199}
2200
2201static int
2202modeConv(FPSCR fpscr)
2203{
2204 uint32_t x = (uint32_t)fpscr;
2205 return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0);
2206 // AHP bit is ignored. Only fplibConvert uses AHP.
2207}
2208
2209static void
2210set_fpscr(FPSCR &fpscr, int flags)
2211{
2212 // translate back to FPSCR
2213 bool underflow = false;
2214 if (flags & FPLIB_IDC) {
2215 fpscr.idc = 1;
2216 }
2217 if (flags & FPLIB_IOC) {
2218 fpscr.ioc = 1;
2219 }
2220 if (flags & FPLIB_DZC) {
2221 fpscr.dzc = 1;
2222 }
2223 if (flags & FPLIB_OFC) {
2224 fpscr.ofc = 1;
2225 }
2226 if (flags & FPLIB_UFC) {
2227 underflow = true; //xx Why is this required?
2228 fpscr.ufc = 1;
2229 }
2230 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
2231 fpscr.ixc = 1;
2232 }
2233}
2234
2235template <>
2236bool
2237fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
2238{
2239 int flags = 0;
2240 int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags);
2241 set_fpscr(fpscr, flags);
2242 return x;
2243}
2244
2245template <>
2246bool
2247fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
2248{
2249 int flags = 0;
2250 int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags);
2251 set_fpscr(fpscr, flags);
2252 return x;
2253}
2254
2255template <>
2256bool
2257fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
2258{
2259 int flags = 0;
2260 int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags);
2261 set_fpscr(fpscr, flags);
2262 return x;
2263}
2264
2265template <>
2266bool
2267fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
2268{
2269 int flags = 0;
2270 int x = fp16_compare_un(a, b, modeConv(fpscr), &flags);
2271 set_fpscr(fpscr, flags);
2272 return x;
2273}
2274
2275template <>
2276bool
2277fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
2278{
2279 int flags = 0;
2280 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
2281 set_fpscr(fpscr, flags);
2282 return x;
2283}
2284
2285template <>
2286bool
2287fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
2288{
2289 int flags = 0;
2290 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
2291 set_fpscr(fpscr, flags);
2292 return x;
2293}
2294
2295template <>
2296bool
2297fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
2298{
2299 int flags = 0;
2300 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
2301 set_fpscr(fpscr, flags);
2302 return x;
2303}
2304
2305template <>
2306bool
2307fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr)
2308{
2309 int flags = 0;
2310 int x = fp32_compare_un(a, b, modeConv(fpscr), &flags);
2311 set_fpscr(fpscr, flags);
2312 return x;
2313}
2314
2315template <>
2316bool
2317fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
2318{
2319 int flags = 0;
2320 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
2321 set_fpscr(fpscr, flags);
2322 return x;
2323}
2324
2325template <>
2326bool
2327fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
2328{
2329 int flags = 0;
2330 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
2331 set_fpscr(fpscr, flags);
2332 return x;
2333}
2334
2335template <>
2336bool
2337fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
2338{
2339 int flags = 0;
2340 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
2341 set_fpscr(fpscr, flags);
2342 return x;
2343}
2344
2345template <>
2346bool
2347fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr)
2348{
2349 int flags = 0;
2350 int x = fp64_compare_un(a, b, modeConv(fpscr), &flags);
2351 set_fpscr(fpscr, flags);
2352 return x;
2353}
2354
2355template <>
2356uint16_t
2357fplibAbs(uint16_t op)
2358{
2359 return op & ~(1ULL << (FP16_BITS - 1));
2360}
2361
2362template <>
2363uint32_t
2364fplibAbs(uint32_t op)
2365{
2366 return op & ~(1ULL << (FP32_BITS - 1));
2367}
2368
2369template <>
2370uint64_t
2371fplibAbs(uint64_t op)
2372{
2373 return op & ~(1ULL << (FP64_BITS - 1));
2374}
2375
2376template <>
2377uint16_t
2378fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2379{
2380 int flags = 0;
2381 uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags);
2382 set_fpscr0(fpscr, flags);
2383 return result;
2384}
2385
2386template <>
2387uint32_t
2388fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2389{
2390 int flags = 0;
2391 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
2392 set_fpscr0(fpscr, flags);
2393 return result;
2394}
2395
2396template <>
2397uint64_t
2398fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2399{
2400 int flags = 0;
2401 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
2402 set_fpscr0(fpscr, flags);
2403 return result;
2404}
2405
2406template <>
2407int
2408fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
2409{
2410 int mode = modeConv(fpscr);
2411 int flags = 0;
2412 int sgn1, exp1, sgn2, exp2, result;
2413 uint16_t mnt1, mnt2;
2414
2415 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2416 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2417
2418 if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) {
2419 result = 3;
2420 if (fp16_is_signalling_NaN(exp1, mnt1) ||
2421 fp16_is_signalling_NaN(exp2, mnt2) || signal_nans)
2422 flags |= FPLIB_IOC;
2423 } else {
2424 if (op1 == op2 || (!mnt1 && !mnt2)) {
2425 result = 6;
2426 } else if (sgn1 != sgn2) {
2427 result = sgn1 ? 8 : 2;
2428 } else if (exp1 != exp2) {
2429 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2430 } else {
2431 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2432 }
2433 }
2434
2435 set_fpscr0(fpscr, flags);
2436
2437 return result;
2438}
2439
2440template <>
2441int
2442fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
2443{
2444 int mode = modeConv(fpscr);
2445 int flags = 0;
2446 int sgn1, exp1, sgn2, exp2, result;
2447 uint32_t mnt1, mnt2;
2448
2449 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2450 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2451
2452 if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) {
2453 result = 3;
2454 if (fp32_is_signalling_NaN(exp1, mnt1) ||
2455 fp32_is_signalling_NaN(exp2, mnt2) || signal_nans)
2456 flags |= FPLIB_IOC;
2457 } else {
2458 if (op1 == op2 || (!mnt1 && !mnt2)) {
2459 result = 6;
2460 } else if (sgn1 != sgn2) {
2461 result = sgn1 ? 8 : 2;
2462 } else if (exp1 != exp2) {
2463 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2464 } else {
2465 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2466 }
2467 }
2468
2469 set_fpscr0(fpscr, flags);
2470
2471 return result;
2472}
2473
2474template <>
2475int
2476fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
2477{
2478 int mode = modeConv(fpscr);
2479 int flags = 0;
2480 int sgn1, exp1, sgn2, exp2, result;
2481 uint64_t mnt1, mnt2;
2482
2483 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2484 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2485
2486 if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) {
2487 result = 3;
2488 if (fp64_is_signalling_NaN(exp1, mnt1) ||
2489 fp64_is_signalling_NaN(exp2, mnt2) || signal_nans)
2490 flags |= FPLIB_IOC;
2491 } else {
2492 if (op1 == op2 || (!mnt1 && !mnt2)) {
2493 result = 6;
2494 } else if (sgn1 != sgn2) {
2495 result = sgn1 ? 8 : 2;
2496 } else if (exp1 != exp2) {
2497 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2498 } else {
2499 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2500 }
2501 }
2502
2503 set_fpscr0(fpscr, flags);
2504
2505 return result;
2506}
2507
2508static uint16_t
2509fp16_FPConvertNaN_32(uint32_t op)
2510{
2511 return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF,
2512 1ULL << (FP16_MANT_BITS - 1) |
2513 op >> (FP32_MANT_BITS - FP16_MANT_BITS));
2514}
2515
2516static uint16_t
2517fp16_FPConvertNaN_64(uint64_t op)
2518{
2519 return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF,
2520 1ULL << (FP16_MANT_BITS - 1) |
2521 op >> (FP64_MANT_BITS - FP16_MANT_BITS));
2522}
2523
2524static uint32_t
2525fp32_FPConvertNaN_16(uint16_t op)
2526{
2527 return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF,
2528 1ULL << (FP32_MANT_BITS - 1) |
2529 (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS));
2530}
2531
2532static uint32_t
2533fp32_FPConvertNaN_64(uint64_t op)
2534{
2535 return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF,
2536 1ULL << (FP32_MANT_BITS - 1) |
2537 op >> (FP64_MANT_BITS - FP32_MANT_BITS));
2538}
2539
2540static uint64_t
2541fp64_FPConvertNaN_16(uint16_t op)
2542{
2543 return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF,
2544 1ULL << (FP64_MANT_BITS - 1) |
2545 (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS));
2546}
2547
2548static uint64_t
2549fp64_FPConvertNaN_32(uint32_t op)
2550{
2551 return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF,
2552 1ULL << (FP64_MANT_BITS - 1) |
2553 (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS));
2554}
2555
2556static uint16_t
2557fp16_FPOnePointFive(int sgn)
2558{
2559 return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1));
2560}
2561
2562static uint32_t
2563fp32_FPOnePointFive(int sgn)
2564{
2565 return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1));
2566}
2567
2568static uint64_t
2569fp64_FPOnePointFive(int sgn)
2570{
2571 return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1));
2572}
2573
2574static uint16_t
2575fp16_FPThree(int sgn)
2576{
2577 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1));
2578}
2579
2580static uint32_t
2581fp32_FPThree(int sgn)
2582{
2583 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1));
2584}
2585
2586static uint64_t
2587fp64_FPThree(int sgn)
2588{
2589 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1));
2590}
2591
2592static uint16_t
2593fp16_FPTwo(int sgn)
2594{
2595 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0);
2596}
2597
2598static uint32_t
2599fp32_FPTwo(int sgn)
2600{
2601 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0);
2602}
2603
2604static uint64_t
2605fp64_FPTwo(int sgn)
2606{
2607 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0);
2608}
2609
2610template <>
2611uint16_t
2612fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2613{
2614 int mode = modeConv(fpscr);
2615 int flags = 0;
2616 int sgn, exp;
2617 uint32_t mnt;
2618 uint16_t result;
2619
2620 // Unpack floating-point operand optionally with flush-to-zero:
2621 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2622
2623 bool alt_hp = fpscr.ahp;
2624
2625 if (fp32_is_NaN(exp, mnt)) {
2626 if (alt_hp) {
2627 result = fp16_zero(sgn);
2628 } else if (fpscr.dn) {
2629 result = fp16_defaultNaN();
2630 } else {
2631 result = fp16_FPConvertNaN_32(op);
2632 }
2633 if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) {
2634 flags |= FPLIB_IOC;
2635 }
2636 } else if (exp == FP32_EXP_INF) {
2637 if (alt_hp) {
2638 result = ((uint16_t)sgn << (FP16_BITS - 1) |
2639 ((1ULL << (FP16_BITS - 1)) - 1));
2640 flags |= FPLIB_IOC;
2641 } else {
2642 result = fp16_infinity(sgn);
2643 }
2644 } else if (!mnt) {
2645 result = fp16_zero(sgn);
2646 } else {
2647 result =
2648 fp16_round_(sgn, exp - FP32_EXP_BIAS + FP16_EXP_BIAS,
2649 mnt >> (FP32_MANT_BITS - FP16_BITS) |
2650 !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)),
2651 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2652 }
2653
2654 set_fpscr0(fpscr, flags);
2655
2656 return result;
2657}
2658
2659template <>
2660uint16_t
2661fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2662{
2663 int mode = modeConv(fpscr);
2664 int flags = 0;
2665 int sgn, exp;
2666 uint64_t mnt;
2667 uint16_t result;
2668
2669 // Unpack floating-point operand optionally with flush-to-zero:
2670 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2671
2672 bool alt_hp = fpscr.ahp;
2673
2674 if (fp64_is_NaN(exp, mnt)) {
2675 if (alt_hp) {
2676 result = fp16_zero(sgn);
2677 } else if (fpscr.dn) {
2678 result = fp16_defaultNaN();
2679 } else {
2680 result = fp16_FPConvertNaN_64(op);
2681 }
2682 if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) {
2683 flags |= FPLIB_IOC;
2684 }
2685 } else if (exp == FP64_EXP_INF) {
2686 if (alt_hp) {
2687 result = ((uint16_t)sgn << (FP16_BITS - 1) |
2688 ((1ULL << (FP16_BITS - 1)) - 1));
2689 flags |= FPLIB_IOC;
2690 } else {
2691 result = fp16_infinity(sgn);
2692 }
2693 } else if (!mnt) {
2694 result = fp16_zero(sgn);
2695 } else {
2696 result =
2697 fp16_round_(sgn, exp - FP64_EXP_BIAS + FP16_EXP_BIAS,
2698 mnt >> (FP64_MANT_BITS - FP16_BITS) |
2699 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)),
2700 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2701 }
2702
2703 set_fpscr0(fpscr, flags);
2704
2705 return result;
2706}
2707
2708template <>
2709uint32_t
2710fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2711{
2712 int mode = modeConv(fpscr);
2713 int flags = 0;
2714 int sgn, exp;
2715 uint16_t mnt;
2716 uint32_t result;
2717
2718 // Unpack floating-point operand optionally with flush-to-zero:
2719 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2720
2721 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2722 if (fpscr.dn) {
2723 result = fp32_defaultNaN();
2724 } else {
2725 result = fp32_FPConvertNaN_16(op);
2726 }
2727 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2728 flags |= FPLIB_IOC;
2729 }
2730 } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2731 result = fp32_infinity(sgn);
2732 } else if (!mnt) {
2733 result = fp32_zero(sgn);
2734 } else {
2735 mnt = fp16_normalise(mnt, &exp);
2736 result = fp32_pack(sgn, (exp - FP16_EXP_BIAS +
2737 FP32_EXP_BIAS + FP16_EXP_BITS),
2738 (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1));
2739 }
2740
2741 set_fpscr0(fpscr, flags);
2742
2743 return result;
2744}
2745
2746template <>
2747uint32_t
2748fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2749{
2750 int mode = modeConv(fpscr);
2751 int flags = 0;
2752 int sgn, exp;
2753 uint64_t mnt;
2754 uint32_t result;
2755
2756 // Unpack floating-point operand optionally with flush-to-zero:
2757 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2758
2759 if (fp64_is_NaN(exp, mnt)) {
2760 if (fpscr.dn) {
2761 result = fp32_defaultNaN();
2762 } else {
2763 result = fp32_FPConvertNaN_64(op);
2764 }
2765 if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) {
2766 flags |= FPLIB_IOC;
2767 }
2768 } else if (exp == FP64_EXP_INF) {
2769 result = fp32_infinity(sgn);
2770 } else if (!mnt) {
2771 result = fp32_zero(sgn);
2772 } else {
2773 result =
2774 fp32_round_(sgn, exp - FP64_EXP_BIAS + FP32_EXP_BIAS,
2775 mnt >> (FP64_MANT_BITS - FP32_BITS) |
2776 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)),
2777 rounding, mode, &flags);
2778 }
2779
2780 set_fpscr0(fpscr, flags);
2781
2782 return result;
2783}
2784
2785template <>
2786uint64_t
2787fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2788{
2789 int mode = modeConv(fpscr);
2790 int flags = 0;
2791 int sgn, exp;
2792 uint16_t mnt;
2793 uint64_t result;
2794
2795 // Unpack floating-point operand optionally with flush-to-zero:
2796 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2797
2798 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2799 if (fpscr.dn) {
2800 result = fp64_defaultNaN();
2801 } else {
2802 result = fp64_FPConvertNaN_16(op);
2803 }
2804 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2805 flags |= FPLIB_IOC;
2806 }
2807 } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2808 result = fp64_infinity(sgn);
2809 } else if (!mnt) {
2810 result = fp64_zero(sgn);
2811 } else {
2812 mnt = fp16_normalise(mnt, &exp);
2813 result = fp64_pack(sgn, (exp - FP16_EXP_BIAS +
2814 FP64_EXP_BIAS + FP16_EXP_BITS),
2815 (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1));
2816 }
2817
2818 set_fpscr0(fpscr, flags);
2819
2820 return result;
2821}
2822
2823template <>
2824uint64_t
2825fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2826{
2827 int mode = modeConv(fpscr);
2828 int flags = 0;
2829 int sgn, exp;
2830 uint32_t mnt;
2831 uint64_t result;
2832
2833 // Unpack floating-point operand optionally with flush-to-zero:
2834 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2835
2836 if (fp32_is_NaN(exp, mnt)) {
2837 if (fpscr.dn) {
2838 result = fp64_defaultNaN();
2839 } else {
2840 result = fp64_FPConvertNaN_32(op);
2841 }
2842 if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) {
2843 flags |= FPLIB_IOC;
2844 }
2845 } else if (exp == FP32_EXP_INF) {
2846 result = fp64_infinity(sgn);
2847 } else if (!mnt) {
2848 result = fp64_zero(sgn);
2849 } else {
2850 mnt = fp32_normalise(mnt, &exp);
2851 result = fp64_pack(sgn, (exp - FP32_EXP_BIAS +
2852 FP64_EXP_BIAS + FP32_EXP_BITS),
2853 (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1));
2854 }
2855
2856 set_fpscr0(fpscr, flags);
2857
2858 return result;
2859}
2860
2861template <>
2862uint16_t
2863fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
2864{
2865 int flags = 0;
2866 uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2867 set_fpscr0(fpscr, flags);
2868 return result;
2869}
2870
2871template <>
2872uint32_t
2873fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2874{
2875 int flags = 0;
2876 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2877 set_fpscr0(fpscr, flags);
2878 return result;
2879}
2880
2881template <>
2882uint64_t
2883fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2884{
2885 int flags = 0;
2886 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2887 set_fpscr0(fpscr, flags);
2888 return result;
2889}
2890
2891template <>
2892uint16_t
2893fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2894{
2895 int flags = 0;
2896 uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags);
2897 set_fpscr0(fpscr, flags);
2898 return result;
2899}
2900
2901template <>
2902uint32_t
2903fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2904{
2905 int flags = 0;
2906 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2907 set_fpscr0(fpscr, flags);
2908 return result;
2909}
2910
2911template <>
2912uint64_t
2913fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2914{
2915 int flags = 0;
2916 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2917 set_fpscr0(fpscr, flags);
2918 return result;
2919}
2920
2921template <>
2922uint16_t
2923fplibExpA(uint16_t op)
2924{
2925 static uint16_t coeff[32] = {
2926 0x0000,
2927 0x0016,
2928 0x002d,
2929 0x0045,
2930 0x005d,
2931 0x0075,
2932 0x008e,
2933 0x00a8,
2934 0x00c2,
2935 0x00dc,
2936 0x00f8,
2937 0x0114,
2938 0x0130,
2939 0x014d,
2940 0x016b,
2941 0x0189,
2942 0x01a8,
2943 0x01c8,
2944 0x01e8,
2945 0x0209,
2946 0x022b,
2947 0x024e,
2948 0x0271,
2949 0x0295,
2950 0x02ba,
2951 0x02e0,
2952 0x0306,
2953 0x032e,
2954 0x0356,
2955 0x037f,
2956 0x03a9,
2957 0x03d4
2958 };
2959 return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) |
2960 coeff[op & ((1 << 5) - 1)]);
2961}
2962
2963template <>
2964uint32_t
2965fplibExpA(uint32_t op)
2966{
2967 static uint32_t coeff[64] = {
2968 0x000000,
2969 0x0164d2,
2970 0x02cd87,
2971 0x043a29,
2972 0x05aac3,
2973 0x071f62,
2974 0x08980f,
2975 0x0a14d5,
2976 0x0b95c2,
2977 0x0d1adf,
2978 0x0ea43a,
2979 0x1031dc,
2980 0x11c3d3,
2981 0x135a2b,
2982 0x14f4f0,
2983 0x16942d,
2984 0x1837f0,
2985 0x19e046,
2986 0x1b8d3a,
2987 0x1d3eda,
2988 0x1ef532,
2989 0x20b051,
2990 0x227043,
2991 0x243516,
2992 0x25fed7,
2993 0x27cd94,
2994 0x29a15b,
2995 0x2b7a3a,
2996 0x2d583f,
2997 0x2f3b79,
2998 0x3123f6,
2999 0x3311c4,
3000 0x3504f3,
3001 0x36fd92,
3002 0x38fbaf,
3003 0x3aff5b,
3004 0x3d08a4,
3005 0x3f179a,
3006 0x412c4d,
3007 0x4346cd,
3008 0x45672a,
3009 0x478d75,
3010 0x49b9be,
3011 0x4bec15,
3012 0x4e248c,
3013 0x506334,
3014 0x52a81e,
3015 0x54f35b,
3016 0x5744fd,
3017 0x599d16,
3018 0x5bfbb8,
3019 0x5e60f5,
3020 0x60ccdf,
3021 0x633f89,
3022 0x65b907,
3023 0x68396a,
3024 0x6ac0c7,
3025 0x6d4f30,
3026 0x6fe4ba,
3027 0x728177,
3028 0x75257d,
3029 0x77d0df,
3030 0x7a83b3,
3031 0x7d3e0c
3032 };
3033 return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) |
3034 coeff[op & ((1 << 6) - 1)]);
3035}
3036
3037template <>
3038uint64_t
3039fplibExpA(uint64_t op)
3040{
3041 static uint64_t coeff[64] = {
3042 0x0000000000000ULL,
3043 0x02c9a3e778061ULL,
3044 0x059b0d3158574ULL,
3045 0x0874518759bc8ULL,
3046 0x0b5586cf9890fULL,
3047 0x0e3ec32d3d1a2ULL,
3048 0x11301d0125b51ULL,
3049 0x1429aaea92de0ULL,
3050 0x172b83c7d517bULL,
3051 0x1a35beb6fcb75ULL,
3052 0x1d4873168b9aaULL,
3053 0x2063b88628cd6ULL,
3054 0x2387a6e756238ULL,
3055 0x26b4565e27cddULL,
3056 0x29e9df51fdee1ULL,
3057 0x2d285a6e4030bULL,
3058 0x306fe0a31b715ULL,
3059 0x33c08b26416ffULL,
3060 0x371a7373aa9cbULL,
3061 0x3a7db34e59ff7ULL,
3062 0x3dea64c123422ULL,
3063 0x4160a21f72e2aULL,
3064 0x44e086061892dULL,
3065 0x486a2b5c13cd0ULL,
3066 0x4bfdad5362a27ULL,
3067 0x4f9b2769d2ca7ULL,
3068 0x5342b569d4f82ULL,
3069 0x56f4736b527daULL,
3070 0x5ab07dd485429ULL,
3071 0x5e76f15ad2148ULL,
3072 0x6247eb03a5585ULL,
3073 0x6623882552225ULL,
3074 0x6a09e667f3bcdULL,
3075 0x6dfb23c651a2fULL,
3076 0x71f75e8ec5f74ULL,
3077 0x75feb564267c9ULL,
3078 0x7a11473eb0187ULL,
3079 0x7e2f336cf4e62ULL,
3080 0x82589994cce13ULL,
3081 0x868d99b4492edULL,
3082 0x8ace5422aa0dbULL,
3083 0x8f1ae99157736ULL,
3084 0x93737b0cdc5e5ULL,
3085 0x97d829fde4e50ULL,
3086 0x9c49182a3f090ULL,
3087 0xa0c667b5de565ULL,
3088 0xa5503b23e255dULL,
3089 0xa9e6b5579fdbfULL,
3090 0xae89f995ad3adULL,
3091 0xb33a2b84f15fbULL,
3092 0xb7f76f2fb5e47ULL,
3093 0xbcc1e904bc1d2ULL,
3094 0xc199bdd85529cULL,
3095 0xc67f12e57d14bULL,
3096 0xcb720dcef9069ULL,
3097 0xd072d4a07897cULL,
3098 0xd5818dcfba487ULL,
3099 0xda9e603db3285ULL,
3100 0xdfc97337b9b5fULL,
3101 0xe502ee78b3ff6ULL,
3102 0xea4afa2a490daULL,
3103 0xefa1bee615a27ULL,
3104 0xf50765b6e4540ULL,
3105 0xfa7c1819e90d8ULL
3106 };
3107 return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) |
3108 coeff[op & ((1 << 6) - 1)]);
3109}
3110
3111static uint16_t
3112fp16_repack(int sgn, int exp, uint16_t mnt)
3113{
3114 return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt);
3115}
3116
3117static uint32_t
3118fp32_repack(int sgn, int exp, uint32_t mnt)
3119{
3120 return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt);
3121}
3122
3123static uint64_t
3124fp64_repack(int sgn, int exp, uint64_t mnt)
3125{
3126 return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt);
3127}
3128
3129static void
3130fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
3131{
3132 // Treat a single quiet-NaN as +Infinity/-Infinity
3133 if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) &&
3134 (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS)
3135 *op1 = fp16_infinity(sgn);
3136 if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) &&
3137 (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS)
3138 *op2 = fp16_infinity(sgn);
3139}
3140
3141static void
3142fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
3143{
3144 // Treat a single quiet-NaN as +Infinity/-Infinity
3145 if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) &&
3146 (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS)
3147 *op1 = fp32_infinity(sgn);
3148 if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) &&
3149 (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS)
3150 *op2 = fp32_infinity(sgn);
3151}
3152
3153static void
3154fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
3155{
3156 // Treat a single quiet-NaN as +Infinity/-Infinity
3157 if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) &&
3158 (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS)
3159 *op1 = fp64_infinity(sgn);
3160 if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) &&
3161 (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS)
3162 *op2 = fp64_infinity(sgn);
3163}
3164
3165template <>
3166uint16_t
3167fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3168{
3169 int mode = modeConv(fpscr);
3170 int flags = 0;
3171 int sgn1, exp1, sgn2, exp2;
3172 uint16_t mnt1, mnt2, x, result;
3173
3174 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3175 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3176
3177 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3178 result = x;
3179 } else {
3180 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3181 fp16_repack(sgn1, exp1, mnt1) :
3182 fp16_repack(sgn2, exp2, mnt2));
3183 }
3184 set_fpscr0(fpscr, flags);
3185 return result;
3186}
3187
3188template <>
3189uint32_t
3190fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3191{
3192 int mode = modeConv(fpscr);
3193 int flags = 0;
3194 int sgn1, exp1, sgn2, exp2;
3195 uint32_t mnt1, mnt2, x, result;
3196
3197 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3198 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3199
3200 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3201 result = x;
3202 } else {
3203 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3204 fp32_repack(sgn1, exp1, mnt1) :
3205 fp32_repack(sgn2, exp2, mnt2));
3206 }
3207 set_fpscr0(fpscr, flags);
3208 return result;
3209}
3210
3211template <>
3212uint64_t
3213fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3214{
3215 int mode = modeConv(fpscr);
3216 int flags = 0;
3217 int sgn1, exp1, sgn2, exp2;
3218 uint64_t mnt1, mnt2, x, result;
3219
3220 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3221 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3222
3223 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3224 result = x;
3225 } else {
3226 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3227 fp64_repack(sgn1, exp1, mnt1) :
3228 fp64_repack(sgn2, exp2, mnt2));
3229 }
3230 set_fpscr0(fpscr, flags);
3231 return result;
3232}
3233
3234template <>
3235uint16_t
3236fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3237{
3238 fp16_minmaxnum(&op1, &op2, 1);
3239 return fplibMax<uint16_t>(op1, op2, fpscr);
3240}
3241
3242template <>
3243uint32_t
3244fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3245{
3246 fp32_minmaxnum(&op1, &op2, 1);
3247 return fplibMax<uint32_t>(op1, op2, fpscr);
3248}
3249
3250template <>
3251uint64_t
3252fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3253{
3254 fp64_minmaxnum(&op1, &op2, 1);
3255 return fplibMax<uint64_t>(op1, op2, fpscr);
3256}
3257
3258template <>
3259uint16_t
3260fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3261{
3262 int mode = modeConv(fpscr);
3263 int flags = 0;
3264 int sgn1, exp1, sgn2, exp2;
3265 uint16_t mnt1, mnt2, x, result;
3266
3267 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3268 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3269
3270 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3271 result = x;
3272 } else {
3273 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3274 fp16_repack(sgn1, exp1, mnt1) :
3275 fp16_repack(sgn2, exp2, mnt2));
3276 }
3277 set_fpscr0(fpscr, flags);
3278 return result;
3279}
3280
3281template <>
3282uint32_t
3283fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3284{
3285 int mode = modeConv(fpscr);
3286 int flags = 0;
3287 int sgn1, exp1, sgn2, exp2;
3288 uint32_t mnt1, mnt2, x, result;
3289
3290 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3291 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3292
3293 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3294 result = x;
3295 } else {
3296 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3297 fp32_repack(sgn1, exp1, mnt1) :
3298 fp32_repack(sgn2, exp2, mnt2));
3299 }
3300 set_fpscr0(fpscr, flags);
3301 return result;
3302}
3303
3304template <>
3305uint64_t
3306fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3307{
3308 int mode = modeConv(fpscr);
3309 int flags = 0;
3310 int sgn1, exp1, sgn2, exp2;
3311 uint64_t mnt1, mnt2, x, result;
3312
3313 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3314 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3315
3316 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3317 result = x;
3318 } else {
3319 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3320 fp64_repack(sgn1, exp1, mnt1) :
3321 fp64_repack(sgn2, exp2, mnt2));
3322 }
3323 set_fpscr0(fpscr, flags);
3324 return result;
3325}
3326
3327template <>
3328uint16_t
3329fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3330{
3331 fp16_minmaxnum(&op1, &op2, 0);
3332 return fplibMin<uint16_t>(op1, op2, fpscr);
3333}
3334
3335template <>
3336uint32_t
3337fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3338{
3339 fp32_minmaxnum(&op1, &op2, 0);
3340 return fplibMin<uint32_t>(op1, op2, fpscr);
3341}
3342
3343template <>
3344uint64_t
3345fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3346{
3347 fp64_minmaxnum(&op1, &op2, 0);
3348 return fplibMin<uint64_t>(op1, op2, fpscr);
3349}
3350
3351template <>
3352uint16_t
3353fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3354{
3355 int flags = 0;
3356 uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags);
3357 set_fpscr0(fpscr, flags);
3358 return result;
3359}
3360
3361template <>
3362uint32_t
3363fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3364{
3365 int flags = 0;
3366 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
3367 set_fpscr0(fpscr, flags);
3368 return result;
3369}
3370
3371template <>
3372uint64_t
3373fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3374{
3375 int flags = 0;
3376 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
3377 set_fpscr0(fpscr, flags);
3378 return result;
3379}
3380
3381template <>
3382uint16_t
3383fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3384{
3385 int mode = modeConv(fpscr);
3386 int flags = 0;
3387 int sgn1, exp1, sgn2, exp2;
3388 uint16_t mnt1, mnt2, result;
3389
3390 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3391 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3392
3393 result = fp16_process_NaNs(op1, op2, mode, &flags);
3394 if (!result) {
3395 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3396 (exp2 == FP16_EXP_INF && !mnt1)) {
3397 result = fp16_FPTwo(sgn1 ^ sgn2);
3398 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3399 result = fp16_infinity(sgn1 ^ sgn2);
3400 } else if (!mnt1 || !mnt2) {
3401 result = fp16_zero(sgn1 ^ sgn2);
3402 } else {
3403 result = fp16_mul(op1, op2, mode, &flags);
3404 }
3405 }
3406
3407 set_fpscr0(fpscr, flags);
3408
3409 return result;
3410}
3411
3412template <>
3413uint32_t
3414fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3415{
3416 int mode = modeConv(fpscr);
3417 int flags = 0;
3418 int sgn1, exp1, sgn2, exp2;
3419 uint32_t mnt1, mnt2, result;
3420
3421 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3422 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3423
3424 result = fp32_process_NaNs(op1, op2, mode, &flags);
3425 if (!result) {
3426 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3427 (exp2 == FP32_EXP_INF && !mnt1)) {
3428 result = fp32_FPTwo(sgn1 ^ sgn2);
3429 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3430 result = fp32_infinity(sgn1 ^ sgn2);
3431 } else if (!mnt1 || !mnt2) {
3432 result = fp32_zero(sgn1 ^ sgn2);
3433 } else {
3434 result = fp32_mul(op1, op2, mode, &flags);
3435 }
3436 }
3437
3438 set_fpscr0(fpscr, flags);
3439
3440 return result;
3441}
3442
3443template <>
3444uint64_t
3445fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3446{
3447 int mode = modeConv(fpscr);
3448 int flags = 0;
3449 int sgn1, exp1, sgn2, exp2;
3450 uint64_t mnt1, mnt2, result;
3451
3452 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3453 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3454
3455 result = fp64_process_NaNs(op1, op2, mode, &flags);
3456 if (!result) {
3457 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3458 (exp2 == FP64_EXP_INF && !mnt1)) {
3459 result = fp64_FPTwo(sgn1 ^ sgn2);
3460 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3461 result = fp64_infinity(sgn1 ^ sgn2);
3462 } else if (!mnt1 || !mnt2) {
3463 result = fp64_zero(sgn1 ^ sgn2);
3464 } else {
3465 result = fp64_mul(op1, op2, mode, &flags);
3466 }
3467 }
3468
3469 set_fpscr0(fpscr, flags);
3470
3471 return result;
3472}
3473
3474template <>
3475uint16_t
3476fplibNeg(uint16_t op)
3477{
3478 return op ^ 1ULL << (FP16_BITS - 1);
3479}
3480
3481template <>
3482uint32_t
3483fplibNeg(uint32_t op)
3484{
3485 return op ^ 1ULL << (FP32_BITS - 1);
3486}
3487
3488template <>
3489uint64_t
3490fplibNeg(uint64_t op)
3491{
3492 return op ^ 1ULL << (FP64_BITS - 1);
3493}
3494
3495static const uint8_t recip_sqrt_estimate[256] = {
3496 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3497 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3498 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3499 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3500 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3501 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3502 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3503 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3504 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
3505 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
3506 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
3507 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
3508 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
3509 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
3510 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
3511 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
3512};
3513
3514template <>
3515uint16_t
3516fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
3517{
3518 int mode = modeConv(fpscr);
3519 int flags = 0;
3520 int sgn, exp;
3521 uint16_t mnt, result;
3522
3523 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3524
3525 if (fp16_is_NaN(exp, mnt)) {
3526 result = fp16_process_NaN(op, mode, &flags);
3527 } else if (!mnt) {
3528 result = fp16_infinity(sgn);
3529 flags |= FPLIB_DZC;
3530 } else if (sgn) {
3531 result = fp16_defaultNaN();
3532 flags |= FPLIB_IOC;
3533 } else if (exp == FP16_EXP_INF) {
3534 result = fp16_zero(0);
3535 } else {
3536 exp += FP16_EXP_BITS;
3537 mnt = fp16_normalise(mnt, &exp);
3538 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3539 (mnt >> (FP16_BITS - 8) & 127)];
3540 result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1,
3541 mnt << (FP16_MANT_BITS - 8));
3542 }
3543
3544 set_fpscr0(fpscr, flags);
3545
3546 return result;
3547}
3548
3549template <>
3550uint32_t
3551fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
3552{
3553 int mode = modeConv(fpscr);
3554 int flags = 0;
3555 int sgn, exp;
3556 uint32_t mnt, result;
3557
3558 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3559
3560 if (fp32_is_NaN(exp, mnt)) {
3561 result = fp32_process_NaN(op, mode, &flags);
3562 } else if (!mnt) {
3563 result = fp32_infinity(sgn);
3564 flags |= FPLIB_DZC;
3565 } else if (sgn) {
3566 result = fp32_defaultNaN();
3567 flags |= FPLIB_IOC;
3568 } else if (exp == FP32_EXP_INF) {
3569 result = fp32_zero(0);
3570 } else {
3571 exp += FP32_EXP_BITS;
3572 mnt = fp32_normalise(mnt, &exp);
3573 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3574 (mnt >> (FP32_BITS - 8) & 127)];
3575 result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1,
3576 mnt << (FP32_MANT_BITS - 8));
3577 }
3578
3579 set_fpscr0(fpscr, flags);
3580
3581 return result;
3582}
3583
3584template <>
3585uint64_t
3586fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
3587{
3588 int mode = modeConv(fpscr);
3589 int flags = 0;
3590 int sgn, exp;
3591 uint64_t mnt, result;
3592
3593 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3594
3595 if (fp64_is_NaN(exp, mnt)) {
3596 result = fp64_process_NaN(op, mode, &flags);
3597 } else if (!mnt) {
3598 result = fp64_infinity(sgn);
3599 flags |= FPLIB_DZC;
3600 } else if (sgn) {
3601 result = fp64_defaultNaN();
3602 flags |= FPLIB_IOC;
3603 } else if (exp == FP64_EXP_INF) {
3604 result = fp32_zero(0);
3605 } else {
3606 exp += FP64_EXP_BITS;
3607 mnt = fp64_normalise(mnt, &exp);
3608 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3609 (mnt >> (FP64_BITS - 8) & 127)];
3610 result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1,
3611 mnt << (FP64_MANT_BITS - 8));
3612 }
3613
3614 set_fpscr0(fpscr, flags);
3615
3616 return result;
3617}
3618
3619template <>
3620uint16_t
3621fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3622{
3623 int mode = modeConv(fpscr);
3624 int flags = 0;
3625 int sgn1, exp1, sgn2, exp2;
3626 uint16_t mnt1, mnt2, result;
3627
3628 op1 = fplibNeg<uint16_t>(op1);
3629 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3630 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3631
3632 result = fp16_process_NaNs(op1, op2, mode, &flags);
3633 if (!result) {
3634 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3635 (exp2 == FP16_EXP_INF && !mnt1)) {
3636 result = fp16_FPOnePointFive(0);
3637 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3638 result = fp16_infinity(sgn1 ^ sgn2);
3639 } else {
3640 result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags);
3641 }
3642 }
3643
3644 set_fpscr0(fpscr, flags);
3645
3646 return result;
3647}
3648
3649template <>
3650uint32_t
3651fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3652{
3653 int mode = modeConv(fpscr);
3654 int flags = 0;
3655 int sgn1, exp1, sgn2, exp2;
3656 uint32_t mnt1, mnt2, result;
3657
3658 op1 = fplibNeg<uint32_t>(op1);
3659 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3660 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3661
3662 result = fp32_process_NaNs(op1, op2, mode, &flags);
3663 if (!result) {
3664 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3665 (exp2 == FP32_EXP_INF && !mnt1)) {
3666 result = fp32_FPOnePointFive(0);
3667 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3668 result = fp32_infinity(sgn1 ^ sgn2);
3669 } else {
3670 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
3671 }
3672 }
3673
3674 set_fpscr0(fpscr, flags);
3675
3676 return result;
3677}
3678
3679template <>
3680uint64_t
3681fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3682{
3683 int mode = modeConv(fpscr);
3684 int flags = 0;
3685 int sgn1, exp1, sgn2, exp2;
3686 uint64_t mnt1, mnt2, result;
3687
3688 op1 = fplibNeg<uint64_t>(op1);
3689 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3690 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3691
3692 result = fp64_process_NaNs(op1, op2, mode, &flags);
3693 if (!result) {
3694 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3695 (exp2 == FP64_EXP_INF && !mnt1)) {
3696 result = fp64_FPOnePointFive(0);
3697 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3698 result = fp64_infinity(sgn1 ^ sgn2);
3699 } else {
3700 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
3701 }
3702 }
3703
3704 set_fpscr0(fpscr, flags);
3705
3706 return result;
3707}
3708
3709template <>
3710uint16_t
3711fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
3712{
3713 int mode = modeConv(fpscr);
3714 int flags = 0;
3715 int sgn, exp;
3716 uint16_t mnt, result;
3717
3718 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3719
3720 if (fp16_is_NaN(exp, mnt)) {
3721 result = fp16_process_NaN(op, mode, &flags);
3722 } else if (exp == FP16_EXP_INF) {
3723 result = fp16_zero(sgn);
3724 } else if (!mnt) {
3725 result = fp16_infinity(sgn);
3726 flags |= FPLIB_DZC;
3727 } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) {
3728 bool overflow_to_inf = false;
3729 switch (FPCRRounding(fpscr)) {
3730 case FPRounding_TIEEVEN:
3731 overflow_to_inf = true;
3732 break;
3733 case FPRounding_POSINF:
3734 overflow_to_inf = !sgn;
3735 break;
3736 case FPRounding_NEGINF:
3737 overflow_to_inf = sgn;
3738 break;
3739 case FPRounding_ZERO:
3740 overflow_to_inf = false;
3741 break;
3742 default:
3743 assert(0);
3744 }
3745 result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn);
3746 flags |= FPLIB_OFC | FPLIB_IXC;
3747 } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) {
3748 result = fp16_zero(sgn);
3749 flags |= FPLIB_UFC;
3750 } else {
3751 exp += FP16_EXP_BITS;
3752 mnt = fp16_normalise(mnt, &exp);
3753 int result_exp = 2 * FP16_EXP_BIAS - 1 - exp;
3754 uint16_t fraction = (((uint32_t)1 << 19) /
3755 (mnt >> (FP16_BITS - 10) | 1) + 1) >> 1;
3756 fraction <<= FP16_MANT_BITS - 8;
3757 if (result_exp == 0) {
3758 fraction >>= 1;
3759 } else if (result_exp == -1) {
3760 fraction >>= 2;
3761 result_exp = 0;
3762 }
3763 result = fp16_pack(sgn, result_exp, fraction);
3764 }
3765
3766 set_fpscr0(fpscr, flags);
3767
3768 return result;
3769}
3770
3771template <>
3772uint32_t
3773fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
3774{
3775 int mode = modeConv(fpscr);
3776 int flags = 0;
3777 int sgn, exp;
3778 uint32_t mnt, result;
3779
3780 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3781
3782 if (fp32_is_NaN(exp, mnt)) {
3783 result = fp32_process_NaN(op, mode, &flags);
3784 } else if (exp == FP32_EXP_INF) {
3785 result = fp32_zero(sgn);
3786 } else if (!mnt) {
3787 result = fp32_infinity(sgn);
3788 flags |= FPLIB_DZC;
3789 } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) {
3790 bool overflow_to_inf = false;
3791 switch (FPCRRounding(fpscr)) {
3792 case FPRounding_TIEEVEN:
3793 overflow_to_inf = true;
3794 break;
3795 case FPRounding_POSINF:
3796 overflow_to_inf = !sgn;
3797 break;
3798 case FPRounding_NEGINF:
3799 overflow_to_inf = sgn;
3800 break;
3801 case FPRounding_ZERO:
3802 overflow_to_inf = false;
3803 break;
3804 default:
3805 assert(0);
3806 }
3807 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
3808 flags |= FPLIB_OFC | FPLIB_IXC;
3809 } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) {
3810 result = fp32_zero(sgn);
3811 flags |= FPLIB_UFC;
3812 } else {
3813 exp += FP32_EXP_BITS;
3814 mnt = fp32_normalise(mnt, &exp);
3815 int result_exp = 2 * FP32_EXP_BIAS - 1 - exp;
3816 uint32_t fraction = (((uint32_t)1 << 19) /
3817 (mnt >> (FP32_BITS - 10) | 1) + 1) >> 1;
3818 fraction <<= FP32_MANT_BITS - 8;
3819 if (result_exp == 0) {
3820 fraction >>= 1;
3821 } else if (result_exp == -1) {
3822 fraction >>= 2;
3823 result_exp = 0;
3824 }
3825 result = fp32_pack(sgn, result_exp, fraction);
3826 }
3827
3828 set_fpscr0(fpscr, flags);
3829
3830 return result;
3831}
3832
3833template <>
3834uint64_t
3835fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
3836{
3837 int mode = modeConv(fpscr);
3838 int flags = 0;
3839 int sgn, exp;
3840 uint64_t mnt, result;
3841
3842 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3843
3844 if (fp64_is_NaN(exp, mnt)) {
3845 result = fp64_process_NaN(op, mode, &flags);
3846 } else if (exp == FP64_EXP_INF) {
3847 result = fp64_zero(sgn);
3848 } else if (!mnt) {
3849 result = fp64_infinity(sgn);
3850 flags |= FPLIB_DZC;
3851 } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) {
3852 bool overflow_to_inf = false;
3853 switch (FPCRRounding(fpscr)) {
3854 case FPRounding_TIEEVEN:
3855 overflow_to_inf = true;
3856 break;
3857 case FPRounding_POSINF:
3858 overflow_to_inf = !sgn;
3859 break;
3860 case FPRounding_NEGINF:
3861 overflow_to_inf = sgn;
3862 break;
3863 case FPRounding_ZERO:
3864 overflow_to_inf = false;
3865 break;
3866 default:
3867 assert(0);
3868 }
3869 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
3870 flags |= FPLIB_OFC | FPLIB_IXC;
3871 } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) {
3872 result = fp64_zero(sgn);
3873 flags |= FPLIB_UFC;
3874 } else {
3875 exp += FP64_EXP_BITS;
3876 mnt = fp64_normalise(mnt, &exp);
3877 int result_exp = 2 * FP64_EXP_BIAS - 1 - exp;
3878 uint64_t fraction = (((uint32_t)1 << 19) /
3879 (mnt >> (FP64_BITS - 10) | 1) + 1) >> 1;
3880 fraction <<= FP64_MANT_BITS - 8;
3881 if (result_exp == 0) {
3882 fraction >>= 1;
3883 } else if (result_exp == -1) {
3884 fraction >>= 2;
3885 result_exp = 0;
3886 }
3887 result = fp64_pack(sgn, result_exp, fraction);
3888 }
3889
3890 set_fpscr0(fpscr, flags);
3891
3892 return result;
3893}
3894
3895template <>
3896uint16_t
3897fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3898{
3899 int mode = modeConv(fpscr);
3900 int flags = 0;
3901 int sgn1, exp1, sgn2, exp2;
3902 uint16_t mnt1, mnt2, result;
3903
3904 op1 = fplibNeg<uint16_t>(op1);
3905 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3906 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3907
3908 result = fp16_process_NaNs(op1, op2, mode, &flags);
3909 if (!result) {
3910 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3911 (exp2 == FP16_EXP_INF && !mnt1)) {
3912 result = fp16_FPTwo(0);
3913 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3914 result = fp16_infinity(sgn1 ^ sgn2);
3915 } else {
3916 result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags);
3917 }
3918 }
3919
3920 set_fpscr0(fpscr, flags);
3921
3922 return result;
3923}
3924
3925template <>
3926uint32_t
3927fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3928{
3929 int mode = modeConv(fpscr);
3930 int flags = 0;
3931 int sgn1, exp1, sgn2, exp2;
3932 uint32_t mnt1, mnt2, result;
3933
3934 op1 = fplibNeg<uint32_t>(op1);
3935 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3936 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3937
3938 result = fp32_process_NaNs(op1, op2, mode, &flags);
3939 if (!result) {
3940 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3941 (exp2 == FP32_EXP_INF && !mnt1)) {
3942 result = fp32_FPTwo(0);
3943 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3944 result = fp32_infinity(sgn1 ^ sgn2);
3945 } else {
3946 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
3947 }
3948 }
3949
3950 set_fpscr0(fpscr, flags);
3951
3952 return result;
3953}
3954
3955template <>
3956uint64_t
3957fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3958{
3959 int mode = modeConv(fpscr);
3960 int flags = 0;
3961 int sgn1, exp1, sgn2, exp2;
3962 uint64_t mnt1, mnt2, result;
3963
3964 op1 = fplibNeg<uint64_t>(op1);
3965 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3966 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3967
3968 result = fp64_process_NaNs(op1, op2, mode, &flags);
3969 if (!result) {
3970 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3971 (exp2 == FP64_EXP_INF && !mnt1)) {
3972 result = fp64_FPTwo(0);
3973 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3974 result = fp64_infinity(sgn1 ^ sgn2);
3975 } else {
3976 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
3977 }
3978 }
3979
3980 set_fpscr0(fpscr, flags);
3981
3982 return result;
3983}
3984
3985template <>
3986uint16_t
3987fplibRecpX(uint16_t op, FPSCR &fpscr)
3988{
3989 int mode = modeConv(fpscr);
3990 int flags = 0;
3991 int sgn, exp;
3992 uint16_t mnt, result;
3993
3994 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3995
3996 if (fp16_is_NaN(exp, mnt)) {
3997 result = fp16_process_NaN(op, mode, &flags);
3998 }
3999 else {
4000 if (!mnt) { // Zero and denormals
4001 result = fp16_pack(sgn, FP16_EXP_INF - 1, 0);
4002 } else { // Infinities and normals
4003 result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0);
4004 }
4005 }
4006
4007 set_fpscr0(fpscr, flags);
4008
4009 return result;
4010}
4011
4012template <>
4013uint32_t
4014fplibRecpX(uint32_t op, FPSCR &fpscr)
4015{
4016 int mode = modeConv(fpscr);
4017 int flags = 0;
4018 int sgn, exp;
4019 uint32_t mnt, result;
4020
4021 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4022
4023 if (fp32_is_NaN(exp, mnt)) {
4024 result = fp32_process_NaN(op, mode, &flags);
4025 }
4026 else {
4027 if (!mnt) { // Zero and denormals
4028 result = fp32_pack(sgn, FP32_EXP_INF - 1, 0);
4029 } else { // Infinities and normals
4030 result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0);
4031 }
4032 }
4033
4034 set_fpscr0(fpscr, flags);
4035
4036 return result;
4037}
4038
4039template <>
4040uint64_t
4041fplibRecpX(uint64_t op, FPSCR &fpscr)
4042{
4043 int mode = modeConv(fpscr);
4044 int flags = 0;
4045 int sgn, exp;
4046 uint64_t mnt, result;
4047
4048 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4049
4050 if (fp64_is_NaN(exp, mnt)) {
4051 result = fp64_process_NaN(op, mode, &flags);
4052 }
4053 else {
4054 if (!mnt) { // Zero and denormals
4055 result = fp64_pack(sgn, FP64_EXP_INF - 1, 0);
4056 } else { // Infinities and normals
4057 result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0);
4058 }
4059 }
4060
4061 set_fpscr0(fpscr, flags);
4062
4063 return result;
4064}
4065
4066template <>
4067uint16_t
4068fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4069{
4070 int expint = FP16_EXP_BIAS + FP16_MANT_BITS;
4071 int mode = modeConv(fpscr);
4072 int flags = 0;
4073 int sgn, exp;
4074 uint16_t mnt, result;
4075
4076 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4077 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4078
4079 // Handle NaNs, infinities and zeroes:
4080 if (fp16_is_NaN(exp, mnt)) {
4081 result = fp16_process_NaN(op, mode, &flags);
4082 } else if (exp == FP16_EXP_INF) {
4083 result = fp16_infinity(sgn);
4084 } else if (!mnt) {
4085 result = fp16_zero(sgn);
4086 } else if (exp >= expint) {
4087 // There are no fractional bits
4088 result = op;
4089 } else {
4090 // Truncate towards zero:
4091 uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp);
4092 int err = exp < expint - FP16_BITS ? 1 :
4093 ((mnt << 1 >> (expint - exp - 1) & 3) |
4094 ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0));
4095 switch (rounding) {
4096 case FPRounding_TIEEVEN:
4097 x += (err == 3 || (err == 2 && (x & 1)));
4098 break;
4099 case FPRounding_POSINF:
4100 x += err && !sgn;
4101 break;
4102 case FPRounding_NEGINF:
4103 x += err && sgn;
4104 break;
4105 case FPRounding_ZERO:
4106 break;
4107 case FPRounding_TIEAWAY:
4108 x += err >> 1;
4109 break;
4110 default:
4111 assert(0);
4112 }
4113
4114 if (x == 0) {
4115 result = fp16_zero(sgn);
4116 } else {
4117 exp = expint;
4118 mnt = fp16_normalise(x, &exp);
4119 result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS);
4120 }
4121
4122 if (err && exact)
4123 flags |= FPLIB_IXC;
4124 }
4125
4126 set_fpscr0(fpscr, flags);
4127
4128 return result;
4129}
4130
4131template <>
4132uint32_t
4133fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4134{
4135 int expint = FP32_EXP_BIAS + FP32_MANT_BITS;
4136 int mode = modeConv(fpscr);
4137 int flags = 0;
4138 int sgn, exp;
4139 uint32_t mnt, result;
4140
4141 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4142 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4143
4144 // Handle NaNs, infinities and zeroes:
4145 if (fp32_is_NaN(exp, mnt)) {
4146 result = fp32_process_NaN(op, mode, &flags);
4147 } else if (exp == FP32_EXP_INF) {
4148 result = fp32_infinity(sgn);
4149 } else if (!mnt) {
4150 result = fp32_zero(sgn);
4151 } else if (exp >= expint) {
4152 // There are no fractional bits
4153 result = op;
4154 } else {
4155 // Truncate towards zero:
4156 uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp);
4157 int err = exp < expint - FP32_BITS ? 1 :
4158 ((mnt << 1 >> (expint - exp - 1) & 3) |
4159 ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0));
4160 switch (rounding) {
4161 case FPRounding_TIEEVEN:
4162 x += (err == 3 || (err == 2 && (x & 1)));
4163 break;
4164 case FPRounding_POSINF:
4165 x += err && !sgn;
4166 break;
4167 case FPRounding_NEGINF:
4168 x += err && sgn;
4169 break;
4170 case FPRounding_ZERO:
4171 break;
4172 case FPRounding_TIEAWAY:
4173 x += err >> 1;
4174 break;
4175 default:
4176 assert(0);
4177 }
4178
4179 if (x == 0) {
4180 result = fp32_zero(sgn);
4181 } else {
4182 exp = expint;
4183 mnt = fp32_normalise(x, &exp);
4184 result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS);
4185 }
4186
4187 if (err && exact)
4188 flags |= FPLIB_IXC;
4189 }
4190
4191 set_fpscr0(fpscr, flags);
4192
4193 return result;
4194}
4195
4196template <>
4197uint64_t
4198fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4199{
4200 int expint = FP64_EXP_BIAS + FP64_MANT_BITS;
4201 int mode = modeConv(fpscr);
4202 int flags = 0;
4203 int sgn, exp;
4204 uint64_t mnt, result;
4205
4206 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4207 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4208
4209 // Handle NaNs, infinities and zeroes:
4210 if (fp64_is_NaN(exp, mnt)) {
4211 result = fp64_process_NaN(op, mode, &flags);
4212 } else if (exp == FP64_EXP_INF) {
4213 result = fp64_infinity(sgn);
4214 } else if (!mnt) {
4215 result = fp64_zero(sgn);
4216 } else if (exp >= expint) {
4217 // There are no fractional bits
4218 result = op;
4219 } else {
4220 // Truncate towards zero:
4221 uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp);
4222 int err = exp < expint - FP64_BITS ? 1 :
4223 ((mnt << 1 >> (expint - exp - 1) & 3) |
4224 ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0));
4225 switch (rounding) {
4226 case FPRounding_TIEEVEN:
4227 x += (err == 3 || (err == 2 && (x & 1)));
4228 break;
4229 case FPRounding_POSINF:
4230 x += err && !sgn;
4231 break;
4232 case FPRounding_NEGINF:
4233 x += err && sgn;
4234 break;
4235 case FPRounding_ZERO:
4236 break;
4237 case FPRounding_TIEAWAY:
4238 x += err >> 1;
4239 break;
4240 default:
4241 assert(0);
4242 }
4243
4244 if (x == 0) {
4245 result = fp64_zero(sgn);
4246 } else {
4247 exp = expint;
4248 mnt = fp64_normalise(x, &exp);
4249 result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS);
4250 }
4251
4252 if (err && exact)
4253 flags |= FPLIB_IXC;
4254 }
4255
4256 set_fpscr0(fpscr, flags);
4257
4258 return result;
4259}
4260
4261template <>
4262uint16_t
4263fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4264{
4265 int flags = 0;
4266 uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags);
4267 set_fpscr0(fpscr, flags);
4268 return result;
4269}
4270
4271template <>
4272uint32_t
4273fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4274{
4275 int flags = 0;
4276 uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags);
4277 set_fpscr0(fpscr, flags);
4278 return result;
4279}
4280
4281template <>
4282uint64_t
4283fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4284{
4285 int flags = 0;
4286 uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags);
4287 set_fpscr0(fpscr, flags);
4288 return result;
4289}
4290
4291template <>
4292uint16_t
4293fplibSqrt(uint16_t op, FPSCR &fpscr)
4294{
4295 int flags = 0;
4296 uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags);
4297 set_fpscr0(fpscr, flags);
4298 return result;
4299}
4300
4301template <>
4302uint32_t
4303fplibSqrt(uint32_t op, FPSCR &fpscr)
4304{
4305 int flags = 0;
4306 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
4307 set_fpscr0(fpscr, flags);
4308 return result;
4309}
4310
4311template <>
4312uint64_t
4313fplibSqrt(uint64_t op, FPSCR &fpscr)
4314{
4315 int flags = 0;
4316 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
4317 set_fpscr0(fpscr, flags);
4318 return result;
4319}
4320
4321template <>
4322uint16_t
4323fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4324{
4325 int flags = 0;
4326 uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags);
4327 set_fpscr0(fpscr, flags);
4328 return result;
4329}
4330
4331template <>
4332uint32_t
4333fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4334{
4335 int flags = 0;
4336 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
4337 set_fpscr0(fpscr, flags);
4338 return result;
4339}
4340
4341template <>
4342uint64_t
4343fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4344{
4345 int flags = 0;
4346 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
4347 set_fpscr0(fpscr, flags);
4348 return result;
4349}
4350
4351template <>
4352uint16_t
4353fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
4354{
4355 static uint16_t coeff[2][8] = {
4356 {
4357 0x3c00,
4358 0xb155,
4359 0x2030,
4360 0x0000,
4361 0x0000,
4362 0x0000,
4363 0x0000,
4364 0x0000,
4365 },
4366 {
4367 0x3c00,
4368 0xb800,
4369 0x293a,
4370 0x0000,
4371 0x0000,
4372 0x0000,
4373 0x0000,
4374 0x0000
4375 }
4376 };
4377 int flags = 0;
4378 uint16_t result =
4379 fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1,
4380 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4381 set_fpscr0(fpscr, flags);
4382 return result;
4383}
4384
4385template <>
4386uint32_t
4387fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr)
4388{
4389 static uint32_t coeff[2][8] = {
4390 {
4391 0x3f800000,
4392 0xbe2aaaab,
4393 0x3c088886,
4394 0xb95008b9,
4395 0x36369d6d,
4396 0x00000000,
4397 0x00000000,
4398 0x00000000
4399 },
4400 {
4401 0x3f800000,
4402 0xbf000000,
4403 0x3d2aaaa6,
4404 0xbab60705,
4405 0x37cd37cc,
4406 0x00000000,
4407 0x00000000,
4408 0x00000000
4409 }
4410 };
4411 int flags = 0;
4412 uint32_t result =
4413 fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1,
4414 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4415 set_fpscr0(fpscr, flags);
4416 return result;
4417}
4418
4419template <>
4420uint64_t
4421fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr)
4422{
4423 static uint64_t coeff[2][8] = {
4424 {
4425 0x3ff0000000000000ULL,
4426 0xbfc5555555555543ULL,
4427 0x3f8111111110f30cULL,
4428 0xbf2a01a019b92fc6ULL,
4429 0x3ec71de351f3d22bULL,
4430 0xbe5ae5e2b60f7b91ULL,
4431 0x3de5d8408868552fULL,
4432 0x0000000000000000ULL
4433 },
4434 {
4435 0x3ff0000000000000ULL,
4436 0xbfe0000000000000ULL,
4437 0x3fa5555555555536ULL,
4438 0xbf56c16c16c13a0bULL,
4439 0x3efa01a019b1e8d8ULL,
4440 0xbe927e4f7282f468ULL,
4441 0x3e21ee96d2641b13ULL,
4442 0xbda8f76380fbb401ULL
4443 }
4444 };
4445 int flags = 0;
4446 uint64_t result =
4447 fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1,
4448 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4449 set_fpscr0(fpscr, flags);
4450 return result;
4451}
4452
4453template <>
4454uint16_t
4455fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4456{
4457 int flags = 0;
4458 int sgn, exp;
4459 uint16_t mnt;
4460
4461 int mode = modeConv(fpscr);
4462 uint16_t result = fp16_mul(op1, op1, mode, &flags);
4463 set_fpscr0(fpscr, flags);
4464
4465 fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4466 if (!fp16_is_NaN(exp, mnt)) {
4467 result = (result & ~(1ULL << (FP16_BITS - 1))) |
4468 op2 << (FP16_BITS - 1);
4469 }
4470 return result;
4471}
4472
4473template <>
4474uint32_t
4475fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4476{
4477 int flags = 0;
4478 int sgn, exp;
4479 uint32_t mnt;
4480
4481 int mode = modeConv(fpscr);
4482 uint32_t result = fp32_mul(op1, op1, mode, &flags);
4483 set_fpscr0(fpscr, flags);
4484
4485 fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4486 if (!fp32_is_NaN(exp, mnt)) {
4487 result = (result & ~(1ULL << (FP32_BITS - 1))) | op2 << (FP32_BITS - 1);
4488 }
4489 return result;
4490}
4491
4492template <>
4493uint64_t
4494fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4495{
4496 int flags = 0;
4497 int sgn, exp;
4498 uint64_t mnt;
4499
4500 int mode = modeConv(fpscr);
4501 uint64_t result = fp64_mul(op1, op1, mode, &flags);
4502 set_fpscr0(fpscr, flags);
4503
4504 fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4505 if (!fp64_is_NaN(exp, mnt)) {
4506 result = (result & ~(1ULL << (FP64_BITS - 1))) | op2 << (FP64_BITS - 1);
4507 }
4508 return result;
4509}
4510
4511template <>
4512uint16_t
4513fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4514{
4515 static constexpr uint16_t fpOne =
4516 (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0
4517 if (op2 & 1)
4518 op1 = fpOne;
4519 return op1 ^ ((op2 >> 1) << (FP16_BITS - 1));
4520}
4521
4522template <>
4523uint32_t
4524fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4525{
4526 static constexpr uint32_t fpOne =
4527 (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0
4528 if (op2 & 1)
4529 op1 = fpOne;
4530 return op1 ^ ((op2 >> 1) << (FP32_BITS - 1));
4531}
4532
4533template <>
4534uint64_t
4535fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4536{
4537 static constexpr uint64_t fpOne =
4538 (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0
4539 if (op2 & 1)
4540 op1 = fpOne;
4541 return op1 ^ ((op2 >> 1) << (FP64_BITS - 1));
4542}
4543
4544static uint64_t
4545FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4546 int *flags)
4547{
4548 int expmax = FP64_EXP_BIAS + FP64_BITS - 1;
4549 uint64_t x;
4550 int err;
4551
4552 if (exp > expmax) {
4553 *flags = FPLIB_IOC;
4554 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4555 }
4556
4557 x = lsr64(mnt << FP64_EXP_BITS, expmax - exp);
4558 err = (exp > expmax - 2 ? 0 :
4559 (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) |
4560 !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1)));
4561
4562 switch (rounding) {
4563 case FPRounding_TIEEVEN:
4564 x += (err == 3 || (err == 2 && (x & 1)));
4565 break;
4566 case FPRounding_POSINF:
4567 x += err && !sgn;
4568 break;
4569 case FPRounding_NEGINF:
4570 x += err && sgn;
4571 break;
4572 case FPRounding_ZERO:
4573 break;
4574 case FPRounding_TIEAWAY:
4575 x += err >> 1;
4576 break;
4577 default:
4578 assert(0);
4579 }
4580
4581 if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) {
4582 *flags = FPLIB_IOC;
4583 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4584 }
4585
4586 if (err) {
4587 *flags = FPLIB_IXC;
4588 }
4589
4590 return sgn ? -x : x;
4591}
4592
4593static uint32_t
4594FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4595 int *flags)
4596{
4597 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4598 if (u ? x >= 1ULL << FP32_BITS :
4599 !(x < 1ULL << (FP32_BITS - 1) ||
4600 (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) {
4601 *flags = FPLIB_IOC;
4602 x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn;
4603 }
4604 return x;
4605}
4606
4607static uint16_t
4608FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4609 int *flags)
4610{
4611 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4612 if (u ? x >= 1ULL << FP16_BITS :
4613 !(x < 1ULL << (FP16_BITS - 1) ||
4614 (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) {
4615 *flags = FPLIB_IOC;
4616 x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn;
4617 }
4618 return x;
4619}
4620
4621template <>
4622uint16_t
4623fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4624 FPSCR &fpscr)
4625{
4626 int flags = 0;
4627 int sgn, exp;
4628 uint16_t mnt, result;
4629
4630 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4631 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4632
4633 // If NaN, set cumulative flag or take exception:
4634 if (fp16_is_NaN(exp, mnt)) {
4635 flags = FPLIB_IOC;
4636 result = 0;
4637 } else {
4638 assert(fbits >= 0);
4639 // Infinity is treated as an ordinary normalised number that saturates.
4640 result =
4641 FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4642 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4643 u, rounding, &flags);
4644 }
4645
4646 set_fpscr0(fpscr, flags);
4647
4648 return result;
4649}
4650
4651template <>
4652uint32_t
4653fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4654 FPSCR &fpscr)
4655{
4656 int flags = 0;
4657 int sgn, exp;
4658 uint16_t mnt;
4659 uint32_t result;
4660
4661 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4662 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4663
4664 // If NaN, set cumulative flag or take exception:
4665 if (fp16_is_NaN(exp, mnt)) {
4666 flags = FPLIB_IOC;
4667 result = 0;
4668 } else {
4669 assert(fbits >= 0);
4670 if (exp == FP16_EXP_INF)
4671 exp = 255; // infinity: make it big enough to saturate
4672 result =
4673 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4674 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4675 u, rounding, &flags);
4676 }
4677
4678 set_fpscr0(fpscr, flags);
4679
4680 return result;
4681}
4682
4683template <>
4684uint32_t
4685fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4686{
4687 int flags = 0;
4688 int sgn, exp;
4689 uint32_t mnt, result;
4690
4691 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4692 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4693
4694 // If NaN, set cumulative flag or take exception:
4695 if (fp32_is_NaN(exp, mnt)) {
4696 flags = FPLIB_IOC;
4697 result = 0;
4698 } else {
4699 assert(fbits >= 0);
4700 // Infinity is treated as an ordinary normalised number that saturates.
4701 result =
4702 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4703 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4704 u, rounding, &flags);
4705 }
4706
4707 set_fpscr0(fpscr, flags);
4708
4709 return result;
4710}
4711
4712template <>
4713uint32_t
4714fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4715{
4716 int flags = 0;
4717 int sgn, exp;
4718 uint64_t mnt;
4719 uint32_t result;
4720
4721 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4722 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4723
4724 // If NaN, set cumulative flag or take exception:
4725 if (fp64_is_NaN(exp, mnt)) {
4726 flags = FPLIB_IOC;
4727 result = 0;
4728 } else {
4729 assert(fbits >= 0);
4730 // Infinity is treated as an ordinary normalised number that saturates.
4731 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
4732 }
4733
4734 set_fpscr0(fpscr, flags);
4735
4736 return result;
4737}
4738
4739template <>
4740uint64_t
4741fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4742 FPSCR &fpscr)
4743{
4744 int flags = 0;
4745 int sgn, exp;
4746 uint16_t mnt;
4747 uint64_t result;
4748
4749 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4750 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4751
4752 // If NaN, set cumulative flag or take exception:
4753 if (fp16_is_NaN(exp, mnt)) {
4754 flags = FPLIB_IOC;
4755 result = 0;
4756 } else {
4757 assert(fbits >= 0);
4758 if (exp == FP16_EXP_INF)
4759 exp = 255; // infinity: make it big enough to saturate
4760 result =
4761 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4762 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4763 u, rounding, &flags);
4764 }
4765
4766 set_fpscr0(fpscr, flags);
4767
4768 return result;
4769}
4770
4771template <>
4772uint64_t
4773fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4774{
4775 int flags = 0;
4776 int sgn, exp;
4777 uint32_t mnt;
4778 uint64_t result;
4779
4780 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4781 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4782
4783 // If NaN, set cumulative flag or take exception:
4784 if (fp32_is_NaN(exp, mnt)) {
4785 flags = FPLIB_IOC;
4786 result = 0;
4787 } else {
4788 assert(fbits >= 0);
4789 // Infinity is treated as an ordinary normalised number that saturates.
4790 result =
4791 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4792 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4793 u, rounding, &flags);
4794 }
4795
4796 set_fpscr0(fpscr, flags);
4797
4798 return result;
4799}
4800
4801template <>
4802uint64_t
4803fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4804{
4805 int flags = 0;
4806 int sgn, exp;
4807 uint64_t mnt, result;
4808
4809 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4810 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4811
4812 // If NaN, set cumulative flag or take exception:
4813 if (fp64_is_NaN(exp, mnt)) {
4814 flags = FPLIB_IOC;
4815 result = 0;
4816 } else {
4817 assert(fbits >= 0);
4818 // Infinity is treated as an ordinary normalised number that saturates.
4819 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
4820 }
4821
4822 set_fpscr0(fpscr, flags);
4823
4824 return result;
4825}
4826
4827static uint16_t
4828fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4829{
4830 int x_sgn = !u && a >> (FP64_BITS - 1);
4831 int x_exp = FP16_EXP_BIAS + FP64_BITS - 1 - fbits;
4832 uint64_t x_mnt = x_sgn ? -a : a;
4833
4834 // Handle zero:
4835 if (!x_mnt) {
4836 return fp16_zero(0);
4837 }
4838
4839 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
4840 x_mnt = fp64_normalise(x_mnt, &x_exp);
4841 x_mnt = (x_mnt >> (FP64_BITS - FP16_BITS - 1) |
4842 !!(x_mnt & ((1ULL << (FP64_BITS - FP16_BITS - 1)) - 1)));
4843
4844 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
4845}
4846
4847static uint32_t
4848fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4849{
4850 int x_sgn = !u && a >> (FP64_BITS - 1);
4851 int x_exp = FP32_EXP_BIAS + FP64_BITS - 1 - fbits;
4852 uint64_t x_mnt = x_sgn ? -a : a;
4853
4854 // Handle zero:
4855 if (!x_mnt) {
4856 return fp32_zero(0);
4857 }
4858
4859 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
4860 x_mnt = fp64_normalise(x_mnt, &x_exp);
4861 x_mnt = (x_mnt >> (FP64_BITS - FP32_BITS - 1) |
4862 !!(x_mnt & ((1ULL << (FP64_BITS - FP32_BITS - 1)) - 1)));
4863
4864 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
4865}
4866
4867static uint64_t
4868fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4869{
4870 int x_sgn = !u && a >> (FP64_BITS - 1);
4871 int x_exp = FP64_EXP_BIAS + FP64_BITS - 1 - fbits;
4872 uint64_t x_mnt = x_sgn ? -a : a;
4873
4874 // Handle zero:
4875 if (!x_mnt) {
4876 return fp64_zero(0);
4877 }
4878
4879 x_mnt = fp64_normalise(x_mnt, &x_exp);
4880
4881 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
4882}
4883
4884template <>
4885uint16_t
4886fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
4887 FPSCR &fpscr)
4888{
4889 int flags = 0;
4890 uint16_t res = fp16_cvtf(op, fbits, u,
4891 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4892 &flags);
4893 set_fpscr0(fpscr, flags);
4894 return res;
4895}
4896
4897template <>
4898uint32_t
4899fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4900{
4901 int flags = 0;
4902 uint32_t res = fp32_cvtf(op, fbits, u,
4903 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4904 &flags);
4905 set_fpscr0(fpscr, flags);
4906 return res;
4907}
4908
4909template <>
4910uint64_t
4911fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4912{
4913 int flags = 0;
4914 uint64_t res = fp64_cvtf(op, fbits, u,
4915 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4916 &flags);
4917 set_fpscr0(fpscr, flags);
4918 return res;
4919}
4920
4921template <>
4922uint16_t
4923fplibInfinity(int sgn)
4924{
4925 return fp16_infinity(sgn);
4926}
4927
4928template <>
4929uint32_t
4930fplibInfinity(int sgn)
4931{
4932 return fp32_infinity(sgn);
4933}
4934
4935template <>
4936uint64_t
4937fplibInfinity(int sgn)
4938{
4939 return fp64_infinity(sgn);
4940}
4941
4942template <>
4943uint16_t
4944fplibDefaultNaN()
4945{
4946 return fp16_defaultNaN();
4947}
4948
4949template <>
4950uint32_t
4951fplibDefaultNaN()
4952{
4953 return fp32_defaultNaN();
4954}
4955
4956template <>
4957uint64_t
4958fplibDefaultNaN()
4959{
4960 return fp64_defaultNaN();
4961}
4962
4963}