fp80.h (9899:0392ef94d766) fp80.h (10480:5d4ebc92d32e)
1/*
2 * Copyright (c) 2013, Andreas Sandberg
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following
13 * disclaimer in the documentation and/or other materials provided
14 * with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
27 * OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#ifndef _FP80_H
31#define _FP80_H 1
32
1/*
2 * Copyright (c) 2013, Andreas Sandberg
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following
13 * disclaimer in the documentation and/or other materials provided
14 * with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
27 * OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#ifndef _FP80_H
31#define _FP80_H 1
32
33#include
34#include <stdint.h>
33#include <math.h> /* FP_NAN et al. */
35#include <stdio.h>
36
34#include <stdio.h>
35
36#include <fputils/fptypes.h>
37
38
37#ifdef __cplusplus
38extern "C" {
39#endif
40
41/**
42 * @defgroup fp80 80-bit Floats
43 * Functions handling 80-bit floats.
44 *
45 * @{
46 */
47
39#ifdef __cplusplus
40extern "C" {
41#endif
42
43/**
44 * @defgroup fp80 80-bit Floats
45 * Functions handling 80-bit floats.
46 *
47 * @{
48 */
49
48/** Internal representation of an 80-bit float. */
49typedef union {
50 char bits[10];
51 struct {
52 uint64_t fi;
53 uint16_t se;
54 } repr;
55} fp80_t;
56
57/** Constant representing +inf */
58extern const fp80_t fp80_pinf;
59/** Constant representing -inf */
60extern const fp80_t fp80_ninf;
61/** Constant representing a quiet NaN */
62extern const fp80_t fp80_qnan;
63/** Constant representing a quiet indefinite NaN */
64extern const fp80_t fp80_qnani;
65/** Constant representing a signaling NaN */
66extern const fp80_t fp80_snan;
67/** Alias for fp80_qnan */
68extern const fp80_t fp80_nan;
69
70/**
71 * Is the value a special floating point value?
72 *
73 * Determine if a floating point value is one of the special values
74 * (i.e., one of the infinities or NaNs). In practice, this function
75 * only checks if the exponent is set to the maximum value supported
76 * by the binary representation, which is a reserved value used for
77 * such special numbers.
78 *
79 * @param fp80 value to analyze.
80 * @return 1 if the value is special, 0 otherwise.
81 */
82int fp80_isspecial(fp80_t fp80);
83/**
84 * Is the value a quiet NaN?
85 *
86 * @param fp80 value to analyze.
87 * @return 1 if true, 0 otherwise.
88 */
89int fp80_isqnan(fp80_t fp80);
90/**
91 * Is the value an indefinite quiet NaN?
92 *
93 * @param fp80 value to analyze.
94 * @return 1 if true, 0 otherwise.
95 */
96int fp80_isqnani(fp80_t fp80);
97/**
98 * Is the value a signaling NaN?
99 *
100 * @param fp80 value to analyze.
101 * @return 1 if true, 0 otherwise.
102 */
103int fp80_issnan(fp80_t fp80);
104
105/**
106 * Classify a floating point number.
107 *
108 * This function implements the same classification as the standard
109 * fpclassify() function. It returns one of the following floating
110 * point classes:
111 * <ul>
112 * <li>FP_NAN - The value is NaN.
113 * <li>FP_INFINITE - The value is either +inf or -inf.
114 * <li>FP_ZERO - The value is either +0 or -0.
115 * <li>FP_SUBNORMAL - The value is to small to be represented as a
116 * normalized float. See fp80_issubnormal().
117 * <li>FP_NORMAL - The value is neither of above.
118 * </ul>
119 *
120 * @param fp80 value to analyze.
121 * @return Floating point classification.
122 */
123int fp80_classify(fp80_t fp80);
124
125/**
126 * Is a value finite?
127 *
128 * Check if a value is a finite value. That is, not one of the
129 * infinities or NaNs.
130 *
131 * @param fp80 value to analyze.
132 * @return -1 if negative finite, +1 if positive finite, 0 otherwise.
133 */
134int fp80_isfinite(fp80_t fp80);
135/**
136 * Is the value a non-zero normal?
137 *
138 * This function checks if a floating point value is a normal (having
139 * an exponent larger or equal to 1) or not. See fp80_issubnormal()
140 * for a description of what a denormal value is.
141 *
142 * @see fp80_issubnormal()
143 *
144 * @param fp80 value to analyze.
145 * @return -1 if negative normal, +1 if positive normal, 0 otherwise.
146 */
147int fp80_isnormal(fp80_t fp80);
148/**
149 * Is the value a NaN of any kind?
150 *
151 * @param fp80 value to analyze.
152 * @return -1 if negative NaN, +1 if positive NaN, 0 otherwise.
153 */
154int fp80_isnan(fp80_t fp80);
155/**
156 * Is the value one of the infinities?
157 *
158 * @param fp80 value to analyze.
159 * @return -1 if -inf, +1 if +inf, 0 otherwise.
160 */
161int fp80_isinf(fp80_t fp80);
162/**
163 * Determine value of the sign-bit of a floating point number.
164 *
165 * @note Floats can represent both positive and negative zeros.
166 *
167 * @param fp80 value to analyze.
168 * @return -1 if negative, +1 if positive.
169 */
170int fp80_sgn(fp80_t fp80);
171/**
172 * Is the value zero?
173 *
174 * @param fp80 value to analyze.
175 * @return -1 if negative zero, +1 if positive zero, 0 otherwise.
176 */
177int fp80_iszero(fp80_t fp80);
178/**
179 * Is the value a denormal?
180 *
181 * Numbers that are close to the minimum of what can be stored in a
182 * floating point number start loosing precision because bits in the
183 * fraction get used (implicitly) to store parts of the negative
184 * exponent (i.e., the exponent is saturated and the fraction is less
185 * than 1). Such numbers are known as denormals. This function checks
186 * whether a float is a denormal or not.
187 *
188 * @param fp80 value to analyze.
189 * @return -1 if negative denormal, +1 if positive denormal, 0 otherwise.
190 */
191int fp80_issubnormal(fp80_t fp80);
192
50/** Constant representing +inf */
51extern const fp80_t fp80_pinf;
52/** Constant representing -inf */
53extern const fp80_t fp80_ninf;
54/** Constant representing a quiet NaN */
55extern const fp80_t fp80_qnan;
56/** Constant representing a quiet indefinite NaN */
57extern const fp80_t fp80_qnani;
58/** Constant representing a signaling NaN */
59extern const fp80_t fp80_snan;
60/** Alias for fp80_qnan */
61extern const fp80_t fp80_nan;
62
63/**
64 * Is the value a special floating point value?
65 *
66 * Determine if a floating point value is one of the special values
67 * (i.e., one of the infinities or NaNs). In practice, this function
68 * only checks if the exponent is set to the maximum value supported
69 * by the binary representation, which is a reserved value used for
70 * such special numbers.
71 *
72 * @param fp80 value to analyze.
73 * @return 1 if the value is special, 0 otherwise.
74 */
75int fp80_isspecial(fp80_t fp80);
76/**
77 * Is the value a quiet NaN?
78 *
79 * @param fp80 value to analyze.
80 * @return 1 if true, 0 otherwise.
81 */
82int fp80_isqnan(fp80_t fp80);
83/**
84 * Is the value an indefinite quiet NaN?
85 *
86 * @param fp80 value to analyze.
87 * @return 1 if true, 0 otherwise.
88 */
89int fp80_isqnani(fp80_t fp80);
90/**
91 * Is the value a signaling NaN?
92 *
93 * @param fp80 value to analyze.
94 * @return 1 if true, 0 otherwise.
95 */
96int fp80_issnan(fp80_t fp80);
97
98/**
99 * Classify a floating point number.
100 *
101 * This function implements the same classification as the standard
102 * fpclassify() function. It returns one of the following floating
103 * point classes:
104 * <ul>
105 * <li>FP_NAN - The value is NaN.
106 * <li>FP_INFINITE - The value is either +inf or -inf.
107 * <li>FP_ZERO - The value is either +0 or -0.
108 * <li>FP_SUBNORMAL - The value is to small to be represented as a
109 * normalized float. See fp80_issubnormal().
110 * <li>FP_NORMAL - The value is neither of above.
111 * </ul>
112 *
113 * @param fp80 value to analyze.
114 * @return Floating point classification.
115 */
116int fp80_classify(fp80_t fp80);
117
118/**
119 * Is a value finite?
120 *
121 * Check if a value is a finite value. That is, not one of the
122 * infinities or NaNs.
123 *
124 * @param fp80 value to analyze.
125 * @return -1 if negative finite, +1 if positive finite, 0 otherwise.
126 */
127int fp80_isfinite(fp80_t fp80);
128/**
129 * Is the value a non-zero normal?
130 *
131 * This function checks if a floating point value is a normal (having
132 * an exponent larger or equal to 1) or not. See fp80_issubnormal()
133 * for a description of what a denormal value is.
134 *
135 * @see fp80_issubnormal()
136 *
137 * @param fp80 value to analyze.
138 * @return -1 if negative normal, +1 if positive normal, 0 otherwise.
139 */
140int fp80_isnormal(fp80_t fp80);
141/**
142 * Is the value a NaN of any kind?
143 *
144 * @param fp80 value to analyze.
145 * @return -1 if negative NaN, +1 if positive NaN, 0 otherwise.
146 */
147int fp80_isnan(fp80_t fp80);
148/**
149 * Is the value one of the infinities?
150 *
151 * @param fp80 value to analyze.
152 * @return -1 if -inf, +1 if +inf, 0 otherwise.
153 */
154int fp80_isinf(fp80_t fp80);
155/**
156 * Determine value of the sign-bit of a floating point number.
157 *
158 * @note Floats can represent both positive and negative zeros.
159 *
160 * @param fp80 value to analyze.
161 * @return -1 if negative, +1 if positive.
162 */
163int fp80_sgn(fp80_t fp80);
164/**
165 * Is the value zero?
166 *
167 * @param fp80 value to analyze.
168 * @return -1 if negative zero, +1 if positive zero, 0 otherwise.
169 */
170int fp80_iszero(fp80_t fp80);
171/**
172 * Is the value a denormal?
173 *
174 * Numbers that are close to the minimum of what can be stored in a
175 * floating point number start loosing precision because bits in the
176 * fraction get used (implicitly) to store parts of the negative
177 * exponent (i.e., the exponent is saturated and the fraction is less
178 * than 1). Such numbers are known as denormals. This function checks
179 * whether a float is a denormal or not.
180 *
181 * @param fp80 value to analyze.
182 * @return -1 if negative denormal, +1 if positive denormal, 0 otherwise.
183 */
184int fp80_issubnormal(fp80_t fp80);
185
186
193/**
194 * Convert an 80-bit float to a 64-bit double.
195 *
187/**
188 * Convert an 80-bit float to a 64-bit double.
189 *
190 * Convenience wrapper around fp80_cvtfp64() that returns a double
191 * instead of the internal fp64_t representation.
192 *
193 * Note that this conversion is lossy, see fp80_cvtfp64() for details
194 * of the conversion.
195 *
196 * @param fp80 Source value to convert.
197 * @return value represented as double.
198 */
199double fp80_cvtd(fp80_t fp80);
200
201/**
202 * Convert an 80-bit float to a 64-bit double.
203 *
196 * This function converts an 80-bit float into a standard 64-bit
197 * double. This conversion is inherently lossy since a double can only
198 * represent a subset of what an 80-bit float can represent. The
199 * fraction of the source value will always be truncated to fit the
200 * lower precision. If a value falls outside of the range that can be
201 * accurately represented by double by truncating the fraction, one of
202 * the following happens:
203 * <ul>
204 * <li>A denormal will be generated if that can approximate the
205 * value.
206 * <li>[-]0 will be generated if the magnitude of the value is too
207 * small to be represented at all.
208 * <li>+-Inf will be generated if the magnitude of the value is too
209 * large to be represented.
210 * </ul>
211 *
212 * NaN values will be preserved across the conversion.
213 *
214 * @param fp80 Source value to convert.
215 * @return 64-bit version of the float.
216 */
204 * This function converts an 80-bit float into a standard 64-bit
205 * double. This conversion is inherently lossy since a double can only
206 * represent a subset of what an 80-bit float can represent. The
207 * fraction of the source value will always be truncated to fit the
208 * lower precision. If a value falls outside of the range that can be
209 * accurately represented by double by truncating the fraction, one of
210 * the following happens:
211 * <ul>
212 * <li>A denormal will be generated if that can approximate the
213 * value.
214 * <li>[-]0 will be generated if the magnitude of the value is too
215 * small to be represented at all.
216 * <li>+-Inf will be generated if the magnitude of the value is too
217 * large to be represented.
218 * </ul>
219 *
220 * NaN values will be preserved across the conversion.
221 *
222 * @param fp80 Source value to convert.
223 * @return 64-bit version of the float.
224 */
217double fp80_cvtd(fp80_t fp80);
225fp64_t fp80_cvtfp64(fp80_t fp80);
218
219/**
226
227/**
220 * Convert an 64-bit double to an 80-bit float.
228 * Convert a double to an 80-bit float.
221 *
229 *
222 * This function converts a standard 64-bit double into an 80-bit
223 * float. This conversion is completely lossless since the 80-bit
224 * float represents a superset of what a 64-bit double can
225 * represent.
230 * This is a convenience wrapper around fp80_cvffp64() and provides a
231 * convenient way of using the native double type instead of the
232 * internal fp64_t representation.
226 *
233 *
227 * @note Denormals will be converted to normalized values.
228 *
229 * @param fpd Source value to convert.
234 * @param fpd Source value to convert.
230 * @return 64-bit version of the float.
235 * @return 80-bit version of the float.
231 */
232fp80_t fp80_cvfd(double fpd);
233
234/**
236 */
237fp80_t fp80_cvfd(double fpd);
238
239/**
240 * Convert a 64-bit float to an 80-bit float.
241 *
242 * This function converts the internal representation of a 64-bit
243 * float into an 80-bit float. This conversion is completely lossless
244 * since the 80-bit float represents a superset of what a 64-bit
245 * float can represent.
246 *
247 * @note Denormals will be converted to normalized values.
248 *
249 * @param fp64 64-bit float to convert.
250 * @return 80-bit version of the float.
251 */
252fp80_t fp80_cvffp64(fp64_t fp64);
253
254/**
235 * Dump the components of an 80-bit float to a file.
236 *
237 * @warning This function is intended for debugging and the format of
238 * the output is not guaranteed to be stable.
239 *
240 * @param fout Output stream (e.g., stdout)
241 * @param fp80 value to dump.
242 */
243void fp80_debug_dump(FILE *fout, fp80_t fp80);
244
245/** @} */
246
247#ifdef __cplusplus
248} /* extern "C" */
249#endif
250
251#endif
255 * Dump the components of an 80-bit float to a file.
256 *
257 * @warning This function is intended for debugging and the format of
258 * the output is not guaranteed to be stable.
259 *
260 * @param fout Output stream (e.g., stdout)
261 * @param fp80 value to dump.
262 */
263void fp80_debug_dump(FILE *fout, fp80_t fp80);
264
265/** @} */
266
267#ifdef __cplusplus
268} /* extern "C" */
269#endif
270
271#endif