fp80.h (9899:0392ef94d766) | fp80.h (10480:5d4ebc92d32e) |
---|---|
1/* 2 * Copyright (c) 2013, Andreas Sandberg 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * --- 16 unchanged lines hidden (view full) --- 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 27 * OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#ifndef _FP80_H 31#define _FP80_H 1 32 | 1/* 2 * Copyright (c) 2013, Andreas Sandberg 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * --- 16 unchanged lines hidden (view full) --- 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 27 * OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#ifndef _FP80_H 31#define _FP80_H 1 32 |
33#include 34#include <stdint.h> | 33#include <math.h> /* FP_NAN et al. */ |
35#include <stdio.h> 36 | 34#include <stdio.h> 35 |
36#include <fputils/fptypes.h> 37 38 |
|
37#ifdef __cplusplus 38extern "C" { 39#endif 40 41/** 42 * @defgroup fp80 80-bit Floats 43 * Functions handling 80-bit floats. 44 * 45 * @{ 46 */ 47 | 39#ifdef __cplusplus 40extern "C" { 41#endif 42 43/** 44 * @defgroup fp80 80-bit Floats 45 * Functions handling 80-bit floats. 46 * 47 * @{ 48 */ 49 |
48/** Internal representation of an 80-bit float. */ 49typedef union { 50 char bits[10]; 51 struct { 52 uint64_t fi; 53 uint16_t se; 54 } repr; 55} fp80_t; 56 | |
57/** Constant representing +inf */ 58extern const fp80_t fp80_pinf; 59/** Constant representing -inf */ 60extern const fp80_t fp80_ninf; 61/** Constant representing a quiet NaN */ 62extern const fp80_t fp80_qnan; 63/** Constant representing a quiet indefinite NaN */ 64extern const fp80_t fp80_qnani; --- 120 unchanged lines hidden (view full) --- 185 * than 1). Such numbers are known as denormals. This function checks 186 * whether a float is a denormal or not. 187 * 188 * @param fp80 value to analyze. 189 * @return -1 if negative denormal, +1 if positive denormal, 0 otherwise. 190 */ 191int fp80_issubnormal(fp80_t fp80); 192 | 50/** Constant representing +inf */ 51extern const fp80_t fp80_pinf; 52/** Constant representing -inf */ 53extern const fp80_t fp80_ninf; 54/** Constant representing a quiet NaN */ 55extern const fp80_t fp80_qnan; 56/** Constant representing a quiet indefinite NaN */ 57extern const fp80_t fp80_qnani; --- 120 unchanged lines hidden (view full) --- 178 * than 1). Such numbers are known as denormals. This function checks 179 * whether a float is a denormal or not. 180 * 181 * @param fp80 value to analyze. 182 * @return -1 if negative denormal, +1 if positive denormal, 0 otherwise. 183 */ 184int fp80_issubnormal(fp80_t fp80); 185 |
186 |
|
193/** 194 * Convert an 80-bit float to a 64-bit double. 195 * | 187/** 188 * Convert an 80-bit float to a 64-bit double. 189 * |
190 * Convenience wrapper around fp80_cvtfp64() that returns a double 191 * instead of the internal fp64_t representation. 192 * 193 * Note that this conversion is lossy, see fp80_cvtfp64() for details 194 * of the conversion. 195 * 196 * @param fp80 Source value to convert. 197 * @return value represented as double. 198 */ 199double fp80_cvtd(fp80_t fp80); 200 201/** 202 * Convert an 80-bit float to a 64-bit double. 203 * |
|
196 * This function converts an 80-bit float into a standard 64-bit 197 * double. This conversion is inherently lossy since a double can only 198 * represent a subset of what an 80-bit float can represent. The 199 * fraction of the source value will always be truncated to fit the 200 * lower precision. If a value falls outside of the range that can be 201 * accurately represented by double by truncating the fraction, one of 202 * the following happens: 203 * <ul> --- 5 unchanged lines hidden (view full) --- 209 * large to be represented. 210 * </ul> 211 * 212 * NaN values will be preserved across the conversion. 213 * 214 * @param fp80 Source value to convert. 215 * @return 64-bit version of the float. 216 */ | 204 * This function converts an 80-bit float into a standard 64-bit 205 * double. This conversion is inherently lossy since a double can only 206 * represent a subset of what an 80-bit float can represent. The 207 * fraction of the source value will always be truncated to fit the 208 * lower precision. If a value falls outside of the range that can be 209 * accurately represented by double by truncating the fraction, one of 210 * the following happens: 211 * <ul> --- 5 unchanged lines hidden (view full) --- 217 * large to be represented. 218 * </ul> 219 * 220 * NaN values will be preserved across the conversion. 221 * 222 * @param fp80 Source value to convert. 223 * @return 64-bit version of the float. 224 */ |
217double fp80_cvtd(fp80_t fp80); | 225fp64_t fp80_cvtfp64(fp80_t fp80); |
218 219/** | 226 227/** |
220 * Convert an 64-bit double to an 80-bit float. | 228 * Convert a double to an 80-bit float. |
221 * | 229 * |
222 * This function converts a standard 64-bit double into an 80-bit 223 * float. This conversion is completely lossless since the 80-bit 224 * float represents a superset of what a 64-bit double can 225 * represent. | 230 * This is a convenience wrapper around fp80_cvffp64() and provides a 231 * convenient way of using the native double type instead of the 232 * internal fp64_t representation. |
226 * | 233 * |
227 * @note Denormals will be converted to normalized values. 228 * | |
229 * @param fpd Source value to convert. | 234 * @param fpd Source value to convert. |
230 * @return 64-bit version of the float. | 235 * @return 80-bit version of the float. |
231 */ 232fp80_t fp80_cvfd(double fpd); 233 234/** | 236 */ 237fp80_t fp80_cvfd(double fpd); 238 239/** |
240 * Convert a 64-bit float to an 80-bit float. 241 * 242 * This function converts the internal representation of a 64-bit 243 * float into an 80-bit float. This conversion is completely lossless 244 * since the 80-bit float represents a superset of what a 64-bit 245 * float can represent. 246 * 247 * @note Denormals will be converted to normalized values. 248 * 249 * @param fp64 64-bit float to convert. 250 * @return 80-bit version of the float. 251 */ 252fp80_t fp80_cvffp64(fp64_t fp64); 253 254/** |
|
235 * Dump the components of an 80-bit float to a file. 236 * 237 * @warning This function is intended for debugging and the format of 238 * the output is not guaranteed to be stable. 239 * 240 * @param fout Output stream (e.g., stdout) 241 * @param fp80 value to dump. 242 */ 243void fp80_debug_dump(FILE *fout, fp80_t fp80); 244 245/** @} */ 246 247#ifdef __cplusplus 248} /* extern "C" */ 249#endif 250 251#endif | 255 * Dump the components of an 80-bit float to a file. 256 * 257 * @warning This function is intended for debugging and the format of 258 * the output is not guaranteed to be stable. 259 * 260 * @param fout Output stream (e.g., stdout) 261 * @param fp80 value to dump. 262 */ 263void fp80_debug_dump(FILE *fout, fp80_t fp80); 264 265/** @} */ 266 267#ifdef __cplusplus 268} /* extern "C" */ 269#endif 270 271#endif |