1/* 2 * Copyright (c) 2013, Andreas Sandberg 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above 12 * copyright notice, this list of conditions and the following 13 * disclaimer in the documentation and/or other materials provided 14 * with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 27 * OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#ifndef _FP80_H 31#define _FP80_H 1 32 33#include <math.h> /* FP_NAN et al. */ 34#include <stdio.h> 35 36#include <fputils/fptypes.h> 37 38 39#ifdef __cplusplus 40extern "C" { 41#endif 42 43/** 44 * @defgroup fp80 80-bit Floats 45 * Functions handling 80-bit floats. 46 * 47 * @{ 48 */ 49 50/** Constant representing +inf */ 51extern const fp80_t fp80_pinf; 52/** Constant representing -inf */ 53extern const fp80_t fp80_ninf; 54/** Constant representing a quiet NaN */ 55extern const fp80_t fp80_qnan; 56/** Constant representing a quiet indefinite NaN */ 57extern const fp80_t fp80_qnani; 58/** Constant representing a signaling NaN */ 59extern const fp80_t fp80_snan; 60/** Alias for fp80_qnan */ 61extern const fp80_t fp80_nan; 62 63/** 64 * Is the value a special floating point value? 65 * 66 * Determine if a floating point value is one of the special values 67 * (i.e., one of the infinities or NaNs). In practice, this function 68 * only checks if the exponent is set to the maximum value supported 69 * by the binary representation, which is a reserved value used for 70 * such special numbers. 71 * 72 * @param fp80 value to analyze. 73 * @return 1 if the value is special, 0 otherwise. 74 */ 75int fp80_isspecial(fp80_t fp80); 76/** 77 * Is the value a quiet NaN? 78 * 79 * @param fp80 value to analyze. 80 * @return 1 if true, 0 otherwise. 81 */ 82int fp80_isqnan(fp80_t fp80); 83/** 84 * Is the value an indefinite quiet NaN? 85 * 86 * @param fp80 value to analyze. 87 * @return 1 if true, 0 otherwise. 88 */ 89int fp80_isqnani(fp80_t fp80); 90/** 91 * Is the value a signaling NaN? 92 * 93 * @param fp80 value to analyze. 94 * @return 1 if true, 0 otherwise. 95 */ 96int fp80_issnan(fp80_t fp80); 97 98/** 99 * Classify a floating point number. 100 * 101 * This function implements the same classification as the standard 102 * fpclassify() function. It returns one of the following floating 103 * point classes: 104 * <ul> 105 * <li>FP_NAN - The value is NaN. 106 * <li>FP_INFINITE - The value is either +inf or -inf. 107 * <li>FP_ZERO - The value is either +0 or -0. 108 * <li>FP_SUBNORMAL - The value is to small to be represented as a 109 * normalized float. See fp80_issubnormal(). 110 * <li>FP_NORMAL - The value is neither of above. 111 * </ul> 112 * 113 * @param fp80 value to analyze. 114 * @return Floating point classification. 115 */ 116int fp80_classify(fp80_t fp80); 117 118/** 119 * Is a value finite? 120 * 121 * Check if a value is a finite value. That is, not one of the 122 * infinities or NaNs. 123 * 124 * @param fp80 value to analyze. 125 * @return -1 if negative finite, +1 if positive finite, 0 otherwise. 126 */ 127int fp80_isfinite(fp80_t fp80); 128/** 129 * Is the value a non-zero normal? 130 * 131 * This function checks if a floating point value is a normal (having 132 * an exponent larger or equal to 1) or not. See fp80_issubnormal() 133 * for a description of what a denormal value is. 134 * 135 * @see fp80_issubnormal() 136 * 137 * @param fp80 value to analyze. 138 * @return -1 if negative normal, +1 if positive normal, 0 otherwise. 139 */ 140int fp80_isnormal(fp80_t fp80); 141/** 142 * Is the value a NaN of any kind? 143 * 144 * @param fp80 value to analyze. 145 * @return -1 if negative NaN, +1 if positive NaN, 0 otherwise. 146 */ 147int fp80_isnan(fp80_t fp80); 148/** 149 * Is the value one of the infinities? 150 * 151 * @param fp80 value to analyze. 152 * @return -1 if -inf, +1 if +inf, 0 otherwise. 153 */ 154int fp80_isinf(fp80_t fp80); 155/** 156 * Determine value of the sign-bit of a floating point number. 157 * 158 * @note Floats can represent both positive and negative zeros. 159 * 160 * @param fp80 value to analyze. 161 * @return -1 if negative, +1 if positive. 162 */ 163int fp80_sgn(fp80_t fp80); 164/** 165 * Is the value zero? 166 * 167 * @param fp80 value to analyze. 168 * @return -1 if negative zero, +1 if positive zero, 0 otherwise. 169 */ 170int fp80_iszero(fp80_t fp80); 171/** 172 * Is the value a denormal? 173 * 174 * Numbers that are close to the minimum of what can be stored in a 175 * floating point number start loosing precision because bits in the 176 * fraction get used (implicitly) to store parts of the negative 177 * exponent (i.e., the exponent is saturated and the fraction is less 178 * than 1). Such numbers are known as denormals. This function checks 179 * whether a float is a denormal or not. 180 * 181 * @param fp80 value to analyze. 182 * @return -1 if negative denormal, +1 if positive denormal, 0 otherwise. 183 */ 184int fp80_issubnormal(fp80_t fp80); 185 186 187/** 188 * Convert an 80-bit float to a 64-bit double. 189 * 190 * Convenience wrapper around fp80_cvtfp64() that returns a double 191 * instead of the internal fp64_t representation. 192 * 193 * Note that this conversion is lossy, see fp80_cvtfp64() for details 194 * of the conversion. 195 * 196 * @param fp80 Source value to convert. 197 * @return value represented as double. 198 */ 199double fp80_cvtd(fp80_t fp80); 200 201/** 202 * Convert an 80-bit float to a 64-bit double. 203 * 204 * This function converts an 80-bit float into a standard 64-bit 205 * double. This conversion is inherently lossy since a double can only 206 * represent a subset of what an 80-bit float can represent. The 207 * fraction of the source value will always be truncated to fit the 208 * lower precision. If a value falls outside of the range that can be 209 * accurately represented by double by truncating the fraction, one of 210 * the following happens: 211 * <ul> 212 * <li>A denormal will be generated if that can approximate the 213 * value. 214 * <li>[-]0 will be generated if the magnitude of the value is too 215 * small to be represented at all. 216 * <li>+-Inf will be generated if the magnitude of the value is too 217 * large to be represented. 218 * </ul> 219 * 220 * NaN values will be preserved across the conversion. 221 * 222 * @param fp80 Source value to convert. 223 * @return 64-bit version of the float. 224 */ 225fp64_t fp80_cvtfp64(fp80_t fp80); 226 227/** 228 * Convert a double to an 80-bit float. 229 * 230 * This is a convenience wrapper around fp80_cvffp64() and provides a 231 * convenient way of using the native double type instead of the 232 * internal fp64_t representation. 233 * 234 * @param fpd Source value to convert. 235 * @return 80-bit version of the float. 236 */ 237fp80_t fp80_cvfd(double fpd); 238 239/** 240 * Convert a 64-bit float to an 80-bit float. 241 * 242 * This function converts the internal representation of a 64-bit 243 * float into an 80-bit float. This conversion is completely lossless 244 * since the 80-bit float represents a superset of what a 64-bit 245 * float can represent. 246 * 247 * @note Denormals will be converted to normalized values. 248 * 249 * @param fp64 64-bit float to convert. 250 * @return 80-bit version of the float. 251 */ 252fp80_t fp80_cvffp64(fp64_t fp64); 253 254/** 255 * Dump the components of an 80-bit float to a file. 256 * 257 * @warning This function is intended for debugging and the format of 258 * the output is not guaranteed to be stable. 259 * 260 * @param fout Output stream (e.g., stdout) 261 * @param fp80 value to dump. 262 */ 263void fp80_debug_dump(FILE *fout, fp80_t fp80); 264 265/** @} */ 266 267#ifdef __cplusplus 268} /* extern "C" */ 269#endif 270 271#endif 272