fp80.h revision 9899:0392ef94d766
1/* 2 * Copyright (c) 2013, Andreas Sandberg 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above 12 * copyright notice, this list of conditions and the following 13 * disclaimer in the documentation and/or other materials provided 14 * with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 27 * OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#ifndef _FP80_H 31#define _FP80_H 1 32 33#include <math.h> 34#include <stdint.h> 35#include <stdio.h> 36 37#ifdef __cplusplus 38extern "C" { 39#endif 40 41/** 42 * @defgroup fp80 80-bit Floats 43 * Functions handling 80-bit floats. 44 * 45 * @{ 46 */ 47 48/** Internal representation of an 80-bit float. */ 49typedef union { 50 char bits[10]; 51 struct { 52 uint64_t fi; 53 uint16_t se; 54 } repr; 55} fp80_t; 56 57/** Constant representing +inf */ 58extern const fp80_t fp80_pinf; 59/** Constant representing -inf */ 60extern const fp80_t fp80_ninf; 61/** Constant representing a quiet NaN */ 62extern const fp80_t fp80_qnan; 63/** Constant representing a quiet indefinite NaN */ 64extern const fp80_t fp80_qnani; 65/** Constant representing a signaling NaN */ 66extern const fp80_t fp80_snan; 67/** Alias for fp80_qnan */ 68extern const fp80_t fp80_nan; 69 70/** 71 * Is the value a special floating point value? 72 * 73 * Determine if a floating point value is one of the special values 74 * (i.e., one of the infinities or NaNs). In practice, this function 75 * only checks if the exponent is set to the maximum value supported 76 * by the binary representation, which is a reserved value used for 77 * such special numbers. 78 * 79 * @param fp80 value to analyze. 80 * @return 1 if the value is special, 0 otherwise. 81 */ 82int fp80_isspecial(fp80_t fp80); 83/** 84 * Is the value a quiet NaN? 85 * 86 * @param fp80 value to analyze. 87 * @return 1 if true, 0 otherwise. 88 */ 89int fp80_isqnan(fp80_t fp80); 90/** 91 * Is the value an indefinite quiet NaN? 92 * 93 * @param fp80 value to analyze. 94 * @return 1 if true, 0 otherwise. 95 */ 96int fp80_isqnani(fp80_t fp80); 97/** 98 * Is the value a signaling NaN? 99 * 100 * @param fp80 value to analyze. 101 * @return 1 if true, 0 otherwise. 102 */ 103int fp80_issnan(fp80_t fp80); 104 105/** 106 * Classify a floating point number. 107 * 108 * This function implements the same classification as the standard 109 * fpclassify() function. It returns one of the following floating 110 * point classes: 111 * <ul> 112 * <li>FP_NAN - The value is NaN. 113 * <li>FP_INFINITE - The value is either +inf or -inf. 114 * <li>FP_ZERO - The value is either +0 or -0. 115 * <li>FP_SUBNORMAL - The value is to small to be represented as a 116 * normalized float. See fp80_issubnormal(). 117 * <li>FP_NORMAL - The value is neither of above. 118 * </ul> 119 * 120 * @param fp80 value to analyze. 121 * @return Floating point classification. 122 */ 123int fp80_classify(fp80_t fp80); 124 125/** 126 * Is a value finite? 127 * 128 * Check if a value is a finite value. That is, not one of the 129 * infinities or NaNs. 130 * 131 * @param fp80 value to analyze. 132 * @return -1 if negative finite, +1 if positive finite, 0 otherwise. 133 */ 134int fp80_isfinite(fp80_t fp80); 135/** 136 * Is the value a non-zero normal? 137 * 138 * This function checks if a floating point value is a normal (having 139 * an exponent larger or equal to 1) or not. See fp80_issubnormal() 140 * for a description of what a denormal value is. 141 * 142 * @see fp80_issubnormal() 143 * 144 * @param fp80 value to analyze. 145 * @return -1 if negative normal, +1 if positive normal, 0 otherwise. 146 */ 147int fp80_isnormal(fp80_t fp80); 148/** 149 * Is the value a NaN of any kind? 150 * 151 * @param fp80 value to analyze. 152 * @return -1 if negative NaN, +1 if positive NaN, 0 otherwise. 153 */ 154int fp80_isnan(fp80_t fp80); 155/** 156 * Is the value one of the infinities? 157 * 158 * @param fp80 value to analyze. 159 * @return -1 if -inf, +1 if +inf, 0 otherwise. 160 */ 161int fp80_isinf(fp80_t fp80); 162/** 163 * Determine value of the sign-bit of a floating point number. 164 * 165 * @note Floats can represent both positive and negative zeros. 166 * 167 * @param fp80 value to analyze. 168 * @return -1 if negative, +1 if positive. 169 */ 170int fp80_sgn(fp80_t fp80); 171/** 172 * Is the value zero? 173 * 174 * @param fp80 value to analyze. 175 * @return -1 if negative zero, +1 if positive zero, 0 otherwise. 176 */ 177int fp80_iszero(fp80_t fp80); 178/** 179 * Is the value a denormal? 180 * 181 * Numbers that are close to the minimum of what can be stored in a 182 * floating point number start loosing precision because bits in the 183 * fraction get used (implicitly) to store parts of the negative 184 * exponent (i.e., the exponent is saturated and the fraction is less 185 * than 1). Such numbers are known as denormals. This function checks 186 * whether a float is a denormal or not. 187 * 188 * @param fp80 value to analyze. 189 * @return -1 if negative denormal, +1 if positive denormal, 0 otherwise. 190 */ 191int fp80_issubnormal(fp80_t fp80); 192 193/** 194 * Convert an 80-bit float to a 64-bit double. 195 * 196 * This function converts an 80-bit float into a standard 64-bit 197 * double. This conversion is inherently lossy since a double can only 198 * represent a subset of what an 80-bit float can represent. The 199 * fraction of the source value will always be truncated to fit the 200 * lower precision. If a value falls outside of the range that can be 201 * accurately represented by double by truncating the fraction, one of 202 * the following happens: 203 * <ul> 204 * <li>A denormal will be generated if that can approximate the 205 * value. 206 * <li>[-]0 will be generated if the magnitude of the value is too 207 * small to be represented at all. 208 * <li>+-Inf will be generated if the magnitude of the value is too 209 * large to be represented. 210 * </ul> 211 * 212 * NaN values will be preserved across the conversion. 213 * 214 * @param fp80 Source value to convert. 215 * @return 64-bit version of the float. 216 */ 217double fp80_cvtd(fp80_t fp80); 218 219/** 220 * Convert an 64-bit double to an 80-bit float. 221 * 222 * This function converts a standard 64-bit double into an 80-bit 223 * float. This conversion is completely lossless since the 80-bit 224 * float represents a superset of what a 64-bit double can 225 * represent. 226 * 227 * @note Denormals will be converted to normalized values. 228 * 229 * @param fpd Source value to convert. 230 * @return 64-bit version of the float. 231 */ 232fp80_t fp80_cvfd(double fpd); 233 234/** 235 * Dump the components of an 80-bit float to a file. 236 * 237 * @warning This function is intended for debugging and the format of 238 * the output is not guaranteed to be stable. 239 * 240 * @param fout Output stream (e.g., stdout) 241 * @param fp80 value to dump. 242 */ 243void fp80_debug_dump(FILE *fout, fp80_t fp80); 244 245/** @} */ 246 247#ifdef __cplusplus 248} /* extern "C" */ 249#endif 250 251#endif 252