gem5/ext/fputils/include/fputils/fp80.h
Andreas Sandberg 7c52865d1b ext: Update fputils to rev 52b6190b4e
This changeset updates the external library to git revision
52b6190b4e. This update includes changes that fix compilation errors
on old gcc versions and fixes to test a case that affect ICC.
2013-10-01 15:19:56 +02:00

252 lines
7.3 KiB
C

/*
* Copyright (c) 2013, Andreas Sandberg
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _FP80_H
#define _FP80_H 1
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* @defgroup fp80 80-bit Floats
* Functions handling 80-bit floats.
*
* @{
*/
/** Internal representation of an 80-bit float. */
typedef union {
char bits[10];
struct {
uint64_t fi;
uint16_t se;
} repr;
} fp80_t;
/** Constant representing +inf */
extern const fp80_t fp80_pinf;
/** Constant representing -inf */
extern const fp80_t fp80_ninf;
/** Constant representing a quiet NaN */
extern const fp80_t fp80_qnan;
/** Constant representing a quiet indefinite NaN */
extern const fp80_t fp80_qnani;
/** Constant representing a signaling NaN */
extern const fp80_t fp80_snan;
/** Alias for fp80_qnan */
extern const fp80_t fp80_nan;
/**
* Is the value a special floating point value?
*
* Determine if a floating point value is one of the special values
* (i.e., one of the infinities or NaNs). In practice, this function
* only checks if the exponent is set to the maximum value supported
* by the binary representation, which is a reserved value used for
* such special numbers.
*
* @param fp80 value to analyze.
* @return 1 if the value is special, 0 otherwise.
*/
int fp80_isspecial(fp80_t fp80);
/**
* Is the value a quiet NaN?
*
* @param fp80 value to analyze.
* @return 1 if true, 0 otherwise.
*/
int fp80_isqnan(fp80_t fp80);
/**
* Is the value an indefinite quiet NaN?
*
* @param fp80 value to analyze.
* @return 1 if true, 0 otherwise.
*/
int fp80_isqnani(fp80_t fp80);
/**
* Is the value a signaling NaN?
*
* @param fp80 value to analyze.
* @return 1 if true, 0 otherwise.
*/
int fp80_issnan(fp80_t fp80);
/**
* Classify a floating point number.
*
* This function implements the same classification as the standard
* fpclassify() function. It returns one of the following floating
* point classes:
* <ul>
* <li>FP_NAN - The value is NaN.
* <li>FP_INFINITE - The value is either +inf or -inf.
* <li>FP_ZERO - The value is either +0 or -0.
* <li>FP_SUBNORMAL - The value is to small to be represented as a
* normalized float. See fp80_issubnormal().
* <li>FP_NORMAL - The value is neither of above.
* </ul>
*
* @param fp80 value to analyze.
* @return Floating point classification.
*/
int fp80_classify(fp80_t fp80);
/**
* Is a value finite?
*
* Check if a value is a finite value. That is, not one of the
* infinities or NaNs.
*
* @param fp80 value to analyze.
* @return -1 if negative finite, +1 if positive finite, 0 otherwise.
*/
int fp80_isfinite(fp80_t fp80);
/**
* Is the value a non-zero normal?
*
* This function checks if a floating point value is a normal (having
* an exponent larger or equal to 1) or not. See fp80_issubnormal()
* for a description of what a denormal value is.
*
* @see fp80_issubnormal()
*
* @param fp80 value to analyze.
* @return -1 if negative normal, +1 if positive normal, 0 otherwise.
*/
int fp80_isnormal(fp80_t fp80);
/**
* Is the value a NaN of any kind?
*
* @param fp80 value to analyze.
* @return -1 if negative NaN, +1 if positive NaN, 0 otherwise.
*/
int fp80_isnan(fp80_t fp80);
/**
* Is the value one of the infinities?
*
* @param fp80 value to analyze.
* @return -1 if -inf, +1 if +inf, 0 otherwise.
*/
int fp80_isinf(fp80_t fp80);
/**
* Determine value of the sign-bit of a floating point number.
*
* @note Floats can represent both positive and negative zeros.
*
* @param fp80 value to analyze.
* @return -1 if negative, +1 if positive.
*/
int fp80_sgn(fp80_t fp80);
/**
* Is the value zero?
*
* @param fp80 value to analyze.
* @return -1 if negative zero, +1 if positive zero, 0 otherwise.
*/
int fp80_iszero(fp80_t fp80);
/**
* Is the value a denormal?
*
* Numbers that are close to the minimum of what can be stored in a
* floating point number start loosing precision because bits in the
* fraction get used (implicitly) to store parts of the negative
* exponent (i.e., the exponent is saturated and the fraction is less
* than 1). Such numbers are known as denormals. This function checks
* whether a float is a denormal or not.
*
* @param fp80 value to analyze.
* @return -1 if negative denormal, +1 if positive denormal, 0 otherwise.
*/
int fp80_issubnormal(fp80_t fp80);
/**
* Convert an 80-bit float to a 64-bit double.
*
* This function converts an 80-bit float into a standard 64-bit
* double. This conversion is inherently lossy since a double can only
* represent a subset of what an 80-bit float can represent. The
* fraction of the source value will always be truncated to fit the
* lower precision. If a value falls outside of the range that can be
* accurately represented by double by truncating the fraction, one of
* the following happens:
* <ul>
* <li>A denormal will be generated if that can approximate the
* value.
* <li>[-]0 will be generated if the magnitude of the value is too
* small to be represented at all.
* <li>+-Inf will be generated if the magnitude of the value is too
* large to be represented.
* </ul>
*
* NaN values will be preserved across the conversion.
*
* @param fp80 Source value to convert.
* @return 64-bit version of the float.
*/
double fp80_cvtd(fp80_t fp80);
/**
* Convert an 64-bit double to an 80-bit float.
*
* This function converts a standard 64-bit double into an 80-bit
* float. This conversion is completely lossless since the 80-bit
* float represents a superset of what a 64-bit double can
* represent.
*
* @note Denormals will be converted to normalized values.
*
* @param fpd Source value to convert.
* @return 64-bit version of the float.
*/
fp80_t fp80_cvfd(double fpd);
/**
* Dump the components of an 80-bit float to a file.
*
* @warning This function is intended for debugging and the format of
* the output is not guaranteed to be stable.
*
* @param fout Output stream (e.g., stdout)
* @param fp80 value to dump.
*/
void fp80_debug_dump(FILE *fout, fp80_t fp80);
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif