1/* 2 * Copyright (c) 2012-2013, 2017-2018 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Edmund Grimley Evans 38 * Thomas Grocutt 39 */ 40 41/** 42 * @file 43 * Floating-point library code, which will gradually replace vfp.hh. For 44 * portability, this library does not use floating-point data types. Currently, 45 * C's standard integer types are used in the API, though this could be changed 46 * to something like class Fp32 { uint32_t x; }, etc. 47 */ 48 49#ifndef __ARCH_ARM_INSTS_FPLIB_HH__ 50#define __ARCH_ARM_INSTS_FPLIB_HH__ 51 52#include <stdint.h> 53 54#include "arch/arm/miscregs.hh" 55 56namespace ArmISA 57{ 58 59enum FPRounding { 60 FPRounding_TIEEVEN = 0, 61 FPRounding_POSINF = 1, 62 FPRounding_NEGINF = 2, 63 FPRounding_ZERO = 3, 64 FPRounding_TIEAWAY = 4, 65 FPRounding_ODD = 5 66}; 67 68static inline FPRounding 69FPCRRounding(FPSCR &fpscr) 70{ 71 return (FPRounding)((uint32_t)fpscr >> 22 & 3); 72} 73 74/** Floating-point absolute value. */ 75template <class T> 76T fplibAbs(T op); 77/** Floating-point add. */ 78template <class T> 79T fplibAdd(T op1, T op2, FPSCR &fpscr); 80/** Floating-point compare (quiet and signaling). */ 81template <class T> 82int fplibCompare(T op1, T op2, bool signal_nans, FPSCR &fpscr); 83/** Floating-point compare equal. */ 84template <class T> 85bool fplibCompareEQ(T op1, T op2, FPSCR &fpscr); 86/** Floating-point compare greater than or equal. */ 87template <class T> 88bool fplibCompareGE(T op1, T op2, FPSCR &fpscr); 89/** Floating-point compare greater than. */ 90template <class T> 91bool fplibCompareGT(T op1, T op2, FPSCR &fpscr); 92/** Floating-point compare unordered. */ 93template <class T> 94bool fplibCompareUN(T op1, T op2, FPSCR &fpscr); 95/** Floating-point convert precision. */ 96template <class T1, class T2> 97T2 fplibConvert(T1 op, FPRounding rounding, FPSCR &fpscr); 98/** Floating-point division. */ 99template <class T> 100T fplibDiv(T op1, T op2, FPSCR &fpscr); 101/** Floating-point exponential accelerator. */ 102template <class T> 103T fplibExpA(T op); 104/** Floating-point maximum. */ 105template <class T> 106T fplibMax(T op1, T op2, FPSCR &fpscr); 107/** Floating-point maximum number. */ 108template <class T> 109T fplibMaxNum(T op1, T op2, FPSCR &fpscr); 110/** Floating-point minimum. */ 111template <class T> 112T fplibMin(T op1, T op2, FPSCR &fpscr); 113/** Floating-point minimum number. */ 114template <class T> 115T fplibMinNum(T op1, T op2, FPSCR &fpscr); 116/** Floating-point multiply. */ 117template <class T> 118T fplibMul(T op1, T op2, FPSCR &fpscr); 119/** Floating-point multiply-add. */ 120template <class T> 121T fplibMulAdd(T addend, T op1, T op2, FPSCR &fpscr); 122/** Floating-point multiply extended. */ 123template <class T> 124T fplibMulX(T op1, T op2, FPSCR &fpscr); 125/** Floating-point negate. */ 126template <class T> 127T fplibNeg(T op); 128/** Floating-point reciprocal square root estimate. */ 129template <class T> 130T fplibRSqrtEstimate(T op, FPSCR &fpscr); 131/** Floating-point reciprocal square root step. */ 132template <class T> 133T fplibRSqrtStepFused(T op1, T op2, FPSCR &fpscr); 134/** Floating-point reciprocal estimate. */ 135template <class T> 136T fplibRecipEstimate(T op, FPSCR &fpscr); 137/** Floating-point reciprocal step. */ 138template <class T> 139T fplibRecipStepFused(T op1, T op2, FPSCR &fpscr); 140/** Floating-point reciprocal exponent. */ 141template <class T> 142T fplibRecpX(T op, FPSCR &fpscr); 143/** Floating-point convert to integer. */ 144template <class T> 145T fplibRoundInt(T op, FPRounding rounding, bool exact, FPSCR &fpscr); 146/** Floating-point adjust exponent. */ 147template <class T> 148T fplibScale(T op1, T op2, FPSCR &fpscr); 149/** Floating-point square root. */ 150template <class T> 151T fplibSqrt(T op, FPSCR &fpscr); 152/** Floating-point subtract. */ 153template <class T> 154T fplibSub(T op1, T op2, FPSCR &fpscr); 155/** Floating-point trigonometric multiply-add coefficient. */ 156template <class T> 157T fplibTrigMulAdd(uint8_t coeff_index, T op1, T op2, FPSCR &fpscr); 158/** Floating-point trigonometric starting value. */ 159template <class T> 160T fplibTrigSMul(T op1, T op2, FPSCR &fpscr); 161/** Floating-point trigonometric select coefficient. */ 162template <class T> 163T fplibTrigSSel(T op1, T op2, FPSCR &fpscr); 164/** Floating-point convert to fixed-point. */ 165template <class T1, class T2> 166T2 fplibFPToFixed(T1 op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr); 167/** Floating-point convert from fixed-point. */ 168template <class T> 169T fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, 170 FPSCR &fpscr); 171/** Floating-point value for +/- infinity. */ 172template <class T> 173T fplibInfinity(int sgn); 174/** Foating-point value for default NaN. */ 175template <class T> 176T fplibDefaultNaN(); 177 178/* Function specializations... */ 179template <> 180uint16_t fplibAbs(uint16_t op); 181template <> 182uint32_t fplibAbs(uint32_t op); 183template <> 184uint64_t fplibAbs(uint64_t op); 185template <> 186uint16_t fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr); 187template <> 188uint32_t fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr); 189template <> 190uint64_t fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr); 191template <> 192int fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr); 193template <> 194int fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr); 195template <> 196int fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr); 197template <> 198bool fplibCompareEQ(uint16_t op1, uint16_t op2, FPSCR &fpscr); 199template <> 200bool fplibCompareEQ(uint32_t op1, uint32_t op2, FPSCR &fpscr); 201template <> 202bool fplibCompareEQ(uint64_t op1, uint64_t op2, FPSCR &fpscr); 203template <> 204bool fplibCompareGE(uint16_t op1, uint16_t op2, FPSCR &fpscr); 205template <> 206bool fplibCompareGE(uint32_t op1, uint32_t op2, FPSCR &fpscr); 207template <> 208bool fplibCompareGE(uint64_t op1, uint64_t op2, FPSCR &fpscr); 209template <> 210bool fplibCompareGT(uint16_t op1, uint16_t op2, FPSCR &fpscr); 211template <> 212bool fplibCompareGT(uint32_t op1, uint32_t op2, FPSCR &fpscr); 213template <> 214bool fplibCompareGT(uint64_t op1, uint64_t op2, FPSCR &fpscr); 215template <> 216bool fplibCompareUN(uint16_t op1, uint16_t op2, FPSCR &fpscr); 217template <> 218bool fplibCompareUN(uint32_t op1, uint32_t op2, FPSCR &fpscr); 219template <> 220bool fplibCompareUN(uint64_t op1, uint64_t op2, FPSCR &fpscr); 221template <> 222uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr); 223template <> 224uint16_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr); 225template <> 226uint32_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr); 227template <> 228uint32_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr); 229template <> 230uint64_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr); 231template <> 232uint64_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr); 233template <> 234uint16_t fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr); 235template <> 236uint32_t fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr); 237template <> 238uint64_t fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr); 239template <> 240uint16_t fplibExpA(uint16_t op); 241template <> 242uint32_t fplibExpA(uint32_t op); 243template <> 244uint64_t fplibExpA(uint64_t op); 245template <> 246uint16_t fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr); 247template <> 248uint32_t fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr); 249template <> 250uint64_t fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr); 251template <> 252uint16_t fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr); 253template <> 254uint32_t fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr); 255template <> 256uint64_t fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr); 257template <> 258uint16_t fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr); 259template <> 260uint32_t fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr); 261template <> 262uint64_t fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr); 263template <> 264uint16_t fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr); 265template <> 266uint32_t fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr); 267template <> 268uint64_t fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr); 269template <> 270uint16_t fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr); 271template <> 272uint32_t fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr); 273template <> 274uint64_t fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr); 275template <> 276uint16_t fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, 277 FPSCR &fpscr); 278template <> 279uint32_t fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, 280 FPSCR &fpscr); 281template <> 282uint64_t fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, 283 FPSCR &fpscr); 284template <> 285uint16_t fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr); 286template <> 287uint32_t fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr); 288template <> 289uint64_t fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr); 290template <> 291uint16_t fplibNeg(uint16_t op); 292template <> 293uint32_t fplibNeg(uint32_t op); 294template <> 295uint64_t fplibNeg(uint64_t op); 296template <> 297uint16_t fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr); 298template <> 299uint32_t fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr); 300template<> 301uint64_t fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr); 302template <> 303uint16_t fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr); 304template <> 305uint32_t fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr); 306template <> 307uint64_t fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr); 308template <> 309uint16_t fplibRecipEstimate(uint16_t op, FPSCR &fpscr); 310template <> 311uint32_t fplibRecipEstimate(uint32_t op, FPSCR &fpscr); 312template <> 313uint64_t fplibRecipEstimate(uint64_t op, FPSCR &fpscr); 314template <> 315uint16_t fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr); 316template <> 317uint32_t fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr); 318template <> 319uint64_t fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr); 320template <> 321uint16_t fplibRecpX(uint16_t op, FPSCR &fpscr); 322template <> 323uint32_t fplibRecpX(uint32_t op, FPSCR &fpscr); 324template <> 325uint64_t fplibRecpX(uint64_t op, FPSCR &fpscr); 326template <> 327uint16_t fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, 328 FPSCR &fpscr); 329template <> 330uint32_t fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, 331 FPSCR &fpscr); 332template <> 333uint64_t fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, 334 FPSCR &fpscr); 335template <> 336uint16_t fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr); 337template <> 338uint32_t fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr); 339template <> 340uint64_t fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr); 341template <> 342uint16_t fplibSqrt(uint16_t op, FPSCR &fpscr); 343template <> 344uint32_t fplibSqrt(uint32_t op, FPSCR &fpscr); 345template <> 346uint64_t fplibSqrt(uint64_t op, FPSCR &fpscr); 347template <> 348uint16_t fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr); 349template <> 350uint32_t fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr); 351template <> 352uint64_t fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr); 353template <> 354uint16_t fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, 355 FPSCR &fpscr); 356template <> 357uint32_t fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, 358 FPSCR &fpscr); 359template <> 360uint64_t fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, 361 FPSCR &fpscr); 362template <> 363uint16_t fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr); 364template <> 365uint32_t fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr); 366template <> 367uint64_t fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr); 368template <> 369uint16_t fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr); 370template <> 371uint32_t fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr); 372template <> 373uint64_t fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr); 374template <> 375uint16_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, 376 FPSCR &fpscr); 377template <> 378uint32_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, 379 FPSCR &fpscr); 380template <> 381uint32_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, 382 FPSCR &fpscr); 383template <> 384uint32_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, 385 FPSCR &fpscr); 386template <> 387uint64_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, 388 FPSCR &fpscr); 389template <> 390uint64_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, 391 FPSCR &fpscr); 392template <> 393uint64_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, 394 FPSCR &fpscr); 395template <> 396uint16_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, 397 FPSCR &fpscr); 398template <> 399uint32_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, 400 FPSCR &fpscr); 401template <> 402uint64_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, 403 FPSCR &fpscr); 404template <> 405uint16_t fplibInfinity(int sgn); 406template <> 407uint32_t fplibInfinity(int sgn); 408template <> 409uint64_t fplibInfinity(int sgn); 410template <> 411uint16_t fplibDefaultNaN(); 412template <> 413uint32_t fplibDefaultNaN(); 414template <> 415uint64_t fplibDefaultNaN(); 416} 417 418#endif 419