1/*
2 * Copyright (c) 2012-2013, 2017-2018 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Edmund Grimley Evans
38 *          Thomas Grocutt
39 */
40
41/**
42 * @file
43 * Floating-point library code, which will gradually replace vfp.hh. For
44 * portability, this library does not use floating-point data types.  Currently,
45 * C's standard integer types are used in the API, though this could be changed
46 * to something like class Fp32 { uint32_t x; }, etc.
47 */
48
49#ifndef __ARCH_ARM_INSTS_FPLIB_HH__
50#define __ARCH_ARM_INSTS_FPLIB_HH__
51
52#include <stdint.h>
53
54#include "arch/arm/miscregs.hh"
55
56namespace ArmISA
57{
58
59enum FPRounding {
60    FPRounding_TIEEVEN = 0,
61    FPRounding_POSINF = 1,
62    FPRounding_NEGINF = 2,
63    FPRounding_ZERO = 3,
64    FPRounding_TIEAWAY = 4,
65    FPRounding_ODD = 5
66};
67
68static inline FPRounding
69FPCRRounding(FPSCR &fpscr)
70{
71    return (FPRounding)((uint32_t)fpscr >> 22 & 3);
72}
73
74/** Floating-point absolute value. */
75template <class T>
76T fplibAbs(T op);
77/** Floating-point add. */
78template <class T>
79T fplibAdd(T op1, T op2, FPSCR &fpscr);
80/** Floating-point compare (quiet and signaling). */
81template <class T>
82int fplibCompare(T op1, T op2, bool signal_nans, FPSCR &fpscr);
83/** Floating-point compare equal. */
84template <class T>
85bool fplibCompareEQ(T op1, T op2, FPSCR &fpscr);
86/** Floating-point compare greater than or equal. */
87template <class T>
88bool fplibCompareGE(T op1, T op2, FPSCR &fpscr);
89/** Floating-point compare greater than. */
90template <class T>
91bool fplibCompareGT(T op1, T op2, FPSCR &fpscr);
92/** Floating-point compare unordered. */
93template <class T>
94bool fplibCompareUN(T op1, T op2, FPSCR &fpscr);
95/** Floating-point convert precision. */
96template <class T1, class T2>
97T2 fplibConvert(T1 op, FPRounding rounding, FPSCR &fpscr);
98/** Floating-point division. */
99template <class T>
100T fplibDiv(T op1, T op2, FPSCR &fpscr);
101/** Floating-point exponential accelerator. */
102template <class T>
103T fplibExpA(T op);
104/** Floating-point maximum. */
105template <class T>
106T fplibMax(T op1, T op2, FPSCR &fpscr);
107/** Floating-point maximum number. */
108template <class T>
109T fplibMaxNum(T op1, T op2, FPSCR &fpscr);
110/** Floating-point minimum. */
111template <class T>
112T fplibMin(T op1, T op2, FPSCR &fpscr);
113/** Floating-point minimum number. */
114template <class T>
115T fplibMinNum(T op1, T op2, FPSCR &fpscr);
116/** Floating-point multiply. */
117template <class T>
118T fplibMul(T op1, T op2, FPSCR &fpscr);
119/** Floating-point multiply-add. */
120template <class T>
121T fplibMulAdd(T addend, T op1, T op2, FPSCR &fpscr);
122/** Floating-point multiply extended. */
123template <class T>
124T fplibMulX(T op1, T op2, FPSCR &fpscr);
125/** Floating-point negate. */
126template <class T>
127T fplibNeg(T op);
128/** Floating-point reciprocal square root estimate. */
129template <class T>
130T fplibRSqrtEstimate(T op, FPSCR &fpscr);
131/** Floating-point reciprocal square root step. */
132template <class T>
133T fplibRSqrtStepFused(T op1, T op2, FPSCR &fpscr);
134/** Floating-point reciprocal estimate. */
135template <class T>
136T fplibRecipEstimate(T op, FPSCR &fpscr);
137/** Floating-point reciprocal step. */
138template <class T>
139T fplibRecipStepFused(T op1, T op2, FPSCR &fpscr);
140/** Floating-point reciprocal exponent. */
141template <class T>
142T fplibRecpX(T op, FPSCR &fpscr);
143/**  Floating-point convert to integer. */
144template <class T>
145T fplibRoundInt(T op, FPRounding rounding, bool exact, FPSCR &fpscr);
146/** Floating-point adjust exponent. */
147template <class T>
148T fplibScale(T op1, T op2, FPSCR &fpscr);
149/** Floating-point square root. */
150template <class T>
151T fplibSqrt(T op, FPSCR &fpscr);
152/** Floating-point subtract. */
153template <class T>
154T fplibSub(T op1, T op2, FPSCR &fpscr);
155/** Floating-point trigonometric multiply-add coefficient. */
156template <class T>
157T fplibTrigMulAdd(uint8_t coeff_index, T op1, T op2, FPSCR &fpscr);
158/** Floating-point trigonometric starting value. */
159template <class T>
160T fplibTrigSMul(T op1, T op2, FPSCR &fpscr);
161/** Floating-point trigonometric select coefficient. */
162template <class T>
163T fplibTrigSSel(T op1, T op2, FPSCR &fpscr);
164/** Floating-point convert to fixed-point. */
165template <class T1, class T2>
166T2 fplibFPToFixed(T1 op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr);
167/** Floating-point convert from fixed-point. */
168template <class T>
169T fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
170                 FPSCR &fpscr);
171/** Floating-point value for +/- infinity. */
172template <class T>
173T fplibInfinity(int sgn);
174/** Foating-point value for default NaN. */
175template <class T>
176T fplibDefaultNaN();
177
178/* Function specializations... */
179template <>
180uint16_t fplibAbs(uint16_t op);
181template <>
182uint32_t fplibAbs(uint32_t op);
183template <>
184uint64_t fplibAbs(uint64_t op);
185template <>
186uint16_t fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr);
187template <>
188uint32_t fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr);
189template <>
190uint64_t fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr);
191template <>
192int fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr);
193template <>
194int fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr);
195template <>
196int fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr);
197template <>
198bool fplibCompareEQ(uint16_t op1, uint16_t op2, FPSCR &fpscr);
199template <>
200bool fplibCompareEQ(uint32_t op1, uint32_t op2, FPSCR &fpscr);
201template <>
202bool fplibCompareEQ(uint64_t op1, uint64_t op2, FPSCR &fpscr);
203template <>
204bool fplibCompareGE(uint16_t op1, uint16_t op2, FPSCR &fpscr);
205template <>
206bool fplibCompareGE(uint32_t op1, uint32_t op2, FPSCR &fpscr);
207template <>
208bool fplibCompareGE(uint64_t op1, uint64_t op2, FPSCR &fpscr);
209template <>
210bool fplibCompareGT(uint16_t op1, uint16_t op2, FPSCR &fpscr);
211template <>
212bool fplibCompareGT(uint32_t op1, uint32_t op2, FPSCR &fpscr);
213template <>
214bool fplibCompareGT(uint64_t op1, uint64_t op2, FPSCR &fpscr);
215template <>
216bool fplibCompareUN(uint16_t op1, uint16_t op2, FPSCR &fpscr);
217template <>
218bool fplibCompareUN(uint32_t op1, uint32_t op2, FPSCR &fpscr);
219template <>
220bool fplibCompareUN(uint64_t op1, uint64_t op2, FPSCR &fpscr);
221template <>
222uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr);
223template <>
224uint16_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr);
225template <>
226uint32_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr);
227template <>
228uint32_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr);
229template <>
230uint64_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr);
231template <>
232uint64_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr);
233template <>
234uint16_t fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr);
235template <>
236uint32_t fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr);
237template <>
238uint64_t fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr);
239template <>
240uint16_t fplibExpA(uint16_t op);
241template <>
242uint32_t fplibExpA(uint32_t op);
243template <>
244uint64_t fplibExpA(uint64_t op);
245template <>
246uint16_t fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr);
247template <>
248uint32_t fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr);
249template <>
250uint64_t fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr);
251template <>
252uint16_t fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr);
253template <>
254uint32_t fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr);
255template <>
256uint64_t fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr);
257template <>
258uint16_t fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr);
259template <>
260uint32_t fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr);
261template <>
262uint64_t fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr);
263template <>
264uint16_t fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr);
265template <>
266uint32_t fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr);
267template <>
268uint64_t fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr);
269template <>
270uint16_t fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr);
271template <>
272uint32_t fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr);
273template <>
274uint64_t fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr);
275template <>
276uint16_t fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2,
277                     FPSCR &fpscr);
278template <>
279uint32_t fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2,
280                     FPSCR &fpscr);
281template <>
282uint64_t fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2,
283                     FPSCR &fpscr);
284template <>
285uint16_t fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr);
286template <>
287uint32_t fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr);
288template <>
289uint64_t fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr);
290template <>
291uint16_t fplibNeg(uint16_t op);
292template <>
293uint32_t fplibNeg(uint32_t op);
294template <>
295uint64_t fplibNeg(uint64_t op);
296template <>
297uint16_t fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr);
298template <>
299uint32_t fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr);
300template<>
301uint64_t fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr);
302template <>
303uint16_t fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr);
304template <>
305uint32_t fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr);
306template <>
307uint64_t fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr);
308template <>
309uint16_t fplibRecipEstimate(uint16_t op, FPSCR &fpscr);
310template <>
311uint32_t fplibRecipEstimate(uint32_t op, FPSCR &fpscr);
312template <>
313uint64_t fplibRecipEstimate(uint64_t op, FPSCR &fpscr);
314template <>
315uint16_t fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr);
316template <>
317uint32_t fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr);
318template <>
319uint64_t fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr);
320template <>
321uint16_t fplibRecpX(uint16_t op, FPSCR &fpscr);
322template <>
323uint32_t fplibRecpX(uint32_t op, FPSCR &fpscr);
324template <>
325uint64_t fplibRecpX(uint64_t op, FPSCR &fpscr);
326template <>
327uint16_t fplibRoundInt(uint16_t op, FPRounding rounding, bool exact,
328                       FPSCR &fpscr);
329template <>
330uint32_t fplibRoundInt(uint32_t op, FPRounding rounding, bool exact,
331                       FPSCR &fpscr);
332template <>
333uint64_t fplibRoundInt(uint64_t op, FPRounding rounding, bool exact,
334                       FPSCR &fpscr);
335template <>
336uint16_t fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr);
337template <>
338uint32_t fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr);
339template <>
340uint64_t fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr);
341template <>
342uint16_t fplibSqrt(uint16_t op, FPSCR &fpscr);
343template <>
344uint32_t fplibSqrt(uint32_t op, FPSCR &fpscr);
345template <>
346uint64_t fplibSqrt(uint64_t op, FPSCR &fpscr);
347template <>
348uint16_t fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr);
349template <>
350uint32_t fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr);
351template <>
352uint64_t fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr);
353template <>
354uint16_t fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2,
355                       FPSCR &fpscr);
356template <>
357uint32_t fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2,
358                         FPSCR &fpscr);
359template <>
360uint64_t fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2,
361                         FPSCR &fpscr);
362template <>
363uint16_t fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr);
364template <>
365uint32_t fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr);
366template <>
367uint64_t fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr);
368template <>
369uint16_t fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr);
370template <>
371uint32_t fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr);
372template <>
373uint64_t fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr);
374template <>
375uint16_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
376                        FPSCR &fpscr);
377template <>
378uint32_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
379                        FPSCR &fpscr);
380template <>
381uint32_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding,
382                        FPSCR &fpscr);
383template <>
384uint32_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding,
385                        FPSCR &fpscr);
386template <>
387uint64_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
388                        FPSCR &fpscr);
389template <>
390uint64_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding,
391                        FPSCR &fpscr);
392template <>
393uint64_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding,
394                        FPSCR &fpscr);
395template <>
396uint16_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
397                        FPSCR &fpscr);
398template <>
399uint32_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
400                        FPSCR &fpscr);
401template <>
402uint64_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
403                        FPSCR &fpscr);
404template <>
405uint16_t fplibInfinity(int sgn);
406template <>
407uint32_t fplibInfinity(int sgn);
408template <>
409uint64_t fplibInfinity(int sgn);
410template <>
411uint16_t fplibDefaultNaN();
412template <>
413uint32_t fplibDefaultNaN();
414template <>
415uint64_t fplibDefaultNaN();
416}
417
418#endif
419