vfp.hh (7385:493aea5e1006) vfp.hh (7386:23065556d48e)
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include "arch/arm/insts/misc.hh"
44#include "arch/arm/miscregs.hh"
45#include <fenv.h>
46#include <cmath>
47
48namespace ArmISA
49{
50
51enum VfpMicroMode {
52 VfpNotAMicroop,
53 VfpMicroop,
54 VfpFirstMicroop,
55 VfpLastMicroop
56};
57
58template<class T>
59static inline void
60setVfpMicroFlags(VfpMicroMode mode, T &flags)
61{
62 switch (mode) {
63 case VfpMicroop:
64 flags[StaticInst::IsMicroop] = true;
65 break;
66 case VfpFirstMicroop:
67 flags[StaticInst::IsMicroop] =
68 flags[StaticInst::IsFirstMicroop] = true;
69 break;
70 case VfpLastMicroop:
71 flags[StaticInst::IsMicroop] =
72 flags[StaticInst::IsLastMicroop] = true;
73 break;
74 case VfpNotAMicroop:
75 break;
76 }
77 if (mode == VfpMicroop || mode == VfpFirstMicroop) {
78 flags[StaticInst::IsDelayedCommit] = true;
79 }
80}
81
82enum FeExceptionBit
83{
84 FeDivByZero = FE_DIVBYZERO,
85 FeInexact = FE_INEXACT,
86 FeInvalid = FE_INVALID,
87 FeOverflow = FE_OVERFLOW,
88 FeUnderflow = FE_UNDERFLOW,
89 FeAllExceptions = FE_ALL_EXCEPT
90};
91
92enum FeRoundingMode
93{
94 FeRoundDown = FE_DOWNWARD,
95 FeRoundNearest = FE_TONEAREST,
96 FeRoundZero = FE_TOWARDZERO,
97 FeRoundUpward = FE_UPWARD
98};
99
100enum VfpRoundingMode
101{
102 VfpRoundNearest = 0,
103 VfpRoundUpward = 1,
104 VfpRoundDown = 2,
105 VfpRoundZero = 3
106};
107
108template <class fpType>
109static inline void
110vfpFlushToZero(uint32_t &_fpscr, fpType &op)
111{
112 FPSCR fpscr = _fpscr;
113 if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
114 fpscr.idc = 1;
115 op = 0;
116 }
117 _fpscr = fpscr;
118}
119
120template <class fpType>
121static inline void
122vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
123{
124 vfpFlushToZero(fpscr, op1);
125 vfpFlushToZero(fpscr, op2);
126}
127
128static inline uint32_t
129fpToBits(float fp)
130{
131 union
132 {
133 float fp;
134 uint32_t bits;
135 } val;
136 val.fp = fp;
137 return val.bits;
138}
139
140static inline uint64_t
141fpToBits(double fp)
142{
143 union
144 {
145 double fp;
146 uint64_t bits;
147 } val;
148 val.fp = fp;
149 return val.bits;
150}
151
152static inline float
153bitsToFp(uint64_t bits, float junk)
154{
155 union
156 {
157 float fp;
158 uint32_t bits;
159 } val;
160 val.bits = bits;
161 return val.fp;
162}
163
164static inline double
165bitsToFp(uint64_t bits, double junk)
166{
167 union
168 {
169 double fp;
170 uint64_t bits;
171 } val;
172 val.bits = bits;
173 return val.fp;
174}
175
176template <class fpType>
177static inline fpType
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include "arch/arm/insts/misc.hh"
44#include "arch/arm/miscregs.hh"
45#include <fenv.h>
46#include <cmath>
47
48namespace ArmISA
49{
50
51enum VfpMicroMode {
52 VfpNotAMicroop,
53 VfpMicroop,
54 VfpFirstMicroop,
55 VfpLastMicroop
56};
57
58template<class T>
59static inline void
60setVfpMicroFlags(VfpMicroMode mode, T &flags)
61{
62 switch (mode) {
63 case VfpMicroop:
64 flags[StaticInst::IsMicroop] = true;
65 break;
66 case VfpFirstMicroop:
67 flags[StaticInst::IsMicroop] =
68 flags[StaticInst::IsFirstMicroop] = true;
69 break;
70 case VfpLastMicroop:
71 flags[StaticInst::IsMicroop] =
72 flags[StaticInst::IsLastMicroop] = true;
73 break;
74 case VfpNotAMicroop:
75 break;
76 }
77 if (mode == VfpMicroop || mode == VfpFirstMicroop) {
78 flags[StaticInst::IsDelayedCommit] = true;
79 }
80}
81
82enum FeExceptionBit
83{
84 FeDivByZero = FE_DIVBYZERO,
85 FeInexact = FE_INEXACT,
86 FeInvalid = FE_INVALID,
87 FeOverflow = FE_OVERFLOW,
88 FeUnderflow = FE_UNDERFLOW,
89 FeAllExceptions = FE_ALL_EXCEPT
90};
91
92enum FeRoundingMode
93{
94 FeRoundDown = FE_DOWNWARD,
95 FeRoundNearest = FE_TONEAREST,
96 FeRoundZero = FE_TOWARDZERO,
97 FeRoundUpward = FE_UPWARD
98};
99
100enum VfpRoundingMode
101{
102 VfpRoundNearest = 0,
103 VfpRoundUpward = 1,
104 VfpRoundDown = 2,
105 VfpRoundZero = 3
106};
107
108template <class fpType>
109static inline void
110vfpFlushToZero(uint32_t &_fpscr, fpType &op)
111{
112 FPSCR fpscr = _fpscr;
113 if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
114 fpscr.idc = 1;
115 op = 0;
116 }
117 _fpscr = fpscr;
118}
119
120template <class fpType>
121static inline void
122vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
123{
124 vfpFlushToZero(fpscr, op1);
125 vfpFlushToZero(fpscr, op2);
126}
127
128static inline uint32_t
129fpToBits(float fp)
130{
131 union
132 {
133 float fp;
134 uint32_t bits;
135 } val;
136 val.fp = fp;
137 return val.bits;
138}
139
140static inline uint64_t
141fpToBits(double fp)
142{
143 union
144 {
145 double fp;
146 uint64_t bits;
147 } val;
148 val.fp = fp;
149 return val.bits;
150}
151
152static inline float
153bitsToFp(uint64_t bits, float junk)
154{
155 union
156 {
157 float fp;
158 uint32_t bits;
159 } val;
160 val.bits = bits;
161 return val.fp;
162}
163
164static inline double
165bitsToFp(uint64_t bits, double junk)
166{
167 union
168 {
169 double fp;
170 uint64_t bits;
171 } val;
172 val.bits = bits;
173 return val.fp;
174}
175
176template <class fpType>
177static inline fpType
178fixDest(FPSCR fpscr, fpType val, fpType op1)
179{
180 int fpClass = std::fpclassify(val);
181 fpType junk = 0.0;
182 if (fpClass == FP_NAN) {
183 const bool single = (sizeof(val) == sizeof(float));
184 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
185 const bool nan = std::isnan(op1);
186 if (!nan || (fpscr.dn == 1)) {
187 val = bitsToFp(qnan, junk);
188 } else if (nan) {
189 val = bitsToFp(fpToBits(op1) | qnan, junk);
190 }
191 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
192 // Turn val into a zero with the correct sign;
193 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
194 val = bitsToFp(fpToBits(val) & bitMask, junk);
195 feraiseexcept(FeUnderflow);
196 }
197 return val;
198}
199
200template <class fpType>
201static inline fpType
178fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
179{
180 int fpClass = std::fpclassify(val);
181 fpType junk = 0.0;
182 if (fpClass == FP_NAN) {
183 const bool single = (sizeof(val) == sizeof(float));
184 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
185 const bool nan1 = std::isnan(op1);
186 const bool nan2 = std::isnan(op2);
187 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
188 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
189 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
190 val = bitsToFp(qnan, junk);
191 } else if (signal1) {
192 val = bitsToFp(fpToBits(op1) | qnan, junk);
193 } else if (signal2) {
194 val = bitsToFp(fpToBits(op2) | qnan, junk);
195 } else if (nan1) {
196 val = op1;
197 } else if (nan2) {
198 val = op2;
199 }
200 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
201 // Turn val into a zero with the correct sign;
202 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
203 val = bitsToFp(fpToBits(val) & bitMask, junk);
204 feraiseexcept(FeUnderflow);
205 }
206 return val;
207}
208
202fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
203{
204 int fpClass = std::fpclassify(val);
205 fpType junk = 0.0;
206 if (fpClass == FP_NAN) {
207 const bool single = (sizeof(val) == sizeof(float));
208 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
209 const bool nan1 = std::isnan(op1);
210 const bool nan2 = std::isnan(op2);
211 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
212 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
213 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
214 val = bitsToFp(qnan, junk);
215 } else if (signal1) {
216 val = bitsToFp(fpToBits(op1) | qnan, junk);
217 } else if (signal2) {
218 val = bitsToFp(fpToBits(op2) | qnan, junk);
219 } else if (nan1) {
220 val = op1;
221 } else if (nan2) {
222 val = op2;
223 }
224 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
225 // Turn val into a zero with the correct sign;
226 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
227 val = bitsToFp(fpToBits(val) & bitMask, junk);
228 feraiseexcept(FeUnderflow);
229 }
230 return val;
231}
232
233template <class fpType>
234static inline fpType
235fixMultDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
236{
237 fpType mid = fixDest(fpscr, val, op1, op2);
238 const bool single = (sizeof(fpType) == sizeof(float));
239 const fpType junk = 0.0;
240 if ((single && (val == bitsToFp(0x00800000, junk) ||
241 val == bitsToFp(0x80800000, junk))) ||
242 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
243 val == bitsToFp(ULL(0x8010000000000000), junk)))
244 ) {
245 __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
246 fesetround(FeRoundZero);
247 fpType temp = 0.0;
248 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
249 temp = op1 * op2;
250 if (!std::isnormal(temp)) {
251 feraiseexcept(FeUnderflow);
252 }
253 __asm__ __volatile__("" :: "m" (temp));
254 }
255 return mid;
256}
257
258template <class fpType>
259static inline fpType
260fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
261{
262 fpType mid = fixDest(fpscr, val, op1, op2);
263 const bool single = (sizeof(fpType) == sizeof(float));
264 const fpType junk = 0.0;
265 if ((single && (val == bitsToFp(0x00800000, junk) ||
266 val == bitsToFp(0x80800000, junk))) ||
267 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
268 val == bitsToFp(ULL(0x8010000000000000), junk)))
269 ) {
270 __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
271 fesetround(FeRoundZero);
272 fpType temp = 0.0;
273 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
274 temp = op1 / op2;
275 if (!std::isnormal(temp)) {
276 feraiseexcept(FeUnderflow);
277 }
278 __asm__ __volatile__("" :: "m" (temp));
279 }
280 return mid;
281}
282
283static inline float
284fixFpDFpSDest(FPSCR fpscr, double val)
285{
286 const float junk = 0.0;
287 float op1 = 0.0;
288 if (std::isnan(val)) {
289 uint64_t valBits = fpToBits(val);
290 uint32_t op1Bits = bits(valBits, 50, 29) |
291 (mask(9) << 22) |
292 (bits(valBits, 63) << 31);
293 op1 = bitsToFp(op1Bits, junk);
294 }
295 float mid = fixDest(fpscr, (float)val, op1);
296 if (mid == bitsToFp(0x00800000, junk) ||
297 mid == bitsToFp(0x80800000, junk)) {
298 __asm__ __volatile__("" : "=m" (val) : "m" (val));
299 fesetround(FeRoundZero);
300 float temp = 0.0;
301 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
302 temp = val;
303 if (!std::isnormal(temp)) {
304 feraiseexcept(FeUnderflow);
305 }
306 __asm__ __volatile__("" :: "m" (temp));
307 }
308 return mid;
309}
310
209static inline uint64_t
210vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
211{
212 fesetround(FeRoundZero);
213 val = val * powf(2.0, imm);
214 __asm__ __volatile__("" : "=m" (val) : "m" (val));
215 feclearexcept(FeAllExceptions);
216 __asm__ __volatile__("" : "=m" (val) : "m" (val));
217 float origVal = val;
218 val = rintf(val);
219 int fpType = std::fpclassify(val);
220 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
221 if (fpType == FP_NAN) {
222 feraiseexcept(FeInvalid);
223 }
224 val = 0.0;
225 } else if (origVal != val) {
226 feraiseexcept(FeInexact);
227 }
228
229 if (isSigned) {
230 if (half) {
231 if ((double)val < (int16_t)(1 << 15)) {
232 feraiseexcept(FeInvalid);
233 feclearexcept(FeInexact);
234 return (int16_t)(1 << 15);
235 }
236 if ((double)val > (int16_t)mask(15)) {
237 feraiseexcept(FeInvalid);
238 feclearexcept(FeInexact);
239 return (int16_t)mask(15);
240 }
241 return (int16_t)val;
242 } else {
243 if ((double)val < (int32_t)(1 << 31)) {
244 feraiseexcept(FeInvalid);
245 feclearexcept(FeInexact);
246 return (int32_t)(1 << 31);
247 }
248 if ((double)val > (int32_t)mask(31)) {
249 feraiseexcept(FeInvalid);
250 feclearexcept(FeInexact);
251 return (int32_t)mask(31);
252 }
253 return (int32_t)val;
254 }
255 } else {
256 if (half) {
257 if ((double)val < 0) {
258 feraiseexcept(FeInvalid);
259 feclearexcept(FeInexact);
260 return 0;
261 }
262 if ((double)val > (mask(16))) {
263 feraiseexcept(FeInvalid);
264 feclearexcept(FeInexact);
265 return mask(16);
266 }
267 return (uint16_t)val;
268 } else {
269 if ((double)val < 0) {
270 feraiseexcept(FeInvalid);
271 feclearexcept(FeInexact);
272 return 0;
273 }
274 if ((double)val > (mask(32))) {
275 feraiseexcept(FeInvalid);
276 feclearexcept(FeInexact);
277 return mask(32);
278 }
279 return (uint32_t)val;
280 }
281 }
282}
283
284static inline float
311static inline uint64_t
312vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
313{
314 fesetround(FeRoundZero);
315 val = val * powf(2.0, imm);
316 __asm__ __volatile__("" : "=m" (val) : "m" (val));
317 feclearexcept(FeAllExceptions);
318 __asm__ __volatile__("" : "=m" (val) : "m" (val));
319 float origVal = val;
320 val = rintf(val);
321 int fpType = std::fpclassify(val);
322 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
323 if (fpType == FP_NAN) {
324 feraiseexcept(FeInvalid);
325 }
326 val = 0.0;
327 } else if (origVal != val) {
328 feraiseexcept(FeInexact);
329 }
330
331 if (isSigned) {
332 if (half) {
333 if ((double)val < (int16_t)(1 << 15)) {
334 feraiseexcept(FeInvalid);
335 feclearexcept(FeInexact);
336 return (int16_t)(1 << 15);
337 }
338 if ((double)val > (int16_t)mask(15)) {
339 feraiseexcept(FeInvalid);
340 feclearexcept(FeInexact);
341 return (int16_t)mask(15);
342 }
343 return (int16_t)val;
344 } else {
345 if ((double)val < (int32_t)(1 << 31)) {
346 feraiseexcept(FeInvalid);
347 feclearexcept(FeInexact);
348 return (int32_t)(1 << 31);
349 }
350 if ((double)val > (int32_t)mask(31)) {
351 feraiseexcept(FeInvalid);
352 feclearexcept(FeInexact);
353 return (int32_t)mask(31);
354 }
355 return (int32_t)val;
356 }
357 } else {
358 if (half) {
359 if ((double)val < 0) {
360 feraiseexcept(FeInvalid);
361 feclearexcept(FeInexact);
362 return 0;
363 }
364 if ((double)val > (mask(16))) {
365 feraiseexcept(FeInvalid);
366 feclearexcept(FeInexact);
367 return mask(16);
368 }
369 return (uint16_t)val;
370 } else {
371 if ((double)val < 0) {
372 feraiseexcept(FeInvalid);
373 feclearexcept(FeInexact);
374 return 0;
375 }
376 if ((double)val > (mask(32))) {
377 feraiseexcept(FeInvalid);
378 feclearexcept(FeInexact);
379 return mask(32);
380 }
381 return (uint32_t)val;
382 }
383 }
384}
385
386static inline float
285vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm)
387vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
286{
287 fesetround(FeRoundNearest);
288 if (half)
289 val = (uint16_t)val;
290 float scale = powf(2.0, imm);
291 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
292 feclearexcept(FeAllExceptions);
293 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
388{
389 fesetround(FeRoundNearest);
390 if (half)
391 val = (uint16_t)val;
392 float scale = powf(2.0, imm);
393 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
394 feclearexcept(FeAllExceptions);
395 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
294 return val / scale;
396 return fixDivDest(fpscr, val / scale, (float)val, scale);
295}
296
297static inline float
397}
398
399static inline float
298vfpSFixedToFpS(int32_t val, bool half, uint8_t imm)
400vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
299{
300 fesetround(FeRoundNearest);
301 if (half)
302 val = sext<16>(val & mask(16));
303 float scale = powf(2.0, imm);
304 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
305 feclearexcept(FeAllExceptions);
306 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
401{
402 fesetround(FeRoundNearest);
403 if (half)
404 val = sext<16>(val & mask(16));
405 float scale = powf(2.0, imm);
406 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
407 feclearexcept(FeAllExceptions);
408 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
307 return val / scale;
409 return fixDivDest(fpscr, val / scale, (float)val, scale);
308}
309
310static inline uint64_t
311vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm)
312{
313 fesetround(FeRoundNearest);
314 val = val * pow(2.0, imm);
315 __asm__ __volatile__("" : "=m" (val) : "m" (val));
316 fesetround(FeRoundZero);
317 feclearexcept(FeAllExceptions);
318 __asm__ __volatile__("" : "=m" (val) : "m" (val));
319 double origVal = val;
320 val = rint(val);
321 int fpType = std::fpclassify(val);
322 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
323 if (fpType == FP_NAN) {
324 feraiseexcept(FeInvalid);
325 }
326 val = 0.0;
327 } else if (origVal != val) {
328 feraiseexcept(FeInexact);
329 }
330 if (isSigned) {
331 if (half) {
332 if (val < (int16_t)(1 << 15)) {
333 feraiseexcept(FeInvalid);
334 feclearexcept(FeInexact);
335 return (int16_t)(1 << 15);
336 }
337 if (val > (int16_t)mask(15)) {
338 feraiseexcept(FeInvalid);
339 feclearexcept(FeInexact);
340 return (int16_t)mask(15);
341 }
342 return (int16_t)val;
343 } else {
344 if (val < (int32_t)(1 << 31)) {
345 feraiseexcept(FeInvalid);
346 feclearexcept(FeInexact);
347 return (int32_t)(1 << 31);
348 }
349 if (val > (int32_t)mask(31)) {
350 feraiseexcept(FeInvalid);
351 feclearexcept(FeInexact);
352 return (int32_t)mask(31);
353 }
354 return (int32_t)val;
355 }
356 } else {
357 if (half) {
358 if (val < 0) {
359 feraiseexcept(FeInvalid);
360 feclearexcept(FeInexact);
361 return 0;
362 }
363 if (val > mask(16)) {
364 feraiseexcept(FeInvalid);
365 feclearexcept(FeInexact);
366 return mask(16);
367 }
368 return (uint16_t)val;
369 } else {
370 if (val < 0) {
371 feraiseexcept(FeInvalid);
372 feclearexcept(FeInexact);
373 return 0;
374 }
375 if (val > mask(32)) {
376 feraiseexcept(FeInvalid);
377 feclearexcept(FeInexact);
378 return mask(32);
379 }
380 return (uint32_t)val;
381 }
382 }
383}
384
385static inline double
410}
411
412static inline uint64_t
413vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm)
414{
415 fesetround(FeRoundNearest);
416 val = val * pow(2.0, imm);
417 __asm__ __volatile__("" : "=m" (val) : "m" (val));
418 fesetround(FeRoundZero);
419 feclearexcept(FeAllExceptions);
420 __asm__ __volatile__("" : "=m" (val) : "m" (val));
421 double origVal = val;
422 val = rint(val);
423 int fpType = std::fpclassify(val);
424 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
425 if (fpType == FP_NAN) {
426 feraiseexcept(FeInvalid);
427 }
428 val = 0.0;
429 } else if (origVal != val) {
430 feraiseexcept(FeInexact);
431 }
432 if (isSigned) {
433 if (half) {
434 if (val < (int16_t)(1 << 15)) {
435 feraiseexcept(FeInvalid);
436 feclearexcept(FeInexact);
437 return (int16_t)(1 << 15);
438 }
439 if (val > (int16_t)mask(15)) {
440 feraiseexcept(FeInvalid);
441 feclearexcept(FeInexact);
442 return (int16_t)mask(15);
443 }
444 return (int16_t)val;
445 } else {
446 if (val < (int32_t)(1 << 31)) {
447 feraiseexcept(FeInvalid);
448 feclearexcept(FeInexact);
449 return (int32_t)(1 << 31);
450 }
451 if (val > (int32_t)mask(31)) {
452 feraiseexcept(FeInvalid);
453 feclearexcept(FeInexact);
454 return (int32_t)mask(31);
455 }
456 return (int32_t)val;
457 }
458 } else {
459 if (half) {
460 if (val < 0) {
461 feraiseexcept(FeInvalid);
462 feclearexcept(FeInexact);
463 return 0;
464 }
465 if (val > mask(16)) {
466 feraiseexcept(FeInvalid);
467 feclearexcept(FeInexact);
468 return mask(16);
469 }
470 return (uint16_t)val;
471 } else {
472 if (val < 0) {
473 feraiseexcept(FeInvalid);
474 feclearexcept(FeInexact);
475 return 0;
476 }
477 if (val > mask(32)) {
478 feraiseexcept(FeInvalid);
479 feclearexcept(FeInexact);
480 return mask(32);
481 }
482 return (uint32_t)val;
483 }
484 }
485}
486
487static inline double
386vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm)
488vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
387{
388 fesetround(FeRoundNearest);
389 if (half)
390 val = (uint16_t)val;
391 double scale = pow(2.0, imm);
392 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
393 feclearexcept(FeAllExceptions);
394 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
489{
490 fesetround(FeRoundNearest);
491 if (half)
492 val = (uint16_t)val;
493 double scale = pow(2.0, imm);
494 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
495 feclearexcept(FeAllExceptions);
496 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
395 return val / scale;
497 return fixDivDest(fpscr, val / scale, (double)val, scale);
396}
397
398static inline double
498}
499
500static inline double
399vfpSFixedToFpD(int32_t val, bool half, uint8_t imm)
501vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
400{
401 fesetround(FeRoundNearest);
402 if (half)
403 val = sext<16>(val & mask(16));
404 double scale = pow(2.0, imm);
405 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
406 feclearexcept(FeAllExceptions);
407 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
502{
503 fesetround(FeRoundNearest);
504 if (half)
505 val = sext<16>(val & mask(16));
506 double scale = pow(2.0, imm);
507 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
508 feclearexcept(FeAllExceptions);
509 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
408 return val / scale;
510 return fixDivDest(fpscr, val / scale, (double)val, scale);
409}
410
411typedef int VfpSavedState;
412
413static inline VfpSavedState
414prepVfpFpscr(FPSCR fpscr)
415{
416 int roundingMode = fegetround();
417 feclearexcept(FeAllExceptions);
418 switch (fpscr.rMode) {
419 case VfpRoundNearest:
420 fesetround(FeRoundNearest);
421 break;
422 case VfpRoundUpward:
423 fesetround(FeRoundUpward);
424 break;
425 case VfpRoundDown:
426 fesetround(FeRoundDown);
427 break;
428 case VfpRoundZero:
429 fesetround(FeRoundZero);
430 break;
431 }
432 return roundingMode;
433}
434
435static inline FPSCR
436setVfpFpscr(FPSCR fpscr, VfpSavedState state)
437{
438 int exceptions = fetestexcept(FeAllExceptions);
439 if (exceptions & FeInvalid) {
440 fpscr.ioc = 1;
441 }
442 if (exceptions & FeDivByZero) {
443 fpscr.dzc = 1;
444 }
445 if (exceptions & FeOverflow) {
446 fpscr.ofc = 1;
447 }
448 if (exceptions & FeUnderflow) {
449 fpscr.ufc = 1;
450 }
451 if (exceptions & FeInexact) {
452 fpscr.ixc = 1;
453 }
454 fesetround(state);
455 return fpscr;
456}
457
458class VfpMacroOp : public PredMacroOp
459{
460 public:
461 static bool
462 inScalarBank(IntRegIndex idx)
463 {
464 return (idx % 32) < 8;
465 }
466
467 protected:
468 bool wide;
469
470 VfpMacroOp(const char *mnem, ExtMachInst _machInst,
471 OpClass __opClass, bool _wide) :
472 PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
473 {}
474
475 IntRegIndex
476 addStride(IntRegIndex idx, unsigned stride)
477 {
478 if (wide) {
479 stride *= 2;
480 }
481 unsigned offset = idx % 8;
482 idx = (IntRegIndex)(idx - offset);
483 offset += stride;
484 idx = (IntRegIndex)(idx + (offset % 8));
485 return idx;
486 }
487
488 void
489 nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
490 {
491 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
492 assert(!inScalarBank(dest));
493 dest = addStride(dest, stride);
494 op1 = addStride(op1, stride);
495 if (!inScalarBank(op2)) {
496 op2 = addStride(op2, stride);
497 }
498 }
499
500 void
501 nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
502 {
503 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
504 assert(!inScalarBank(dest));
505 dest = addStride(dest, stride);
506 if (!inScalarBank(op1)) {
507 op1 = addStride(op1, stride);
508 }
509 }
510
511 void
512 nextIdxs(IntRegIndex &dest)
513 {
514 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
515 assert(!inScalarBank(dest));
516 dest = addStride(dest, stride);
517 }
518};
519
520class VfpRegRegOp : public RegRegOp
521{
522 protected:
523 VfpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
524 IntRegIndex _dest, IntRegIndex _op1,
525 VfpMicroMode mode = VfpNotAMicroop) :
526 RegRegOp(mnem, _machInst, __opClass, _dest, _op1)
527 {
528 setVfpMicroFlags(mode, flags);
529 }
530};
531
532class VfpRegImmOp : public RegImmOp
533{
534 protected:
535 VfpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
536 IntRegIndex _dest, uint64_t _imm,
537 VfpMicroMode mode = VfpNotAMicroop) :
538 RegImmOp(mnem, _machInst, __opClass, _dest, _imm)
539 {
540 setVfpMicroFlags(mode, flags);
541 }
542};
543
544class VfpRegRegImmOp : public RegRegImmOp
545{
546 protected:
547 VfpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
548 IntRegIndex _dest, IntRegIndex _op1,
549 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
550 RegRegImmOp(mnem, _machInst, __opClass, _dest, _op1, _imm)
551 {
552 setVfpMicroFlags(mode, flags);
553 }
554};
555
556class VfpRegRegRegOp : public RegRegRegOp
557{
558 protected:
559 VfpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
560 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
561 VfpMicroMode mode = VfpNotAMicroop) :
562 RegRegRegOp(mnem, _machInst, __opClass, _dest, _op1, _op2)
563 {
564 setVfpMicroFlags(mode, flags);
565 }
566};
567
568}
569
570#endif //__ARCH_ARM_INSTS_VFP_HH__
511}
512
513typedef int VfpSavedState;
514
515static inline VfpSavedState
516prepVfpFpscr(FPSCR fpscr)
517{
518 int roundingMode = fegetround();
519 feclearexcept(FeAllExceptions);
520 switch (fpscr.rMode) {
521 case VfpRoundNearest:
522 fesetround(FeRoundNearest);
523 break;
524 case VfpRoundUpward:
525 fesetround(FeRoundUpward);
526 break;
527 case VfpRoundDown:
528 fesetround(FeRoundDown);
529 break;
530 case VfpRoundZero:
531 fesetround(FeRoundZero);
532 break;
533 }
534 return roundingMode;
535}
536
537static inline FPSCR
538setVfpFpscr(FPSCR fpscr, VfpSavedState state)
539{
540 int exceptions = fetestexcept(FeAllExceptions);
541 if (exceptions & FeInvalid) {
542 fpscr.ioc = 1;
543 }
544 if (exceptions & FeDivByZero) {
545 fpscr.dzc = 1;
546 }
547 if (exceptions & FeOverflow) {
548 fpscr.ofc = 1;
549 }
550 if (exceptions & FeUnderflow) {
551 fpscr.ufc = 1;
552 }
553 if (exceptions & FeInexact) {
554 fpscr.ixc = 1;
555 }
556 fesetround(state);
557 return fpscr;
558}
559
560class VfpMacroOp : public PredMacroOp
561{
562 public:
563 static bool
564 inScalarBank(IntRegIndex idx)
565 {
566 return (idx % 32) < 8;
567 }
568
569 protected:
570 bool wide;
571
572 VfpMacroOp(const char *mnem, ExtMachInst _machInst,
573 OpClass __opClass, bool _wide) :
574 PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
575 {}
576
577 IntRegIndex
578 addStride(IntRegIndex idx, unsigned stride)
579 {
580 if (wide) {
581 stride *= 2;
582 }
583 unsigned offset = idx % 8;
584 idx = (IntRegIndex)(idx - offset);
585 offset += stride;
586 idx = (IntRegIndex)(idx + (offset % 8));
587 return idx;
588 }
589
590 void
591 nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
592 {
593 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
594 assert(!inScalarBank(dest));
595 dest = addStride(dest, stride);
596 op1 = addStride(op1, stride);
597 if (!inScalarBank(op2)) {
598 op2 = addStride(op2, stride);
599 }
600 }
601
602 void
603 nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
604 {
605 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
606 assert(!inScalarBank(dest));
607 dest = addStride(dest, stride);
608 if (!inScalarBank(op1)) {
609 op1 = addStride(op1, stride);
610 }
611 }
612
613 void
614 nextIdxs(IntRegIndex &dest)
615 {
616 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
617 assert(!inScalarBank(dest));
618 dest = addStride(dest, stride);
619 }
620};
621
622class VfpRegRegOp : public RegRegOp
623{
624 protected:
625 VfpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
626 IntRegIndex _dest, IntRegIndex _op1,
627 VfpMicroMode mode = VfpNotAMicroop) :
628 RegRegOp(mnem, _machInst, __opClass, _dest, _op1)
629 {
630 setVfpMicroFlags(mode, flags);
631 }
632};
633
634class VfpRegImmOp : public RegImmOp
635{
636 protected:
637 VfpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
638 IntRegIndex _dest, uint64_t _imm,
639 VfpMicroMode mode = VfpNotAMicroop) :
640 RegImmOp(mnem, _machInst, __opClass, _dest, _imm)
641 {
642 setVfpMicroFlags(mode, flags);
643 }
644};
645
646class VfpRegRegImmOp : public RegRegImmOp
647{
648 protected:
649 VfpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
650 IntRegIndex _dest, IntRegIndex _op1,
651 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
652 RegRegImmOp(mnem, _machInst, __opClass, _dest, _op1, _imm)
653 {
654 setVfpMicroFlags(mode, flags);
655 }
656};
657
658class VfpRegRegRegOp : public RegRegRegOp
659{
660 protected:
661 VfpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
662 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
663 VfpMicroMode mode = VfpNotAMicroop) :
664 RegRegRegOp(mnem, _machInst, __opClass, _dest, _op1, _op2)
665 {
666 setVfpMicroFlags(mode, flags);
667 }
668};
669
670}
671
672#endif //__ARCH_ARM_INSTS_VFP_HH__