vfp.hh (7396:53454ef35b46) vfp.hh (7397:cbd950459a29)
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include "arch/arm/insts/misc.hh"
44#include "arch/arm/miscregs.hh"
45#include <fenv.h>
46#include <cmath>
47
48namespace ArmISA
49{
50
51enum VfpMicroMode {
52 VfpNotAMicroop,
53 VfpMicroop,
54 VfpFirstMicroop,
55 VfpLastMicroop
56};
57
58template<class T>
59static inline void
60setVfpMicroFlags(VfpMicroMode mode, T &flags)
61{
62 switch (mode) {
63 case VfpMicroop:
64 flags[StaticInst::IsMicroop] = true;
65 break;
66 case VfpFirstMicroop:
67 flags[StaticInst::IsMicroop] =
68 flags[StaticInst::IsFirstMicroop] = true;
69 break;
70 case VfpLastMicroop:
71 flags[StaticInst::IsMicroop] =
72 flags[StaticInst::IsLastMicroop] = true;
73 break;
74 case VfpNotAMicroop:
75 break;
76 }
77 if (mode == VfpMicroop || mode == VfpFirstMicroop) {
78 flags[StaticInst::IsDelayedCommit] = true;
79 }
80}
81
82enum FeExceptionBit
83{
84 FeDivByZero = FE_DIVBYZERO,
85 FeInexact = FE_INEXACT,
86 FeInvalid = FE_INVALID,
87 FeOverflow = FE_OVERFLOW,
88 FeUnderflow = FE_UNDERFLOW,
89 FeAllExceptions = FE_ALL_EXCEPT
90};
91
92enum FeRoundingMode
93{
94 FeRoundDown = FE_DOWNWARD,
95 FeRoundNearest = FE_TONEAREST,
96 FeRoundZero = FE_TOWARDZERO,
97 FeRoundUpward = FE_UPWARD
98};
99
100enum VfpRoundingMode
101{
102 VfpRoundNearest = 0,
103 VfpRoundUpward = 1,
104 VfpRoundDown = 2,
105 VfpRoundZero = 3
106};
107
108template <class fpType>
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include "arch/arm/insts/misc.hh"
44#include "arch/arm/miscregs.hh"
45#include <fenv.h>
46#include <cmath>
47
48namespace ArmISA
49{
50
51enum VfpMicroMode {
52 VfpNotAMicroop,
53 VfpMicroop,
54 VfpFirstMicroop,
55 VfpLastMicroop
56};
57
58template<class T>
59static inline void
60setVfpMicroFlags(VfpMicroMode mode, T &flags)
61{
62 switch (mode) {
63 case VfpMicroop:
64 flags[StaticInst::IsMicroop] = true;
65 break;
66 case VfpFirstMicroop:
67 flags[StaticInst::IsMicroop] =
68 flags[StaticInst::IsFirstMicroop] = true;
69 break;
70 case VfpLastMicroop:
71 flags[StaticInst::IsMicroop] =
72 flags[StaticInst::IsLastMicroop] = true;
73 break;
74 case VfpNotAMicroop:
75 break;
76 }
77 if (mode == VfpMicroop || mode == VfpFirstMicroop) {
78 flags[StaticInst::IsDelayedCommit] = true;
79 }
80}
81
82enum FeExceptionBit
83{
84 FeDivByZero = FE_DIVBYZERO,
85 FeInexact = FE_INEXACT,
86 FeInvalid = FE_INVALID,
87 FeOverflow = FE_OVERFLOW,
88 FeUnderflow = FE_UNDERFLOW,
89 FeAllExceptions = FE_ALL_EXCEPT
90};
91
92enum FeRoundingMode
93{
94 FeRoundDown = FE_DOWNWARD,
95 FeRoundNearest = FE_TONEAREST,
96 FeRoundZero = FE_TOWARDZERO,
97 FeRoundUpward = FE_UPWARD
98};
99
100enum VfpRoundingMode
101{
102 VfpRoundNearest = 0,
103 VfpRoundUpward = 1,
104 VfpRoundDown = 2,
105 VfpRoundZero = 3
106};
107
108template <class fpType>
109static inline void
110vfpFlushToZero(uint32_t &_fpscr, fpType &op)
111{
112 FPSCR fpscr = _fpscr;
113 fpType junk = 0.0;
114 if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
115 fpscr.idc = 1;
116 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
117 op = bitsToFp(fpToBits(op) & bitMask, junk);
118 }
119 _fpscr = fpscr;
120}
121
122template <class fpType>
123static inline void
124vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
125{
126 vfpFlushToZero(fpscr, op1);
127 vfpFlushToZero(fpscr, op2);
128}
129
130template <class fpType>
131static inline bool
132flushToZero(fpType &op)
133{
134 fpType junk = 0.0;
135 if (std::fpclassify(op) == FP_SUBNORMAL) {
136 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
137 op = bitsToFp(fpToBits(op) & bitMask, junk);
138 return true;
139 }
140 return false;
141}
142
143template <class fpType>
144static inline bool
145flushToZero(fpType &op1, fpType &op2)
146{
147 bool flush1 = flushToZero(op1);
148 bool flush2 = flushToZero(op2);
149 return flush1 || flush2;
150}
151
109static inline bool
110flushToZero(fpType &op)
111{
112 fpType junk = 0.0;
113 if (std::fpclassify(op) == FP_SUBNORMAL) {
114 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
115 op = bitsToFp(fpToBits(op) & bitMask, junk);
116 return true;
117 }
118 return false;
119}
120
121template <class fpType>
122static inline bool
123flushToZero(fpType &op1, fpType &op2)
124{
125 bool flush1 = flushToZero(op1);
126 bool flush2 = flushToZero(op2);
127 return flush1 || flush2;
128}
129
130template <class fpType>
131static inline void
132vfpFlushToZero(FPSCR &fpscr, fpType &op)
133{
134 if (fpscr.fz == 1 && flushToZero(op)) {
135 fpscr.idc = 1;
136 }
137}
138
139template <class fpType>
140static inline void
141vfpFlushToZero(FPSCR &fpscr, fpType &op1, fpType &op2)
142{
143 vfpFlushToZero(fpscr, op1);
144 vfpFlushToZero(fpscr, op2);
145}
146
152static inline uint32_t
153fpToBits(float fp)
154{
155 union
156 {
157 float fp;
158 uint32_t bits;
159 } val;
160 val.fp = fp;
161 return val.bits;
162}
163
164static inline uint64_t
165fpToBits(double fp)
166{
167 union
168 {
169 double fp;
170 uint64_t bits;
171 } val;
172 val.fp = fp;
173 return val.bits;
174}
175
176static inline float
177bitsToFp(uint64_t bits, float junk)
178{
179 union
180 {
181 float fp;
182 uint32_t bits;
183 } val;
184 val.bits = bits;
185 return val.fp;
186}
187
188static inline double
189bitsToFp(uint64_t bits, double junk)
190{
191 union
192 {
193 double fp;
194 uint64_t bits;
195 } val;
196 val.bits = bits;
197 return val.fp;
198}
199
200typedef int VfpSavedState;
201
202static inline VfpSavedState
147static inline uint32_t
148fpToBits(float fp)
149{
150 union
151 {
152 float fp;
153 uint32_t bits;
154 } val;
155 val.fp = fp;
156 return val.bits;
157}
158
159static inline uint64_t
160fpToBits(double fp)
161{
162 union
163 {
164 double fp;
165 uint64_t bits;
166 } val;
167 val.fp = fp;
168 return val.bits;
169}
170
171static inline float
172bitsToFp(uint64_t bits, float junk)
173{
174 union
175 {
176 float fp;
177 uint32_t bits;
178 } val;
179 val.bits = bits;
180 return val.fp;
181}
182
183static inline double
184bitsToFp(uint64_t bits, double junk)
185{
186 union
187 {
188 double fp;
189 uint64_t bits;
190 } val;
191 val.bits = bits;
192 return val.fp;
193}
194
195typedef int VfpSavedState;
196
197static inline VfpSavedState
203prepVfpFpscr(FPSCR fpscr)
204{
205 int roundingMode = fegetround();
206 feclearexcept(FeAllExceptions);
207 switch (fpscr.rMode) {
208 case VfpRoundNearest:
209 fesetround(FeRoundNearest);
210 break;
211 case VfpRoundUpward:
212 fesetround(FeRoundUpward);
213 break;
214 case VfpRoundDown:
215 fesetround(FeRoundDown);
216 break;
217 case VfpRoundZero:
218 fesetround(FeRoundZero);
219 break;
220 }
221 return roundingMode;
222}
223
224static inline VfpSavedState
225prepFpState(uint32_t rMode)
226{
227 int roundingMode = fegetround();
228 feclearexcept(FeAllExceptions);
229 switch (rMode) {
230 case VfpRoundNearest:
231 fesetround(FeRoundNearest);
232 break;
233 case VfpRoundUpward:
234 fesetround(FeRoundUpward);
235 break;
236 case VfpRoundDown:
237 fesetround(FeRoundDown);
238 break;
239 case VfpRoundZero:
240 fesetround(FeRoundZero);
241 break;
242 }
243 return roundingMode;
244}
245
198prepFpState(uint32_t rMode)
199{
200 int roundingMode = fegetround();
201 feclearexcept(FeAllExceptions);
202 switch (rMode) {
203 case VfpRoundNearest:
204 fesetround(FeRoundNearest);
205 break;
206 case VfpRoundUpward:
207 fesetround(FeRoundUpward);
208 break;
209 case VfpRoundDown:
210 fesetround(FeRoundDown);
211 break;
212 case VfpRoundZero:
213 fesetround(FeRoundZero);
214 break;
215 }
216 return roundingMode;
217}
218
246static inline FPSCR
247setVfpFpscr(FPSCR fpscr, VfpSavedState state)
248{
249 int exceptions = fetestexcept(FeAllExceptions);
250 if (exceptions & FeInvalid) {
251 fpscr.ioc = 1;
252 }
253 if (exceptions & FeDivByZero) {
254 fpscr.dzc = 1;
255 }
256 if (exceptions & FeOverflow) {
257 fpscr.ofc = 1;
258 }
259 if (exceptions & FeUnderflow) {
260 fpscr.ufc = 1;
261 }
262 if (exceptions & FeInexact) {
263 fpscr.ixc = 1;
264 }
265 fesetround(state);
266 return fpscr;
267}
268
269static inline void
270finishVfp(FPSCR &fpscr, VfpSavedState state)
271{
272 int exceptions = fetestexcept(FeAllExceptions);
273 bool underflow = false;
274 if (exceptions & FeInvalid) {
275 fpscr.ioc = 1;
276 }
277 if (exceptions & FeDivByZero) {
278 fpscr.dzc = 1;
279 }
280 if (exceptions & FeOverflow) {
281 fpscr.ofc = 1;
282 }
283 if (exceptions & FeUnderflow) {
284 underflow = true;
285 fpscr.ufc = 1;
286 }
287 if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
288 fpscr.ixc = 1;
289 }
290 fesetround(state);
291}
292
293template <class fpType>
294static inline fpType
295fixDest(FPSCR fpscr, fpType val, fpType op1)
296{
297 int fpClass = std::fpclassify(val);
298 fpType junk = 0.0;
299 if (fpClass == FP_NAN) {
300 const bool single = (sizeof(val) == sizeof(float));
301 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
302 const bool nan = std::isnan(op1);
303 if (!nan || (fpscr.dn == 1)) {
304 val = bitsToFp(qnan, junk);
305 } else if (nan) {
306 val = bitsToFp(fpToBits(op1) | qnan, junk);
307 }
308 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
309 // Turn val into a zero with the correct sign;
310 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
311 val = bitsToFp(fpToBits(val) & bitMask, junk);
312 feclearexcept(FeInexact);
313 feraiseexcept(FeUnderflow);
314 }
315 return val;
316}
317
318template <class fpType>
319static inline fpType
320fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
321{
322 int fpClass = std::fpclassify(val);
323 fpType junk = 0.0;
324 if (fpClass == FP_NAN) {
325 const bool single = (sizeof(val) == sizeof(float));
326 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
327 const bool nan1 = std::isnan(op1);
328 const bool nan2 = std::isnan(op2);
329 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
330 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
331 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
332 val = bitsToFp(qnan, junk);
333 } else if (signal1) {
334 val = bitsToFp(fpToBits(op1) | qnan, junk);
335 } else if (signal2) {
336 val = bitsToFp(fpToBits(op2) | qnan, junk);
337 } else if (nan1) {
338 val = op1;
339 } else if (nan2) {
340 val = op2;
341 }
342 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
343 // Turn val into a zero with the correct sign;
344 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
345 val = bitsToFp(fpToBits(val) & bitMask, junk);
346 feclearexcept(FeInexact);
347 feraiseexcept(FeUnderflow);
348 }
349 return val;
350}
351
352template <class fpType>
353static inline fpType
354fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
355{
356 fpType mid = fixDest(fpscr, val, op1, op2);
357 const bool single = (sizeof(fpType) == sizeof(float));
358 const fpType junk = 0.0;
359 if ((single && (val == bitsToFp(0x00800000, junk) ||
360 val == bitsToFp(0x80800000, junk))) ||
361 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
362 val == bitsToFp(ULL(0x8010000000000000), junk)))
363 ) {
364 __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
365 fesetround(FeRoundZero);
366 fpType temp = 0.0;
367 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
368 temp = op1 / op2;
369 if (flushToZero(temp)) {
370 feraiseexcept(FeUnderflow);
371 if (fpscr.fz) {
372 feclearexcept(FeInexact);
373 mid = temp;
374 }
375 }
376 __asm__ __volatile__("" :: "m" (temp));
377 }
378 return mid;
379}
380
381static inline float
382fixFpDFpSDest(FPSCR fpscr, double val)
383{
384 const float junk = 0.0;
385 float op1 = 0.0;
386 if (std::isnan(val)) {
387 uint64_t valBits = fpToBits(val);
388 uint32_t op1Bits = bits(valBits, 50, 29) |
389 (mask(9) << 22) |
390 (bits(valBits, 63) << 31);
391 op1 = bitsToFp(op1Bits, junk);
392 }
393 float mid = fixDest(fpscr, (float)val, op1);
394 if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
395 (FeUnderflow | FeInexact)) {
396 feclearexcept(FeInexact);
397 }
398 if (mid == bitsToFp(0x00800000, junk) ||
399 mid == bitsToFp(0x80800000, junk)) {
400 __asm__ __volatile__("" : "=m" (val) : "m" (val));
401 fesetround(FeRoundZero);
402 float temp = 0.0;
403 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
404 temp = val;
405 if (flushToZero(temp)) {
406 feraiseexcept(FeUnderflow);
407 if (fpscr.fz) {
408 feclearexcept(FeInexact);
409 mid = temp;
410 }
411 }
412 __asm__ __volatile__("" :: "m" (temp));
413 }
414 return mid;
415}
416
417static inline double
418fixFpSFpDDest(FPSCR fpscr, float val)
419{
420 const double junk = 0.0;
421 double op1 = 0.0;
422 if (std::isnan(val)) {
423 uint32_t valBits = fpToBits(val);
424 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
425 (mask(12) << 51) |
426 ((uint64_t)bits(valBits, 31) << 63);
427 op1 = bitsToFp(op1Bits, junk);
428 }
429 double mid = fixDest(fpscr, (double)val, op1);
430 if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
431 mid == bitsToFp(ULL(0x8010000000000000), junk)) {
432 __asm__ __volatile__("" : "=m" (val) : "m" (val));
433 fesetround(FeRoundZero);
434 double temp = 0.0;
435 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
436 temp = val;
437 if (flushToZero(temp)) {
438 feraiseexcept(FeUnderflow);
439 if (fpscr.fz) {
440 feclearexcept(FeInexact);
441 mid = temp;
442 }
443 }
444 __asm__ __volatile__("" :: "m" (temp));
445 }
446 return mid;
447}
448
449static inline double
450makeDouble(uint32_t low, uint32_t high)
451{
452 double junk = 0.0;
453 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
454}
455
456static inline uint32_t
457lowFromDouble(double val)
458{
459 return fpToBits(val);
460}
461
462static inline uint32_t
463highFromDouble(double val)
464{
465 return fpToBits(val) >> 32;
466}
467
468static inline uint64_t
469vfpFpSToFixed(float val, bool isSigned, bool half,
470 uint8_t imm, bool rzero = true)
471{
472 int rmode = rzero ? FeRoundZero : fegetround();
473 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
474 fesetround(FeRoundNearest);
475 val = val * powf(2.0, imm);
476 __asm__ __volatile__("" : "=m" (val) : "m" (val));
477 fesetround(rmode);
478 feclearexcept(FeAllExceptions);
479 __asm__ __volatile__("" : "=m" (val) : "m" (val));
480 float origVal = val;
481 val = rintf(val);
482 int fpType = std::fpclassify(val);
483 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
484 if (fpType == FP_NAN) {
485 feraiseexcept(FeInvalid);
486 }
487 val = 0.0;
488 } else if (origVal != val) {
489 switch (rmode) {
490 case FeRoundNearest:
491 if (origVal - val > 0.5)
492 val += 1.0;
493 else if (val - origVal > 0.5)
494 val -= 1.0;
495 break;
496 case FeRoundDown:
497 if (origVal < val)
498 val -= 1.0;
499 break;
500 case FeRoundUpward:
501 if (origVal > val)
502 val += 1.0;
503 break;
504 }
505 feraiseexcept(FeInexact);
506 }
507
508 if (isSigned) {
509 if (half) {
510 if ((double)val < (int16_t)(1 << 15)) {
511 feraiseexcept(FeInvalid);
512 feclearexcept(FeInexact);
513 return (int16_t)(1 << 15);
514 }
515 if ((double)val > (int16_t)mask(15)) {
516 feraiseexcept(FeInvalid);
517 feclearexcept(FeInexact);
518 return (int16_t)mask(15);
519 }
520 return (int16_t)val;
521 } else {
522 if ((double)val < (int32_t)(1 << 31)) {
523 feraiseexcept(FeInvalid);
524 feclearexcept(FeInexact);
525 return (int32_t)(1 << 31);
526 }
527 if ((double)val > (int32_t)mask(31)) {
528 feraiseexcept(FeInvalid);
529 feclearexcept(FeInexact);
530 return (int32_t)mask(31);
531 }
532 return (int32_t)val;
533 }
534 } else {
535 if (half) {
536 if ((double)val < 0) {
537 feraiseexcept(FeInvalid);
538 feclearexcept(FeInexact);
539 return 0;
540 }
541 if ((double)val > (mask(16))) {
542 feraiseexcept(FeInvalid);
543 feclearexcept(FeInexact);
544 return mask(16);
545 }
546 return (uint16_t)val;
547 } else {
548 if ((double)val < 0) {
549 feraiseexcept(FeInvalid);
550 feclearexcept(FeInexact);
551 return 0;
552 }
553 if ((double)val > (mask(32))) {
554 feraiseexcept(FeInvalid);
555 feclearexcept(FeInexact);
556 return mask(32);
557 }
558 return (uint32_t)val;
559 }
560 }
561}
562
563static inline float
564vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
565{
566 fesetround(FeRoundNearest);
567 if (half)
568 val = (uint16_t)val;
569 float scale = powf(2.0, imm);
570 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
571 feclearexcept(FeAllExceptions);
572 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
573 return fixDivDest(fpscr, val / scale, (float)val, scale);
574}
575
576static inline float
577vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
578{
579 fesetround(FeRoundNearest);
580 if (half)
581 val = sext<16>(val & mask(16));
582 float scale = powf(2.0, imm);
583 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
584 feclearexcept(FeAllExceptions);
585 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
586 return fixDivDest(fpscr, val / scale, (float)val, scale);
587}
588
589static inline uint64_t
590vfpFpDToFixed(double val, bool isSigned, bool half,
591 uint8_t imm, bool rzero = true)
592{
593 int rmode = rzero ? FeRoundZero : fegetround();
594 fesetround(FeRoundNearest);
595 val = val * pow(2.0, imm);
596 __asm__ __volatile__("" : "=m" (val) : "m" (val));
597 fesetround(rmode);
598 feclearexcept(FeAllExceptions);
599 __asm__ __volatile__("" : "=m" (val) : "m" (val));
600 double origVal = val;
601 val = rint(val);
602 int fpType = std::fpclassify(val);
603 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
604 if (fpType == FP_NAN) {
605 feraiseexcept(FeInvalid);
606 }
607 val = 0.0;
608 } else if (origVal != val) {
609 switch (rmode) {
610 case FeRoundNearest:
611 if (origVal - val > 0.5)
612 val += 1.0;
613 else if (val - origVal > 0.5)
614 val -= 1.0;
615 break;
616 case FeRoundDown:
617 if (origVal < val)
618 val -= 1.0;
619 break;
620 case FeRoundUpward:
621 if (origVal > val)
622 val += 1.0;
623 break;
624 }
625 feraiseexcept(FeInexact);
626 }
627 if (isSigned) {
628 if (half) {
629 if (val < (int16_t)(1 << 15)) {
630 feraiseexcept(FeInvalid);
631 feclearexcept(FeInexact);
632 return (int16_t)(1 << 15);
633 }
634 if (val > (int16_t)mask(15)) {
635 feraiseexcept(FeInvalid);
636 feclearexcept(FeInexact);
637 return (int16_t)mask(15);
638 }
639 return (int16_t)val;
640 } else {
641 if (val < (int32_t)(1 << 31)) {
642 feraiseexcept(FeInvalid);
643 feclearexcept(FeInexact);
644 return (int32_t)(1 << 31);
645 }
646 if (val > (int32_t)mask(31)) {
647 feraiseexcept(FeInvalid);
648 feclearexcept(FeInexact);
649 return (int32_t)mask(31);
650 }
651 return (int32_t)val;
652 }
653 } else {
654 if (half) {
655 if (val < 0) {
656 feraiseexcept(FeInvalid);
657 feclearexcept(FeInexact);
658 return 0;
659 }
660 if (val > mask(16)) {
661 feraiseexcept(FeInvalid);
662 feclearexcept(FeInexact);
663 return mask(16);
664 }
665 return (uint16_t)val;
666 } else {
667 if (val < 0) {
668 feraiseexcept(FeInvalid);
669 feclearexcept(FeInexact);
670 return 0;
671 }
672 if (val > mask(32)) {
673 feraiseexcept(FeInvalid);
674 feclearexcept(FeInexact);
675 return mask(32);
676 }
677 return (uint32_t)val;
678 }
679 }
680}
681
682static inline double
683vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
684{
685 fesetround(FeRoundNearest);
686 if (half)
687 val = (uint16_t)val;
688 double scale = pow(2.0, imm);
689 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
690 feclearexcept(FeAllExceptions);
691 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
692 return fixDivDest(fpscr, val / scale, (double)val, scale);
693}
694
695static inline double
696vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
697{
698 fesetround(FeRoundNearest);
699 if (half)
700 val = sext<16>(val & mask(16));
701 double scale = pow(2.0, imm);
702 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
703 feclearexcept(FeAllExceptions);
704 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
705 return fixDivDest(fpscr, val / scale, (double)val, scale);
706}
707
708class VfpMacroOp : public PredMacroOp
709{
710 public:
711 static bool
712 inScalarBank(IntRegIndex idx)
713 {
714 return (idx % 32) < 8;
715 }
716
717 protected:
718 bool wide;
719
720 VfpMacroOp(const char *mnem, ExtMachInst _machInst,
721 OpClass __opClass, bool _wide) :
722 PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
723 {}
724
725 IntRegIndex
726 addStride(IntRegIndex idx, unsigned stride)
727 {
728 if (wide) {
729 stride *= 2;
730 }
731 unsigned offset = idx % 8;
732 idx = (IntRegIndex)(idx - offset);
733 offset += stride;
734 idx = (IntRegIndex)(idx + (offset % 8));
735 return idx;
736 }
737
738 void
739 nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
740 {
741 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
742 assert(!inScalarBank(dest));
743 dest = addStride(dest, stride);
744 op1 = addStride(op1, stride);
745 if (!inScalarBank(op2)) {
746 op2 = addStride(op2, stride);
747 }
748 }
749
750 void
751 nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
752 {
753 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
754 assert(!inScalarBank(dest));
755 dest = addStride(dest, stride);
756 if (!inScalarBank(op1)) {
757 op1 = addStride(op1, stride);
758 }
759 }
760
761 void
762 nextIdxs(IntRegIndex &dest)
763 {
764 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
765 assert(!inScalarBank(dest));
766 dest = addStride(dest, stride);
767 }
768};
769
770static inline float
771fpAddS(float a, float b)
772{
773 return a + b;
774}
775
776static inline double
777fpAddD(double a, double b)
778{
779 return a + b;
780}
781
782static inline float
783fpSubS(float a, float b)
784{
785 return a - b;
786}
787
788static inline double
789fpSubD(double a, double b)
790{
791 return a - b;
792}
793
794static inline float
795fpDivS(float a, float b)
796{
797 return a / b;
798}
799
800static inline double
801fpDivD(double a, double b)
802{
803 return a / b;
804}
805
806static inline float
807fpMulS(float a, float b)
808{
809 return a * b;
810}
811
812static inline double
813fpMulD(double a, double b)
814{
815 return a * b;
816}
817
818class FpOp : public PredOp
819{
820 protected:
821 FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
822 PredOp(mnem, _machInst, __opClass)
823 {}
824
825 virtual float
826 doOp(float op1, float op2) const
827 {
828 panic("Unimplemented version of doOp called.\n");
829 }
830
831 virtual float
832 doOp(float op1) const
833 {
834 panic("Unimplemented version of doOp called.\n");
835 }
836
837 virtual double
838 doOp(double op1, double op2) const
839 {
840 panic("Unimplemented version of doOp called.\n");
841 }
842
843 virtual double
844 doOp(double op1) const
845 {
846 panic("Unimplemented version of doOp called.\n");
847 }
848
849 double
850 dbl(uint32_t low, uint32_t high) const
851 {
852 double junk = 0.0;
853 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
854 }
855
856 uint32_t
857 dblLow(double val) const
858 {
859 return fpToBits(val);
860 }
861
862 uint32_t
863 dblHi(double val) const
864 {
865 return fpToBits(val) >> 32;
866 }
867
868 template <class fpType>
869 fpType
870 binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
871 fpType (*func)(fpType, fpType),
872 bool flush, uint32_t rMode) const
873 {
874 const bool single = (sizeof(fpType) == sizeof(float));
875 fpType junk = 0.0;
876
877 if (flush && flushToZero(op1, op2))
878 fpscr.idc = 1;
879 VfpSavedState state = prepFpState(rMode);
880 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
881 : "m" (op1), "m" (op2), "m" (state));
882 fpType dest = func(op1, op2);
883 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
884
885 int fpClass = std::fpclassify(dest);
886 // Get NAN behavior right. This varies between x86 and ARM.
887 if (fpClass == FP_NAN) {
888 const bool single = (sizeof(fpType) == sizeof(float));
889 const uint64_t qnan =
890 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
891 const bool nan1 = std::isnan(op1);
892 const bool nan2 = std::isnan(op2);
893 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
894 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
895 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
896 dest = bitsToFp(qnan, junk);
897 } else if (signal1) {
898 dest = bitsToFp(fpToBits(op1) | qnan, junk);
899 } else if (signal2) {
900 dest = bitsToFp(fpToBits(op2) | qnan, junk);
901 } else if (nan1) {
902 dest = op1;
903 } else if (nan2) {
904 dest = op2;
905 }
906 } else if (flush && flushToZero(dest)) {
907 feraiseexcept(FeUnderflow);
908 } else if ((
909 (single && (dest == bitsToFp(0x00800000, junk) ||
910 dest == bitsToFp(0x80800000, junk))) ||
911 (!single &&
912 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
913 dest == bitsToFp(ULL(0x8010000000000000), junk)))
914 ) && rMode != VfpRoundZero) {
915 /*
916 * Correct for the fact that underflow is detected -before- rounding
917 * in ARM and -after- rounding in x86.
918 */
919 fesetround(FeRoundZero);
920 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
921 : "m" (op1), "m" (op2));
922 fpType temp = func(op1, op2);
923 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
924 if (flush && flushToZero(temp)) {
925 dest = temp;
926 }
927 }
928 finishVfp(fpscr, state);
929 return dest;
930 }
931
932 template <class fpType>
933 fpType
934 unaryOp(FPSCR &fpscr, fpType op1,
935 fpType (*func)(fpType),
936 bool flush, uint32_t rMode) const
937 {
938 const bool single = (sizeof(fpType) == sizeof(float));
939 fpType junk = 0.0;
940
941 if (flush && flushToZero(op1))
942 fpscr.idc = 1;
943 VfpSavedState state = prepFpState(rMode);
944 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
945 : "m" (op1), "m" (state));
946 fpType dest = func(op1);
947 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
948
949 int fpClass = std::fpclassify(dest);
950 // Get NAN behavior right. This varies between x86 and ARM.
951 if (fpClass == FP_NAN) {
952 const bool single = (sizeof(fpType) == sizeof(float));
953 const uint64_t qnan =
954 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
955 const bool nan = std::isnan(op1);
956 if (!nan || fpscr.dn == 1) {
957 dest = bitsToFp(qnan, junk);
958 } else if (nan) {
959 dest = bitsToFp(fpToBits(op1) | qnan, junk);
960 }
961 } else if (flush && flushToZero(dest)) {
962 feraiseexcept(FeUnderflow);
963 } else if ((
964 (single && (dest == bitsToFp(0x00800000, junk) ||
965 dest == bitsToFp(0x80800000, junk))) ||
966 (!single &&
967 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
968 dest == bitsToFp(ULL(0x8010000000000000), junk)))
969 ) && rMode != VfpRoundZero) {
970 /*
971 * Correct for the fact that underflow is detected -before- rounding
972 * in ARM and -after- rounding in x86.
973 */
974 fesetround(FeRoundZero);
975 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
976 fpType temp = func(op1);
977 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
978 if (flush && flushToZero(temp)) {
979 dest = temp;
980 }
981 }
982 finishVfp(fpscr, state);
983 return dest;
984 }
985};
986
987class FpRegRegOp : public FpOp
988{
989 protected:
990 IntRegIndex dest;
991 IntRegIndex op1;
992
993 FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
994 IntRegIndex _dest, IntRegIndex _op1,
995 VfpMicroMode mode = VfpNotAMicroop) :
996 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1)
997 {
998 setVfpMicroFlags(mode, flags);
999 }
1000
1001 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1002};
1003
1004class FpRegImmOp : public FpOp
1005{
1006 protected:
1007 IntRegIndex dest;
1008 uint64_t imm;
1009
1010 FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1011 IntRegIndex _dest, uint64_t _imm,
1012 VfpMicroMode mode = VfpNotAMicroop) :
1013 FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm)
1014 {
1015 setVfpMicroFlags(mode, flags);
1016 }
1017
1018 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1019};
1020
1021class FpRegRegImmOp : public FpOp
1022{
1023 protected:
1024 IntRegIndex dest;
1025 IntRegIndex op1;
1026 uint64_t imm;
1027
1028 FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1029 IntRegIndex _dest, IntRegIndex _op1,
1030 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
1031 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm)
1032 {
1033 setVfpMicroFlags(mode, flags);
1034 }
1035
1036 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1037};
1038
1039class FpRegRegRegOp : public FpOp
1040{
1041 protected:
1042 IntRegIndex dest;
1043 IntRegIndex op1;
1044 IntRegIndex op2;
1045
1046 FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1047 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
1048 VfpMicroMode mode = VfpNotAMicroop) :
1049 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2)
1050 {
1051 setVfpMicroFlags(mode, flags);
1052 }
1053
1054 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1055};
1056
1057}
1058
1059#endif //__ARCH_ARM_INSTS_VFP_HH__
219static inline void
220finishVfp(FPSCR &fpscr, VfpSavedState state)
221{
222 int exceptions = fetestexcept(FeAllExceptions);
223 bool underflow = false;
224 if (exceptions & FeInvalid) {
225 fpscr.ioc = 1;
226 }
227 if (exceptions & FeDivByZero) {
228 fpscr.dzc = 1;
229 }
230 if (exceptions & FeOverflow) {
231 fpscr.ofc = 1;
232 }
233 if (exceptions & FeUnderflow) {
234 underflow = true;
235 fpscr.ufc = 1;
236 }
237 if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
238 fpscr.ixc = 1;
239 }
240 fesetround(state);
241}
242
243template <class fpType>
244static inline fpType
245fixDest(FPSCR fpscr, fpType val, fpType op1)
246{
247 int fpClass = std::fpclassify(val);
248 fpType junk = 0.0;
249 if (fpClass == FP_NAN) {
250 const bool single = (sizeof(val) == sizeof(float));
251 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
252 const bool nan = std::isnan(op1);
253 if (!nan || (fpscr.dn == 1)) {
254 val = bitsToFp(qnan, junk);
255 } else if (nan) {
256 val = bitsToFp(fpToBits(op1) | qnan, junk);
257 }
258 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
259 // Turn val into a zero with the correct sign;
260 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
261 val = bitsToFp(fpToBits(val) & bitMask, junk);
262 feclearexcept(FeInexact);
263 feraiseexcept(FeUnderflow);
264 }
265 return val;
266}
267
268template <class fpType>
269static inline fpType
270fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
271{
272 int fpClass = std::fpclassify(val);
273 fpType junk = 0.0;
274 if (fpClass == FP_NAN) {
275 const bool single = (sizeof(val) == sizeof(float));
276 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
277 const bool nan1 = std::isnan(op1);
278 const bool nan2 = std::isnan(op2);
279 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
280 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
281 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
282 val = bitsToFp(qnan, junk);
283 } else if (signal1) {
284 val = bitsToFp(fpToBits(op1) | qnan, junk);
285 } else if (signal2) {
286 val = bitsToFp(fpToBits(op2) | qnan, junk);
287 } else if (nan1) {
288 val = op1;
289 } else if (nan2) {
290 val = op2;
291 }
292 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
293 // Turn val into a zero with the correct sign;
294 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
295 val = bitsToFp(fpToBits(val) & bitMask, junk);
296 feclearexcept(FeInexact);
297 feraiseexcept(FeUnderflow);
298 }
299 return val;
300}
301
302template <class fpType>
303static inline fpType
304fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
305{
306 fpType mid = fixDest(fpscr, val, op1, op2);
307 const bool single = (sizeof(fpType) == sizeof(float));
308 const fpType junk = 0.0;
309 if ((single && (val == bitsToFp(0x00800000, junk) ||
310 val == bitsToFp(0x80800000, junk))) ||
311 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
312 val == bitsToFp(ULL(0x8010000000000000), junk)))
313 ) {
314 __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
315 fesetround(FeRoundZero);
316 fpType temp = 0.0;
317 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
318 temp = op1 / op2;
319 if (flushToZero(temp)) {
320 feraiseexcept(FeUnderflow);
321 if (fpscr.fz) {
322 feclearexcept(FeInexact);
323 mid = temp;
324 }
325 }
326 __asm__ __volatile__("" :: "m" (temp));
327 }
328 return mid;
329}
330
331static inline float
332fixFpDFpSDest(FPSCR fpscr, double val)
333{
334 const float junk = 0.0;
335 float op1 = 0.0;
336 if (std::isnan(val)) {
337 uint64_t valBits = fpToBits(val);
338 uint32_t op1Bits = bits(valBits, 50, 29) |
339 (mask(9) << 22) |
340 (bits(valBits, 63) << 31);
341 op1 = bitsToFp(op1Bits, junk);
342 }
343 float mid = fixDest(fpscr, (float)val, op1);
344 if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
345 (FeUnderflow | FeInexact)) {
346 feclearexcept(FeInexact);
347 }
348 if (mid == bitsToFp(0x00800000, junk) ||
349 mid == bitsToFp(0x80800000, junk)) {
350 __asm__ __volatile__("" : "=m" (val) : "m" (val));
351 fesetround(FeRoundZero);
352 float temp = 0.0;
353 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
354 temp = val;
355 if (flushToZero(temp)) {
356 feraiseexcept(FeUnderflow);
357 if (fpscr.fz) {
358 feclearexcept(FeInexact);
359 mid = temp;
360 }
361 }
362 __asm__ __volatile__("" :: "m" (temp));
363 }
364 return mid;
365}
366
367static inline double
368fixFpSFpDDest(FPSCR fpscr, float val)
369{
370 const double junk = 0.0;
371 double op1 = 0.0;
372 if (std::isnan(val)) {
373 uint32_t valBits = fpToBits(val);
374 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
375 (mask(12) << 51) |
376 ((uint64_t)bits(valBits, 31) << 63);
377 op1 = bitsToFp(op1Bits, junk);
378 }
379 double mid = fixDest(fpscr, (double)val, op1);
380 if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
381 mid == bitsToFp(ULL(0x8010000000000000), junk)) {
382 __asm__ __volatile__("" : "=m" (val) : "m" (val));
383 fesetround(FeRoundZero);
384 double temp = 0.0;
385 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
386 temp = val;
387 if (flushToZero(temp)) {
388 feraiseexcept(FeUnderflow);
389 if (fpscr.fz) {
390 feclearexcept(FeInexact);
391 mid = temp;
392 }
393 }
394 __asm__ __volatile__("" :: "m" (temp));
395 }
396 return mid;
397}
398
399static inline double
400makeDouble(uint32_t low, uint32_t high)
401{
402 double junk = 0.0;
403 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
404}
405
406static inline uint32_t
407lowFromDouble(double val)
408{
409 return fpToBits(val);
410}
411
412static inline uint32_t
413highFromDouble(double val)
414{
415 return fpToBits(val) >> 32;
416}
417
418static inline uint64_t
419vfpFpSToFixed(float val, bool isSigned, bool half,
420 uint8_t imm, bool rzero = true)
421{
422 int rmode = rzero ? FeRoundZero : fegetround();
423 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
424 fesetround(FeRoundNearest);
425 val = val * powf(2.0, imm);
426 __asm__ __volatile__("" : "=m" (val) : "m" (val));
427 fesetround(rmode);
428 feclearexcept(FeAllExceptions);
429 __asm__ __volatile__("" : "=m" (val) : "m" (val));
430 float origVal = val;
431 val = rintf(val);
432 int fpType = std::fpclassify(val);
433 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
434 if (fpType == FP_NAN) {
435 feraiseexcept(FeInvalid);
436 }
437 val = 0.0;
438 } else if (origVal != val) {
439 switch (rmode) {
440 case FeRoundNearest:
441 if (origVal - val > 0.5)
442 val += 1.0;
443 else if (val - origVal > 0.5)
444 val -= 1.0;
445 break;
446 case FeRoundDown:
447 if (origVal < val)
448 val -= 1.0;
449 break;
450 case FeRoundUpward:
451 if (origVal > val)
452 val += 1.0;
453 break;
454 }
455 feraiseexcept(FeInexact);
456 }
457
458 if (isSigned) {
459 if (half) {
460 if ((double)val < (int16_t)(1 << 15)) {
461 feraiseexcept(FeInvalid);
462 feclearexcept(FeInexact);
463 return (int16_t)(1 << 15);
464 }
465 if ((double)val > (int16_t)mask(15)) {
466 feraiseexcept(FeInvalid);
467 feclearexcept(FeInexact);
468 return (int16_t)mask(15);
469 }
470 return (int16_t)val;
471 } else {
472 if ((double)val < (int32_t)(1 << 31)) {
473 feraiseexcept(FeInvalid);
474 feclearexcept(FeInexact);
475 return (int32_t)(1 << 31);
476 }
477 if ((double)val > (int32_t)mask(31)) {
478 feraiseexcept(FeInvalid);
479 feclearexcept(FeInexact);
480 return (int32_t)mask(31);
481 }
482 return (int32_t)val;
483 }
484 } else {
485 if (half) {
486 if ((double)val < 0) {
487 feraiseexcept(FeInvalid);
488 feclearexcept(FeInexact);
489 return 0;
490 }
491 if ((double)val > (mask(16))) {
492 feraiseexcept(FeInvalid);
493 feclearexcept(FeInexact);
494 return mask(16);
495 }
496 return (uint16_t)val;
497 } else {
498 if ((double)val < 0) {
499 feraiseexcept(FeInvalid);
500 feclearexcept(FeInexact);
501 return 0;
502 }
503 if ((double)val > (mask(32))) {
504 feraiseexcept(FeInvalid);
505 feclearexcept(FeInexact);
506 return mask(32);
507 }
508 return (uint32_t)val;
509 }
510 }
511}
512
513static inline float
514vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
515{
516 fesetround(FeRoundNearest);
517 if (half)
518 val = (uint16_t)val;
519 float scale = powf(2.0, imm);
520 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
521 feclearexcept(FeAllExceptions);
522 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
523 return fixDivDest(fpscr, val / scale, (float)val, scale);
524}
525
526static inline float
527vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
528{
529 fesetround(FeRoundNearest);
530 if (half)
531 val = sext<16>(val & mask(16));
532 float scale = powf(2.0, imm);
533 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
534 feclearexcept(FeAllExceptions);
535 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
536 return fixDivDest(fpscr, val / scale, (float)val, scale);
537}
538
539static inline uint64_t
540vfpFpDToFixed(double val, bool isSigned, bool half,
541 uint8_t imm, bool rzero = true)
542{
543 int rmode = rzero ? FeRoundZero : fegetround();
544 fesetround(FeRoundNearest);
545 val = val * pow(2.0, imm);
546 __asm__ __volatile__("" : "=m" (val) : "m" (val));
547 fesetround(rmode);
548 feclearexcept(FeAllExceptions);
549 __asm__ __volatile__("" : "=m" (val) : "m" (val));
550 double origVal = val;
551 val = rint(val);
552 int fpType = std::fpclassify(val);
553 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
554 if (fpType == FP_NAN) {
555 feraiseexcept(FeInvalid);
556 }
557 val = 0.0;
558 } else if (origVal != val) {
559 switch (rmode) {
560 case FeRoundNearest:
561 if (origVal - val > 0.5)
562 val += 1.0;
563 else if (val - origVal > 0.5)
564 val -= 1.0;
565 break;
566 case FeRoundDown:
567 if (origVal < val)
568 val -= 1.0;
569 break;
570 case FeRoundUpward:
571 if (origVal > val)
572 val += 1.0;
573 break;
574 }
575 feraiseexcept(FeInexact);
576 }
577 if (isSigned) {
578 if (half) {
579 if (val < (int16_t)(1 << 15)) {
580 feraiseexcept(FeInvalid);
581 feclearexcept(FeInexact);
582 return (int16_t)(1 << 15);
583 }
584 if (val > (int16_t)mask(15)) {
585 feraiseexcept(FeInvalid);
586 feclearexcept(FeInexact);
587 return (int16_t)mask(15);
588 }
589 return (int16_t)val;
590 } else {
591 if (val < (int32_t)(1 << 31)) {
592 feraiseexcept(FeInvalid);
593 feclearexcept(FeInexact);
594 return (int32_t)(1 << 31);
595 }
596 if (val > (int32_t)mask(31)) {
597 feraiseexcept(FeInvalid);
598 feclearexcept(FeInexact);
599 return (int32_t)mask(31);
600 }
601 return (int32_t)val;
602 }
603 } else {
604 if (half) {
605 if (val < 0) {
606 feraiseexcept(FeInvalid);
607 feclearexcept(FeInexact);
608 return 0;
609 }
610 if (val > mask(16)) {
611 feraiseexcept(FeInvalid);
612 feclearexcept(FeInexact);
613 return mask(16);
614 }
615 return (uint16_t)val;
616 } else {
617 if (val < 0) {
618 feraiseexcept(FeInvalid);
619 feclearexcept(FeInexact);
620 return 0;
621 }
622 if (val > mask(32)) {
623 feraiseexcept(FeInvalid);
624 feclearexcept(FeInexact);
625 return mask(32);
626 }
627 return (uint32_t)val;
628 }
629 }
630}
631
632static inline double
633vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
634{
635 fesetround(FeRoundNearest);
636 if (half)
637 val = (uint16_t)val;
638 double scale = pow(2.0, imm);
639 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
640 feclearexcept(FeAllExceptions);
641 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
642 return fixDivDest(fpscr, val / scale, (double)val, scale);
643}
644
645static inline double
646vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
647{
648 fesetround(FeRoundNearest);
649 if (half)
650 val = sext<16>(val & mask(16));
651 double scale = pow(2.0, imm);
652 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
653 feclearexcept(FeAllExceptions);
654 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
655 return fixDivDest(fpscr, val / scale, (double)val, scale);
656}
657
658class VfpMacroOp : public PredMacroOp
659{
660 public:
661 static bool
662 inScalarBank(IntRegIndex idx)
663 {
664 return (idx % 32) < 8;
665 }
666
667 protected:
668 bool wide;
669
670 VfpMacroOp(const char *mnem, ExtMachInst _machInst,
671 OpClass __opClass, bool _wide) :
672 PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
673 {}
674
675 IntRegIndex
676 addStride(IntRegIndex idx, unsigned stride)
677 {
678 if (wide) {
679 stride *= 2;
680 }
681 unsigned offset = idx % 8;
682 idx = (IntRegIndex)(idx - offset);
683 offset += stride;
684 idx = (IntRegIndex)(idx + (offset % 8));
685 return idx;
686 }
687
688 void
689 nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
690 {
691 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
692 assert(!inScalarBank(dest));
693 dest = addStride(dest, stride);
694 op1 = addStride(op1, stride);
695 if (!inScalarBank(op2)) {
696 op2 = addStride(op2, stride);
697 }
698 }
699
700 void
701 nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
702 {
703 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
704 assert(!inScalarBank(dest));
705 dest = addStride(dest, stride);
706 if (!inScalarBank(op1)) {
707 op1 = addStride(op1, stride);
708 }
709 }
710
711 void
712 nextIdxs(IntRegIndex &dest)
713 {
714 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
715 assert(!inScalarBank(dest));
716 dest = addStride(dest, stride);
717 }
718};
719
720static inline float
721fpAddS(float a, float b)
722{
723 return a + b;
724}
725
726static inline double
727fpAddD(double a, double b)
728{
729 return a + b;
730}
731
732static inline float
733fpSubS(float a, float b)
734{
735 return a - b;
736}
737
738static inline double
739fpSubD(double a, double b)
740{
741 return a - b;
742}
743
744static inline float
745fpDivS(float a, float b)
746{
747 return a / b;
748}
749
750static inline double
751fpDivD(double a, double b)
752{
753 return a / b;
754}
755
756static inline float
757fpMulS(float a, float b)
758{
759 return a * b;
760}
761
762static inline double
763fpMulD(double a, double b)
764{
765 return a * b;
766}
767
768class FpOp : public PredOp
769{
770 protected:
771 FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
772 PredOp(mnem, _machInst, __opClass)
773 {}
774
775 virtual float
776 doOp(float op1, float op2) const
777 {
778 panic("Unimplemented version of doOp called.\n");
779 }
780
781 virtual float
782 doOp(float op1) const
783 {
784 panic("Unimplemented version of doOp called.\n");
785 }
786
787 virtual double
788 doOp(double op1, double op2) const
789 {
790 panic("Unimplemented version of doOp called.\n");
791 }
792
793 virtual double
794 doOp(double op1) const
795 {
796 panic("Unimplemented version of doOp called.\n");
797 }
798
799 double
800 dbl(uint32_t low, uint32_t high) const
801 {
802 double junk = 0.0;
803 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
804 }
805
806 uint32_t
807 dblLow(double val) const
808 {
809 return fpToBits(val);
810 }
811
812 uint32_t
813 dblHi(double val) const
814 {
815 return fpToBits(val) >> 32;
816 }
817
818 template <class fpType>
819 fpType
820 binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
821 fpType (*func)(fpType, fpType),
822 bool flush, uint32_t rMode) const
823 {
824 const bool single = (sizeof(fpType) == sizeof(float));
825 fpType junk = 0.0;
826
827 if (flush && flushToZero(op1, op2))
828 fpscr.idc = 1;
829 VfpSavedState state = prepFpState(rMode);
830 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
831 : "m" (op1), "m" (op2), "m" (state));
832 fpType dest = func(op1, op2);
833 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
834
835 int fpClass = std::fpclassify(dest);
836 // Get NAN behavior right. This varies between x86 and ARM.
837 if (fpClass == FP_NAN) {
838 const bool single = (sizeof(fpType) == sizeof(float));
839 const uint64_t qnan =
840 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
841 const bool nan1 = std::isnan(op1);
842 const bool nan2 = std::isnan(op2);
843 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
844 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
845 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
846 dest = bitsToFp(qnan, junk);
847 } else if (signal1) {
848 dest = bitsToFp(fpToBits(op1) | qnan, junk);
849 } else if (signal2) {
850 dest = bitsToFp(fpToBits(op2) | qnan, junk);
851 } else if (nan1) {
852 dest = op1;
853 } else if (nan2) {
854 dest = op2;
855 }
856 } else if (flush && flushToZero(dest)) {
857 feraiseexcept(FeUnderflow);
858 } else if ((
859 (single && (dest == bitsToFp(0x00800000, junk) ||
860 dest == bitsToFp(0x80800000, junk))) ||
861 (!single &&
862 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
863 dest == bitsToFp(ULL(0x8010000000000000), junk)))
864 ) && rMode != VfpRoundZero) {
865 /*
866 * Correct for the fact that underflow is detected -before- rounding
867 * in ARM and -after- rounding in x86.
868 */
869 fesetround(FeRoundZero);
870 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
871 : "m" (op1), "m" (op2));
872 fpType temp = func(op1, op2);
873 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
874 if (flush && flushToZero(temp)) {
875 dest = temp;
876 }
877 }
878 finishVfp(fpscr, state);
879 return dest;
880 }
881
882 template <class fpType>
883 fpType
884 unaryOp(FPSCR &fpscr, fpType op1,
885 fpType (*func)(fpType),
886 bool flush, uint32_t rMode) const
887 {
888 const bool single = (sizeof(fpType) == sizeof(float));
889 fpType junk = 0.0;
890
891 if (flush && flushToZero(op1))
892 fpscr.idc = 1;
893 VfpSavedState state = prepFpState(rMode);
894 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
895 : "m" (op1), "m" (state));
896 fpType dest = func(op1);
897 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
898
899 int fpClass = std::fpclassify(dest);
900 // Get NAN behavior right. This varies between x86 and ARM.
901 if (fpClass == FP_NAN) {
902 const bool single = (sizeof(fpType) == sizeof(float));
903 const uint64_t qnan =
904 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
905 const bool nan = std::isnan(op1);
906 if (!nan || fpscr.dn == 1) {
907 dest = bitsToFp(qnan, junk);
908 } else if (nan) {
909 dest = bitsToFp(fpToBits(op1) | qnan, junk);
910 }
911 } else if (flush && flushToZero(dest)) {
912 feraiseexcept(FeUnderflow);
913 } else if ((
914 (single && (dest == bitsToFp(0x00800000, junk) ||
915 dest == bitsToFp(0x80800000, junk))) ||
916 (!single &&
917 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
918 dest == bitsToFp(ULL(0x8010000000000000), junk)))
919 ) && rMode != VfpRoundZero) {
920 /*
921 * Correct for the fact that underflow is detected -before- rounding
922 * in ARM and -after- rounding in x86.
923 */
924 fesetround(FeRoundZero);
925 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
926 fpType temp = func(op1);
927 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
928 if (flush && flushToZero(temp)) {
929 dest = temp;
930 }
931 }
932 finishVfp(fpscr, state);
933 return dest;
934 }
935};
936
937class FpRegRegOp : public FpOp
938{
939 protected:
940 IntRegIndex dest;
941 IntRegIndex op1;
942
943 FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
944 IntRegIndex _dest, IntRegIndex _op1,
945 VfpMicroMode mode = VfpNotAMicroop) :
946 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1)
947 {
948 setVfpMicroFlags(mode, flags);
949 }
950
951 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
952};
953
954class FpRegImmOp : public FpOp
955{
956 protected:
957 IntRegIndex dest;
958 uint64_t imm;
959
960 FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
961 IntRegIndex _dest, uint64_t _imm,
962 VfpMicroMode mode = VfpNotAMicroop) :
963 FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm)
964 {
965 setVfpMicroFlags(mode, flags);
966 }
967
968 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
969};
970
971class FpRegRegImmOp : public FpOp
972{
973 protected:
974 IntRegIndex dest;
975 IntRegIndex op1;
976 uint64_t imm;
977
978 FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
979 IntRegIndex _dest, IntRegIndex _op1,
980 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
981 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm)
982 {
983 setVfpMicroFlags(mode, flags);
984 }
985
986 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
987};
988
989class FpRegRegRegOp : public FpOp
990{
991 protected:
992 IntRegIndex dest;
993 IntRegIndex op1;
994 IntRegIndex op2;
995
996 FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
997 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
998 VfpMicroMode mode = VfpNotAMicroop) :
999 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2)
1000 {
1001 setVfpMicroFlags(mode, flags);
1002 }
1003
1004 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1005};
1006
1007}
1008
1009#endif //__ARCH_ARM_INSTS_VFP_HH__