Cross Reference: /gem5/src/arch/arm/insts/vfp.hh

Deleted Added

sdiff udiff text old ( 7382:b3c768629a54 ) new ( 7384:f12b4f28e5eb )

full compact

1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#ifndef __ARCH_ARM_INSTS_VFP_HH__
41#define __ARCH_ARM_INSTS_VFP_HH__
42
43#include "arch/arm/insts/misc.hh"
44#include "arch/arm/miscregs.hh"
45#include <fenv.h>
46#include <cmath>
47
48namespace ArmISA
49{
50
51enum VfpMicroMode {
52 VfpNotAMicroop,
53 VfpMicroop,
54 VfpFirstMicroop,
55 VfpLastMicroop
56};
57
58template<class T>
59static inline void
60setVfpMicroFlags(VfpMicroMode mode, T &flags)
61{
62 switch (mode) {
63 case VfpMicroop:
64 flags[StaticInst::IsMicroop] = true;
65 break;
66 case VfpFirstMicroop:
67 flags[StaticInst::IsMicroop] =
68 flags[StaticInst::IsFirstMicroop] = true;
69 break;
70 case VfpLastMicroop:
71 flags[StaticInst::IsMicroop] =
72 flags[StaticInst::IsLastMicroop] = true;
73 break;
74 case VfpNotAMicroop:
75 break;
76 }
77 if (mode == VfpMicroop || mode == VfpFirstMicroop) {
78 flags[StaticInst::IsDelayedCommit] = true;
79 }
80}
81
82enum FeExceptionBit
83{
84 FeDivByZero = FE_DIVBYZERO,
85 FeInexact = FE_INEXACT,
86 FeInvalid = FE_INVALID,
87 FeOverflow = FE_OVERFLOW,
88 FeUnderflow = FE_UNDERFLOW,
89 FeAllExceptions = FE_ALL_EXCEPT
90};
91
92enum FeRoundingMode
93{
94 FeRoundDown = FE_DOWNWARD,
95 FeRoundNearest = FE_TONEAREST,
96 FeRoundZero = FE_TOWARDZERO,
97 FeRoundUpward = FE_UPWARD
98};
99
100enum VfpRoundingMode
101{
102 VfpRoundNearest = 0,
103 VfpRoundUpward = 1,
104 VfpRoundDown = 2,
105 VfpRoundZero = 3
106};
107
108template <class fpType>
109static inline void
110vfpFlushToZero(uint32_t &_fpscr, fpType &op)
111{
112 FPSCR fpscr = _fpscr;
113 if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
114 fpscr.idc = 1;
115 op = 0;
116 }
117 _fpscr = fpscr;
118}
119
120template <class fpType>
121static inline void
122vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
123{
124 vfpFlushToZero(fpscr, op1);
125 vfpFlushToZero(fpscr, op2);
126}
127
128static inline uint32_t
129fpToBits(float fp)
130{
131 union
132 {
133 float fp;
134 uint32_t bits;
135 } val;
136 val.fp = fp;
137 return val.bits;
138}
139
140static inline uint64_t
141fpToBits(double fp)
142{
143 union
144 {
145 double fp;
146 uint64_t bits;
147 } val;
148 val.fp = fp;
149 return val.bits;
150}
151
152static inline float
153bitsToFp(uint64_t bits, float junk)
154{
155 union
156 {
157 float fp;
158 uint32_t bits;
159 } val;
160 val.bits = bits;
161 return val.fp;
162}
163
164static inline double
165bitsToFp(uint64_t bits, double junk)
166{
167 union
168 {
169 double fp;
170 uint64_t bits;
171 } val;
172 val.bits = bits;
173 return val.fp;
174}
175
176template <class fpType>
177static inline fpType
178fixNan(FPSCR fpscr, fpType val, fpType op1, fpType op2)
179{
180 if (std::isnan(val)) {
181 const bool single = (sizeof(val) == sizeof(float));
182 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
183 const bool nan1 = std::isnan(op1);
184 const bool nan2 = std::isnan(op2);
185 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
186 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
187 fpType junk = 0.0;
188 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
189 val = bitsToFp(qnan, junk);
190 } else if (signal1) {
191 val = bitsToFp(fpToBits(op1) | qnan, junk);
192 } else if (signal2) {
193 val = bitsToFp(fpToBits(op2) | qnan, junk);
194 } else if (nan1) {
195 val = op1;
196 } else if (nan2) {
197 val = op2;
198 }
199 }
200 return val;
201}
202
203static inline uint64_t
204vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
205{
206 fesetround(FeRoundZero);
207 val = val * powf(2.0, imm);
208 __asm__ __volatile__("" : "=m" (val) : "m" (val));
209 feclearexcept(FeAllExceptions);
210 __asm__ __volatile__("" : "=m" (val) : "m" (val));
211 float origVal = val;
212 val = rintf(val);
213 int fpType = std::fpclassify(val);
214 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
215 if (fpType == FP_NAN) {
216 feraiseexcept(FeInvalid);
217 }
218 val = 0.0;
219 } else if (origVal != val) {
220 feraiseexcept(FeInexact);
221 }
222
223 if (isSigned) {
224 if (half) {
225 if ((double)val < (int16_t)(1 << 15)) {
226 feraiseexcept(FeInvalid);
227 feclearexcept(FeInexact);
228 return (int16_t)(1 << 15);
229 }
230 if ((double)val > (int16_t)mask(15)) {
231 feraiseexcept(FeInvalid);
232 feclearexcept(FeInexact);
233 return (int16_t)mask(15);
234 }
235 return (int16_t)val;
236 } else {
237 if ((double)val < (int32_t)(1 << 31)) {
238 feraiseexcept(FeInvalid);
239 feclearexcept(FeInexact);
240 return (int32_t)(1 << 31);
241 }
242 if ((double)val > (int32_t)mask(31)) {
243 feraiseexcept(FeInvalid);
244 feclearexcept(FeInexact);
245 return (int32_t)mask(31);
246 }
247 return (int32_t)val;
248 }
249 } else {
250 if (half) {
251 if ((double)val < 0) {
252 feraiseexcept(FeInvalid);
253 feclearexcept(FeInexact);
254 return 0;
255 }
256 if ((double)val > (mask(16))) {
257 feraiseexcept(FeInvalid);
258 feclearexcept(FeInexact);
259 return mask(16);
260 }
261 return (uint16_t)val;
262 } else {
263 if ((double)val < 0) {
264 feraiseexcept(FeInvalid);
265 feclearexcept(FeInexact);
266 return 0;
267 }
268 if ((double)val > (mask(32))) {
269 feraiseexcept(FeInvalid);
270 feclearexcept(FeInexact);
271 return mask(32);
272 }
273 return (uint32_t)val;
274 }
275 }
276}
277
278static inline float
279vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm)
280{
281 fesetround(FeRoundNearest);
282 if (half)
283 val = (uint16_t)val;
284 float scale = powf(2.0, imm);
285 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
286 feclearexcept(FeAllExceptions);
287 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
288 return val / scale;
289}
290
291static inline float
292vfpSFixedToFpS(int32_t val, bool half, uint8_t imm)
293{
294 fesetround(FeRoundNearest);
295 if (half)
296 val = sext<16>(val & mask(16));
297 float scale = powf(2.0, imm);
298 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
299 feclearexcept(FeAllExceptions);
300 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
301 return val / scale;
302}
303
304static inline uint64_t
305vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm)
306{
307 fesetround(FeRoundNearest);
308 val = val * pow(2.0, imm);
309 __asm__ __volatile__("" : "=m" (val) : "m" (val));
310 fesetround(FeRoundZero);
311 feclearexcept(FeAllExceptions);
312 __asm__ __volatile__("" : "=m" (val) : "m" (val));
313 double origVal = val;
314 val = rint(val);
315 int fpType = std::fpclassify(val);
316 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
317 if (fpType == FP_NAN) {
318 feraiseexcept(FeInvalid);
319 }
320 val = 0.0;
321 } else if (origVal != val) {
322 feraiseexcept(FeInexact);
323 }
324 if (isSigned) {
325 if (half) {
326 if (val < (int16_t)(1 << 15)) {
327 feraiseexcept(FeInvalid);
328 feclearexcept(FeInexact);
329 return (int16_t)(1 << 15);
330 }
331 if (val > (int16_t)mask(15)) {
332 feraiseexcept(FeInvalid);
333 feclearexcept(FeInexact);
334 return (int16_t)mask(15);
335 }
336 return (int16_t)val;
337 } else {
338 if (val < (int32_t)(1 << 31)) {
339 feraiseexcept(FeInvalid);
340 feclearexcept(FeInexact);
341 return (int32_t)(1 << 31);
342 }
343 if (val > (int32_t)mask(31)) {
344 feraiseexcept(FeInvalid);
345 feclearexcept(FeInexact);
346 return (int32_t)mask(31);
347 }
348 return (int32_t)val;
349 }
350 } else {
351 if (half) {
352 if (val < 0) {
353 feraiseexcept(FeInvalid);
354 feclearexcept(FeInexact);
355 return 0;
356 }
357 if (val > mask(16)) {
358 feraiseexcept(FeInvalid);
359 feclearexcept(FeInexact);
360 return mask(16);
361 }
362 return (uint16_t)val;
363 } else {
364 if (val < 0) {
365 feraiseexcept(FeInvalid);
366 feclearexcept(FeInexact);
367 return 0;
368 }
369 if (val > mask(32)) {
370 feraiseexcept(FeInvalid);
371 feclearexcept(FeInexact);
372 return mask(32);
373 }
374 return (uint32_t)val;
375 }
376 }
377}
378
379static inline double
380vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm)
381{
382 fesetround(FeRoundNearest);
383 if (half)
384 val = (uint16_t)val;
385 double scale = pow(2.0, imm);
386 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
387 feclearexcept(FeAllExceptions);
388 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
389 return val / scale;
390}
391
392static inline double
393vfpSFixedToFpD(int32_t val, bool half, uint8_t imm)
394{
395 fesetround(FeRoundNearest);
396 if (half)
397 val = sext<16>(val & mask(16));
398 double scale = pow(2.0, imm);
399 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
400 feclearexcept(FeAllExceptions);
401 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
402 return val / scale;
403}
404
405typedef int VfpSavedState;
406
407static inline VfpSavedState
408prepVfpFpscr(FPSCR fpscr)
409{
410 int roundingMode = fegetround();
411 feclearexcept(FeAllExceptions);
412 switch (fpscr.rMode) {
413 case VfpRoundNearest:
414 fesetround(FeRoundNearest);
415 break;
416 case VfpRoundUpward:
417 fesetround(FeRoundUpward);
418 break;
419 case VfpRoundDown:
420 fesetround(FeRoundDown);
421 break;
422 case VfpRoundZero:
423 fesetround(FeRoundZero);
424 break;
425 }
426 return roundingMode;
427}
428
429static inline FPSCR
430setVfpFpscr(FPSCR fpscr, VfpSavedState state)
431{
432 int exceptions = fetestexcept(FeAllExceptions);
433 if (exceptions & FeInvalid) {
434 fpscr.ioc = 1;
435 }
436 if (exceptions & FeDivByZero) {
437 fpscr.dzc = 1;
438 }
439 if (exceptions & FeOverflow) {
440 fpscr.ofc = 1;
441 }
442 if (exceptions & FeUnderflow) {
443 fpscr.ufc = 1;
444 }
445 if (exceptions & FeInexact) {
446 fpscr.ixc = 1;
447 }
448 fesetround(state);
449 return fpscr;
450}
451
452class VfpMacroOp : public PredMacroOp
453{
454 public:
455 static bool
456 inScalarBank(IntRegIndex idx)
457 {
458 return (idx % 32) < 8;
459 }
460
461 protected:
462 bool wide;
463
464 VfpMacroOp(const char *mnem, ExtMachInst _machInst,
465 OpClass __opClass, bool _wide) :
466 PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
467 {}
468
469 IntRegIndex
470 addStride(IntRegIndex idx, unsigned stride)
471 {
472 if (wide) {
473 stride *= 2;
474 }
475 unsigned offset = idx % 8;
476 idx = (IntRegIndex)(idx - offset);
477 offset += stride;
478 idx = (IntRegIndex)(idx + (offset % 8));
479 return idx;
480 }
481
482 void
483 nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
484 {
485 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
486 assert(!inScalarBank(dest));
487 dest = addStride(dest, stride);
488 op1 = addStride(op1, stride);
489 if (!inScalarBank(op2)) {
490 op2 = addStride(op2, stride);
491 }
492 }
493
494 void
495 nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
496 {
497 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
498 assert(!inScalarBank(dest));
499 dest = addStride(dest, stride);
500 if (!inScalarBank(op1)) {
501 op1 = addStride(op1, stride);
502 }
503 }
504
505 void
506 nextIdxs(IntRegIndex &dest)
507 {
508 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
509 assert(!inScalarBank(dest));
510 dest = addStride(dest, stride);
511 }
512};
513
514class VfpRegRegOp : public RegRegOp
515{
516 protected:
517 VfpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
518 IntRegIndex _dest, IntRegIndex _op1,
519 VfpMicroMode mode = VfpNotAMicroop) :
520 RegRegOp(mnem, _machInst, __opClass, _dest, _op1)
521 {
522 setVfpMicroFlags(mode, flags);
523 }
524};
525
526class VfpRegImmOp : public RegImmOp
527{
528 protected:
529 VfpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
530 IntRegIndex _dest, uint64_t _imm,
531 VfpMicroMode mode = VfpNotAMicroop) :
532 RegImmOp(mnem, _machInst, __opClass, _dest, _imm)
533 {
534 setVfpMicroFlags(mode, flags);
535 }
536};
537
538class VfpRegRegImmOp : public RegRegImmOp
539{
540 protected:
541 VfpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
542 IntRegIndex _dest, IntRegIndex _op1,
543 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
544 RegRegImmOp(mnem, _machInst, __opClass, _dest, _op1, _imm)
545 {
546 setVfpMicroFlags(mode, flags);
547 }
548};
549
550class VfpRegRegRegOp : public RegRegRegOp
551{
552 protected:
553 VfpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
554 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
555 VfpMicroMode mode = VfpNotAMicroop) :
556 RegRegRegOp(mnem, _machInst, __opClass, _dest, _op1, _op2)
557 {
558 setVfpMicroFlags(mode, flags);
559 }
560};
561
562}
563
564#endif //__ARCH_ARM_INSTS_VFP_HH__