vfp.hh (7388:293878a9d220) vfp.hh (7396:53454ef35b46)
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software

--- 96 unchanged lines hidden (view full) ---

105 VfpRoundZero = 3
106};
107
108template <class fpType>
109static inline void
110vfpFlushToZero(uint32_t &_fpscr, fpType &op)
111{
112 FPSCR fpscr = _fpscr;
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software

--- 96 unchanged lines hidden (view full) ---

105 VfpRoundZero = 3
106};
107
108template <class fpType>
109static inline void
110vfpFlushToZero(uint32_t &_fpscr, fpType &op)
111{
112 FPSCR fpscr = _fpscr;
113 fpType junk = 0.0;
113 if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
114 fpscr.idc = 1;
114 if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
115 fpscr.idc = 1;
115 op = 0;
116 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
117 op = bitsToFp(fpToBits(op) & bitMask, junk);
116 }
117 _fpscr = fpscr;
118}
119
120template <class fpType>
121static inline void
122vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
123{
124 vfpFlushToZero(fpscr, op1);
125 vfpFlushToZero(fpscr, op2);
126}
127
118 }
119 _fpscr = fpscr;
120}
121
122template <class fpType>
123static inline void
124vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
125{
126 vfpFlushToZero(fpscr, op1);
127 vfpFlushToZero(fpscr, op2);
128}
129
130template <class fpType>
131static inline bool
132flushToZero(fpType &op)
133{
134 fpType junk = 0.0;
135 if (std::fpclassify(op) == FP_SUBNORMAL) {
136 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
137 op = bitsToFp(fpToBits(op) & bitMask, junk);
138 return true;
139 }
140 return false;
141}
142
143template <class fpType>
144static inline bool
145flushToZero(fpType &op1, fpType &op2)
146{
147 bool flush1 = flushToZero(op1);
148 bool flush2 = flushToZero(op2);
149 return flush1 || flush2;
150}
151
128static inline uint32_t
129fpToBits(float fp)
130{
131 union
132 {
133 float fp;
134 uint32_t bits;
135 } val;

--- 32 unchanged lines hidden (view full) ---

168 {
169 double fp;
170 uint64_t bits;
171 } val;
172 val.bits = bits;
173 return val.fp;
174}
175
152static inline uint32_t
153fpToBits(float fp)
154{
155 union
156 {
157 float fp;
158 uint32_t bits;
159 } val;

--- 32 unchanged lines hidden (view full) ---

192 {
193 double fp;
194 uint64_t bits;
195 } val;
196 val.bits = bits;
197 return val.fp;
198}
199
200typedef int VfpSavedState;
201
202static inline VfpSavedState
203prepVfpFpscr(FPSCR fpscr)
204{
205 int roundingMode = fegetround();
206 feclearexcept(FeAllExceptions);
207 switch (fpscr.rMode) {
208 case VfpRoundNearest:
209 fesetround(FeRoundNearest);
210 break;
211 case VfpRoundUpward:
212 fesetround(FeRoundUpward);
213 break;
214 case VfpRoundDown:
215 fesetround(FeRoundDown);
216 break;
217 case VfpRoundZero:
218 fesetround(FeRoundZero);
219 break;
220 }
221 return roundingMode;
222}
223
224static inline VfpSavedState
225prepFpState(uint32_t rMode)
226{
227 int roundingMode = fegetround();
228 feclearexcept(FeAllExceptions);
229 switch (rMode) {
230 case VfpRoundNearest:
231 fesetround(FeRoundNearest);
232 break;
233 case VfpRoundUpward:
234 fesetround(FeRoundUpward);
235 break;
236 case VfpRoundDown:
237 fesetround(FeRoundDown);
238 break;
239 case VfpRoundZero:
240 fesetround(FeRoundZero);
241 break;
242 }
243 return roundingMode;
244}
245
246static inline FPSCR
247setVfpFpscr(FPSCR fpscr, VfpSavedState state)
248{
249 int exceptions = fetestexcept(FeAllExceptions);
250 if (exceptions & FeInvalid) {
251 fpscr.ioc = 1;
252 }
253 if (exceptions & FeDivByZero) {
254 fpscr.dzc = 1;
255 }
256 if (exceptions & FeOverflow) {
257 fpscr.ofc = 1;
258 }
259 if (exceptions & FeUnderflow) {
260 fpscr.ufc = 1;
261 }
262 if (exceptions & FeInexact) {
263 fpscr.ixc = 1;
264 }
265 fesetround(state);
266 return fpscr;
267}
268
269static inline void
270finishVfp(FPSCR &fpscr, VfpSavedState state)
271{
272 int exceptions = fetestexcept(FeAllExceptions);
273 bool underflow = false;
274 if (exceptions & FeInvalid) {
275 fpscr.ioc = 1;
276 }
277 if (exceptions & FeDivByZero) {
278 fpscr.dzc = 1;
279 }
280 if (exceptions & FeOverflow) {
281 fpscr.ofc = 1;
282 }
283 if (exceptions & FeUnderflow) {
284 underflow = true;
285 fpscr.ufc = 1;
286 }
287 if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
288 fpscr.ixc = 1;
289 }
290 fesetround(state);
291}
292
176template <class fpType>
177static inline fpType
178fixDest(FPSCR fpscr, fpType val, fpType op1)
179{
180 int fpClass = std::fpclassify(val);
181 fpType junk = 0.0;
182 if (fpClass == FP_NAN) {
183 const bool single = (sizeof(val) == sizeof(float));
184 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
185 const bool nan = std::isnan(op1);
186 if (!nan || (fpscr.dn == 1)) {
187 val = bitsToFp(qnan, junk);
188 } else if (nan) {
189 val = bitsToFp(fpToBits(op1) | qnan, junk);
190 }
191 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
192 // Turn val into a zero with the correct sign;
193 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
194 val = bitsToFp(fpToBits(val) & bitMask, junk);
293template <class fpType>
294static inline fpType
295fixDest(FPSCR fpscr, fpType val, fpType op1)
296{
297 int fpClass = std::fpclassify(val);
298 fpType junk = 0.0;
299 if (fpClass == FP_NAN) {
300 const bool single = (sizeof(val) == sizeof(float));
301 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
302 const bool nan = std::isnan(op1);
303 if (!nan || (fpscr.dn == 1)) {
304 val = bitsToFp(qnan, junk);
305 } else if (nan) {
306 val = bitsToFp(fpToBits(op1) | qnan, junk);
307 }
308 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
309 // Turn val into a zero with the correct sign;
310 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
311 val = bitsToFp(fpToBits(val) & bitMask, junk);
312 feclearexcept(FeInexact);
195 feraiseexcept(FeUnderflow);
196 }
197 return val;
198}
199
200template <class fpType>
201static inline fpType
202fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)

--- 17 unchanged lines hidden (view full) ---

220 val = op1;
221 } else if (nan2) {
222 val = op2;
223 }
224 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
225 // Turn val into a zero with the correct sign;
226 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
227 val = bitsToFp(fpToBits(val) & bitMask, junk);
313 feraiseexcept(FeUnderflow);
314 }
315 return val;
316}
317
318template <class fpType>
319static inline fpType
320fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)

--- 17 unchanged lines hidden (view full) ---

338 val = op1;
339 } else if (nan2) {
340 val = op2;
341 }
342 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
343 // Turn val into a zero with the correct sign;
344 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
345 val = bitsToFp(fpToBits(val) & bitMask, junk);
346 feclearexcept(FeInexact);
228 feraiseexcept(FeUnderflow);
229 }
230 return val;
231}
232
233template <class fpType>
234static inline fpType
347 feraiseexcept(FeUnderflow);
348 }
349 return val;
350}
351
352template <class fpType>
353static inline fpType
235fixMultDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
236{
237 fpType mid = fixDest(fpscr, val, op1, op2);
238 const bool single = (sizeof(fpType) == sizeof(float));
239 const fpType junk = 0.0;
240 if ((single && (val == bitsToFp(0x00800000, junk) ||
241 val == bitsToFp(0x80800000, junk))) ||
242 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
243 val == bitsToFp(ULL(0x8010000000000000), junk)))
244 ) {
245 __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
246 fesetround(FeRoundZero);
247 fpType temp = 0.0;
248 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
249 temp = op1 * op2;
250 if (!std::isnormal(temp)) {
251 feraiseexcept(FeUnderflow);
252 }
253 __asm__ __volatile__("" :: "m" (temp));
254 }
255 return mid;
256}
257
258template <class fpType>
259static inline fpType
260fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
261{
262 fpType mid = fixDest(fpscr, val, op1, op2);
263 const bool single = (sizeof(fpType) == sizeof(float));
264 const fpType junk = 0.0;
265 if ((single && (val == bitsToFp(0x00800000, junk) ||
266 val == bitsToFp(0x80800000, junk))) ||
267 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
268 val == bitsToFp(ULL(0x8010000000000000), junk)))
269 ) {
270 __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
271 fesetround(FeRoundZero);
272 fpType temp = 0.0;
273 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
274 temp = op1 / op2;
354fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
355{
356 fpType mid = fixDest(fpscr, val, op1, op2);
357 const bool single = (sizeof(fpType) == sizeof(float));
358 const fpType junk = 0.0;
359 if ((single && (val == bitsToFp(0x00800000, junk) ||
360 val == bitsToFp(0x80800000, junk))) ||
361 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
362 val == bitsToFp(ULL(0x8010000000000000), junk)))
363 ) {
364 __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
365 fesetround(FeRoundZero);
366 fpType temp = 0.0;
367 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
368 temp = op1 / op2;
275 if (!std::isnormal(temp)) {
369 if (flushToZero(temp)) {
276 feraiseexcept(FeUnderflow);
370 feraiseexcept(FeUnderflow);
371 if (fpscr.fz) {
372 feclearexcept(FeInexact);
373 mid = temp;
374 }
277 }
278 __asm__ __volatile__("" :: "m" (temp));
279 }
280 return mid;
281}
282
283static inline float
284fixFpDFpSDest(FPSCR fpscr, double val)
285{
286 const float junk = 0.0;
287 float op1 = 0.0;
288 if (std::isnan(val)) {
289 uint64_t valBits = fpToBits(val);
290 uint32_t op1Bits = bits(valBits, 50, 29) |
291 (mask(9) << 22) |
292 (bits(valBits, 63) << 31);
293 op1 = bitsToFp(op1Bits, junk);
294 }
295 float mid = fixDest(fpscr, (float)val, op1);
375 }
376 __asm__ __volatile__("" :: "m" (temp));
377 }
378 return mid;
379}
380
381static inline float
382fixFpDFpSDest(FPSCR fpscr, double val)
383{
384 const float junk = 0.0;
385 float op1 = 0.0;
386 if (std::isnan(val)) {
387 uint64_t valBits = fpToBits(val);
388 uint32_t op1Bits = bits(valBits, 50, 29) |
389 (mask(9) << 22) |
390 (bits(valBits, 63) << 31);
391 op1 = bitsToFp(op1Bits, junk);
392 }
393 float mid = fixDest(fpscr, (float)val, op1);
394 if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
395 (FeUnderflow | FeInexact)) {
396 feclearexcept(FeInexact);
397 }
296 if (mid == bitsToFp(0x00800000, junk) ||
297 mid == bitsToFp(0x80800000, junk)) {
298 __asm__ __volatile__("" : "=m" (val) : "m" (val));
299 fesetround(FeRoundZero);
300 float temp = 0.0;
301 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
302 temp = val;
398 if (mid == bitsToFp(0x00800000, junk) ||
399 mid == bitsToFp(0x80800000, junk)) {
400 __asm__ __volatile__("" : "=m" (val) : "m" (val));
401 fesetround(FeRoundZero);
402 float temp = 0.0;
403 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
404 temp = val;
303 if (!std::isnormal(temp)) {
405 if (flushToZero(temp)) {
304 feraiseexcept(FeUnderflow);
406 feraiseexcept(FeUnderflow);
407 if (fpscr.fz) {
408 feclearexcept(FeInexact);
409 mid = temp;
410 }
305 }
306 __asm__ __volatile__("" :: "m" (temp));
307 }
308 return mid;
309}
310
411 }
412 __asm__ __volatile__("" :: "m" (temp));
413 }
414 return mid;
415}
416
417static inline double
418fixFpSFpDDest(FPSCR fpscr, float val)
419{
420 const double junk = 0.0;
421 double op1 = 0.0;
422 if (std::isnan(val)) {
423 uint32_t valBits = fpToBits(val);
424 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
425 (mask(12) << 51) |
426 ((uint64_t)bits(valBits, 31) << 63);
427 op1 = bitsToFp(op1Bits, junk);
428 }
429 double mid = fixDest(fpscr, (double)val, op1);
430 if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
431 mid == bitsToFp(ULL(0x8010000000000000), junk)) {
432 __asm__ __volatile__("" : "=m" (val) : "m" (val));
433 fesetround(FeRoundZero);
434 double temp = 0.0;
435 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
436 temp = val;
437 if (flushToZero(temp)) {
438 feraiseexcept(FeUnderflow);
439 if (fpscr.fz) {
440 feclearexcept(FeInexact);
441 mid = temp;
442 }
443 }
444 __asm__ __volatile__("" :: "m" (temp));
445 }
446 return mid;
447}
448
449static inline double
450makeDouble(uint32_t low, uint32_t high)
451{
452 double junk = 0.0;
453 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
454}
455
456static inline uint32_t
457lowFromDouble(double val)
458{
459 return fpToBits(val);
460}
461
462static inline uint32_t
463highFromDouble(double val)
464{
465 return fpToBits(val) >> 32;
466}
467
311static inline uint64_t
312vfpFpSToFixed(float val, bool isSigned, bool half,
313 uint8_t imm, bool rzero = true)
314{
468static inline uint64_t
469vfpFpSToFixed(float val, bool isSigned, bool half,
470 uint8_t imm, bool rzero = true)
471{
315 int rmode = fegetround();
472 int rmode = rzero ? FeRoundZero : fegetround();
473 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
316 fesetround(FeRoundNearest);
317 val = val * powf(2.0, imm);
318 __asm__ __volatile__("" : "=m" (val) : "m" (val));
474 fesetround(FeRoundNearest);
475 val = val * powf(2.0, imm);
476 __asm__ __volatile__("" : "=m" (val) : "m" (val));
319 if (rzero)
320 fesetround(FeRoundZero);
321 else
322 fesetround(rmode);
477 fesetround(rmode);
323 feclearexcept(FeAllExceptions);
324 __asm__ __volatile__("" : "=m" (val) : "m" (val));
325 float origVal = val;
326 val = rintf(val);
327 int fpType = std::fpclassify(val);
328 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
329 if (fpType == FP_NAN) {
330 feraiseexcept(FeInvalid);
331 }
332 val = 0.0;
333 } else if (origVal != val) {
478 feclearexcept(FeAllExceptions);
479 __asm__ __volatile__("" : "=m" (val) : "m" (val));
480 float origVal = val;
481 val = rintf(val);
482 int fpType = std::fpclassify(val);
483 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
484 if (fpType == FP_NAN) {
485 feraiseexcept(FeInvalid);
486 }
487 val = 0.0;
488 } else if (origVal != val) {
489 switch (rmode) {
490 case FeRoundNearest:
491 if (origVal - val > 0.5)
492 val += 1.0;
493 else if (val - origVal > 0.5)
494 val -= 1.0;
495 break;
496 case FeRoundDown:
497 if (origVal < val)
498 val -= 1.0;
499 break;
500 case FeRoundUpward:
501 if (origVal > val)
502 val += 1.0;
503 break;
504 }
334 feraiseexcept(FeInexact);
335 }
336
337 if (isSigned) {
338 if (half) {
339 if ((double)val < (int16_t)(1 << 15)) {
340 feraiseexcept(FeInvalid);
341 feclearexcept(FeInexact);

--- 72 unchanged lines hidden (view full) ---

414 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
415 return fixDivDest(fpscr, val / scale, (float)val, scale);
416}
417
418static inline uint64_t
419vfpFpDToFixed(double val, bool isSigned, bool half,
420 uint8_t imm, bool rzero = true)
421{
505 feraiseexcept(FeInexact);
506 }
507
508 if (isSigned) {
509 if (half) {
510 if ((double)val < (int16_t)(1 << 15)) {
511 feraiseexcept(FeInvalid);
512 feclearexcept(FeInexact);

--- 72 unchanged lines hidden (view full) ---

585 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
586 return fixDivDest(fpscr, val / scale, (float)val, scale);
587}
588
589static inline uint64_t
590vfpFpDToFixed(double val, bool isSigned, bool half,
591 uint8_t imm, bool rzero = true)
592{
422 int rmode = fegetround();
593 int rmode = rzero ? FeRoundZero : fegetround();
423 fesetround(FeRoundNearest);
424 val = val * pow(2.0, imm);
425 __asm__ __volatile__("" : "=m" (val) : "m" (val));
594 fesetround(FeRoundNearest);
595 val = val * pow(2.0, imm);
596 __asm__ __volatile__("" : "=m" (val) : "m" (val));
426 if (rzero)
427 fesetround(FeRoundZero);
428 else
429 fesetround(rmode);
597 fesetround(rmode);
430 feclearexcept(FeAllExceptions);
431 __asm__ __volatile__("" : "=m" (val) : "m" (val));
432 double origVal = val;
433 val = rint(val);
434 int fpType = std::fpclassify(val);
435 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
436 if (fpType == FP_NAN) {
437 feraiseexcept(FeInvalid);
438 }
439 val = 0.0;
440 } else if (origVal != val) {
598 feclearexcept(FeAllExceptions);
599 __asm__ __volatile__("" : "=m" (val) : "m" (val));
600 double origVal = val;
601 val = rint(val);
602 int fpType = std::fpclassify(val);
603 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
604 if (fpType == FP_NAN) {
605 feraiseexcept(FeInvalid);
606 }
607 val = 0.0;
608 } else if (origVal != val) {
609 switch (rmode) {
610 case FeRoundNearest:
611 if (origVal - val > 0.5)
612 val += 1.0;
613 else if (val - origVal > 0.5)
614 val -= 1.0;
615 break;
616 case FeRoundDown:
617 if (origVal < val)
618 val -= 1.0;
619 break;
620 case FeRoundUpward:
621 if (origVal > val)
622 val += 1.0;
623 break;
624 }
441 feraiseexcept(FeInexact);
442 }
443 if (isSigned) {
444 if (half) {
445 if (val < (int16_t)(1 << 15)) {
446 feraiseexcept(FeInvalid);
447 feclearexcept(FeInexact);
448 return (int16_t)(1 << 15);

--- 67 unchanged lines hidden (view full) ---

516 val = sext<16>(val & mask(16));
517 double scale = pow(2.0, imm);
518 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
519 feclearexcept(FeAllExceptions);
520 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
521 return fixDivDest(fpscr, val / scale, (double)val, scale);
522}
523
625 feraiseexcept(FeInexact);
626 }
627 if (isSigned) {
628 if (half) {
629 if (val < (int16_t)(1 << 15)) {
630 feraiseexcept(FeInvalid);
631 feclearexcept(FeInexact);
632 return (int16_t)(1 << 15);

--- 67 unchanged lines hidden (view full) ---

700 val = sext<16>(val & mask(16));
701 double scale = pow(2.0, imm);
702 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
703 feclearexcept(FeAllExceptions);
704 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
705 return fixDivDest(fpscr, val / scale, (double)val, scale);
706}
707
524typedef int VfpSavedState;
525
526static inline VfpSavedState
527prepVfpFpscr(FPSCR fpscr)
528{
529 int roundingMode = fegetround();
530 feclearexcept(FeAllExceptions);
531 switch (fpscr.rMode) {
532 case VfpRoundNearest:
533 fesetround(FeRoundNearest);
534 break;
535 case VfpRoundUpward:
536 fesetround(FeRoundUpward);
537 break;
538 case VfpRoundDown:
539 fesetround(FeRoundDown);
540 break;
541 case VfpRoundZero:
542 fesetround(FeRoundZero);
543 break;
544 }
545 return roundingMode;
546}
547
548static inline FPSCR
549setVfpFpscr(FPSCR fpscr, VfpSavedState state)
550{
551 int exceptions = fetestexcept(FeAllExceptions);
552 if (exceptions & FeInvalid) {
553 fpscr.ioc = 1;
554 }
555 if (exceptions & FeDivByZero) {
556 fpscr.dzc = 1;
557 }
558 if (exceptions & FeOverflow) {
559 fpscr.ofc = 1;
560 }
561 if (exceptions & FeUnderflow) {
562 fpscr.ufc = 1;
563 }
564 if (exceptions & FeInexact) {
565 fpscr.ixc = 1;
566 }
567 fesetround(state);
568 return fpscr;
569}
570
571class VfpMacroOp : public PredMacroOp
572{
573 public:
574 static bool
575 inScalarBank(IntRegIndex idx)
576 {
577 return (idx % 32) < 8;
578 }

--- 46 unchanged lines hidden (view full) ---

625 nextIdxs(IntRegIndex &dest)
626 {
627 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
628 assert(!inScalarBank(dest));
629 dest = addStride(dest, stride);
630 }
631};
632
708class VfpMacroOp : public PredMacroOp
709{
710 public:
711 static bool
712 inScalarBank(IntRegIndex idx)
713 {
714 return (idx % 32) < 8;
715 }

--- 46 unchanged lines hidden (view full) ---

762 nextIdxs(IntRegIndex &dest)
763 {
764 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
765 assert(!inScalarBank(dest));
766 dest = addStride(dest, stride);
767 }
768};
769
633class VfpRegRegOp : public RegRegOp
770static inline float
771fpAddS(float a, float b)
634{
772{
773 return a + b;
774}
775
776static inline double
777fpAddD(double a, double b)
778{
779 return a + b;
780}
781
782static inline float
783fpSubS(float a, float b)
784{
785 return a - b;
786}
787
788static inline double
789fpSubD(double a, double b)
790{
791 return a - b;
792}
793
794static inline float
795fpDivS(float a, float b)
796{
797 return a / b;
798}
799
800static inline double
801fpDivD(double a, double b)
802{
803 return a / b;
804}
805
806static inline float
807fpMulS(float a, float b)
808{
809 return a * b;
810}
811
812static inline double
813fpMulD(double a, double b)
814{
815 return a * b;
816}
817
818class FpOp : public PredOp
819{
635 protected:
820 protected:
636 VfpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
637 IntRegIndex _dest, IntRegIndex _op1,
638 VfpMicroMode mode = VfpNotAMicroop) :
639 RegRegOp(mnem, _machInst, __opClass, _dest, _op1)
821 FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
822 PredOp(mnem, _machInst, __opClass)
823 {}
824
825 virtual float
826 doOp(float op1, float op2) const
640 {
827 {
828 panic("Unimplemented version of doOp called.\n");
829 }
830
831 virtual float
832 doOp(float op1) const
833 {
834 panic("Unimplemented version of doOp called.\n");
835 }
836
837 virtual double
838 doOp(double op1, double op2) const
839 {
840 panic("Unimplemented version of doOp called.\n");
841 }
842
843 virtual double
844 doOp(double op1) const
845 {
846 panic("Unimplemented version of doOp called.\n");
847 }
848
849 double
850 dbl(uint32_t low, uint32_t high) const
851 {
852 double junk = 0.0;
853 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
854 }
855
856 uint32_t
857 dblLow(double val) const
858 {
859 return fpToBits(val);
860 }
861
862 uint32_t
863 dblHi(double val) const
864 {
865 return fpToBits(val) >> 32;
866 }
867
868 template <class fpType>
869 fpType
870 binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
871 fpType (*func)(fpType, fpType),
872 bool flush, uint32_t rMode) const
873 {
874 const bool single = (sizeof(fpType) == sizeof(float));
875 fpType junk = 0.0;
876
877 if (flush && flushToZero(op1, op2))
878 fpscr.idc = 1;
879 VfpSavedState state = prepFpState(rMode);
880 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
881 : "m" (op1), "m" (op2), "m" (state));
882 fpType dest = func(op1, op2);
883 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
884
885 int fpClass = std::fpclassify(dest);
886 // Get NAN behavior right. This varies between x86 and ARM.
887 if (fpClass == FP_NAN) {
888 const bool single = (sizeof(fpType) == sizeof(float));
889 const uint64_t qnan =
890 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
891 const bool nan1 = std::isnan(op1);
892 const bool nan2 = std::isnan(op2);
893 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
894 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
895 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
896 dest = bitsToFp(qnan, junk);
897 } else if (signal1) {
898 dest = bitsToFp(fpToBits(op1) | qnan, junk);
899 } else if (signal2) {
900 dest = bitsToFp(fpToBits(op2) | qnan, junk);
901 } else if (nan1) {
902 dest = op1;
903 } else if (nan2) {
904 dest = op2;
905 }
906 } else if (flush && flushToZero(dest)) {
907 feraiseexcept(FeUnderflow);
908 } else if ((
909 (single && (dest == bitsToFp(0x00800000, junk) ||
910 dest == bitsToFp(0x80800000, junk))) ||
911 (!single &&
912 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
913 dest == bitsToFp(ULL(0x8010000000000000), junk)))
914 ) && rMode != VfpRoundZero) {
915 /*
916 * Correct for the fact that underflow is detected -before- rounding
917 * in ARM and -after- rounding in x86.
918 */
919 fesetround(FeRoundZero);
920 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
921 : "m" (op1), "m" (op2));
922 fpType temp = func(op1, op2);
923 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
924 if (flush && flushToZero(temp)) {
925 dest = temp;
926 }
927 }
928 finishVfp(fpscr, state);
929 return dest;
930 }
931
932 template <class fpType>
933 fpType
934 unaryOp(FPSCR &fpscr, fpType op1,
935 fpType (*func)(fpType),
936 bool flush, uint32_t rMode) const
937 {
938 const bool single = (sizeof(fpType) == sizeof(float));
939 fpType junk = 0.0;
940
941 if (flush && flushToZero(op1))
942 fpscr.idc = 1;
943 VfpSavedState state = prepFpState(rMode);
944 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
945 : "m" (op1), "m" (state));
946 fpType dest = func(op1);
947 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
948
949 int fpClass = std::fpclassify(dest);
950 // Get NAN behavior right. This varies between x86 and ARM.
951 if (fpClass == FP_NAN) {
952 const bool single = (sizeof(fpType) == sizeof(float));
953 const uint64_t qnan =
954 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
955 const bool nan = std::isnan(op1);
956 if (!nan || fpscr.dn == 1) {
957 dest = bitsToFp(qnan, junk);
958 } else if (nan) {
959 dest = bitsToFp(fpToBits(op1) | qnan, junk);
960 }
961 } else if (flush && flushToZero(dest)) {
962 feraiseexcept(FeUnderflow);
963 } else if ((
964 (single && (dest == bitsToFp(0x00800000, junk) ||
965 dest == bitsToFp(0x80800000, junk))) ||
966 (!single &&
967 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
968 dest == bitsToFp(ULL(0x8010000000000000), junk)))
969 ) && rMode != VfpRoundZero) {
970 /*
971 * Correct for the fact that underflow is detected -before- rounding
972 * in ARM and -after- rounding in x86.
973 */
974 fesetround(FeRoundZero);
975 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
976 fpType temp = func(op1);
977 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
978 if (flush && flushToZero(temp)) {
979 dest = temp;
980 }
981 }
982 finishVfp(fpscr, state);
983 return dest;
984 }
985};
986
987class FpRegRegOp : public FpOp
988{
989 protected:
990 IntRegIndex dest;
991 IntRegIndex op1;
992
993 FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
994 IntRegIndex _dest, IntRegIndex _op1,
995 VfpMicroMode mode = VfpNotAMicroop) :
996 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1)
997 {
641 setVfpMicroFlags(mode, flags);
642 }
998 setVfpMicroFlags(mode, flags);
999 }
1000
1001 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
643};
644
1002};
1003
645class VfpRegImmOp : public RegImmOp
1004class FpRegImmOp : public FpOp
646{
647 protected:
1005{
1006 protected:
648 VfpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
649 IntRegIndex _dest, uint64_t _imm,
650 VfpMicroMode mode = VfpNotAMicroop) :
651 RegImmOp(mnem, _machInst, __opClass, _dest, _imm)
1007 IntRegIndex dest;
1008 uint64_t imm;
1009
1010 FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1011 IntRegIndex _dest, uint64_t _imm,
1012 VfpMicroMode mode = VfpNotAMicroop) :
1013 FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm)
652 {
653 setVfpMicroFlags(mode, flags);
654 }
1014 {
1015 setVfpMicroFlags(mode, flags);
1016 }
1017
1018 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
655};
656
1019};
1020
657class VfpRegRegImmOp : public RegRegImmOp
1021class FpRegRegImmOp : public FpOp
658{
659 protected:
1022{
1023 protected:
660 VfpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
661 IntRegIndex _dest, IntRegIndex _op1,
662 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
663 RegRegImmOp(mnem, _machInst, __opClass, _dest, _op1, _imm)
1024 IntRegIndex dest;
1025 IntRegIndex op1;
1026 uint64_t imm;
1027
1028 FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1029 IntRegIndex _dest, IntRegIndex _op1,
1030 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
1031 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm)
664 {
665 setVfpMicroFlags(mode, flags);
666 }
1032 {
1033 setVfpMicroFlags(mode, flags);
1034 }
1035
1036 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
667};
668
1037};
1038
669class VfpRegRegRegOp : public RegRegRegOp
1039class FpRegRegRegOp : public FpOp
670{
671 protected:
1040{
1041 protected:
672 VfpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
673 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
674 VfpMicroMode mode = VfpNotAMicroop) :
675 RegRegRegOp(mnem, _machInst, __opClass, _dest, _op1, _op2)
1042 IntRegIndex dest;
1043 IntRegIndex op1;
1044 IntRegIndex op2;
1045
1046 FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1047 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
1048 VfpMicroMode mode = VfpNotAMicroop) :
1049 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2)
676 {
677 setVfpMicroFlags(mode, flags);
678 }
1050 {
1051 setVfpMicroFlags(mode, flags);
1052 }
1053
1054 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
679};
680
681}
682
683#endif //__ARCH_ARM_INSTS_VFP_HH__
1055};
1056
1057}
1058
1059#endif //__ARCH_ARM_INSTS_VFP_HH__