vfp.cc (7396:53454ef35b46) vfp.cc (7430:db3e376f35d1)
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software

--- 70 unchanged lines hidden (view full) ---

79 printMnemonic(ss);
80 printReg(ss, dest + FP_Base_DepTag);
81 ss << ", ";
82 printReg(ss, op1 + FP_Base_DepTag);
83 ss << ", ";
84 printReg(ss, op2 + FP_Base_DepTag);
85 return ss.str();
86}
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software

--- 70 unchanged lines hidden (view full) ---

79 printMnemonic(ss);
80 printReg(ss, dest + FP_Base_DepTag);
81 ss << ", ";
82 printReg(ss, op1 + FP_Base_DepTag);
83 ss << ", ";
84 printReg(ss, op2 + FP_Base_DepTag);
85 return ss.str();
86}
87
88namespace ArmISA
89{
90
91VfpSavedState
92prepFpState(uint32_t rMode)
93{
94 int roundingMode = fegetround();
95 feclearexcept(FeAllExceptions);
96 switch (rMode) {
97 case VfpRoundNearest:
98 fesetround(FeRoundNearest);
99 break;
100 case VfpRoundUpward:
101 fesetround(FeRoundUpward);
102 break;
103 case VfpRoundDown:
104 fesetround(FeRoundDown);
105 break;
106 case VfpRoundZero:
107 fesetround(FeRoundZero);
108 break;
109 }
110 return roundingMode;
111}
112
113void
114finishVfp(FPSCR &fpscr, VfpSavedState state)
115{
116 int exceptions = fetestexcept(FeAllExceptions);
117 bool underflow = false;
118 if (exceptions & FeInvalid) {
119 fpscr.ioc = 1;
120 }
121 if (exceptions & FeDivByZero) {
122 fpscr.dzc = 1;
123 }
124 if (exceptions & FeOverflow) {
125 fpscr.ofc = 1;
126 }
127 if (exceptions & FeUnderflow) {
128 underflow = true;
129 fpscr.ufc = 1;
130 }
131 if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
132 fpscr.ixc = 1;
133 }
134 fesetround(state);
135}
136
137template <class fpType>
138fpType
139fixDest(FPSCR fpscr, fpType val, fpType op1)
140{
141 int fpClass = std::fpclassify(val);
142 fpType junk = 0.0;
143 if (fpClass == FP_NAN) {
144 const bool single = (sizeof(val) == sizeof(float));
145 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
146 const bool nan = std::isnan(op1);
147 if (!nan || (fpscr.dn == 1)) {
148 val = bitsToFp(qnan, junk);
149 } else if (nan) {
150 val = bitsToFp(fpToBits(op1) | qnan, junk);
151 }
152 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
153 // Turn val into a zero with the correct sign;
154 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
155 val = bitsToFp(fpToBits(val) & bitMask, junk);
156 feclearexcept(FeInexact);
157 feraiseexcept(FeUnderflow);
158 }
159 return val;
160}
161
162template
163float fixDest<float>(FPSCR fpscr, float val, float op1);
164template
165double fixDest<double>(FPSCR fpscr, double val, double op1);
166
167template <class fpType>
168fpType
169fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
170{
171 int fpClass = std::fpclassify(val);
172 fpType junk = 0.0;
173 if (fpClass == FP_NAN) {
174 const bool single = (sizeof(val) == sizeof(float));
175 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
176 const bool nan1 = std::isnan(op1);
177 const bool nan2 = std::isnan(op2);
178 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
179 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
180 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
181 val = bitsToFp(qnan, junk);
182 } else if (signal1) {
183 val = bitsToFp(fpToBits(op1) | qnan, junk);
184 } else if (signal2) {
185 val = bitsToFp(fpToBits(op2) | qnan, junk);
186 } else if (nan1) {
187 val = op1;
188 } else if (nan2) {
189 val = op2;
190 }
191 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
192 // Turn val into a zero with the correct sign;
193 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
194 val = bitsToFp(fpToBits(val) & bitMask, junk);
195 feclearexcept(FeInexact);
196 feraiseexcept(FeUnderflow);
197 }
198 return val;
199}
200
201template
202float fixDest<float>(FPSCR fpscr, float val, float op1, float op2);
203template
204double fixDest<double>(FPSCR fpscr, double val, double op1, double op2);
205
206template <class fpType>
207fpType
208fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
209{
210 fpType mid = fixDest(fpscr, val, op1, op2);
211 const bool single = (sizeof(fpType) == sizeof(float));
212 const fpType junk = 0.0;
213 if ((single && (val == bitsToFp(0x00800000, junk) ||
214 val == bitsToFp(0x80800000, junk))) ||
215 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
216 val == bitsToFp(ULL(0x8010000000000000), junk)))
217 ) {
218 __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
219 fesetround(FeRoundZero);
220 fpType temp = 0.0;
221 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
222 temp = op1 / op2;
223 if (flushToZero(temp)) {
224 feraiseexcept(FeUnderflow);
225 if (fpscr.fz) {
226 feclearexcept(FeInexact);
227 mid = temp;
228 }
229 }
230 __asm__ __volatile__("" :: "m" (temp));
231 }
232 return mid;
233}
234
235template
236float fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2);
237template
238double fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2);
239
240float
241fixFpDFpSDest(FPSCR fpscr, double val)
242{
243 const float junk = 0.0;
244 float op1 = 0.0;
245 if (std::isnan(val)) {
246 uint64_t valBits = fpToBits(val);
247 uint32_t op1Bits = bits(valBits, 50, 29) |
248 (mask(9) << 22) |
249 (bits(valBits, 63) << 31);
250 op1 = bitsToFp(op1Bits, junk);
251 }
252 float mid = fixDest(fpscr, (float)val, op1);
253 if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
254 (FeUnderflow | FeInexact)) {
255 feclearexcept(FeInexact);
256 }
257 if (mid == bitsToFp(0x00800000, junk) ||
258 mid == bitsToFp(0x80800000, junk)) {
259 __asm__ __volatile__("" : "=m" (val) : "m" (val));
260 fesetround(FeRoundZero);
261 float temp = 0.0;
262 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
263 temp = val;
264 if (flushToZero(temp)) {
265 feraiseexcept(FeUnderflow);
266 if (fpscr.fz) {
267 feclearexcept(FeInexact);
268 mid = temp;
269 }
270 }
271 __asm__ __volatile__("" :: "m" (temp));
272 }
273 return mid;
274}
275
276double
277fixFpSFpDDest(FPSCR fpscr, float val)
278{
279 const double junk = 0.0;
280 double op1 = 0.0;
281 if (std::isnan(val)) {
282 uint32_t valBits = fpToBits(val);
283 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
284 (mask(12) << 51) |
285 ((uint64_t)bits(valBits, 31) << 63);
286 op1 = bitsToFp(op1Bits, junk);
287 }
288 double mid = fixDest(fpscr, (double)val, op1);
289 if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
290 mid == bitsToFp(ULL(0x8010000000000000), junk)) {
291 __asm__ __volatile__("" : "=m" (val) : "m" (val));
292 fesetround(FeRoundZero);
293 double temp = 0.0;
294 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
295 temp = val;
296 if (flushToZero(temp)) {
297 feraiseexcept(FeUnderflow);
298 if (fpscr.fz) {
299 feclearexcept(FeInexact);
300 mid = temp;
301 }
302 }
303 __asm__ __volatile__("" :: "m" (temp));
304 }
305 return mid;
306}
307
308float
309vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
310{
311 float junk = 0.0;
312 uint32_t destBits = fpToBits(dest);
313 uint32_t opBits = fpToBits(op);
314 // Extract the operand.
315 bool neg = bits(opBits, 31);
316 uint32_t exponent = bits(opBits, 30, 23);
317 uint32_t oldMantissa = bits(opBits, 22, 0);
318 uint32_t mantissa = oldMantissa >> (23 - 10);
319 // Do the conversion.
320 uint32_t extra = oldMantissa & mask(23 - 10);
321 if (exponent == 0xff) {
322 if (oldMantissa != 0) {
323 // Nans.
324 if (bits(mantissa, 9) == 0) {
325 // Signalling nan.
326 fpscr.ioc = 1;
327 }
328 if (fpscr.ahp) {
329 mantissa = 0;
330 exponent = 0;
331 fpscr.ioc = 1;
332 } else if (fpscr.dn) {
333 mantissa = (1 << 9);
334 exponent = 0x1f;
335 neg = false;
336 } else {
337 exponent = 0x1f;
338 mantissa |= (1 << 9);
339 }
340 } else {
341 // Infinities.
342 exponent = 0x1F;
343 if (fpscr.ahp) {
344 fpscr.ioc = 1;
345 mantissa = 0x3ff;
346 } else {
347 mantissa = 0;
348 }
349 }
350 } else if (exponent == 0 && oldMantissa == 0) {
351 // Zero, don't need to do anything.
352 } else {
353 // Normalized or denormalized numbers.
354
355 bool inexact = (extra != 0);
356
357 if (exponent == 0) {
358 // Denormalized.
359
360 // If flush to zero is on, this shouldn't happen.
361 assert(fpscr.fz == 0);
362
363 // Check for underflow
364 if (inexact || fpscr.ufe)
365 fpscr.ufc = 1;
366
367 // Handle rounding.
368 unsigned mode = fpscr.rMode;
369 if ((mode == VfpRoundUpward && !neg && extra) ||
370 (mode == VfpRoundDown && neg && extra) ||
371 (mode == VfpRoundNearest &&
372 (extra > (1 << 9) ||
373 (extra == (1 << 9) && bits(mantissa, 0))))) {
374 mantissa++;
375 }
376
377 // See if the number became normalized after rounding.
378 if (mantissa == (1 << 10)) {
379 mantissa = 0;
380 exponent = 1;
381 }
382 } else {
383 // Normalized.
384
385 // We need to track the dropped bits differently since
386 // more can be dropped by denormalizing.
387 bool topOne = bits(extra, 12);
388 bool restZeros = bits(extra, 11, 0) == 0;
389
390 if (exponent <= (127 - 15)) {
391 // The result is too small. Denormalize.
392 mantissa |= (1 << 10);
393 while (mantissa && exponent <= (127 - 15)) {
394 restZeros = restZeros && !topOne;
395 topOne = bits(mantissa, 0);
396 mantissa = mantissa >> 1;
397 exponent++;
398 }
399 if (topOne || !restZeros)
400 inexact = true;
401 exponent = 0;
402 } else {
403 // Change bias.
404 exponent -= (127 - 15);
405 }
406
407 if (exponent == 0 && (inexact || fpscr.ufe)) {
408 // Underflow
409 fpscr.ufc = 1;
410 }
411
412 // Handle rounding.
413 unsigned mode = fpscr.rMode;
414 bool nonZero = topOne || !restZeros;
415 if ((mode == VfpRoundUpward && !neg && nonZero) ||
416 (mode == VfpRoundDown && neg && nonZero) ||
417 (mode == VfpRoundNearest && topOne &&
418 (!restZeros || bits(mantissa, 0)))) {
419 mantissa++;
420 }
421
422 // See if we rounded up and need to bump the exponent.
423 if (mantissa == (1 << 10)) {
424 mantissa = 0;
425 exponent++;
426 }
427
428 // Deal with overflow
429 if (fpscr.ahp) {
430 if (exponent >= 0x20) {
431 exponent = 0x1f;
432 mantissa = 0x3ff;
433 fpscr.ioc = 1;
434 // Supress inexact exception.
435 inexact = false;
436 }
437 } else {
438 if (exponent >= 0x1f) {
439 if ((mode == VfpRoundNearest) ||
440 (mode == VfpRoundUpward && !neg) ||
441 (mode == VfpRoundDown && neg)) {
442 // Overflow to infinity.
443 exponent = 0x1f;
444 mantissa = 0;
445 } else {
446 // Overflow to max normal.
447 exponent = 0x1e;
448 mantissa = 0x3ff;
449 }
450 fpscr.ofc = 1;
451 inexact = true;
452 }
453 }
454 }
455
456 if (inexact) {
457 fpscr.ixc = 1;
458 }
459 }
460 // Reassemble and install the result.
461 uint32_t result = bits(mantissa, 9, 0);
462 replaceBits(result, 14, 10, exponent);
463 if (neg)
464 result |= (1 << 15);
465 if (top)
466 replaceBits(destBits, 31, 16, result);
467 else
468 replaceBits(destBits, 15, 0, result);
469 return bitsToFp(destBits, junk);
470}
471
472float
473vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
474{
475 float junk = 0.0;
476 uint32_t opBits = fpToBits(op);
477 // Extract the operand.
478 if (top)
479 opBits = bits(opBits, 31, 16);
480 else
481 opBits = bits(opBits, 15, 0);
482 // Extract the bitfields.
483 bool neg = bits(opBits, 15);
484 uint32_t exponent = bits(opBits, 14, 10);
485 uint32_t mantissa = bits(opBits, 9, 0);
486 // Do the conversion.
487 if (exponent == 0) {
488 if (mantissa != 0) {
489 // Normalize the value.
490 exponent = exponent + (127 - 15) + 1;
491 while (mantissa < (1 << 10)) {
492 mantissa = mantissa << 1;
493 exponent--;
494 }
495 }
496 mantissa = mantissa << (23 - 10);
497 } else if (exponent == 0x1f && !fpscr.ahp) {
498 // Infinities and nans.
499 exponent = 0xff;
500 if (mantissa != 0) {
501 // Nans.
502 mantissa = mantissa << (23 - 10);
503 if (bits(mantissa, 22) == 0) {
504 // Signalling nan.
505 fpscr.ioc = 1;
506 mantissa |= (1 << 22);
507 }
508 if (fpscr.dn) {
509 mantissa &= ~mask(22);
510 neg = false;
511 }
512 }
513 } else {
514 exponent = exponent + (127 - 15);
515 mantissa = mantissa << (23 - 10);
516 }
517 // Reassemble the result.
518 uint32_t result = bits(mantissa, 22, 0);
519 replaceBits(result, 30, 23, exponent);
520 if (neg)
521 result |= (1 << 31);
522 return bitsToFp(result, junk);
523}
524
525uint64_t
526vfpFpSToFixed(float val, bool isSigned, bool half,
527 uint8_t imm, bool rzero)
528{
529 int rmode = rzero ? FeRoundZero : fegetround();
530 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
531 fesetround(FeRoundNearest);
532 val = val * powf(2.0, imm);
533 __asm__ __volatile__("" : "=m" (val) : "m" (val));
534 fesetround(rmode);
535 feclearexcept(FeAllExceptions);
536 __asm__ __volatile__("" : "=m" (val) : "m" (val));
537 float origVal = val;
538 val = rintf(val);
539 int fpType = std::fpclassify(val);
540 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
541 if (fpType == FP_NAN) {
542 feraiseexcept(FeInvalid);
543 }
544 val = 0.0;
545 } else if (origVal != val) {
546 switch (rmode) {
547 case FeRoundNearest:
548 if (origVal - val > 0.5)
549 val += 1.0;
550 else if (val - origVal > 0.5)
551 val -= 1.0;
552 break;
553 case FeRoundDown:
554 if (origVal < val)
555 val -= 1.0;
556 break;
557 case FeRoundUpward:
558 if (origVal > val)
559 val += 1.0;
560 break;
561 }
562 feraiseexcept(FeInexact);
563 }
564
565 if (isSigned) {
566 if (half) {
567 if ((double)val < (int16_t)(1 << 15)) {
568 feraiseexcept(FeInvalid);
569 feclearexcept(FeInexact);
570 return (int16_t)(1 << 15);
571 }
572 if ((double)val > (int16_t)mask(15)) {
573 feraiseexcept(FeInvalid);
574 feclearexcept(FeInexact);
575 return (int16_t)mask(15);
576 }
577 return (int16_t)val;
578 } else {
579 if ((double)val < (int32_t)(1 << 31)) {
580 feraiseexcept(FeInvalid);
581 feclearexcept(FeInexact);
582 return (int32_t)(1 << 31);
583 }
584 if ((double)val > (int32_t)mask(31)) {
585 feraiseexcept(FeInvalid);
586 feclearexcept(FeInexact);
587 return (int32_t)mask(31);
588 }
589 return (int32_t)val;
590 }
591 } else {
592 if (half) {
593 if ((double)val < 0) {
594 feraiseexcept(FeInvalid);
595 feclearexcept(FeInexact);
596 return 0;
597 }
598 if ((double)val > (mask(16))) {
599 feraiseexcept(FeInvalid);
600 feclearexcept(FeInexact);
601 return mask(16);
602 }
603 return (uint16_t)val;
604 } else {
605 if ((double)val < 0) {
606 feraiseexcept(FeInvalid);
607 feclearexcept(FeInexact);
608 return 0;
609 }
610 if ((double)val > (mask(32))) {
611 feraiseexcept(FeInvalid);
612 feclearexcept(FeInexact);
613 return mask(32);
614 }
615 return (uint32_t)val;
616 }
617 }
618}
619
620float
621vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
622{
623 fesetround(FeRoundNearest);
624 if (half)
625 val = (uint16_t)val;
626 float scale = powf(2.0, imm);
627 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
628 feclearexcept(FeAllExceptions);
629 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
630 return fixDivDest(fpscr, val / scale, (float)val, scale);
631}
632
633float
634vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
635{
636 fesetround(FeRoundNearest);
637 if (half)
638 val = sext<16>(val & mask(16));
639 float scale = powf(2.0, imm);
640 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
641 feclearexcept(FeAllExceptions);
642 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
643 return fixDivDest(fpscr, val / scale, (float)val, scale);
644}
645
646uint64_t
647vfpFpDToFixed(double val, bool isSigned, bool half,
648 uint8_t imm, bool rzero)
649{
650 int rmode = rzero ? FeRoundZero : fegetround();
651 fesetround(FeRoundNearest);
652 val = val * pow(2.0, imm);
653 __asm__ __volatile__("" : "=m" (val) : "m" (val));
654 fesetround(rmode);
655 feclearexcept(FeAllExceptions);
656 __asm__ __volatile__("" : "=m" (val) : "m" (val));
657 double origVal = val;
658 val = rint(val);
659 int fpType = std::fpclassify(val);
660 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
661 if (fpType == FP_NAN) {
662 feraiseexcept(FeInvalid);
663 }
664 val = 0.0;
665 } else if (origVal != val) {
666 switch (rmode) {
667 case FeRoundNearest:
668 if (origVal - val > 0.5)
669 val += 1.0;
670 else if (val - origVal > 0.5)
671 val -= 1.0;
672 break;
673 case FeRoundDown:
674 if (origVal < val)
675 val -= 1.0;
676 break;
677 case FeRoundUpward:
678 if (origVal > val)
679 val += 1.0;
680 break;
681 }
682 feraiseexcept(FeInexact);
683 }
684 if (isSigned) {
685 if (half) {
686 if (val < (int16_t)(1 << 15)) {
687 feraiseexcept(FeInvalid);
688 feclearexcept(FeInexact);
689 return (int16_t)(1 << 15);
690 }
691 if (val > (int16_t)mask(15)) {
692 feraiseexcept(FeInvalid);
693 feclearexcept(FeInexact);
694 return (int16_t)mask(15);
695 }
696 return (int16_t)val;
697 } else {
698 if (val < (int32_t)(1 << 31)) {
699 feraiseexcept(FeInvalid);
700 feclearexcept(FeInexact);
701 return (int32_t)(1 << 31);
702 }
703 if (val > (int32_t)mask(31)) {
704 feraiseexcept(FeInvalid);
705 feclearexcept(FeInexact);
706 return (int32_t)mask(31);
707 }
708 return (int32_t)val;
709 }
710 } else {
711 if (half) {
712 if (val < 0) {
713 feraiseexcept(FeInvalid);
714 feclearexcept(FeInexact);
715 return 0;
716 }
717 if (val > mask(16)) {
718 feraiseexcept(FeInvalid);
719 feclearexcept(FeInexact);
720 return mask(16);
721 }
722 return (uint16_t)val;
723 } else {
724 if (val < 0) {
725 feraiseexcept(FeInvalid);
726 feclearexcept(FeInexact);
727 return 0;
728 }
729 if (val > mask(32)) {
730 feraiseexcept(FeInvalid);
731 feclearexcept(FeInexact);
732 return mask(32);
733 }
734 return (uint32_t)val;
735 }
736 }
737}
738
739double
740vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
741{
742 fesetround(FeRoundNearest);
743 if (half)
744 val = (uint16_t)val;
745 double scale = pow(2.0, imm);
746 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
747 feclearexcept(FeAllExceptions);
748 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
749 return fixDivDest(fpscr, val / scale, (double)val, scale);
750}
751
752double
753vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
754{
755 fesetround(FeRoundNearest);
756 if (half)
757 val = sext<16>(val & mask(16));
758 double scale = pow(2.0, imm);
759 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
760 feclearexcept(FeAllExceptions);
761 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
762 return fixDivDest(fpscr, val / scale, (double)val, scale);
763}
764
765template <class fpType>
766fpType
767FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
768 fpType (*func)(fpType, fpType),
769 bool flush, uint32_t rMode) const
770{
771 const bool single = (sizeof(fpType) == sizeof(float));
772 fpType junk = 0.0;
773
774 if (flush && flushToZero(op1, op2))
775 fpscr.idc = 1;
776 VfpSavedState state = prepFpState(rMode);
777 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
778 : "m" (op1), "m" (op2), "m" (state));
779 fpType dest = func(op1, op2);
780 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
781
782 int fpClass = std::fpclassify(dest);
783 // Get NAN behavior right. This varies between x86 and ARM.
784 if (fpClass == FP_NAN) {
785 const bool single = (sizeof(fpType) == sizeof(float));
786 const uint64_t qnan =
787 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
788 const bool nan1 = std::isnan(op1);
789 const bool nan2 = std::isnan(op2);
790 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
791 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
792 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
793 dest = bitsToFp(qnan, junk);
794 } else if (signal1) {
795 dest = bitsToFp(fpToBits(op1) | qnan, junk);
796 } else if (signal2) {
797 dest = bitsToFp(fpToBits(op2) | qnan, junk);
798 } else if (nan1) {
799 dest = op1;
800 } else if (nan2) {
801 dest = op2;
802 }
803 } else if (flush && flushToZero(dest)) {
804 feraiseexcept(FeUnderflow);
805 } else if ((
806 (single && (dest == bitsToFp(0x00800000, junk) ||
807 dest == bitsToFp(0x80800000, junk))) ||
808 (!single &&
809 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
810 dest == bitsToFp(ULL(0x8010000000000000), junk)))
811 ) && rMode != VfpRoundZero) {
812 /*
813 * Correct for the fact that underflow is detected -before- rounding
814 * in ARM and -after- rounding in x86.
815 */
816 fesetround(FeRoundZero);
817 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
818 : "m" (op1), "m" (op2));
819 fpType temp = func(op1, op2);
820 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
821 if (flush && flushToZero(temp)) {
822 dest = temp;
823 }
824 }
825 finishVfp(fpscr, state);
826 return dest;
827}
828
829template
830float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2,
831 float (*func)(float, float),
832 bool flush, uint32_t rMode) const;
833template
834double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2,
835 double (*func)(double, double),
836 bool flush, uint32_t rMode) const;
837
838template <class fpType>
839fpType
840FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType),
841 bool flush, uint32_t rMode) const
842{
843 const bool single = (sizeof(fpType) == sizeof(float));
844 fpType junk = 0.0;
845
846 if (flush && flushToZero(op1))
847 fpscr.idc = 1;
848 VfpSavedState state = prepFpState(rMode);
849 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
850 : "m" (op1), "m" (state));
851 fpType dest = func(op1);
852 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
853
854 int fpClass = std::fpclassify(dest);
855 // Get NAN behavior right. This varies between x86 and ARM.
856 if (fpClass == FP_NAN) {
857 const bool single = (sizeof(fpType) == sizeof(float));
858 const uint64_t qnan =
859 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
860 const bool nan = std::isnan(op1);
861 if (!nan || fpscr.dn == 1) {
862 dest = bitsToFp(qnan, junk);
863 } else if (nan) {
864 dest = bitsToFp(fpToBits(op1) | qnan, junk);
865 }
866 } else if (flush && flushToZero(dest)) {
867 feraiseexcept(FeUnderflow);
868 } else if ((
869 (single && (dest == bitsToFp(0x00800000, junk) ||
870 dest == bitsToFp(0x80800000, junk))) ||
871 (!single &&
872 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
873 dest == bitsToFp(ULL(0x8010000000000000), junk)))
874 ) && rMode != VfpRoundZero) {
875 /*
876 * Correct for the fact that underflow is detected -before- rounding
877 * in ARM and -after- rounding in x86.
878 */
879 fesetround(FeRoundZero);
880 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
881 fpType temp = func(op1);
882 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
883 if (flush && flushToZero(temp)) {
884 dest = temp;
885 }
886 }
887 finishVfp(fpscr, state);
888 return dest;
889}
890
891template
892float FpOp::unaryOp(FPSCR &fpscr, float op1, float (*func)(float),
893 bool flush, uint32_t rMode) const;
894template
895double FpOp::unaryOp(FPSCR &fpscr, double op1, double (*func)(double),
896 bool flush, uint32_t rMode) const;
897
898IntRegIndex
899VfpMacroOp::addStride(IntRegIndex idx, unsigned stride)
900{
901 if (wide) {
902 stride *= 2;
903 }
904 unsigned offset = idx % 8;
905 idx = (IntRegIndex)(idx - offset);
906 offset += stride;
907 idx = (IntRegIndex)(idx + (offset % 8));
908 return idx;
909}
910
911void
912VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
913{
914 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
915 assert(!inScalarBank(dest));
916 dest = addStride(dest, stride);
917 op1 = addStride(op1, stride);
918 if (!inScalarBank(op2)) {
919 op2 = addStride(op2, stride);
920 }
921}
922
923void
924VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
925{
926 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
927 assert(!inScalarBank(dest));
928 dest = addStride(dest, stride);
929 if (!inScalarBank(op1)) {
930 op1 = addStride(op1, stride);
931 }
932}
933
934void
935VfpMacroOp::nextIdxs(IntRegIndex &dest)
936{
937 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
938 assert(!inScalarBank(dest));
939 dest = addStride(dest, stride);
940}
941
942}