Cross Reference: /gem5/src/arch/arm/insts/vfp.cc

Deleted Added

sdiff udiff text old ( 7434:dd5a09b86b14 ) new ( 7639:8c09b7ff5b57 )

full compact

1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#include "arch/arm/insts/vfp.hh"
41
42/*
43 * The asm statements below are to keep gcc from reordering code. Otherwise
44 * the rounding mode might be set after the operation it was intended for, the
45 * exception bits read before it, etc.
46 */
47
48std::string
49FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
50{
51 std::stringstream ss;
52 printMnemonic(ss);
53 printReg(ss, dest + FP_Base_DepTag);
54 ss << ", ";
55 printReg(ss, op1 + FP_Base_DepTag);
56 return ss.str();
57}
58
59std::string
60FpRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
61{
62 std::stringstream ss;
63 printMnemonic(ss);
64 printReg(ss, dest + FP_Base_DepTag);
65 ccprintf(ss, ", #%d", imm);
66 return ss.str();
67}
68
69std::string
70FpRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
71{
72 std::stringstream ss;
73 printMnemonic(ss);
74 printReg(ss, dest + FP_Base_DepTag);
75 ss << ", ";
76 printReg(ss, op1 + FP_Base_DepTag);
77 ccprintf(ss, ", #%d", imm);
78 return ss.str();
79}
80
81std::string
82FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
83{
84 std::stringstream ss;
85 printMnemonic(ss);
86 printReg(ss, dest + FP_Base_DepTag);
87 ss << ", ";
88 printReg(ss, op1 + FP_Base_DepTag);
89 ss << ", ";
90 printReg(ss, op2 + FP_Base_DepTag);
91 return ss.str();
92}
93
94namespace ArmISA
95{
96
97VfpSavedState
98prepFpState(uint32_t rMode)
99{
100 int roundingMode = fegetround();
101 feclearexcept(FeAllExceptions);
102 switch (rMode) {
103 case VfpRoundNearest:
104 fesetround(FeRoundNearest);
105 break;
106 case VfpRoundUpward:
107 fesetround(FeRoundUpward);
108 break;
109 case VfpRoundDown:
110 fesetround(FeRoundDown);
111 break;
112 case VfpRoundZero:
113 fesetround(FeRoundZero);
114 break;
115 }
116 return roundingMode;
117}
118
119void
120finishVfp(FPSCR &fpscr, VfpSavedState state)
121{
122 int exceptions = fetestexcept(FeAllExceptions);
123 bool underflow = false;
124 if (exceptions & FeInvalid) {
125 fpscr.ioc = 1;
126 }
127 if (exceptions & FeDivByZero) {
128 fpscr.dzc = 1;
129 }
130 if (exceptions & FeOverflow) {
131 fpscr.ofc = 1;
132 }
133 if (exceptions & FeUnderflow) {
134 underflow = true;
135 fpscr.ufc = 1;
136 }
137 if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
138 fpscr.ixc = 1;
139 }
140 fesetround(state);
141}
142
143template <class fpType>
144fpType
145fixDest(FPSCR fpscr, fpType val, fpType op1)
146{
147 int fpClass = std::fpclassify(val);
148 fpType junk = 0.0;
149 if (fpClass == FP_NAN) {
150 const bool single = (sizeof(val) == sizeof(float));
151 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
152 const bool nan = std::isnan(op1);
153 if (!nan || (fpscr.dn == 1)) {
154 val = bitsToFp(qnan, junk);
155 } else if (nan) {
156 val = bitsToFp(fpToBits(op1) | qnan, junk);
157 }
158 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
159 // Turn val into a zero with the correct sign;
160 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
161 val = bitsToFp(fpToBits(val) & bitMask, junk);
162 feclearexcept(FeInexact);
163 feraiseexcept(FeUnderflow);
164 }
165 return val;
166}
167
168template
169float fixDest<float>(FPSCR fpscr, float val, float op1);
170template
171double fixDest<double>(FPSCR fpscr, double val, double op1);
172
173template <class fpType>
174fpType
175fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
176{
177 int fpClass = std::fpclassify(val);
178 fpType junk = 0.0;
179 if (fpClass == FP_NAN) {
180 const bool single = (sizeof(val) == sizeof(float));
181 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
182 const bool nan1 = std::isnan(op1);
183 const bool nan2 = std::isnan(op2);
184 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
185 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
186 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
187 val = bitsToFp(qnan, junk);
188 } else if (signal1) {
189 val = bitsToFp(fpToBits(op1) | qnan, junk);
190 } else if (signal2) {
191 val = bitsToFp(fpToBits(op2) | qnan, junk);
192 } else if (nan1) {
193 val = op1;
194 } else if (nan2) {
195 val = op2;
196 }
197 } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
198 // Turn val into a zero with the correct sign;
199 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
200 val = bitsToFp(fpToBits(val) & bitMask, junk);
201 feclearexcept(FeInexact);
202 feraiseexcept(FeUnderflow);
203 }
204 return val;
205}
206
207template
208float fixDest<float>(FPSCR fpscr, float val, float op1, float op2);
209template
210double fixDest<double>(FPSCR fpscr, double val, double op1, double op2);
211
212template <class fpType>
213fpType
214fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
215{
216 fpType mid = fixDest(fpscr, val, op1, op2);
217 const bool single = (sizeof(fpType) == sizeof(float));
218 const fpType junk = 0.0;
219 if ((single && (val == bitsToFp(0x00800000, junk) ||
220 val == bitsToFp(0x80800000, junk))) ||
221 (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
222 val == bitsToFp(ULL(0x8010000000000000), junk)))
223 ) {
224 __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
225 fesetround(FeRoundZero);
226 fpType temp = 0.0;
227 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
228 temp = op1 / op2;
229 if (flushToZero(temp)) {
230 feraiseexcept(FeUnderflow);
231 if (fpscr.fz) {
232 feclearexcept(FeInexact);
233 mid = temp;
234 }
235 }
236 __asm__ __volatile__("" :: "m" (temp));
237 }
238 return mid;
239}
240
241template
242float fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2);
243template
244double fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2);
245
246float
247fixFpDFpSDest(FPSCR fpscr, double val)
248{
249 const float junk = 0.0;
250 float op1 = 0.0;
251 if (std::isnan(val)) {
252 uint64_t valBits = fpToBits(val);
253 uint32_t op1Bits = bits(valBits, 50, 29) |
254 (mask(9) << 22) |
255 (bits(valBits, 63) << 31);
256 op1 = bitsToFp(op1Bits, junk);
257 }
258 float mid = fixDest(fpscr, (float)val, op1);
259 if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
260 (FeUnderflow | FeInexact)) {
261 feclearexcept(FeInexact);
262 }
263 if (mid == bitsToFp(0x00800000, junk) ||
264 mid == bitsToFp(0x80800000, junk)) {
265 __asm__ __volatile__("" : "=m" (val) : "m" (val));
266 fesetround(FeRoundZero);
267 float temp = 0.0;
268 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
269 temp = val;
270 if (flushToZero(temp)) {
271 feraiseexcept(FeUnderflow);
272 if (fpscr.fz) {
273 feclearexcept(FeInexact);
274 mid = temp;
275 }
276 }
277 __asm__ __volatile__("" :: "m" (temp));
278 }
279 return mid;
280}
281
282double
283fixFpSFpDDest(FPSCR fpscr, float val)
284{
285 const double junk = 0.0;
286 double op1 = 0.0;
287 if (std::isnan(val)) {
288 uint32_t valBits = fpToBits(val);
289 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
290 (mask(12) << 51) |
291 ((uint64_t)bits(valBits, 31) << 63);
292 op1 = bitsToFp(op1Bits, junk);
293 }
294 double mid = fixDest(fpscr, (double)val, op1);
295 if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
296 mid == bitsToFp(ULL(0x8010000000000000), junk)) {
297 __asm__ __volatile__("" : "=m" (val) : "m" (val));
298 fesetround(FeRoundZero);
299 double temp = 0.0;
300 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
301 temp = val;
302 if (flushToZero(temp)) {
303 feraiseexcept(FeUnderflow);
304 if (fpscr.fz) {
305 feclearexcept(FeInexact);
306 mid = temp;
307 }
308 }
309 __asm__ __volatile__("" :: "m" (temp));
310 }
311 return mid;
312}
313
314float
315vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
316{
317 float junk = 0.0;
318 uint32_t destBits = fpToBits(dest);
319 uint32_t opBits = fpToBits(op);
320 // Extract the operand.
321 bool neg = bits(opBits, 31);
322 uint32_t exponent = bits(opBits, 30, 23);
323 uint32_t oldMantissa = bits(opBits, 22, 0);
324 uint32_t mantissa = oldMantissa >> (23 - 10);
325 // Do the conversion.
326 uint32_t extra = oldMantissa & mask(23 - 10);
327 if (exponent == 0xff) {
328 if (oldMantissa != 0) {
329 // Nans.
330 if (bits(mantissa, 9) == 0) {
331 // Signalling nan.
332 fpscr.ioc = 1;
333 }
334 if (fpscr.ahp) {
335 mantissa = 0;
336 exponent = 0;
337 fpscr.ioc = 1;
338 } else if (fpscr.dn) {
339 mantissa = (1 << 9);
340 exponent = 0x1f;
341 neg = false;
342 } else {
343 exponent = 0x1f;
344 mantissa |= (1 << 9);
345 }
346 } else {
347 // Infinities.
348 exponent = 0x1F;
349 if (fpscr.ahp) {
350 fpscr.ioc = 1;
351 mantissa = 0x3ff;
352 } else {
353 mantissa = 0;
354 }
355 }
356 } else if (exponent == 0 && oldMantissa == 0) {
357 // Zero, don't need to do anything.
358 } else {
359 // Normalized or denormalized numbers.
360
361 bool inexact = (extra != 0);
362
363 if (exponent == 0) {
364 // Denormalized.
365
366 // If flush to zero is on, this shouldn't happen.
367 assert(fpscr.fz == 0);
368
369 // Check for underflow
370 if (inexact || fpscr.ufe)
371 fpscr.ufc = 1;
372
373 // Handle rounding.
374 unsigned mode = fpscr.rMode;
375 if ((mode == VfpRoundUpward && !neg && extra) ||
376 (mode == VfpRoundDown && neg && extra) ||
377 (mode == VfpRoundNearest &&
378 (extra > (1 << 9) ||
379 (extra == (1 << 9) && bits(mantissa, 0))))) {
380 mantissa++;
381 }
382
383 // See if the number became normalized after rounding.
384 if (mantissa == (1 << 10)) {
385 mantissa = 0;
386 exponent = 1;
387 }
388 } else {
389 // Normalized.
390
391 // We need to track the dropped bits differently since
392 // more can be dropped by denormalizing.
393 bool topOne = bits(extra, 12);
394 bool restZeros = bits(extra, 11, 0) == 0;
395
396 if (exponent <= (127 - 15)) {
397 // The result is too small. Denormalize.
398 mantissa |= (1 << 10);
399 while (mantissa && exponent <= (127 - 15)) {
400 restZeros = restZeros && !topOne;
401 topOne = bits(mantissa, 0);
402 mantissa = mantissa >> 1;
403 exponent++;
404 }
405 if (topOne || !restZeros)
406 inexact = true;
407 exponent = 0;
408 } else {
409 // Change bias.
410 exponent -= (127 - 15);
411 }
412
413 if (exponent == 0 && (inexact || fpscr.ufe)) {
414 // Underflow
415 fpscr.ufc = 1;
416 }
417
418 // Handle rounding.
419 unsigned mode = fpscr.rMode;
420 bool nonZero = topOne || !restZeros;
421 if ((mode == VfpRoundUpward && !neg && nonZero) ||
422 (mode == VfpRoundDown && neg && nonZero) ||
423 (mode == VfpRoundNearest && topOne &&
424 (!restZeros || bits(mantissa, 0)))) {
425 mantissa++;
426 }
427
428 // See if we rounded up and need to bump the exponent.
429 if (mantissa == (1 << 10)) {
430 mantissa = 0;
431 exponent++;
432 }
433
434 // Deal with overflow
435 if (fpscr.ahp) {
436 if (exponent >= 0x20) {
437 exponent = 0x1f;
438 mantissa = 0x3ff;
439 fpscr.ioc = 1;
440 // Supress inexact exception.
441 inexact = false;
442 }
443 } else {
444 if (exponent >= 0x1f) {
445 if ((mode == VfpRoundNearest) ||
446 (mode == VfpRoundUpward && !neg) ||
447 (mode == VfpRoundDown && neg)) {
448 // Overflow to infinity.
449 exponent = 0x1f;
450 mantissa = 0;
451 } else {
452 // Overflow to max normal.
453 exponent = 0x1e;
454 mantissa = 0x3ff;
455 }
456 fpscr.ofc = 1;
457 inexact = true;
458 }
459 }
460 }
461
462 if (inexact) {
463 fpscr.ixc = 1;
464 }
465 }
466 // Reassemble and install the result.
467 uint32_t result = bits(mantissa, 9, 0);
468 replaceBits(result, 14, 10, exponent);
469 if (neg)
470 result |= (1 << 15);
471 if (top)
472 replaceBits(destBits, 31, 16, result);
473 else
474 replaceBits(destBits, 15, 0, result);
475 return bitsToFp(destBits, junk);
476}
477
478float
479vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
480{
481 float junk = 0.0;
482 uint32_t opBits = fpToBits(op);
483 // Extract the operand.
484 if (top)
485 opBits = bits(opBits, 31, 16);
486 else
487 opBits = bits(opBits, 15, 0);
488 // Extract the bitfields.
489 bool neg = bits(opBits, 15);
490 uint32_t exponent = bits(opBits, 14, 10);
491 uint32_t mantissa = bits(opBits, 9, 0);
492 // Do the conversion.
493 if (exponent == 0) {
494 if (mantissa != 0) {
495 // Normalize the value.
496 exponent = exponent + (127 - 15) + 1;
497 while (mantissa < (1 << 10)) {
498 mantissa = mantissa << 1;
499 exponent--;
500 }
501 }
502 mantissa = mantissa << (23 - 10);
503 } else if (exponent == 0x1f && !fpscr.ahp) {
504 // Infinities and nans.
505 exponent = 0xff;
506 if (mantissa != 0) {
507 // Nans.
508 mantissa = mantissa << (23 - 10);
509 if (bits(mantissa, 22) == 0) {
510 // Signalling nan.
511 fpscr.ioc = 1;
512 mantissa |= (1 << 22);
513 }
514 if (fpscr.dn) {
515 mantissa &= ~mask(22);
516 neg = false;
517 }
518 }
519 } else {
520 exponent = exponent + (127 - 15);
521 mantissa = mantissa << (23 - 10);
522 }
523 // Reassemble the result.
524 uint32_t result = bits(mantissa, 22, 0);
525 replaceBits(result, 30, 23, exponent);
526 if (neg)
527 result |= (1 << 31);
528 return bitsToFp(result, junk);
529}
530
531uint64_t
532vfpFpSToFixed(float val, bool isSigned, bool half,
533 uint8_t imm, bool rzero)
534{
535 int rmode = rzero ? FeRoundZero : fegetround();
536 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
537 fesetround(FeRoundNearest);
538 val = val * powf(2.0, imm);
539 __asm__ __volatile__("" : "=m" (val) : "m" (val));
540 fesetround(rmode);
541 feclearexcept(FeAllExceptions);
542 __asm__ __volatile__("" : "=m" (val) : "m" (val));
543 float origVal = val;
544 val = rintf(val);
545 int fpType = std::fpclassify(val);
546 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
547 if (fpType == FP_NAN) {
548 feraiseexcept(FeInvalid);
549 }
550 val = 0.0;
551 } else if (origVal != val) {
552 switch (rmode) {
553 case FeRoundNearest:
554 if (origVal - val > 0.5)
555 val += 1.0;
556 else if (val - origVal > 0.5)
557 val -= 1.0;
558 break;
559 case FeRoundDown:
560 if (origVal < val)
561 val -= 1.0;
562 break;
563 case FeRoundUpward:
564 if (origVal > val)
565 val += 1.0;
566 break;
567 }
568 feraiseexcept(FeInexact);
569 }
570
571 if (isSigned) {
572 if (half) {
573 if ((double)val < (int16_t)(1 << 15)) {
574 feraiseexcept(FeInvalid);
575 feclearexcept(FeInexact);
576 return (int16_t)(1 << 15);
577 }
578 if ((double)val > (int16_t)mask(15)) {
579 feraiseexcept(FeInvalid);
580 feclearexcept(FeInexact);
581 return (int16_t)mask(15);
582 }
583 return (int16_t)val;
584 } else {
585 if ((double)val < (int32_t)(1 << 31)) {
586 feraiseexcept(FeInvalid);
587 feclearexcept(FeInexact);
588 return (int32_t)(1 << 31);
589 }
590 if ((double)val > (int32_t)mask(31)) {
591 feraiseexcept(FeInvalid);
592 feclearexcept(FeInexact);
593 return (int32_t)mask(31);
594 }
595 return (int32_t)val;
596 }
597 } else {
598 if (half) {
599 if ((double)val < 0) {
600 feraiseexcept(FeInvalid);
601 feclearexcept(FeInexact);
602 return 0;
603 }
604 if ((double)val > (mask(16))) {
605 feraiseexcept(FeInvalid);
606 feclearexcept(FeInexact);
607 return mask(16);
608 }
609 return (uint16_t)val;
610 } else {
611 if ((double)val < 0) {
612 feraiseexcept(FeInvalid);
613 feclearexcept(FeInexact);
614 return 0;
615 }
616 if ((double)val > (mask(32))) {
617 feraiseexcept(FeInvalid);
618 feclearexcept(FeInexact);
619 return mask(32);
620 }
621 return (uint32_t)val;
622 }
623 }
624}
625
626float
627vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
628{
629 fesetround(FeRoundNearest);
630 if (half)
631 val = (uint16_t)val;
632 float scale = powf(2.0, imm);
633 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
634 feclearexcept(FeAllExceptions);
635 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
636 return fixDivDest(fpscr, val / scale, (float)val, scale);
637}
638
639float
640vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
641{
642 fesetround(FeRoundNearest);
643 if (half)
644 val = sext<16>(val & mask(16));
645 float scale = powf(2.0, imm);
646 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
647 feclearexcept(FeAllExceptions);
648 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
649 return fixDivDest(fpscr, val / scale, (float)val, scale);
650}
651
652uint64_t
653vfpFpDToFixed(double val, bool isSigned, bool half,
654 uint8_t imm, bool rzero)
655{
656 int rmode = rzero ? FeRoundZero : fegetround();
657 fesetround(FeRoundNearest);
658 val = val * pow(2.0, imm);
659 __asm__ __volatile__("" : "=m" (val) : "m" (val));
660 fesetround(rmode);
661 feclearexcept(FeAllExceptions);
662 __asm__ __volatile__("" : "=m" (val) : "m" (val));
663 double origVal = val;
664 val = rint(val);
665 int fpType = std::fpclassify(val);
666 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
667 if (fpType == FP_NAN) {
668 feraiseexcept(FeInvalid);
669 }
670 val = 0.0;
671 } else if (origVal != val) {
672 switch (rmode) {
673 case FeRoundNearest:
674 if (origVal - val > 0.5)
675 val += 1.0;
676 else if (val - origVal > 0.5)
677 val -= 1.0;
678 break;
679 case FeRoundDown:
680 if (origVal < val)
681 val -= 1.0;
682 break;
683 case FeRoundUpward:
684 if (origVal > val)
685 val += 1.0;
686 break;
687 }
688 feraiseexcept(FeInexact);
689 }
690 if (isSigned) {
691 if (half) {
692 if (val < (int16_t)(1 << 15)) {
693 feraiseexcept(FeInvalid);
694 feclearexcept(FeInexact);
695 return (int16_t)(1 << 15);
696 }
697 if (val > (int16_t)mask(15)) {
698 feraiseexcept(FeInvalid);
699 feclearexcept(FeInexact);
700 return (int16_t)mask(15);
701 }
702 return (int16_t)val;
703 } else {
704 if (val < (int32_t)(1 << 31)) {
705 feraiseexcept(FeInvalid);
706 feclearexcept(FeInexact);
707 return (int32_t)(1 << 31);
708 }
709 if (val > (int32_t)mask(31)) {
710 feraiseexcept(FeInvalid);
711 feclearexcept(FeInexact);
712 return (int32_t)mask(31);
713 }
714 return (int32_t)val;
715 }
716 } else {
717 if (half) {
718 if (val < 0) {
719 feraiseexcept(FeInvalid);
720 feclearexcept(FeInexact);
721 return 0;
722 }
723 if (val > mask(16)) {
724 feraiseexcept(FeInvalid);
725 feclearexcept(FeInexact);
726 return mask(16);
727 }
728 return (uint16_t)val;
729 } else {
730 if (val < 0) {
731 feraiseexcept(FeInvalid);
732 feclearexcept(FeInexact);
733 return 0;
734 }
735 if (val > mask(32)) {
736 feraiseexcept(FeInvalid);
737 feclearexcept(FeInexact);
738 return mask(32);
739 }
740 return (uint32_t)val;
741 }
742 }
743}
744
745double
746vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
747{
748 fesetround(FeRoundNearest);
749 if (half)
750 val = (uint16_t)val;
751 double scale = pow(2.0, imm);
752 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
753 feclearexcept(FeAllExceptions);
754 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
755 return fixDivDest(fpscr, val / scale, (double)val, scale);
756}
757
758double
759vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
760{
761 fesetround(FeRoundNearest);
762 if (half)
763 val = sext<16>(val & mask(16));
764 double scale = pow(2.0, imm);
765 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
766 feclearexcept(FeAllExceptions);
767 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
768 return fixDivDest(fpscr, val / scale, (double)val, scale);
769}
770
771template <class fpType>
772fpType
773FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
774 fpType (*func)(fpType, fpType),
775 bool flush, uint32_t rMode) const
776{
777 const bool single = (sizeof(fpType) == sizeof(float));
778 fpType junk = 0.0;
779
780 if (flush && flushToZero(op1, op2))
781 fpscr.idc = 1;
782 VfpSavedState state = prepFpState(rMode);
783 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
784 : "m" (op1), "m" (op2), "m" (state));
785 fpType dest = func(op1, op2);
786 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
787
788 int fpClass = std::fpclassify(dest);
789 // Get NAN behavior right. This varies between x86 and ARM.
790 if (fpClass == FP_NAN) {
791 const bool single = (sizeof(fpType) == sizeof(float));
792 const uint64_t qnan =
793 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
794 const bool nan1 = std::isnan(op1);
795 const bool nan2 = std::isnan(op2);
796 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
797 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
798 if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
799 dest = bitsToFp(qnan, junk);
800 } else if (signal1) {
801 dest = bitsToFp(fpToBits(op1) | qnan, junk);
802 } else if (signal2) {
803 dest = bitsToFp(fpToBits(op2) | qnan, junk);
804 } else if (nan1) {
805 dest = op1;
806 } else if (nan2) {
807 dest = op2;
808 }
809 } else if (flush && flushToZero(dest)) {
810 feraiseexcept(FeUnderflow);
811 } else if ((
812 (single && (dest == bitsToFp(0x00800000, junk) ||
813 dest == bitsToFp(0x80800000, junk))) ||
814 (!single &&
815 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
816 dest == bitsToFp(ULL(0x8010000000000000), junk)))
817 ) && rMode != VfpRoundZero) {
818 /*
819 * Correct for the fact that underflow is detected -before- rounding
820 * in ARM and -after- rounding in x86.
821 */
822 fesetround(FeRoundZero);
823 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
824 : "m" (op1), "m" (op2));
825 fpType temp = func(op1, op2);
826 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
827 if (flush && flushToZero(temp)) {
828 dest = temp;
829 }
830 }
831 finishVfp(fpscr, state);
832 return dest;
833}
834
835template
836float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2,
837 float (*func)(float, float),
838 bool flush, uint32_t rMode) const;
839template
840double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2,
841 double (*func)(double, double),
842 bool flush, uint32_t rMode) const;
843
844template <class fpType>
845fpType
846FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType),
847 bool flush, uint32_t rMode) const
848{
849 const bool single = (sizeof(fpType) == sizeof(float));
850 fpType junk = 0.0;
851
852 if (flush && flushToZero(op1))
853 fpscr.idc = 1;
854 VfpSavedState state = prepFpState(rMode);
855 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
856 : "m" (op1), "m" (state));
857 fpType dest = func(op1);
858 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
859
860 int fpClass = std::fpclassify(dest);
861 // Get NAN behavior right. This varies between x86 and ARM.
862 if (fpClass == FP_NAN) {
863 const bool single = (sizeof(fpType) == sizeof(float));
864 const uint64_t qnan =
865 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
866 const bool nan = std::isnan(op1);
867 if (!nan || fpscr.dn == 1) {
868 dest = bitsToFp(qnan, junk);
869 } else if (nan) {
870 dest = bitsToFp(fpToBits(op1) | qnan, junk);
871 }
872 } else if (flush && flushToZero(dest)) {
873 feraiseexcept(FeUnderflow);
874 } else if ((
875 (single && (dest == bitsToFp(0x00800000, junk) ||
876 dest == bitsToFp(0x80800000, junk))) ||
877 (!single &&
878 (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
879 dest == bitsToFp(ULL(0x8010000000000000), junk)))
880 ) && rMode != VfpRoundZero) {
881 /*
882 * Correct for the fact that underflow is detected -before- rounding
883 * in ARM and -after- rounding in x86.
884 */
885 fesetround(FeRoundZero);
886 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
887 fpType temp = func(op1);
888 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
889 if (flush && flushToZero(temp)) {
890 dest = temp;
891 }
892 }
893 finishVfp(fpscr, state);
894 return dest;
895}
896
897template
898float FpOp::unaryOp(FPSCR &fpscr, float op1, float (*func)(float),
899 bool flush, uint32_t rMode) const;
900template
901double FpOp::unaryOp(FPSCR &fpscr, double op1, double (*func)(double),
902 bool flush, uint32_t rMode) const;
903
904IntRegIndex
905VfpMacroOp::addStride(IntRegIndex idx, unsigned stride)
906{
907 if (wide) {
908 stride *= 2;
909 }
910 unsigned offset = idx % 8;
911 idx = (IntRegIndex)(idx - offset);
912 offset += stride;
913 idx = (IntRegIndex)(idx + (offset % 8));
914 return idx;
915}
916
917void
918VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
919{
920 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
921 assert(!inScalarBank(dest));
922 dest = addStride(dest, stride);
923 op1 = addStride(op1, stride);
924 if (!inScalarBank(op2)) {
925 op2 = addStride(op2, stride);
926 }
927}
928
929void
930VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
931{
932 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
933 assert(!inScalarBank(dest));
934 dest = addStride(dest, stride);
935 if (!inScalarBank(op1)) {
936 op1 = addStride(op1, stride);
937 }
938}
939
940void
941VfpMacroOp::nextIdxs(IntRegIndex &dest)
942{
943 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
944 assert(!inScalarBank(dest));
945 dest = addStride(dest, stride);
946}
947
948}