Cross Reference: /gem5/src/arch/arm/insts/vfp.hh

Deleted Added

sdiff udiff text old ( 7397:cbd950459a29 ) new ( 7398:063002e7106b )

full compact

1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software

--- 382 unchanged lines hidden (view full) ---

391 mid = temp;
392 }
393 }
394 __asm__ __volatile__("" :: "m" (temp));
395 }
396 return mid;
397}
398

399static inline float
400vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
401{
402 float junk = 0.0;
403 uint32_t destBits = fpToBits(dest);
404 uint32_t opBits = fpToBits(op);
405 // Extract the operand.
406 bool neg = bits(opBits, 31);
407 uint32_t exponent = bits(opBits, 30, 23);
408 uint32_t oldMantissa = bits(opBits, 22, 0);
409 uint32_t mantissa = oldMantissa >> (23 - 10);
410 // Do the conversion.
411 uint32_t extra = oldMantissa & mask(23 - 10);
412 if (exponent == 0xff) {
413 if (oldMantissa != 0) {
414 // Nans.
415 if (bits(mantissa, 9) == 0) {
416 // Signalling nan.
417 fpscr.ioc = 1;
418 }
419 if (fpscr.ahp) {
420 mantissa = 0;
421 exponent = 0;
422 fpscr.ioc = 1;
423 } else if (fpscr.dn) {
424 mantissa = (1 << 9);
425 exponent = 0x1f;
426 neg = false;
427 } else {
428 exponent = 0x1f;
429 mantissa |= (1 << 9);
430 }
431 } else {
432 // Infinities.
433 exponent = 0x1F;
434 if (fpscr.ahp) {
435 fpscr.ioc = 1;
436 mantissa = 0x3ff;
437 } else {
438 mantissa = 0;
439 }
440 }
441 } else if (exponent == 0 && oldMantissa == 0) {
442 // Zero, don't need to do anything.
443 } else {
444 // Normalized or denormalized numbers.
445
446 bool inexact = (extra != 0);
447
448 if (exponent == 0) {
449 // Denormalized.
450
451 // If flush to zero is on, this shouldn't happen.
452 assert(fpscr.fz == 0);
453
454 // Check for underflow
455 if (inexact || fpscr.ufe)
456 fpscr.ufc = 1;
457
458 // Handle rounding.
459 unsigned mode = fpscr.rMode;
460 if ((mode == VfpRoundUpward && !neg && extra) ||
461 (mode == VfpRoundDown && neg && extra) ||
462 (mode == VfpRoundNearest &&
463 (extra > (1 << 9) ||
464 (extra == (1 << 9) && bits(mantissa, 0))))) {
465 mantissa++;
466 }
467
468 // See if the number became normalized after rounding.
469 if (mantissa == (1 << 10)) {
470 mantissa = 0;
471 exponent = 1;
472 }
473 } else {
474 // Normalized.
475
476 // We need to track the dropped bits differently since
477 // more can be dropped by denormalizing.
478 bool topOne = bits(extra, 12);
479 bool restZeros = bits(extra, 11, 0) == 0;
480
481 if (exponent <= (127 - 15)) {
482 // The result is too small. Denormalize.
483 mantissa |= (1 << 10);
484 while (mantissa && exponent <= (127 - 15)) {
485 restZeros = restZeros && !topOne;
486 topOne = bits(mantissa, 0);
487 mantissa = mantissa >> 1;
488 exponent++;
489 }
490 if (topOne || !restZeros)
491 inexact = true;
492 exponent = 0;
493 } else {
494 // Change bias.
495 exponent -= (127 - 15);
496 }
497
498 if (exponent == 0 && (inexact || fpscr.ufe)) {
499 // Underflow
500 fpscr.ufc = 1;
501 }
502
503 // Handle rounding.
504 unsigned mode = fpscr.rMode;
505 bool nonZero = topOne || !restZeros;
506 if ((mode == VfpRoundUpward && !neg && nonZero) ||
507 (mode == VfpRoundDown && neg && nonZero) ||
508 (mode == VfpRoundNearest && topOne &&
509 (!restZeros || bits(mantissa, 0)))) {
510 mantissa++;
511 }
512
513 // See if we rounded up and need to bump the exponent.
514 if (mantissa == (1 << 10)) {
515 mantissa = 0;
516 exponent++;
517 }
518
519 // Deal with overflow
520 if (fpscr.ahp) {
521 if (exponent >= 0x20) {
522 exponent = 0x1f;
523 mantissa = 0x3ff;
524 fpscr.ioc = 1;
525 // Supress inexact exception.
526 inexact = false;
527 }
528 } else {
529 if (exponent >= 0x1f) {
530 if ((mode == VfpRoundNearest) ||
531 (mode == VfpRoundUpward && !neg) ||
532 (mode == VfpRoundDown && neg)) {
533 // Overflow to infinity.
534 exponent = 0x1f;
535 mantissa = 0;
536 } else {
537 // Overflow to max normal.
538 exponent = 0x1e;
539 mantissa = 0x3ff;
540 }
541 fpscr.ofc = 1;
542 inexact = true;
543 }
544 }
545 }
546
547 if (inexact) {
548 fpscr.ixc = 1;
549 }
550 }
551 // Reassemble and install the result.
552 uint32_t result = bits(mantissa, 9, 0);
553 replaceBits(result, 14, 10, exponent);
554 if (neg)
555 result |= (1 << 15);
556 if (top)
557 replaceBits(destBits, 31, 16, result);
558 else
559 replaceBits(destBits, 15, 0, result);
560 return bitsToFp(destBits, junk);
561}
562
563static inline float
564vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
565{
566 float junk = 0.0;
567 uint32_t opBits = fpToBits(op);
568 // Extract the operand.
569 if (top)
570 opBits = bits(opBits, 31, 16);
571 else
572 opBits = bits(opBits, 15, 0);
573 // Extract the bitfields.
574 bool neg = bits(opBits, 15);
575 uint32_t exponent = bits(opBits, 14, 10);
576 uint32_t mantissa = bits(opBits, 9, 0);
577 // Do the conversion.
578 if (exponent == 0) {
579 if (mantissa != 0) {
580 // Normalize the value.
581 exponent = exponent + (127 - 15) + 1;
582 while (mantissa < (1 << 10)) {
583 mantissa = mantissa << 1;
584 exponent--;
585 }
586 }
587 mantissa = mantissa << (23 - 10);
588 } else if (exponent == 0x1f && !fpscr.ahp) {
589 // Infinities and nans.
590 exponent = 0xff;
591 if (mantissa != 0) {
592 // Nans.
593 mantissa = mantissa << (23 - 10);
594 if (bits(mantissa, 22) == 0) {
595 // Signalling nan.
596 fpscr.ioc = 1;
597 mantissa |= (1 << 22);
598 }
599 if (fpscr.dn) {
600 mantissa &= ~mask(22);
601 neg = false;
602 }
603 }
604 } else {
605 exponent = exponent + (127 - 15);
606 mantissa = mantissa << (23 - 10);
607 }
608 // Reassemble the result.
609 uint32_t result = bits(mantissa, 22, 0);
610 replaceBits(result, 30, 23, exponent);
611 if (neg)
612 result |= (1 << 31);
613 return bitsToFp(result, junk);
614}
615

616static inline double
617makeDouble(uint32_t low, uint32_t high)
618{
619 double junk = 0.0;
620 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
621}
622
623static inline uint32_t

--- 603 unchanged lines hidden ---