mediaop.isa (6732:4b93003bb069) mediaop.isa (6742:a2a79fe9655d)
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright

--- 438 unchanged lines hidden (view full) ---

447 (i + 0) * srcBits);
448 unsigned signBit = bits(picked, srcBits - 1);
449 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
450
451 // Handle saturation.
452 if (signBit) {
453 if (overflow != mask(destBits - srcBits + 1)) {
454 if (ext & 0x1)
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright

--- 438 unchanged lines hidden (view full) ---

447 (i + 0) * srcBits);
448 unsigned signBit = bits(picked, srcBits - 1);
449 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
450
451 // Handle saturation.
452 if (signBit) {
453 if (overflow != mask(destBits - srcBits + 1)) {
454 if (ext & 0x1)
455 picked = (1 << (destBits - 1));
455 picked = (ULL(1) << (destBits - 1));
456 else
457 picked = 0;
458 }
459 } else {
460 if (overflow != 0) {
461 if (ext & 0x1)
462 picked = mask(destBits - 1);
463 else

--- 11 unchanged lines hidden (view full) ---

475 (i - items + 0) * srcBits);
476 unsigned signBit = bits(picked, srcBits - 1);
477 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
478
479 // Handle saturation.
480 if (signBit) {
481 if (overflow != mask(destBits - srcBits + 1)) {
482 if (ext & 0x1)
456 else
457 picked = 0;
458 }
459 } else {
460 if (overflow != 0) {
461 if (ext & 0x1)
462 picked = mask(destBits - 1);
463 else

--- 11 unchanged lines hidden (view full) ---

475 (i - items + 0) * srcBits);
476 unsigned signBit = bits(picked, srcBits - 1);
477 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
478
479 // Handle saturation.
480 if (signBit) {
481 if (overflow != mask(destBits - srcBits + 1)) {
482 if (ext & 0x1)
483 picked = (1 << (destBits - 1));
483 picked = (ULL(1) << (destBits - 1));
484 else
485 picked = 0;
486 }
487 } else {
488 if (overflow != 0) {
489 if (ext & 0x1)
490 picked = mask(destBits - 1);
491 else

--- 145 unchanged lines hidden (view full) ---

637 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
638 uint64_t result = FpDestReg.uqw;
639
640 for (int i = 0; i < items; i++) {
641 int hiIndex = (i + 1) * sizeBits - 1;
642 int loIndex = (i + 0) * sizeBits;
643 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
644 int64_t arg1 = arg1Bits |
484 else
485 picked = 0;
486 }
487 } else {
488 if (overflow != 0) {
489 if (ext & 0x1)
490 picked = mask(destBits - 1);
491 else

--- 145 unchanged lines hidden (view full) ---

637 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
638 uint64_t result = FpDestReg.uqw;
639
640 for (int i = 0; i < items; i++) {
641 int hiIndex = (i + 1) * sizeBits - 1;
642 int loIndex = (i + 0) * sizeBits;
643 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
644 int64_t arg1 = arg1Bits |
645 (0 - (arg1Bits & (1 << (sizeBits - 1))));
645 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
646 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
647 int64_t arg2 = arg2Bits |
646 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
647 int64_t arg2 = arg2Bits |
648 (0 - (arg2Bits & (1 << (sizeBits - 1))));
648 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
649 uint64_t resBits;
650
651 if (ext & 0x2) {
652 if (arg1 < arg2) {
653 resBits = arg1Bits;
654 } else {
655 resBits = arg2Bits;
656 }

--- 18 unchanged lines hidden (view full) ---

675 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
676 uint64_t result = FpDestReg.uqw;
677
678 for (int i = 0; i < items; i++) {
679 int hiIndex = (i + 1) * sizeBits - 1;
680 int loIndex = (i + 0) * sizeBits;
681 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
682 int64_t arg1 = arg1Bits |
649 uint64_t resBits;
650
651 if (ext & 0x2) {
652 if (arg1 < arg2) {
653 resBits = arg1Bits;
654 } else {
655 resBits = arg2Bits;
656 }

--- 18 unchanged lines hidden (view full) ---

675 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
676 uint64_t result = FpDestReg.uqw;
677
678 for (int i = 0; i < items; i++) {
679 int hiIndex = (i + 1) * sizeBits - 1;
680 int loIndex = (i + 0) * sizeBits;
681 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
682 int64_t arg1 = arg1Bits |
683 (0 - (arg1Bits & (1 << (sizeBits - 1))));
683 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
684 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
685 int64_t arg2 = arg2Bits |
684 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
685 int64_t arg2 = arg2Bits |
686 (0 - (arg2Bits & (1 << (sizeBits - 1))));
686 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
687 uint64_t resBits;
688
689 if (ext & 0x2) {
690 if (arg1 > arg2) {
691 resBits = arg1Bits;
692 } else {
693 resBits = arg2Bits;
694 }

--- 257 unchanged lines hidden (view full) ---

952 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
953 resBits = mask(sizeBits);
954 } else if (ext & 0x4) {
955 int arg1Sign = bits(arg1Bits, sizeBits - 1);
956 int arg2Sign = bits(arg2Bits, sizeBits - 1);
957 int resSign = bits(resBits, sizeBits - 1);
958 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
959 if (resSign == 0)
687 uint64_t resBits;
688
689 if (ext & 0x2) {
690 if (arg1 > arg2) {
691 resBits = arg1Bits;
692 } else {
693 resBits = arg2Bits;
694 }

--- 257 unchanged lines hidden (view full) ---

952 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
953 resBits = mask(sizeBits);
954 } else if (ext & 0x4) {
955 int arg1Sign = bits(arg1Bits, sizeBits - 1);
956 int arg2Sign = bits(arg2Bits, sizeBits - 1);
957 int resSign = bits(resBits, sizeBits - 1);
958 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
959 if (resSign == 0)
960 resBits = (1 << (sizeBits - 1));
960 resBits = (ULL(1) << (sizeBits - 1));
961 else
962 resBits = mask(sizeBits - 1);
963 }
964 }
965
966 result = insertBits(result, hiIndex, loIndex, resBits);
967 }
968 FpDestReg.uqw = result;

--- 22 unchanged lines hidden (view full) ---

991 resBits = mask(sizeBits);
992 }
993 } else if (ext & 0x4) {
994 int arg1Sign = bits(arg1Bits, sizeBits - 1);
995 int arg2Sign = !bits(arg2Bits, sizeBits - 1);
996 int resSign = bits(resBits, sizeBits - 1);
997 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
998 if (resSign == 0)
961 else
962 resBits = mask(sizeBits - 1);
963 }
964 }
965
966 result = insertBits(result, hiIndex, loIndex, resBits);
967 }
968 FpDestReg.uqw = result;

--- 22 unchanged lines hidden (view full) ---

991 resBits = mask(sizeBits);
992 }
993 } else if (ext & 0x4) {
994 int arg1Sign = bits(arg1Bits, sizeBits - 1);
995 int arg2Sign = !bits(arg2Bits, sizeBits - 1);
996 int resSign = bits(resBits, sizeBits - 1);
997 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
998 if (resSign == 0)
999 resBits = (1 << (sizeBits - 1));
999 resBits = (ULL(1) << (sizeBits - 1));
1000 else
1001 resBits = mask(sizeBits - 1);
1002 }
1003 }
1004
1005 result = insertBits(result, hiIndex, loIndex, resBits);
1006 }
1007 FpDestReg.uqw = result;

--- 19 unchanged lines hidden (view full) ---

1027 int srcHiIndex = (i + 1) * srcBits - 1 + offset;
1028 int srcLoIndex = (i + 0) * srcBits + offset;
1029 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1030 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
1031 uint64_t resBits;
1032
1033 if (ext & 0x2) {
1034 int64_t arg1 = arg1Bits |
1000 else
1001 resBits = mask(sizeBits - 1);
1002 }
1003 }
1004
1005 result = insertBits(result, hiIndex, loIndex, resBits);
1006 }
1007 FpDestReg.uqw = result;

--- 19 unchanged lines hidden (view full) ---

1027 int srcHiIndex = (i + 1) * srcBits - 1 + offset;
1028 int srcLoIndex = (i + 0) * srcBits + offset;
1029 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1030 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
1031 uint64_t resBits;
1032
1033 if (ext & 0x2) {
1034 int64_t arg1 = arg1Bits |
1035 (0 - (arg1Bits & (1 << (srcBits - 1))));
1035 (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
1036 int64_t arg2 = arg2Bits |
1036 int64_t arg2 = arg2Bits |
1037 (0 - (arg2Bits & (1 << (srcBits - 1))));
1037 (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
1038 resBits = (uint64_t)(arg1 * arg2);
1039 } else {
1040 resBits = arg1Bits * arg2Bits;
1041 }
1042
1043 if (ext & 0x4)
1038 resBits = (uint64_t)(arg1 * arg2);
1039 } else {
1040 resBits = arg1Bits * arg2Bits;
1041 }
1042
1043 if (ext & 0x4)
1044 resBits += (1 << (destBits - 1));
1044 resBits += (ULL(1) << (destBits - 1));
1045
1046 if (ext & 0x8)
1047 resBits >>= destBits;
1048
1049 int destHiIndex = (i + 1) * destBits - 1;
1050 int destLoIndex = (i + 0) * destBits;
1051 result = insertBits(result, destHiIndex, destLoIndex, resBits);
1052 }

--- 84 unchanged lines hidden (view full) ---

1137 if (shiftAmt >= sizeBits) {
1138 if (bits(arg1Bits, sizeBits - 1))
1139 resBits = mask(sizeBits);
1140 else
1141 resBits = 0;
1142 } else {
1143 resBits = (arg1Bits >> shiftAmt);
1144 resBits = resBits |
1045
1046 if (ext & 0x8)
1047 resBits >>= destBits;
1048
1049 int destHiIndex = (i + 1) * destBits - 1;
1050 int destLoIndex = (i + 0) * destBits;
1051 result = insertBits(result, destHiIndex, destLoIndex, resBits);
1052 }

--- 84 unchanged lines hidden (view full) ---

1137 if (shiftAmt >= sizeBits) {
1138 if (bits(arg1Bits, sizeBits - 1))
1139 resBits = mask(sizeBits);
1140 else
1141 resBits = 0;
1142 } else {
1143 resBits = (arg1Bits >> shiftAmt);
1144 resBits = resBits |
1145 (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt))));
1145 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
1146 }
1147
1148 result = insertBits(result, hiIndex, loIndex, resBits);
1149 }
1150 FpDestReg.uqw = result;
1151 '''
1152
1153 class Msll(MediaOp):

--- 130 unchanged lines hidden (view full) ---

1284 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1285 }
1286 uint64_t result = FpDestReg.uqw;
1287
1288 for (int i = 0; i < items; i++) {
1289 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1290 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1291 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1146 }
1147
1148 result = insertBits(result, hiIndex, loIndex, resBits);
1149 }
1150 FpDestReg.uqw = result;
1151 '''
1152
1153 class Msll(MediaOp):

--- 130 unchanged lines hidden (view full) ---

1284 items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1285 }
1286 uint64_t result = FpDestReg.uqw;
1287
1288 for (int i = 0; i < items; i++) {
1289 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1290 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1291 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1292 int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex)));
1292
1293 int64_t sArg = argBits | (0 - (argBits & (ULL(1) << srcHiIndex)));
1293 double arg = sArg;
1294
1295 if (destSize == 4) {
1296 floatInt fi;
1297 fi.f = arg;
1298 argBits = fi.i;
1299 } else {
1300 doubleInt di;

--- 94 unchanged lines hidden (view full) ---

1395 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1396 uint64_t result = FpDestReg.uqw;
1397
1398 for (int i = 0; i < items; i++) {
1399 int hiIndex = (i + 1) * sizeBits - 1;
1400 int loIndex = (i + 0) * sizeBits;
1401 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1402 int64_t arg1 = arg1Bits |
1294 double arg = sArg;
1295
1296 if (destSize == 4) {
1297 floatInt fi;
1298 fi.f = arg;
1299 argBits = fi.i;
1300 } else {
1301 doubleInt di;

--- 94 unchanged lines hidden (view full) ---

1396 int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1397 uint64_t result = FpDestReg.uqw;
1398
1399 for (int i = 0; i < items; i++) {
1400 int hiIndex = (i + 1) * sizeBits - 1;
1401 int loIndex = (i + 0) * sizeBits;
1402 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1403 int64_t arg1 = arg1Bits |
1403 (0 - (arg1Bits & (1 << (sizeBits - 1))));
1404 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
1404 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1405 int64_t arg2 = arg2Bits |
1405 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1406 int64_t arg2 = arg2Bits |
1406 (0 - (arg2Bits & (1 << (sizeBits - 1))));
1407 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
1407
1408 uint64_t resBits = 0;
1409 if (((ext & 0x2) == 0 && arg1 == arg2) ||
1410 ((ext & 0x2) == 0x2 && arg1 > arg2))
1411 resBits = mask(sizeBits);
1412
1413 result = insertBits(result, hiIndex, loIndex, resBits);
1414 }

--- 140 unchanged lines hidden ---
1408
1409 uint64_t resBits = 0;
1410 if (((ext & 0x2) == 0 && arg1 == arg2) ||
1411 ((ext & 0x2) == 0x2 && arg1 > arg2))
1412 resBits = mask(sizeBits);
1413
1414 result = insertBits(result, hiIndex, loIndex, resBits);
1415 }

--- 140 unchanged lines hidden ---