603 msg.str().c_str()); 604 return 0; 605 } 606 } 607 608 return convert_signed_SM_to_2C_to_SM(s, unb, und, u); 609} 610 611 612// All vec_ functions assume that the vector to hold the result, 613// called w, has sufficient length to hold the result. For efficiency 614// reasons, we do not test whether or not we are out of bounds. 615 616// Compute w = u + v, where w, u, and v are vectors. 617// - ulen >= vlen 618// - wlen >= sc_max(ulen, vlen) + 1 619void 620vec_add(int ulen, const sc_digit *u, int vlen, const sc_digit *v, sc_digit *w) 621{ 622#ifdef DEBUG_SYSTEMC 623 sc_assert((ulen > 0) && (u != NULL)); 624 sc_assert((vlen > 0) && (v != NULL)); 625 sc_assert(w != NULL); 626 sc_assert(ulen >= vlen); 627#endif 628 629 const sc_digit *uend = (u + ulen); 630 const sc_digit *vend = (v + vlen); 631 632 sc_digit carry = 0; // Also used as sum to save space. 633 634 // Add along the shorter v. 635 while (v < vend) { 636 carry += (*u++) + (*v++); 637 (*w++) = carry & DIGIT_MASK; 638 carry >>= BITS_PER_DIGIT; 639 } 640 641 // Propagate the carry. 642 while (carry && (u < uend)) { 643 carry = (*u++) + 1; 644 (*w++) = carry & DIGIT_MASK; 645 carry >>= BITS_PER_DIGIT; 646 } 647 648 // Copy the rest of u to the result. 649 while (u < uend) 650 (*w++) = (*u++); 651 652 // Propagate the carry if it is still 1. 653 if (carry) 654 (*w) = 1; 655} 656 657 658// Compute u += v, where u and v are vectors. 659// - ulen >= vlen 660void 661vec_add_on(int ulen, sc_digit *ubegin, int vlen, const sc_digit *v) 662{ 663#ifdef DEBUG_SYSTEMC 664 sc_assert((ulen > 0) && (ubegin != NULL)); 665 sc_assert((vlen > 0) && (v != NULL)); 666 sc_assert(ulen >= vlen); 667#endif 668 669 sc_digit *u = ubegin; 670 const sc_digit *uend = (u + ulen); 671 const sc_digit *vend = (v + vlen); 672 673 sc_digit carry = 0; // Also used as sum to save space. 674 675 // Add along the shorter v. 676 while (v < vend) { 677 carry += (*u) + (*v++); 678 (*u++) = carry & DIGIT_MASK; 679 carry >>= BITS_PER_DIGIT; 680 } 681 682 // Propagate the carry. 683 while (carry && (u < uend)) { 684 carry = (*u) + 1; 685 (*u++) = carry & DIGIT_MASK; 686 carry >>= BITS_PER_DIGIT; 687 } 688 689#ifdef DEBUG_SYSTEMC 690 if (carry != 0) { 691 SC_REPORT_WARNING(sc_core::SC_ID_WITHOUT_MESSAGE_, 692 "vec_add_on( int, sc_digit*, int, const " 693 "sc_digit* ) : " 694 "result of addition is wrapped around"); 695 } 696#endif 697} 698 699 700// Compute u += v, where u and v are vectors. 701// - ulen < vlen 702void 703vec_add_on2(int ulen, sc_digit *ubegin, int, 704#ifdef DEBUG_SYSTEMC 705 vlen, 706#endif 707 const sc_digit *v) 708{ 709#ifdef DEBUG_SYSTEMC 710 sc_assert((ulen > 0) && (ubegin != NULL)); 711 sc_assert((vlen > 0) && (v != NULL)); 712 sc_assert(ulen < vlen); 713#endif 714 715 sc_digit *u = ubegin; 716 const sc_digit *uend = (u + ulen); 717 718 sc_digit carry = 0; // Also used as sum to save space. 719 720 // Add along the shorter u. 721 while (u < uend) { 722 carry += (*u) + (*v++); 723 (*u++) = carry & DIGIT_MASK; 724 carry >>= BITS_PER_DIGIT; 725 } 726 727#ifdef DEBUG_SYSTEMC 728 if (carry != 0) { 729 SC_REPORT_WARNING(sc_core::SC_ID_WITHOUT_MESSAGE_, 730 "vec_add_on2( int, sc_digit*, int, const " 731 "sc_digit* ) : " 732 "result of addition is wrapped around"); 733 } 734#endif 735} 736 737 738// Compute w = u + v, where w and u are vectors, and v is a scalar. 739void 740vec_add_small(int ulen, const sc_digit *u, sc_digit v, sc_digit *w) 741{ 742#ifdef DEBUG_SYSTEMC 743 sc_assert((ulen > 0) && (u != NULL)); 744 sc_assert(w != NULL); 745#endif 746 747 const sc_digit *uend = (u + ulen); 748 749 // Add along the shorter v. 750 sc_digit carry = (*u++) + v; 751 (*w++) = carry & DIGIT_MASK; 752 carry >>= BITS_PER_DIGIT; 753 754 // Propagate the carry. 755 while (carry && (u < uend)) { 756 carry = (*u++) + 1; 757 (*w++) = carry & DIGIT_MASK; 758 carry >>= BITS_PER_DIGIT; 759 } 760 761 // Copy the rest of u to the result. 762 while (u < uend) 763 (*w++) = (*u++); 764 765 // Propagate the carry if it is still 1. 766 if (carry) 767 (*w) = 1; 768} 769 770// Compute u += v, where u is vectors, and v is a scalar. 771void 772vec_add_small_on(int ulen, sc_digit *u, sc_digit v) 773{ 774#ifdef DEBUG_SYSTEMC 775 sc_assert((ulen > 0) && (u != NULL)); 776#endif 777 778 int i = 0; 779 780 while (v && (i < ulen)) { 781 v += u[i]; 782 u[i++] = v & DIGIT_MASK; 783 v >>= BITS_PER_DIGIT; 784 } 785 786#ifdef DEBUG_SYSTEMC 787 if (v != 0) { 788 SC_REPORT_WARNING(sc_core::SC_ID_WITHOUT_MESSAGE_, 789 "vec_add_small_on( int, sc_digit*, unsigned " 790 "long ) : " 791 "result of addition is wrapped around"); 792 } 793#endif 794} 795 796// Compute w = u - v, where w, u, and v are vectors. 797// - ulen >= vlen 798// - wlen >= sc_max(ulen, vlen) 799void 800vec_sub(int ulen, const sc_digit *u, int vlen, const sc_digit *v, sc_digit *w) 801{ 802#ifdef DEBUG_SYSTEMC 803 sc_assert((ulen > 0) && (u != NULL)); 804 sc_assert((vlen > 0) && (v != NULL)); 805 sc_assert(w != NULL); 806 sc_assert(ulen >= vlen); 807#endif 808 809 const sc_digit *uend = (u + ulen); 810 const sc_digit *vend = (v + vlen); 811 812 sc_digit borrow = 0; // Also used as diff to save space. 813 814 // Subtract along the shorter v. 815 while (v < vend) { 816 borrow = ((*u++) + DIGIT_RADIX) - (*v++) - borrow; 817 (*w++) = borrow & DIGIT_MASK; 818 borrow = 1 - (borrow >> BITS_PER_DIGIT); 819 } 820 821 // Propagate the borrow. 822 while (borrow && (u < uend)) { 823 borrow = ((*u++) + DIGIT_RADIX) - 1; 824 (*w++) = borrow & DIGIT_MASK; 825 borrow = 1 - (borrow >> BITS_PER_DIGIT); 826 } 827 828#ifdef DEBUG_SYSTEMC 829 sc_assert(borrow == 0); 830#endif 831 832 // Copy the rest of u to the result. 833 while (u < uend) 834 (*w++) = (*u++); 835} 836 837// Compute u = u - v, where u and v are vectors. 838// - u > v 839// - ulen >= vlen 840void 841vec_sub_on(int ulen, sc_digit *ubegin, int vlen, const sc_digit *v) 842{ 843#ifdef DEBUG_SYSTEMC 844 sc_assert((ulen > 0) && (ubegin != NULL)); 845 sc_assert((vlen > 0) && (v != NULL)); 846 sc_assert(ulen >= vlen); 847#endif 848 849 sc_digit *u = ubegin; 850 const sc_digit *uend = (u + ulen); 851 const sc_digit *vend = (v + vlen); 852 853 sc_digit borrow = 0; // Also used as diff to save space. 854 855 // Subtract along the shorter v. 856 while (v < vend) { 857 borrow = ((*u) + DIGIT_RADIX) - (*v++) - borrow; 858 (*u++) = borrow & DIGIT_MASK; 859 borrow = 1 - (borrow >> BITS_PER_DIGIT); 860 } 861 862 // Propagate the borrow. 863 while (borrow && (u < uend)) { 864 borrow = ((*u) + DIGIT_RADIX) - 1; 865 (*u++) = borrow & DIGIT_MASK; 866 borrow = 1 - (borrow >> BITS_PER_DIGIT); 867 } 868 869#ifdef DEBUG_SYSTEMC 870 sc_assert(borrow == 0); 871#endif 872} 873 874// Compute u = v - u, where u and v are vectors. 875// - v > u 876// - ulen <= vlen or ulen > ulen 877void 878vec_sub_on2(int ulen, sc_digit *ubegin, int vlen, const sc_digit *v) 879{ 880#ifdef DEBUG_SYSTEMC 881 sc_assert((ulen > 0) && (ubegin != NULL)); 882 sc_assert((vlen > 0) && (v != NULL)); 883#endif 884 885 sc_digit *u = ubegin; 886 const sc_digit *uend = (u + sc_min(ulen, vlen)); 887 888 sc_digit borrow = 0; // Also used as diff to save space. 889 890 // Subtract along the shorter u. 891 while (u < uend) { 892 borrow = ((*v++) + DIGIT_RADIX) - (*u) - borrow; 893 (*u++) = borrow & DIGIT_MASK; 894 borrow = 1 - (borrow >> BITS_PER_DIGIT); 895 } 896 897#ifdef DEBUG_SYSTEMC 898 if (borrow != 0) { 899 SC_REPORT_WARNING(sc_core::SC_ID_WITHOUT_MESSAGE_, 900 "vec_sub_on2( int, sc_digit*, int, const " 901 "sc_digit* ) : " 902 "result of subtraction is wrapped around"); 903 } 904#endif 905} 906 907// Compute w = u - v, where w and u are vectors, and v is a scalar. 908void 909vec_sub_small(int ulen, const sc_digit *u, sc_digit v, sc_digit *w) 910{ 911#ifdef DEBUG_SYSTEMC 912 sc_assert(ulen > 0); 913 sc_assert(u != NULL); 914#endif 915 916 const sc_digit *uend = (u + ulen); 917 918 // Add along the shorter v. 919 sc_digit borrow = ((*u++) + DIGIT_RADIX) - v; 920 (*w++) = borrow & DIGIT_MASK; 921 borrow = 1 - (borrow >> BITS_PER_DIGIT); 922 923 // Propagate the borrow. 924 while (borrow && (u < uend)) { 925 borrow = ((*u++) + DIGIT_RADIX) - 1; 926 (*w++) = borrow & DIGIT_MASK; 927 borrow = 1 - (borrow >> BITS_PER_DIGIT); 928 } 929 930#ifdef DEBUG_SYSTEMC 931 sc_assert(borrow == 0); 932#endif 933 934 // Copy the rest of u to the result. 935 while (u < uend) 936 (*w++) = (*u++); 937} 938 939 940// Compute u -= v, where u is vectors, and v is a scalar. 941void 942vec_sub_small_on(int ulen, sc_digit *u, sc_digit v) 943{ 944#ifdef DEBUG_SYSTEMC 945 sc_assert((ulen > 0) && (u != NULL)); 946#endif 947 948 for (int i = 0; i < ulen; ++i) { 949 v = (u[i] + DIGIT_RADIX) - v; 950 u[i] = v & DIGIT_MASK; 951 v = 1 - (v >> BITS_PER_DIGIT); 952 } 953 954#ifdef DEBUG_SYSTEMC 955 sc_assert(v == 0); 956#endif 957} 958 959// Compute w = u * v, where w, u, and v are vectors. 960void 961vec_mul(int ulen, const sc_digit *u, int vlen, const sc_digit *vbegin, 962 sc_digit *wbegin) 963{ 964 965 /* Consider u = Ax + B and v = Cx + D where x is equal to 966 HALF_DIGIT_RADIX. In other words, A is the higher half of u and 967 B is the lower half of u. The interpretation for v is 968 similar. Then, we have the following picture: 969 970 u_h u_l 971 u: -------- -------- 972 A B 973 974 v_h v_l 975 v: -------- -------- 976 C D 977 978 result (d): 979 carry_before: -------- -------- 980 carry_h carry_l 981 result_before: -------- -------- -------- -------- 982 R1_h R1_l R0_h R0_l 983 -------- -------- 984 BD_h BD_l 985 -------- -------- 986 AD_h AD_l 987 -------- -------- 988 BC_h BC_l 989 -------- -------- 990 AC_h AC_l 991 result_after: -------- -------- -------- -------- 992 R1_h' R1_l' R0_h' R0_l' 993 994 prod_l = R0_h|R0_l + B * D + 0|carry_l 995 = R0_h|R0_l + BD_h|BD_l + 0|carry_l 996 997 prod_h = A * D + B * C + high_half(prod_l) + carry_h 998 = AD_h|AD_l + BC_h|BC_l + high_half(prod_l) + 0|carry_h 999 1000 carry = A * C + high_half(prod_h) 1001 = AC_h|AC_l + high_half(prod_h) 1002 1003 R0_l' = low_half(prod_l) 1004 1005 R0_h' = low_half(prod_h) 1006 1007 R0 = high_half(prod_h)|low_half(prod_l) 1008 1009 where '|' is the concatenation operation and the suffixes 0 and 1 1010 show the iteration number, i.e., 0 is the current iteration and 1 1011 is the next iteration. 1012 1013 NOTE: sc_max(prod_l, prod_h, carry) <= 2 * x^2 - 1, so any 1014 of these numbers can be stored in a digit. 1015 1016 NOTE: low_half(u) returns the lower BITS_PER_HALF_DIGIT of u, 1017 whereas high_half(u) returns the rest of the bits, which may 1018 contain more bits than BITS_PER_HALF_DIGIT. 1019 */ 1020 1021#ifdef DEBUG_SYSTEMC 1022 sc_assert((ulen > 0) && (u != NULL)); 1023 sc_assert((vlen > 0) && (vbegin != NULL)); 1024 sc_assert(wbegin != NULL); 1025#endif 1026 1027#define prod_h carry 1028 const sc_digit *uend = (u + ulen); 1029 const sc_digit *vend = (vbegin + vlen); 1030 1031 while (u < uend) { 1032 sc_digit u_h = (*u++); // A|B 1033 sc_digit u_l = low_half(u_h); // B 1034 u_h = high_half(u_h); // A 1035 1036#ifdef DEBUG_SYSTEMC 1037 // The overflow bits must be zero. 1038 sc_assert(u_h == (u_h & HALF_DIGIT_MASK)); 1039#endif 1040 sc_digit carry = 0; 1041 sc_digit *w = (wbegin++); 1042 const sc_digit *v = vbegin; 1043 1044 while (v < vend) { 1045 sc_digit v_h = (*v++); // C|D 1046 sc_digit v_l = low_half(v_h); // D 1047 1048 v_h = high_half(v_h); // C 1049 1050#ifdef DEBUG_SYSTEMC 1051 // The overflow bits must be zero. 1052 sc_assert(v_h == (v_h & HALF_DIGIT_MASK)); 1053#endif 1054 1055 sc_digit prod_l = (*w) + u_l * v_l + low_half(carry); 1056 prod_h = u_h * v_l + u_l * v_h + 1057 high_half(prod_l) + high_half(carry); 1058 (*w++) = concat(low_half(prod_h), low_half(prod_l)); 1059 carry = u_h * v_h + high_half(prod_h); 1060 } 1061 (*w) = carry; 1062 } 1063#undef prod_h 1064} 1065 1066// Compute w = u * v, where w and u are vectors, and v is a scalar. 1067// - 0 < v < HALF_DIGIT_RADIX. 1068void 1069vec_mul_small(int ulen, const sc_digit *u, sc_digit v, sc_digit *w) 1070{ 1071#ifdef DEBUG_SYSTEMC 1072 sc_assert((ulen > 0) && (u != NULL)); 1073 sc_assert(w != NULL); 1074 sc_assert((0 < v) && (v < HALF_DIGIT_RADIX)); 1075#endif 1076 1077#define prod_h carry 1078 1079 const sc_digit *uend = (u + ulen); 1080 sc_digit carry = 0; 1081 while (u < uend) { 1082 sc_digit u_AB = (*u++); 1083#ifdef DEBUG_SYSTEMC 1084 // The overflow bits must be zero. 1085 sc_assert(high_half(u_AB) == high_half_masked(u_AB)); 1086#endif 1087 sc_digit prod_l = v * low_half(u_AB) + low_half(carry); 1088 prod_h = v * high_half(u_AB) + high_half(prod_l) + high_half(carry); 1089 (*w++) = concat(low_half(prod_h), low_half(prod_l)); 1090 carry = high_half(prod_h); 1091 } 1092 (*w) = carry; 1093#undef prod_h 1094} 1095 1096// Compute u = u * v, where u is a vector, and v is a scalar. 1097// - 0 < v < HALF_DIGIT_RADIX. 1098void 1099vec_mul_small_on(int ulen, sc_digit *u, sc_digit v) 1100{ 1101#ifdef DEBUG_SYSTEMC 1102 sc_assert((ulen > 0) && (u != NULL)); 1103 sc_assert((0 < v) && (v < HALF_DIGIT_RADIX)); 1104#endif 1105 1106#define prod_h carry 1107 sc_digit carry = 0; 1108 for (int i = 0; i < ulen; ++i) { 1109#ifdef DEBUG_SYSTEMC 1110 // The overflow bits must be zero. 1111 sc_assert(high_half(u[i]) == high_half_masked(u[i])); 1112#endif 1113 sc_digit prod_l = v * low_half(u[i]) + low_half(carry); 1114 prod_h = v * high_half(u[i]) + high_half(prod_l) + high_half(carry); 1115 u[i] = concat(low_half(prod_h), low_half(prod_l)); 1116 carry = high_half(prod_h); 1117 } 1118#undef prod_h 1119 1120#ifdef DEBUG_SYSTEMC 1121 if (carry != 0) { 1122 SC_REPORT_WARNING(sc_core::SC_ID_WITHOUT_MESSAGE_, 1123 "vec_mul_small_on( int, sc_digit*, unsigned " 1124 "long ) : " 1125 "result of multiplication is wrapped around"); 1126 } 1127#endif 1128} 1129 1130// Compute w = u / v, where w, u, and v are vectors. 1131// - u and v are assumed to have at least two digits as uchars. 1132void 1133vec_div_large(int ulen, const sc_digit *u, int vlen, const sc_digit *v, 1134 sc_digit *w) 1135{ 1136#ifdef DEBUG_SYSTEMC 1137 sc_assert((ulen > 0) && (u != NULL)); 1138 sc_assert((vlen > 0) && (v != NULL)); 1139 sc_assert(w != NULL); 1140 sc_assert(BITS_PER_DIGIT >= 3 * BITS_PER_BYTE); 1141#endif 1142 1143 // We will compute q = x / y where x = u and y = v. The reason for 1144 // using x and y is that x and y are BYTE_RADIX copies of u and v, 1145 // respectively. The use of BYTE_RADIX radix greatly simplifies the 1146 // complexity of the division operation. These copies are also 1147 // needed even when we use DIGIT_RADIX representation. 1148 1149 int xlen = BYTES_PER_DIGIT * ulen + 1; 1150 int ylen = BYTES_PER_DIGIT * vlen; 1151 1152#ifdef SC_MAX_NBITS 1153 uchar x[DIV_CEIL2(SC_MAX_NBITS, BITS_PER_BYTE)]; 1154 uchar y[DIV_CEIL2(SC_MAX_NBITS, BITS_PER_BYTE)]; 1155 uchar q[DIV_CEIL2(SC_MAX_NBITS, BITS_PER_BYTE)]; 1156#else 1157 uchar *x = new uchar[xlen]; 1158 uchar *y = new uchar[ylen]; 1159 // valgrind complains about us accessing too far to so leave a buffer. 1160 uchar *q = new uchar[(xlen - ylen) + 10]; 1161#endif 1162 1163 // q corresponds to w. 1164 1165 // Set (uchar) x = (sc_digit) u. 1166 xlen = vec_to_char(ulen, u, xlen, x); 1167 1168 // Skip all the leading zeros in x. 1169 while ((--xlen >= 0) && (! x[xlen])) 1170 continue; 1171 xlen++; 1172 1173 // Set (uchar) y = (sc_digit) v. 1174 ylen = vec_to_char(vlen, v, ylen, y); 1175 1176 // Skip all the leading zeros in y. 1177 while ((--ylen >= 0) && (! y[ylen])) 1178 continue; 1179 ylen++; 1180 1181#ifdef DEBUG_SYSTEMC 1182 sc_assert(xlen > 1); 1183 sc_assert(ylen > 1); 1184#endif 1185 1186 // At this point, all the leading zeros are eliminated from x and y. 1187 1188 // Zero the last digit of x. 1189 x[xlen] = 0; 1190 1191 // The first two digits of y. 1192 sc_digit y2 = (y[ylen - 1] << BITS_PER_BYTE) + y[ylen - 2]; 1193 1194 const sc_digit DOUBLE_BITS_PER_BYTE = 2 * BITS_PER_BYTE; 1195 1196 // Find each q[k]. 1197 for (int k = (xlen - ylen); k >= 0; --k) { 1198 // qk is a guess for q[k] such that q[k] = qk or qk - 1. 1199 sc_digit qk; 1200 1201 // Find qk by just using 2 digits of y and 3 digits of x. The 1202 // following code assumes that sizeof(sc_digit) >= 3 BYTEs. 1203 int k2 = k + ylen; 1204 1205 qk = ((x[k2] << DOUBLE_BITS_PER_BYTE) + 1206 (x[k2 - 1] << BITS_PER_BYTE) + x[k2 - 2]) / y2; 1207 1208 if (qk >= BYTE_RADIX) // qk cannot be larger than the largest 1209 qk = BYTE_RADIX - 1; // digit in BYTE_RADIX. 1210 1211 // q[k] = qk or qk - 1. The following if-statement determines which: 1212 if (qk) { 1213 uchar *xk = (x + k); // A shortcut for x[k]. 1214 1215 // x = x - y * qk : 1216 sc_digit carry = 0; 1217 1218 for (int i = 0; i < ylen; ++i) { 1219 carry += y[i] * qk; 1220 sc_digit diff = (xk[i] + BYTE_RADIX) - (carry & BYTE_MASK); 1221 xk[i] = (uchar)(diff & BYTE_MASK); 1222 carry = (carry >> BITS_PER_BYTE) + 1223 (1 - (diff >> BITS_PER_BYTE)); 1224 } 1225 1226 // If carry, qk may be one too large. 1227 if (carry) { 1228 // 2's complement the last digit. 1229 carry = (xk[ylen] + BYTE_RADIX) - carry; 1230 xk[ylen] = (uchar)(carry & BYTE_MASK); 1231 carry = 1 - (carry >> BITS_PER_BYTE); 1232 1233 if (carry) { 1234 1235 // qk was one too large, so decrement it. 1236 --qk; 1237 1238 // Since qk was decreased by one, y must be added to x: 1239 // x = x - y * (qk - 1) = x - y * qk + y = x_above + y. 1240 carry = 0; 1241 1242 for (int i = 0; i < ylen; ++i) { 1243 carry += xk[i] + y[i]; 1244 xk[i] = (uchar)(carry & BYTE_MASK); 1245 carry >>= BITS_PER_BYTE; 1246 } 1247 1248 if (carry) 1249 xk[ylen] = (uchar)((xk[ylen] + 1) & BYTE_MASK); 1250 1251 } // second if carry 1252 } // first if carry 1253 } // if qk 1254 q[k] = (uchar)qk; 1255 } // for k 1256 1257 // Set (sc_digit) w = (uchar) q. 1258 vec_from_char(xlen - ylen + 1, q, ulen, w); 1259 1260#ifndef SC_MAX_NBITS 1261 delete [] x; 1262 delete [] y; 1263 delete [] q; 1264#endif 1265 1266} 1267 1268// Compute w = u / v, where u and w are vectors, and v is a scalar. 1269// - 0 < v < HALF_DIGIT_RADIX. Below, we rename w to q. 1270void 1271vec_div_small(int ulen, const sc_digit *u, sc_digit v, sc_digit *q) 1272{ 1273 // Given (u = u_1u_2...u_n)_b = (q = q_1q_2...q_n) * v + r, where b 1274 // is the base, and 0 <= r < v. Then, the algorithm is as follows: 1275 // 1276 // r = 0; 1277 // for (j = 1; j <= n; j++) { 1278 // q_j = (r * b + u_j) / v; 1279 // r = (r * b + u_j) % v; 1280 // } 1281 // 1282 // In our case, b = DIGIT_RADIX, and u = Ax + B and q = Cx + D where 1283 // x = HALF_DIGIT_RADIX. Note that r < v < x and b = x^2. Then, a 1284 // typical situation is as follows: 1285 // 1286 // ---- ---- 1287 // 0 r 1288 // ---- ---- 1289 // A B 1290 // ---- ---- ---- 1291 // r A B = r * b + u 1292 // 1293 // Hence, C = (r|A) / v. 1294 // D = (((r|A) % v)|B) / v 1295 // r = (((r|A) % v)|B) % v 1296 1297#ifdef DEBUG_SYSTEMC 1298 sc_assert((ulen > 0) && (u != NULL)); 1299 sc_assert(q != NULL); 1300 sc_assert((0 < v) && (v < HALF_DIGIT_RADIX)); 1301#endif 1302 1303#define q_h r 1304 sc_digit r = 0; 1305 const sc_digit *ubegin = u; 1306 1307 u += ulen; 1308 q += ulen; 1309 1310 while (ubegin < u) { 1311 sc_digit u_AB = (*--u); // A|B 1312 1313#ifdef DEBUG_SYSTEMC 1314 // The overflow bits must be zero. 1315 sc_assert(high_half(u_AB) == high_half_masked(u_AB)); 1316#endif 1317 1318 sc_digit num = concat(r, high_half(u_AB)); // num = r|A 1319 q_h = num / v; // C 1320 num = concat((num % v), low_half(u_AB)); // num = (((r|A) % v)|B) 1321 (*--q) = concat(q_h, num / v); // q = C|D 1322 r = num % v; 1323 } 1324#undef q_h 1325} 1326 1327// Compute w = u % v, where w, u, and v are vectors. 1328// - u and v are assumed to have at least two digits as uchars. 1329void 1330vec_rem_large(int ulen, const sc_digit *u, int vlen, const sc_digit *v, 1331 sc_digit *w) 1332{ 1333#ifdef DEBUG_SYSTEMC 1334 sc_assert((ulen > 0) && (u != NULL)); 1335 sc_assert((vlen > 0) && (v != NULL)); 1336 sc_assert(w != NULL); 1337 sc_assert(BITS_PER_DIGIT >= 3 * BITS_PER_BYTE); 1338#endif 1339 1340 // This function is adapted from vec_div_large. 1341 int xlen = BYTES_PER_DIGIT * ulen + 1; 1342 int ylen = BYTES_PER_DIGIT * vlen; 1343 1344#ifdef SC_MAX_NBITS 1345 uchar x[DIV_CEIL2(SC_MAX_NBITS, BITS_PER_BYTE)]; 1346 uchar y[DIV_CEIL2(SC_MAX_NBITS, BITS_PER_BYTE)]; 1347#else 1348 uchar *x = new uchar[xlen]; 1349 uchar *y = new uchar[ylen]; 1350#endif 1351 1352 // r corresponds to w. 1353 1354 // Set (uchar) x = (sc_digit) u. 1355 xlen = vec_to_char(ulen, u, xlen, x); 1356 1357 // Skip all the leading zeros in x. 1358 while ((--xlen >= 0) && (!x[xlen])) 1359 continue; 1360 xlen++; 1361 1362 // Set (uchar) y = (sc_digit) v. 1363 ylen = vec_to_char(vlen, v, ylen, y); 1364 1365 // Skip all the leading zeros in y. 1366 while ((--ylen >= 0) && (!y[ylen])) 1367 continue; 1368 ylen++; 1369 1370#ifdef DEBUG_SYSTEMC 1371 sc_assert(xlen > 1); 1372 sc_assert(ylen > 1); 1373#endif 1374 1375 // At this point, all the leading zeros are eliminated from x and y. 1376 1377 // Zero the last digit of x. 1378 x[xlen] = 0; 1379 1380 // The first two digits of y. 1381 sc_digit y2 = (y[ylen - 1] << BITS_PER_BYTE) + y[ylen - 2]; 1382 1383 const sc_digit DOUBLE_BITS_PER_BYTE = 2 * BITS_PER_BYTE; 1384 1385 // Find each q[k]. 1386 for (int k = xlen - ylen; k >= 0; --k) { 1387 // qk is a guess for q[k] such that q[k] = qk or qk - 1. 1388 sc_digit qk; 1389 1390 // Find qk by just using 2 digits of y and 3 digits of x. The 1391 // following code assumes that sizeof(sc_digit) >= 3 BYTEs. 1392 int k2 = k + ylen; 1393 1394 qk = ((x[k2] << DOUBLE_BITS_PER_BYTE) + 1395 (x[k2 - 1] << BITS_PER_BYTE) + x[k2 - 2]) / y2; 1396 1397 if (qk >= BYTE_RADIX) // qk cannot be larger than the largest 1398 qk = BYTE_RADIX - 1; // digit in BYTE_RADIX. 1399 1400 // q[k] = qk or qk - 1. The following if-statement determines which. 1401 if (qk) { 1402 uchar *xk = (x + k); // A shortcut for x[k]. 1403 1404 // x = x - y * qk; 1405 sc_digit carry = 0; 1406 1407 for (int i = 0; i < ylen; ++i) { 1408 carry += y[i] * qk; 1409 sc_digit diff = (xk[i] + BYTE_RADIX) - (carry & BYTE_MASK); 1410 xk[i] = (uchar)(diff & BYTE_MASK); 1411 carry = (carry >> BITS_PER_BYTE) + 1412 (1 - (diff >> BITS_PER_BYTE)); 1413 } 1414 1415 if (carry) { 1416 // 2's complement the last digit. 1417 carry = (xk[ylen] + BYTE_RADIX) - carry; 1418 xk[ylen] = (uchar)(carry & BYTE_MASK); 1419 carry = 1 - (carry >> BITS_PER_BYTE); 1420 1421 if (carry) { 1422 // qk was one too large, so decrement it. 1423 // --qk; 1424 1425 // x = x - y * (qk - 1) = x - y * qk + y = x_above + y. 1426 carry = 0; 1427 1428 for (int i = 0; i < ylen; ++i) { 1429 carry += xk[i] + y[i]; 1430 xk[i] = (uchar)(carry & BYTE_MASK); 1431 carry >>= BITS_PER_BYTE; 1432 } 1433 1434 if (carry) 1435 xk[ylen] = (uchar)((xk[ylen] + 1) & BYTE_MASK); 1436 } // second if carry 1437 } // first if carry 1438 } // if qk 1439 } // for k 1440 1441 // Set (sc_digit) w = (uchar) x for the remainder. 1442 vec_from_char(ylen, x, ulen, w); 1443 1444#ifndef SC_MAX_NBITS 1445 delete [] x; 1446 delete [] y; 1447#endif 1448 1449} 1450 1451// Compute r = u % v, where u is a vector, and r and v are scalars. 1452// - 0 < v < HALF_DIGIT_RADIX. 1453// - The remainder r is returned. 1454sc_digit 1455vec_rem_small(int ulen, const sc_digit *u, sc_digit v) 1456{ 1457 1458#ifdef DEBUG_SYSTEMC 1459 sc_assert((ulen > 0) && (u != NULL)); 1460 sc_assert((0 < v) && (v < HALF_DIGIT_RADIX)); 1461#endif 1462 1463 // This function is adapted from vec_div_small(). 1464 1465 sc_digit r = 0; 1466 const sc_digit *ubegin = u; 1467 1468 u += ulen; 1469 1470 while (ubegin < u) { 1471 sc_digit u_AB = (*--u); // A|B 1472#ifdef DEBUG_SYSTEMC 1473 // The overflow bits must be zero. 1474 sc_assert(high_half(u_AB) == high_half_masked(u_AB)); 1475#endif 1476 // r = (((r|A) % v)|B) % v 1477 r = (concat(((concat(r, high_half(u_AB))) % v), low_half(u_AB))) % v; 1478 } 1479 1480 return r; 1481} 1482 1483// u = u / v, r = u % v. 1484sc_digit 1485vec_rem_on_small(int ulen, sc_digit *u, sc_digit v) 1486{ 1487#ifdef DEBUG_SYSTEMC 1488 sc_assert((ulen > 0) && (u != NULL)); 1489 sc_assert(v > 0); 1490#endif 1491 1492#define q_h r 1493 sc_digit r = 0; 1494 const sc_digit *ubegin = u; 1495 1496 u += ulen; 1497 while (ubegin < u) { 1498 sc_digit u_AB = (*--u); // A|B 1499#ifdef DEBUG_SYSTEMC 1500 // The overflow bits must be zero. 1501 sc_assert(high_half(u_AB) == high_half_masked(u_AB)); 1502#endif 1503 sc_digit num = concat(r, high_half(u_AB)); // num = r|A 1504 q_h = num / v; // C 1505 num = concat((num % v), low_half(u_AB)); // num = (((r|A) % v)|B) 1506 (*u) = concat(q_h, num / v); // q = C|D 1507 r = num % v; 1508 } 1509#undef q_h 1510 return r; 1511} 1512 1513// Set (uchar) v = (sc_digit) u. Return the new vlen. 1514int 1515vec_to_char(int ulen, const sc_digit *u, int vlen, uchar *v) 1516{ 1517#ifdef DEBUG_SYSTEMC 1518 sc_assert((ulen > 0) && (u != NULL)); 1519 sc_assert((vlen > 0) && (v != NULL)); 1520#endif 1521 1522 int nbits = ulen * BITS_PER_DIGIT; 1523 int right = 0; 1524 int left = right + BITS_PER_BYTE - 1; 1525 1526 vlen = 0; 1527 while (nbits > 0) { 1528 int left_digit = left / BITS_PER_DIGIT; 1529 int right_digit = right / BITS_PER_DIGIT; 1530 int nsr = ((vlen << LOG2_BITS_PER_BYTE) % BITS_PER_DIGIT); 1531 int d = u[right_digit] >> nsr; 1532 1533 if (left_digit != right_digit) { 1534 if (left_digit < ulen) 1535 d |= u[left_digit] << (BITS_PER_DIGIT - nsr); 1536 } 1537 1538 v[vlen++] = (uchar)(d & BYTE_MASK); 1539 1540 left += BITS_PER_BYTE; 1541 right += BITS_PER_BYTE; 1542 nbits -= BITS_PER_BYTE; 1543 } 1544 return vlen; 1545} 1546 1547// Set (sc_digit) v = (uchar) u. 1548// - sizeof(uchar) <= sizeof(sc_digit), 1549void 1550vec_from_char(int ulen, const uchar *u, int vlen, sc_digit *v) 1551{ 1552#ifdef DEBUG_SYSTEMC 1553 sc_assert((ulen > 0) && (u != NULL)); 1554 sc_assert((vlen > 0) && (v != NULL)); 1555 sc_assert(sizeof(uchar) <= sizeof(sc_digit)); 1556#endif 1557 1558 sc_digit *vend = (v + vlen); 1559 1560 const int nsr = BITS_PER_DIGIT - BITS_PER_BYTE; 1561 const sc_digit mask = one_and_ones(nsr); 1562 1563 (*v) = (sc_digit) u[ulen - 1]; 1564 1565 for (int i = ulen - 2; i >= 0; --i) { 1566 // Manual inlining of vec_shift_left(). 1567 sc_digit *viter = v; 1568 sc_digit carry = 0; 1569 while (viter < vend) { 1570 sc_digit vval = (*viter); 1571 (*viter++) = (((vval & mask) << BITS_PER_BYTE) | carry); 1572 carry = vval >> nsr; 1573 } 1574 1575 if (viter < vend) 1576 (*viter) = carry; 1577 1578 (*v) |= (sc_digit)u[i]; 1579 } 1580} 1581 1582// Set u <<= nsl. 1583// If nsl is negative, it is ignored. 1584void 1585vec_shift_left(int ulen, sc_digit *u, int nsl) 1586{ 1587#ifdef DEBUG_SYSTEMC 1588 sc_assert((ulen > 0) && (u != NULL)); 1589#endif 1590 1591 if (nsl <= 0) 1592 return; 1593 1594 // Shift left whole digits if nsl is large enough. 1595 if (nsl >= (int) BITS_PER_DIGIT) { 1596 int nd; 1597 if (nsl % BITS_PER_DIGIT == 0) { 1598 nd = nsl / BITS_PER_DIGIT; // No need to use DIV_CEIL(nsl). 1599 nsl = 0; 1600 } else { 1601 nd = DIV_CEIL(nsl) - 1; 1602 nsl -= nd * BITS_PER_DIGIT; 1603 } 1604 1605 if (nd) { 1606 // Shift left for nd digits. 1607 for (int j = ulen - 1; j >= nd; --j) 1608 u[j] = u[j - nd]; 1609 1610 vec_zero(sc_min(nd, ulen), u); 1611 } 1612 if (nsl == 0) 1613 return; 1614 } 1615 1616 // Shift left if nsl < BITS_PER_DIGIT. 1617 sc_digit *uiter = u; 1618 sc_digit *uend = uiter + ulen; 1619 1620 int nsr = BITS_PER_DIGIT - nsl; 1621 sc_digit mask = one_and_ones(nsr); 1622 1623 sc_digit carry = 0; 1624 1625 while (uiter < uend) { 1626 sc_digit uval = (*uiter); 1627 (*uiter++) = (((uval & mask) << nsl) | carry); 1628 carry = uval >> nsr; 1629 } 1630 1631 if (uiter < uend) 1632 (*uiter) = carry; 1633} 1634 1635// Set u >>= nsr. 1636// If nsr is negative, it is ignored. 1637void 1638vec_shift_right(int ulen, sc_digit *u, int nsr, sc_digit fill) 1639{ 1640#ifdef DEBUG_SYSTEMC 1641 sc_assert((ulen > 0) && (u != NULL)); 1642#endif 1643 1644 // fill is usually either 0 or DIGIT_MASK; it can be any value. 1645 if (nsr <= 0) 1646 return; 1647 1648 // Shift right whole digits if nsr is large enough. 1649 if (nsr >= (int) BITS_PER_DIGIT) { 1650 int nd; 1651 if (nsr % BITS_PER_DIGIT == 0) { 1652 nd = nsr / BITS_PER_DIGIT; 1653 nsr = 0; 1654 } else { 1655 nd = DIV_CEIL(nsr) - 1; 1656 nsr -= nd * BITS_PER_DIGIT; 1657 } 1658 1659 if (nd) { 1660 // Shift right for nd digits. 1661 for (int j = 0; j < (ulen - nd); ++j) 1662 u[j] = u[j + nd]; 1663 1664 if (fill) { 1665 for (int j = ulen - sc_min( nd, ulen ); j < ulen; ++j) 1666 u[j] = fill; 1667 } else { 1668 vec_zero(ulen - sc_min( nd, ulen ), ulen, u); 1669 } 1670 } 1671 if (nsr == 0) 1672 return; 1673 } 1674 1675 // Shift right if nsr < BITS_PER_DIGIT. 1676 sc_digit *ubegin = u; 1677 sc_digit *uiter = (ubegin + ulen); 1678 1679 int nsl = BITS_PER_DIGIT - nsr; 1680 sc_digit mask = one_and_ones(nsr); 1681 1682 sc_digit carry = (fill & mask) << nsl; 1683 1684 while (ubegin < uiter) { 1685 sc_digit uval = (*--uiter); 1686 (*uiter) = (uval >> nsr) | carry; 1687 carry = (uval & mask) << nsl; 1688 } 1689} 1690 1691 1692// Let u[l..r], where l and r are left and right bit positions 1693// respectively, be equal to its mirror image. 1694void 1695vec_reverse(int unb, int und, sc_digit *ud, int l, int r) 1696{ 1697#ifdef DEBUG_SYSTEMC 1698 sc_assert((unb > 0) && (und > 0) && (ud != NULL)); 1699 sc_assert((0 <= r) && (r <= l) && (l < unb)); 1700#endif 1701 1702 if (l < r) { 1703 std::stringstream msg; 1704 msg << "vec_reverse( int, int, sc_digit*, int l, int r ) : " << 1705 "l = " << l << " < r = " << r << " is not valid",
| 605 msg.str().c_str()); 606 return 0; 607 } 608 } 609 610 return convert_signed_SM_to_2C_to_SM(s, unb, und, u); 611} 612 613 614// All vec_ functions assume that the vector to hold the result, 615// called w, has sufficient length to hold the result. For efficiency 616// reasons, we do not test whether or not we are out of bounds. 617 618// Compute w = u + v, where w, u, and v are vectors. 619// - ulen >= vlen 620// - wlen >= sc_max(ulen, vlen) + 1 621void 622vec_add(int ulen, const sc_digit *u, int vlen, const sc_digit *v, sc_digit *w) 623{ 624#ifdef DEBUG_SYSTEMC 625 sc_assert((ulen > 0) && (u != NULL)); 626 sc_assert((vlen > 0) && (v != NULL)); 627 sc_assert(w != NULL); 628 sc_assert(ulen >= vlen); 629#endif 630 631 const sc_digit *uend = (u + ulen); 632 const sc_digit *vend = (v + vlen); 633 634 sc_digit carry = 0; // Also used as sum to save space. 635 636 // Add along the shorter v. 637 while (v < vend) { 638 carry += (*u++) + (*v++); 639 (*w++) = carry & DIGIT_MASK; 640 carry >>= BITS_PER_DIGIT; 641 } 642 643 // Propagate the carry. 644 while (carry && (u < uend)) { 645 carry = (*u++) + 1; 646 (*w++) = carry & DIGIT_MASK; 647 carry >>= BITS_PER_DIGIT; 648 } 649 650 // Copy the rest of u to the result. 651 while (u < uend) 652 (*w++) = (*u++); 653 654 // Propagate the carry if it is still 1. 655 if (carry) 656 (*w) = 1; 657} 658 659 660// Compute u += v, where u and v are vectors. 661// - ulen >= vlen 662void 663vec_add_on(int ulen, sc_digit *ubegin, int vlen, const sc_digit *v) 664{ 665#ifdef DEBUG_SYSTEMC 666 sc_assert((ulen > 0) && (ubegin != NULL)); 667 sc_assert((vlen > 0) && (v != NULL)); 668 sc_assert(ulen >= vlen); 669#endif 670 671 sc_digit *u = ubegin; 672 const sc_digit *uend = (u + ulen); 673 const sc_digit *vend = (v + vlen); 674 675 sc_digit carry = 0; // Also used as sum to save space. 676 677 // Add along the shorter v. 678 while (v < vend) { 679 carry += (*u) + (*v++); 680 (*u++) = carry & DIGIT_MASK; 681 carry >>= BITS_PER_DIGIT; 682 } 683 684 // Propagate the carry. 685 while (carry && (u < uend)) { 686 carry = (*u) + 1; 687 (*u++) = carry & DIGIT_MASK; 688 carry >>= BITS_PER_DIGIT; 689 } 690 691#ifdef DEBUG_SYSTEMC 692 if (carry != 0) { 693 SC_REPORT_WARNING(sc_core::SC_ID_WITHOUT_MESSAGE_, 694 "vec_add_on( int, sc_digit*, int, const " 695 "sc_digit* ) : " 696 "result of addition is wrapped around"); 697 } 698#endif 699} 700 701 702// Compute u += v, where u and v are vectors. 703// - ulen < vlen 704void 705vec_add_on2(int ulen, sc_digit *ubegin, int, 706#ifdef DEBUG_SYSTEMC 707 vlen, 708#endif 709 const sc_digit *v) 710{ 711#ifdef DEBUG_SYSTEMC 712 sc_assert((ulen > 0) && (ubegin != NULL)); 713 sc_assert((vlen > 0) && (v != NULL)); 714 sc_assert(ulen < vlen); 715#endif 716 717 sc_digit *u = ubegin; 718 const sc_digit *uend = (u + ulen); 719 720 sc_digit carry = 0; // Also used as sum to save space. 721 722 // Add along the shorter u. 723 while (u < uend) { 724 carry += (*u) + (*v++); 725 (*u++) = carry & DIGIT_MASK; 726 carry >>= BITS_PER_DIGIT; 727 } 728 729#ifdef DEBUG_SYSTEMC 730 if (carry != 0) { 731 SC_REPORT_WARNING(sc_core::SC_ID_WITHOUT_MESSAGE_, 732 "vec_add_on2( int, sc_digit*, int, const " 733 "sc_digit* ) : " 734 "result of addition is wrapped around"); 735 } 736#endif 737} 738 739 740// Compute w = u + v, where w and u are vectors, and v is a scalar. 741void 742vec_add_small(int ulen, const sc_digit *u, sc_digit v, sc_digit *w) 743{ 744#ifdef DEBUG_SYSTEMC 745 sc_assert((ulen > 0) && (u != NULL)); 746 sc_assert(w != NULL); 747#endif 748 749 const sc_digit *uend = (u + ulen); 750 751 // Add along the shorter v. 752 sc_digit carry = (*u++) + v; 753 (*w++) = carry & DIGIT_MASK; 754 carry >>= BITS_PER_DIGIT; 755 756 // Propagate the carry. 757 while (carry && (u < uend)) { 758 carry = (*u++) + 1; 759 (*w++) = carry & DIGIT_MASK; 760 carry >>= BITS_PER_DIGIT; 761 } 762 763 // Copy the rest of u to the result. 764 while (u < uend) 765 (*w++) = (*u++); 766 767 // Propagate the carry if it is still 1. 768 if (carry) 769 (*w) = 1; 770} 771 772// Compute u += v, where u is vectors, and v is a scalar. 773void 774vec_add_small_on(int ulen, sc_digit *u, sc_digit v) 775{ 776#ifdef DEBUG_SYSTEMC 777 sc_assert((ulen > 0) && (u != NULL)); 778#endif 779 780 int i = 0; 781 782 while (v && (i < ulen)) { 783 v += u[i]; 784 u[i++] = v & DIGIT_MASK; 785 v >>= BITS_PER_DIGIT; 786 } 787 788#ifdef DEBUG_SYSTEMC 789 if (v != 0) { 790 SC_REPORT_WARNING(sc_core::SC_ID_WITHOUT_MESSAGE_, 791 "vec_add_small_on( int, sc_digit*, unsigned " 792 "long ) : " 793 "result of addition is wrapped around"); 794 } 795#endif 796} 797 798// Compute w = u - v, where w, u, and v are vectors. 799// - ulen >= vlen 800// - wlen >= sc_max(ulen, vlen) 801void 802vec_sub(int ulen, const sc_digit *u, int vlen, const sc_digit *v, sc_digit *w) 803{ 804#ifdef DEBUG_SYSTEMC 805 sc_assert((ulen > 0) && (u != NULL)); 806 sc_assert((vlen > 0) && (v != NULL)); 807 sc_assert(w != NULL); 808 sc_assert(ulen >= vlen); 809#endif 810 811 const sc_digit *uend = (u + ulen); 812 const sc_digit *vend = (v + vlen); 813 814 sc_digit borrow = 0; // Also used as diff to save space. 815 816 // Subtract along the shorter v. 817 while (v < vend) { 818 borrow = ((*u++) + DIGIT_RADIX) - (*v++) - borrow; 819 (*w++) = borrow & DIGIT_MASK; 820 borrow = 1 - (borrow >> BITS_PER_DIGIT); 821 } 822 823 // Propagate the borrow. 824 while (borrow && (u < uend)) { 825 borrow = ((*u++) + DIGIT_RADIX) - 1; 826 (*w++) = borrow & DIGIT_MASK; 827 borrow = 1 - (borrow >> BITS_PER_DIGIT); 828 } 829 830#ifdef DEBUG_SYSTEMC 831 sc_assert(borrow == 0); 832#endif 833 834 // Copy the rest of u to the result. 835 while (u < uend) 836 (*w++) = (*u++); 837} 838 839// Compute u = u - v, where u and v are vectors. 840// - u > v 841// - ulen >= vlen 842void 843vec_sub_on(int ulen, sc_digit *ubegin, int vlen, const sc_digit *v) 844{ 845#ifdef DEBUG_SYSTEMC 846 sc_assert((ulen > 0) && (ubegin != NULL)); 847 sc_assert((vlen > 0) && (v != NULL)); 848 sc_assert(ulen >= vlen); 849#endif 850 851 sc_digit *u = ubegin; 852 const sc_digit *uend = (u + ulen); 853 const sc_digit *vend = (v + vlen); 854 855 sc_digit borrow = 0; // Also used as diff to save space. 856 857 // Subtract along the shorter v. 858 while (v < vend) { 859 borrow = ((*u) + DIGIT_RADIX) - (*v++) - borrow; 860 (*u++) = borrow & DIGIT_MASK; 861 borrow = 1 - (borrow >> BITS_PER_DIGIT); 862 } 863 864 // Propagate the borrow. 865 while (borrow && (u < uend)) { 866 borrow = ((*u) + DIGIT_RADIX) - 1; 867 (*u++) = borrow & DIGIT_MASK; 868 borrow = 1 - (borrow >> BITS_PER_DIGIT); 869 } 870 871#ifdef DEBUG_SYSTEMC 872 sc_assert(borrow == 0); 873#endif 874} 875 876// Compute u = v - u, where u and v are vectors. 877// - v > u 878// - ulen <= vlen or ulen > ulen 879void 880vec_sub_on2(int ulen, sc_digit *ubegin, int vlen, const sc_digit *v) 881{ 882#ifdef DEBUG_SYSTEMC 883 sc_assert((ulen > 0) && (ubegin != NULL)); 884 sc_assert((vlen > 0) && (v != NULL)); 885#endif 886 887 sc_digit *u = ubegin; 888 const sc_digit *uend = (u + sc_min(ulen, vlen)); 889 890 sc_digit borrow = 0; // Also used as diff to save space. 891 892 // Subtract along the shorter u. 893 while (u < uend) { 894 borrow = ((*v++) + DIGIT_RADIX) - (*u) - borrow; 895 (*u++) = borrow & DIGIT_MASK; 896 borrow = 1 - (borrow >> BITS_PER_DIGIT); 897 } 898 899#ifdef DEBUG_SYSTEMC 900 if (borrow != 0) { 901 SC_REPORT_WARNING(sc_core::SC_ID_WITHOUT_MESSAGE_, 902 "vec_sub_on2( int, sc_digit*, int, const " 903 "sc_digit* ) : " 904 "result of subtraction is wrapped around"); 905 } 906#endif 907} 908 909// Compute w = u - v, where w and u are vectors, and v is a scalar. 910void 911vec_sub_small(int ulen, const sc_digit *u, sc_digit v, sc_digit *w) 912{ 913#ifdef DEBUG_SYSTEMC 914 sc_assert(ulen > 0); 915 sc_assert(u != NULL); 916#endif 917 918 const sc_digit *uend = (u + ulen); 919 920 // Add along the shorter v. 921 sc_digit borrow = ((*u++) + DIGIT_RADIX) - v; 922 (*w++) = borrow & DIGIT_MASK; 923 borrow = 1 - (borrow >> BITS_PER_DIGIT); 924 925 // Propagate the borrow. 926 while (borrow && (u < uend)) { 927 borrow = ((*u++) + DIGIT_RADIX) - 1; 928 (*w++) = borrow & DIGIT_MASK; 929 borrow = 1 - (borrow >> BITS_PER_DIGIT); 930 } 931 932#ifdef DEBUG_SYSTEMC 933 sc_assert(borrow == 0); 934#endif 935 936 // Copy the rest of u to the result. 937 while (u < uend) 938 (*w++) = (*u++); 939} 940 941 942// Compute u -= v, where u is vectors, and v is a scalar. 943void 944vec_sub_small_on(int ulen, sc_digit *u, sc_digit v) 945{ 946#ifdef DEBUG_SYSTEMC 947 sc_assert((ulen > 0) && (u != NULL)); 948#endif 949 950 for (int i = 0; i < ulen; ++i) { 951 v = (u[i] + DIGIT_RADIX) - v; 952 u[i] = v & DIGIT_MASK; 953 v = 1 - (v >> BITS_PER_DIGIT); 954 } 955 956#ifdef DEBUG_SYSTEMC 957 sc_assert(v == 0); 958#endif 959} 960 961// Compute w = u * v, where w, u, and v are vectors. 962void 963vec_mul(int ulen, const sc_digit *u, int vlen, const sc_digit *vbegin, 964 sc_digit *wbegin) 965{ 966 967 /* Consider u = Ax + B and v = Cx + D where x is equal to 968 HALF_DIGIT_RADIX. In other words, A is the higher half of u and 969 B is the lower half of u. The interpretation for v is 970 similar. Then, we have the following picture: 971 972 u_h u_l 973 u: -------- -------- 974 A B 975 976 v_h v_l 977 v: -------- -------- 978 C D 979 980 result (d): 981 carry_before: -------- -------- 982 carry_h carry_l 983 result_before: -------- -------- -------- -------- 984 R1_h R1_l R0_h R0_l 985 -------- -------- 986 BD_h BD_l 987 -------- -------- 988 AD_h AD_l 989 -------- -------- 990 BC_h BC_l 991 -------- -------- 992 AC_h AC_l 993 result_after: -------- -------- -------- -------- 994 R1_h' R1_l' R0_h' R0_l' 995 996 prod_l = R0_h|R0_l + B * D + 0|carry_l 997 = R0_h|R0_l + BD_h|BD_l + 0|carry_l 998 999 prod_h = A * D + B * C + high_half(prod_l) + carry_h 1000 = AD_h|AD_l + BC_h|BC_l + high_half(prod_l) + 0|carry_h 1001 1002 carry = A * C + high_half(prod_h) 1003 = AC_h|AC_l + high_half(prod_h) 1004 1005 R0_l' = low_half(prod_l) 1006 1007 R0_h' = low_half(prod_h) 1008 1009 R0 = high_half(prod_h)|low_half(prod_l) 1010 1011 where '|' is the concatenation operation and the suffixes 0 and 1 1012 show the iteration number, i.e., 0 is the current iteration and 1 1013 is the next iteration. 1014 1015 NOTE: sc_max(prod_l, prod_h, carry) <= 2 * x^2 - 1, so any 1016 of these numbers can be stored in a digit. 1017 1018 NOTE: low_half(u) returns the lower BITS_PER_HALF_DIGIT of u, 1019 whereas high_half(u) returns the rest of the bits, which may 1020 contain more bits than BITS_PER_HALF_DIGIT. 1021 */ 1022 1023#ifdef DEBUG_SYSTEMC 1024 sc_assert((ulen > 0) && (u != NULL)); 1025 sc_assert((vlen > 0) && (vbegin != NULL)); 1026 sc_assert(wbegin != NULL); 1027#endif 1028 1029#define prod_h carry 1030 const sc_digit *uend = (u + ulen); 1031 const sc_digit *vend = (vbegin + vlen); 1032 1033 while (u < uend) { 1034 sc_digit u_h = (*u++); // A|B 1035 sc_digit u_l = low_half(u_h); // B 1036 u_h = high_half(u_h); // A 1037 1038#ifdef DEBUG_SYSTEMC 1039 // The overflow bits must be zero. 1040 sc_assert(u_h == (u_h & HALF_DIGIT_MASK)); 1041#endif 1042 sc_digit carry = 0; 1043 sc_digit *w = (wbegin++); 1044 const sc_digit *v = vbegin; 1045 1046 while (v < vend) { 1047 sc_digit v_h = (*v++); // C|D 1048 sc_digit v_l = low_half(v_h); // D 1049 1050 v_h = high_half(v_h); // C 1051 1052#ifdef DEBUG_SYSTEMC 1053 // The overflow bits must be zero. 1054 sc_assert(v_h == (v_h & HALF_DIGIT_MASK)); 1055#endif 1056 1057 sc_digit prod_l = (*w) + u_l * v_l + low_half(carry); 1058 prod_h = u_h * v_l + u_l * v_h + 1059 high_half(prod_l) + high_half(carry); 1060 (*w++) = concat(low_half(prod_h), low_half(prod_l)); 1061 carry = u_h * v_h + high_half(prod_h); 1062 } 1063 (*w) = carry; 1064 } 1065#undef prod_h 1066} 1067 1068// Compute w = u * v, where w and u are vectors, and v is a scalar. 1069// - 0 < v < HALF_DIGIT_RADIX. 1070void 1071vec_mul_small(int ulen, const sc_digit *u, sc_digit v, sc_digit *w) 1072{ 1073#ifdef DEBUG_SYSTEMC 1074 sc_assert((ulen > 0) && (u != NULL)); 1075 sc_assert(w != NULL); 1076 sc_assert((0 < v) && (v < HALF_DIGIT_RADIX)); 1077#endif 1078 1079#define prod_h carry 1080 1081 const sc_digit *uend = (u + ulen); 1082 sc_digit carry = 0; 1083 while (u < uend) { 1084 sc_digit u_AB = (*u++); 1085#ifdef DEBUG_SYSTEMC 1086 // The overflow bits must be zero. 1087 sc_assert(high_half(u_AB) == high_half_masked(u_AB)); 1088#endif 1089 sc_digit prod_l = v * low_half(u_AB) + low_half(carry); 1090 prod_h = v * high_half(u_AB) + high_half(prod_l) + high_half(carry); 1091 (*w++) = concat(low_half(prod_h), low_half(prod_l)); 1092 carry = high_half(prod_h); 1093 } 1094 (*w) = carry; 1095#undef prod_h 1096} 1097 1098// Compute u = u * v, where u is a vector, and v is a scalar. 1099// - 0 < v < HALF_DIGIT_RADIX. 1100void 1101vec_mul_small_on(int ulen, sc_digit *u, sc_digit v) 1102{ 1103#ifdef DEBUG_SYSTEMC 1104 sc_assert((ulen > 0) && (u != NULL)); 1105 sc_assert((0 < v) && (v < HALF_DIGIT_RADIX)); 1106#endif 1107 1108#define prod_h carry 1109 sc_digit carry = 0; 1110 for (int i = 0; i < ulen; ++i) { 1111#ifdef DEBUG_SYSTEMC 1112 // The overflow bits must be zero. 1113 sc_assert(high_half(u[i]) == high_half_masked(u[i])); 1114#endif 1115 sc_digit prod_l = v * low_half(u[i]) + low_half(carry); 1116 prod_h = v * high_half(u[i]) + high_half(prod_l) + high_half(carry); 1117 u[i] = concat(low_half(prod_h), low_half(prod_l)); 1118 carry = high_half(prod_h); 1119 } 1120#undef prod_h 1121 1122#ifdef DEBUG_SYSTEMC 1123 if (carry != 0) { 1124 SC_REPORT_WARNING(sc_core::SC_ID_WITHOUT_MESSAGE_, 1125 "vec_mul_small_on( int, sc_digit*, unsigned " 1126 "long ) : " 1127 "result of multiplication is wrapped around"); 1128 } 1129#endif 1130} 1131 1132// Compute w = u / v, where w, u, and v are vectors. 1133// - u and v are assumed to have at least two digits as uchars. 1134void 1135vec_div_large(int ulen, const sc_digit *u, int vlen, const sc_digit *v, 1136 sc_digit *w) 1137{ 1138#ifdef DEBUG_SYSTEMC 1139 sc_assert((ulen > 0) && (u != NULL)); 1140 sc_assert((vlen > 0) && (v != NULL)); 1141 sc_assert(w != NULL); 1142 sc_assert(BITS_PER_DIGIT >= 3 * BITS_PER_BYTE); 1143#endif 1144 1145 // We will compute q = x / y where x = u and y = v. The reason for 1146 // using x and y is that x and y are BYTE_RADIX copies of u and v, 1147 // respectively. The use of BYTE_RADIX radix greatly simplifies the 1148 // complexity of the division operation. These copies are also 1149 // needed even when we use DIGIT_RADIX representation. 1150 1151 int xlen = BYTES_PER_DIGIT * ulen + 1; 1152 int ylen = BYTES_PER_DIGIT * vlen; 1153 1154#ifdef SC_MAX_NBITS 1155 uchar x[DIV_CEIL2(SC_MAX_NBITS, BITS_PER_BYTE)]; 1156 uchar y[DIV_CEIL2(SC_MAX_NBITS, BITS_PER_BYTE)]; 1157 uchar q[DIV_CEIL2(SC_MAX_NBITS, BITS_PER_BYTE)]; 1158#else 1159 uchar *x = new uchar[xlen]; 1160 uchar *y = new uchar[ylen]; 1161 // valgrind complains about us accessing too far to so leave a buffer. 1162 uchar *q = new uchar[(xlen - ylen) + 10]; 1163#endif 1164 1165 // q corresponds to w. 1166 1167 // Set (uchar) x = (sc_digit) u. 1168 xlen = vec_to_char(ulen, u, xlen, x); 1169 1170 // Skip all the leading zeros in x. 1171 while ((--xlen >= 0) && (! x[xlen])) 1172 continue; 1173 xlen++; 1174 1175 // Set (uchar) y = (sc_digit) v. 1176 ylen = vec_to_char(vlen, v, ylen, y); 1177 1178 // Skip all the leading zeros in y. 1179 while ((--ylen >= 0) && (! y[ylen])) 1180 continue; 1181 ylen++; 1182 1183#ifdef DEBUG_SYSTEMC 1184 sc_assert(xlen > 1); 1185 sc_assert(ylen > 1); 1186#endif 1187 1188 // At this point, all the leading zeros are eliminated from x and y. 1189 1190 // Zero the last digit of x. 1191 x[xlen] = 0; 1192 1193 // The first two digits of y. 1194 sc_digit y2 = (y[ylen - 1] << BITS_PER_BYTE) + y[ylen - 2]; 1195 1196 const sc_digit DOUBLE_BITS_PER_BYTE = 2 * BITS_PER_BYTE; 1197 1198 // Find each q[k]. 1199 for (int k = (xlen - ylen); k >= 0; --k) { 1200 // qk is a guess for q[k] such that q[k] = qk or qk - 1. 1201 sc_digit qk; 1202 1203 // Find qk by just using 2 digits of y and 3 digits of x. The 1204 // following code assumes that sizeof(sc_digit) >= 3 BYTEs. 1205 int k2 = k + ylen; 1206 1207 qk = ((x[k2] << DOUBLE_BITS_PER_BYTE) + 1208 (x[k2 - 1] << BITS_PER_BYTE) + x[k2 - 2]) / y2; 1209 1210 if (qk >= BYTE_RADIX) // qk cannot be larger than the largest 1211 qk = BYTE_RADIX - 1; // digit in BYTE_RADIX. 1212 1213 // q[k] = qk or qk - 1. The following if-statement determines which: 1214 if (qk) { 1215 uchar *xk = (x + k); // A shortcut for x[k]. 1216 1217 // x = x - y * qk : 1218 sc_digit carry = 0; 1219 1220 for (int i = 0; i < ylen; ++i) { 1221 carry += y[i] * qk; 1222 sc_digit diff = (xk[i] + BYTE_RADIX) - (carry & BYTE_MASK); 1223 xk[i] = (uchar)(diff & BYTE_MASK); 1224 carry = (carry >> BITS_PER_BYTE) + 1225 (1 - (diff >> BITS_PER_BYTE)); 1226 } 1227 1228 // If carry, qk may be one too large. 1229 if (carry) { 1230 // 2's complement the last digit. 1231 carry = (xk[ylen] + BYTE_RADIX) - carry; 1232 xk[ylen] = (uchar)(carry & BYTE_MASK); 1233 carry = 1 - (carry >> BITS_PER_BYTE); 1234 1235 if (carry) { 1236 1237 // qk was one too large, so decrement it. 1238 --qk; 1239 1240 // Since qk was decreased by one, y must be added to x: 1241 // x = x - y * (qk - 1) = x - y * qk + y = x_above + y. 1242 carry = 0; 1243 1244 for (int i = 0; i < ylen; ++i) { 1245 carry += xk[i] + y[i]; 1246 xk[i] = (uchar)(carry & BYTE_MASK); 1247 carry >>= BITS_PER_BYTE; 1248 } 1249 1250 if (carry) 1251 xk[ylen] = (uchar)((xk[ylen] + 1) & BYTE_MASK); 1252 1253 } // second if carry 1254 } // first if carry 1255 } // if qk 1256 q[k] = (uchar)qk; 1257 } // for k 1258 1259 // Set (sc_digit) w = (uchar) q. 1260 vec_from_char(xlen - ylen + 1, q, ulen, w); 1261 1262#ifndef SC_MAX_NBITS 1263 delete [] x; 1264 delete [] y; 1265 delete [] q; 1266#endif 1267 1268} 1269 1270// Compute w = u / v, where u and w are vectors, and v is a scalar. 1271// - 0 < v < HALF_DIGIT_RADIX. Below, we rename w to q. 1272void 1273vec_div_small(int ulen, const sc_digit *u, sc_digit v, sc_digit *q) 1274{ 1275 // Given (u = u_1u_2...u_n)_b = (q = q_1q_2...q_n) * v + r, where b 1276 // is the base, and 0 <= r < v. Then, the algorithm is as follows: 1277 // 1278 // r = 0; 1279 // for (j = 1; j <= n; j++) { 1280 // q_j = (r * b + u_j) / v; 1281 // r = (r * b + u_j) % v; 1282 // } 1283 // 1284 // In our case, b = DIGIT_RADIX, and u = Ax + B and q = Cx + D where 1285 // x = HALF_DIGIT_RADIX. Note that r < v < x and b = x^2. Then, a 1286 // typical situation is as follows: 1287 // 1288 // ---- ---- 1289 // 0 r 1290 // ---- ---- 1291 // A B 1292 // ---- ---- ---- 1293 // r A B = r * b + u 1294 // 1295 // Hence, C = (r|A) / v. 1296 // D = (((r|A) % v)|B) / v 1297 // r = (((r|A) % v)|B) % v 1298 1299#ifdef DEBUG_SYSTEMC 1300 sc_assert((ulen > 0) && (u != NULL)); 1301 sc_assert(q != NULL); 1302 sc_assert((0 < v) && (v < HALF_DIGIT_RADIX)); 1303#endif 1304 1305#define q_h r 1306 sc_digit r = 0; 1307 const sc_digit *ubegin = u; 1308 1309 u += ulen; 1310 q += ulen; 1311 1312 while (ubegin < u) { 1313 sc_digit u_AB = (*--u); // A|B 1314 1315#ifdef DEBUG_SYSTEMC 1316 // The overflow bits must be zero. 1317 sc_assert(high_half(u_AB) == high_half_masked(u_AB)); 1318#endif 1319 1320 sc_digit num = concat(r, high_half(u_AB)); // num = r|A 1321 q_h = num / v; // C 1322 num = concat((num % v), low_half(u_AB)); // num = (((r|A) % v)|B) 1323 (*--q) = concat(q_h, num / v); // q = C|D 1324 r = num % v; 1325 } 1326#undef q_h 1327} 1328 1329// Compute w = u % v, where w, u, and v are vectors. 1330// - u and v are assumed to have at least two digits as uchars. 1331void 1332vec_rem_large(int ulen, const sc_digit *u, int vlen, const sc_digit *v, 1333 sc_digit *w) 1334{ 1335#ifdef DEBUG_SYSTEMC 1336 sc_assert((ulen > 0) && (u != NULL)); 1337 sc_assert((vlen > 0) && (v != NULL)); 1338 sc_assert(w != NULL); 1339 sc_assert(BITS_PER_DIGIT >= 3 * BITS_PER_BYTE); 1340#endif 1341 1342 // This function is adapted from vec_div_large. 1343 int xlen = BYTES_PER_DIGIT * ulen + 1; 1344 int ylen = BYTES_PER_DIGIT * vlen; 1345 1346#ifdef SC_MAX_NBITS 1347 uchar x[DIV_CEIL2(SC_MAX_NBITS, BITS_PER_BYTE)]; 1348 uchar y[DIV_CEIL2(SC_MAX_NBITS, BITS_PER_BYTE)]; 1349#else 1350 uchar *x = new uchar[xlen]; 1351 uchar *y = new uchar[ylen]; 1352#endif 1353 1354 // r corresponds to w. 1355 1356 // Set (uchar) x = (sc_digit) u. 1357 xlen = vec_to_char(ulen, u, xlen, x); 1358 1359 // Skip all the leading zeros in x. 1360 while ((--xlen >= 0) && (!x[xlen])) 1361 continue; 1362 xlen++; 1363 1364 // Set (uchar) y = (sc_digit) v. 1365 ylen = vec_to_char(vlen, v, ylen, y); 1366 1367 // Skip all the leading zeros in y. 1368 while ((--ylen >= 0) && (!y[ylen])) 1369 continue; 1370 ylen++; 1371 1372#ifdef DEBUG_SYSTEMC 1373 sc_assert(xlen > 1); 1374 sc_assert(ylen > 1); 1375#endif 1376 1377 // At this point, all the leading zeros are eliminated from x and y. 1378 1379 // Zero the last digit of x. 1380 x[xlen] = 0; 1381 1382 // The first two digits of y. 1383 sc_digit y2 = (y[ylen - 1] << BITS_PER_BYTE) + y[ylen - 2]; 1384 1385 const sc_digit DOUBLE_BITS_PER_BYTE = 2 * BITS_PER_BYTE; 1386 1387 // Find each q[k]. 1388 for (int k = xlen - ylen; k >= 0; --k) { 1389 // qk is a guess for q[k] such that q[k] = qk or qk - 1. 1390 sc_digit qk; 1391 1392 // Find qk by just using 2 digits of y and 3 digits of x. The 1393 // following code assumes that sizeof(sc_digit) >= 3 BYTEs. 1394 int k2 = k + ylen; 1395 1396 qk = ((x[k2] << DOUBLE_BITS_PER_BYTE) + 1397 (x[k2 - 1] << BITS_PER_BYTE) + x[k2 - 2]) / y2; 1398 1399 if (qk >= BYTE_RADIX) // qk cannot be larger than the largest 1400 qk = BYTE_RADIX - 1; // digit in BYTE_RADIX. 1401 1402 // q[k] = qk or qk - 1. The following if-statement determines which. 1403 if (qk) { 1404 uchar *xk = (x + k); // A shortcut for x[k]. 1405 1406 // x = x - y * qk; 1407 sc_digit carry = 0; 1408 1409 for (int i = 0; i < ylen; ++i) { 1410 carry += y[i] * qk; 1411 sc_digit diff = (xk[i] + BYTE_RADIX) - (carry & BYTE_MASK); 1412 xk[i] = (uchar)(diff & BYTE_MASK); 1413 carry = (carry >> BITS_PER_BYTE) + 1414 (1 - (diff >> BITS_PER_BYTE)); 1415 } 1416 1417 if (carry) { 1418 // 2's complement the last digit. 1419 carry = (xk[ylen] + BYTE_RADIX) - carry; 1420 xk[ylen] = (uchar)(carry & BYTE_MASK); 1421 carry = 1 - (carry >> BITS_PER_BYTE); 1422 1423 if (carry) { 1424 // qk was one too large, so decrement it. 1425 // --qk; 1426 1427 // x = x - y * (qk - 1) = x - y * qk + y = x_above + y. 1428 carry = 0; 1429 1430 for (int i = 0; i < ylen; ++i) { 1431 carry += xk[i] + y[i]; 1432 xk[i] = (uchar)(carry & BYTE_MASK); 1433 carry >>= BITS_PER_BYTE; 1434 } 1435 1436 if (carry) 1437 xk[ylen] = (uchar)((xk[ylen] + 1) & BYTE_MASK); 1438 } // second if carry 1439 } // first if carry 1440 } // if qk 1441 } // for k 1442 1443 // Set (sc_digit) w = (uchar) x for the remainder. 1444 vec_from_char(ylen, x, ulen, w); 1445 1446#ifndef SC_MAX_NBITS 1447 delete [] x; 1448 delete [] y; 1449#endif 1450 1451} 1452 1453// Compute r = u % v, where u is a vector, and r and v are scalars. 1454// - 0 < v < HALF_DIGIT_RADIX. 1455// - The remainder r is returned. 1456sc_digit 1457vec_rem_small(int ulen, const sc_digit *u, sc_digit v) 1458{ 1459 1460#ifdef DEBUG_SYSTEMC 1461 sc_assert((ulen > 0) && (u != NULL)); 1462 sc_assert((0 < v) && (v < HALF_DIGIT_RADIX)); 1463#endif 1464 1465 // This function is adapted from vec_div_small(). 1466 1467 sc_digit r = 0; 1468 const sc_digit *ubegin = u; 1469 1470 u += ulen; 1471 1472 while (ubegin < u) { 1473 sc_digit u_AB = (*--u); // A|B 1474#ifdef DEBUG_SYSTEMC 1475 // The overflow bits must be zero. 1476 sc_assert(high_half(u_AB) == high_half_masked(u_AB)); 1477#endif 1478 // r = (((r|A) % v)|B) % v 1479 r = (concat(((concat(r, high_half(u_AB))) % v), low_half(u_AB))) % v; 1480 } 1481 1482 return r; 1483} 1484 1485// u = u / v, r = u % v. 1486sc_digit 1487vec_rem_on_small(int ulen, sc_digit *u, sc_digit v) 1488{ 1489#ifdef DEBUG_SYSTEMC 1490 sc_assert((ulen > 0) && (u != NULL)); 1491 sc_assert(v > 0); 1492#endif 1493 1494#define q_h r 1495 sc_digit r = 0; 1496 const sc_digit *ubegin = u; 1497 1498 u += ulen; 1499 while (ubegin < u) { 1500 sc_digit u_AB = (*--u); // A|B 1501#ifdef DEBUG_SYSTEMC 1502 // The overflow bits must be zero. 1503 sc_assert(high_half(u_AB) == high_half_masked(u_AB)); 1504#endif 1505 sc_digit num = concat(r, high_half(u_AB)); // num = r|A 1506 q_h = num / v; // C 1507 num = concat((num % v), low_half(u_AB)); // num = (((r|A) % v)|B) 1508 (*u) = concat(q_h, num / v); // q = C|D 1509 r = num % v; 1510 } 1511#undef q_h 1512 return r; 1513} 1514 1515// Set (uchar) v = (sc_digit) u. Return the new vlen. 1516int 1517vec_to_char(int ulen, const sc_digit *u, int vlen, uchar *v) 1518{ 1519#ifdef DEBUG_SYSTEMC 1520 sc_assert((ulen > 0) && (u != NULL)); 1521 sc_assert((vlen > 0) && (v != NULL)); 1522#endif 1523 1524 int nbits = ulen * BITS_PER_DIGIT; 1525 int right = 0; 1526 int left = right + BITS_PER_BYTE - 1; 1527 1528 vlen = 0; 1529 while (nbits > 0) { 1530 int left_digit = left / BITS_PER_DIGIT; 1531 int right_digit = right / BITS_PER_DIGIT; 1532 int nsr = ((vlen << LOG2_BITS_PER_BYTE) % BITS_PER_DIGIT); 1533 int d = u[right_digit] >> nsr; 1534 1535 if (left_digit != right_digit) { 1536 if (left_digit < ulen) 1537 d |= u[left_digit] << (BITS_PER_DIGIT - nsr); 1538 } 1539 1540 v[vlen++] = (uchar)(d & BYTE_MASK); 1541 1542 left += BITS_PER_BYTE; 1543 right += BITS_PER_BYTE; 1544 nbits -= BITS_PER_BYTE; 1545 } 1546 return vlen; 1547} 1548 1549// Set (sc_digit) v = (uchar) u. 1550// - sizeof(uchar) <= sizeof(sc_digit), 1551void 1552vec_from_char(int ulen, const uchar *u, int vlen, sc_digit *v) 1553{ 1554#ifdef DEBUG_SYSTEMC 1555 sc_assert((ulen > 0) && (u != NULL)); 1556 sc_assert((vlen > 0) && (v != NULL)); 1557 sc_assert(sizeof(uchar) <= sizeof(sc_digit)); 1558#endif 1559 1560 sc_digit *vend = (v + vlen); 1561 1562 const int nsr = BITS_PER_DIGIT - BITS_PER_BYTE; 1563 const sc_digit mask = one_and_ones(nsr); 1564 1565 (*v) = (sc_digit) u[ulen - 1]; 1566 1567 for (int i = ulen - 2; i >= 0; --i) { 1568 // Manual inlining of vec_shift_left(). 1569 sc_digit *viter = v; 1570 sc_digit carry = 0; 1571 while (viter < vend) { 1572 sc_digit vval = (*viter); 1573 (*viter++) = (((vval & mask) << BITS_PER_BYTE) | carry); 1574 carry = vval >> nsr; 1575 } 1576 1577 if (viter < vend) 1578 (*viter) = carry; 1579 1580 (*v) |= (sc_digit)u[i]; 1581 } 1582} 1583 1584// Set u <<= nsl. 1585// If nsl is negative, it is ignored. 1586void 1587vec_shift_left(int ulen, sc_digit *u, int nsl) 1588{ 1589#ifdef DEBUG_SYSTEMC 1590 sc_assert((ulen > 0) && (u != NULL)); 1591#endif 1592 1593 if (nsl <= 0) 1594 return; 1595 1596 // Shift left whole digits if nsl is large enough. 1597 if (nsl >= (int) BITS_PER_DIGIT) { 1598 int nd; 1599 if (nsl % BITS_PER_DIGIT == 0) { 1600 nd = nsl / BITS_PER_DIGIT; // No need to use DIV_CEIL(nsl). 1601 nsl = 0; 1602 } else { 1603 nd = DIV_CEIL(nsl) - 1; 1604 nsl -= nd * BITS_PER_DIGIT; 1605 } 1606 1607 if (nd) { 1608 // Shift left for nd digits. 1609 for (int j = ulen - 1; j >= nd; --j) 1610 u[j] = u[j - nd]; 1611 1612 vec_zero(sc_min(nd, ulen), u); 1613 } 1614 if (nsl == 0) 1615 return; 1616 } 1617 1618 // Shift left if nsl < BITS_PER_DIGIT. 1619 sc_digit *uiter = u; 1620 sc_digit *uend = uiter + ulen; 1621 1622 int nsr = BITS_PER_DIGIT - nsl; 1623 sc_digit mask = one_and_ones(nsr); 1624 1625 sc_digit carry = 0; 1626 1627 while (uiter < uend) { 1628 sc_digit uval = (*uiter); 1629 (*uiter++) = (((uval & mask) << nsl) | carry); 1630 carry = uval >> nsr; 1631 } 1632 1633 if (uiter < uend) 1634 (*uiter) = carry; 1635} 1636 1637// Set u >>= nsr. 1638// If nsr is negative, it is ignored. 1639void 1640vec_shift_right(int ulen, sc_digit *u, int nsr, sc_digit fill) 1641{ 1642#ifdef DEBUG_SYSTEMC 1643 sc_assert((ulen > 0) && (u != NULL)); 1644#endif 1645 1646 // fill is usually either 0 or DIGIT_MASK; it can be any value. 1647 if (nsr <= 0) 1648 return; 1649 1650 // Shift right whole digits if nsr is large enough. 1651 if (nsr >= (int) BITS_PER_DIGIT) { 1652 int nd; 1653 if (nsr % BITS_PER_DIGIT == 0) { 1654 nd = nsr / BITS_PER_DIGIT; 1655 nsr = 0; 1656 } else { 1657 nd = DIV_CEIL(nsr) - 1; 1658 nsr -= nd * BITS_PER_DIGIT; 1659 } 1660 1661 if (nd) { 1662 // Shift right for nd digits. 1663 for (int j = 0; j < (ulen - nd); ++j) 1664 u[j] = u[j + nd]; 1665 1666 if (fill) { 1667 for (int j = ulen - sc_min( nd, ulen ); j < ulen; ++j) 1668 u[j] = fill; 1669 } else { 1670 vec_zero(ulen - sc_min( nd, ulen ), ulen, u); 1671 } 1672 } 1673 if (nsr == 0) 1674 return; 1675 } 1676 1677 // Shift right if nsr < BITS_PER_DIGIT. 1678 sc_digit *ubegin = u; 1679 sc_digit *uiter = (ubegin + ulen); 1680 1681 int nsl = BITS_PER_DIGIT - nsr; 1682 sc_digit mask = one_and_ones(nsr); 1683 1684 sc_digit carry = (fill & mask) << nsl; 1685 1686 while (ubegin < uiter) { 1687 sc_digit uval = (*--uiter); 1688 (*uiter) = (uval >> nsr) | carry; 1689 carry = (uval & mask) << nsl; 1690 } 1691} 1692 1693 1694// Let u[l..r], where l and r are left and right bit positions 1695// respectively, be equal to its mirror image. 1696void 1697vec_reverse(int unb, int und, sc_digit *ud, int l, int r) 1698{ 1699#ifdef DEBUG_SYSTEMC 1700 sc_assert((unb > 0) && (und > 0) && (ud != NULL)); 1701 sc_assert((0 <= r) && (r <= l) && (l < unb)); 1702#endif 1703 1704 if (l < r) { 1705 std::stringstream msg; 1706 msg << "vec_reverse( int, int, sc_digit*, int l, int r ) : " << 1707 "l = " << l << " < r = " << r << " is not valid",
|