ltage.cc (13455:56e25a5f9603) | ltage.cc (13493:91ae6168ef27) |
---|---|
1/* 2 * Copyright (c) 2014 The University of Wisconsin 3 * 4 * Copyright (c) 2006 INRIA (Institut National de Recherche en 5 * Informatique et en Automatique / French National Research Institute 6 * for Computer Science and Applied Mathematics) 7 * 8 * All rights reserved. --- 44 unchanged lines hidden (view full) --- 53 loopTableAgeBits(params->loopTableAgeBits), 54 loopTableConfidenceBits(params->loopTableConfidenceBits), 55 loopTableTagBits(params->loopTableTagBits), 56 loopTableIterBits(params->loopTableIterBits), 57 logLoopTableAssoc(params->logLoopTableAssoc), 58 confidenceThreshold((1 << loopTableConfidenceBits) - 1), 59 loopTagMask((1 << loopTableTagBits) - 1), 60 loopNumIterMask((1 << loopTableIterBits) - 1), | 1/* 2 * Copyright (c) 2014 The University of Wisconsin 3 * 4 * Copyright (c) 2006 INRIA (Institut National de Recherche en 5 * Informatique et en Automatique / French National Research Institute 6 * for Computer Science and Applied Mathematics) 7 * 8 * All rights reserved. --- 44 unchanged lines hidden (view full) --- 53 loopTableAgeBits(params->loopTableAgeBits), 54 loopTableConfidenceBits(params->loopTableConfidenceBits), 55 loopTableTagBits(params->loopTableTagBits), 56 loopTableIterBits(params->loopTableIterBits), 57 logLoopTableAssoc(params->logLoopTableAssoc), 58 confidenceThreshold((1 << loopTableConfidenceBits) - 1), 59 loopTagMask((1 << loopTableTagBits) - 1), 60 loopNumIterMask((1 << loopTableIterBits) - 1), |
61 loopSetMask((1 << (logSizeLoopPred - logLoopTableAssoc)) - 1), |
|
61 loopUseCounter(0), | 62 loopUseCounter(0), |
62 withLoopBits(params->withLoopBits) | 63 withLoopBits(params->withLoopBits), 64 useDirectionBit(params->useDirectionBit), 65 useSpeculation(params->useSpeculation), 66 useHashing(params->useHashing) |
63{ 64 // we use uint16_t type for these vales, so they cannot be more than 65 // 16 bits 66 assert(loopTableTagBits <= 16); 67 assert(loopTableIterBits <= 16); 68 69 assert(logSizeLoopPred >= logLoopTableAssoc); 70 --- 6 unchanged lines hidden (view full) --- 77 // The loop table is implemented as a linear table 78 // If associativity is N (N being 1 << logLoopTableAssoc), 79 // the first N entries are for set 0, the next N entries are for set 1, 80 // and so on. 81 // Thus, this function calculates the set and then it gets left shifted 82 // by logLoopTableAssoc in order to return the index of the first of the 83 // N entries of the set 84 Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1; | 67{ 68 // we use uint16_t type for these vales, so they cannot be more than 69 // 16 bits 70 assert(loopTableTagBits <= 16); 71 assert(loopTableIterBits <= 16); 72 73 assert(logSizeLoopPred >= logLoopTableAssoc); 74 --- 6 unchanged lines hidden (view full) --- 81 // The loop table is implemented as a linear table 82 // If associativity is N (N being 1 << logLoopTableAssoc), 83 // the first N entries are for set 0, the next N entries are for set 1, 84 // and so on. 85 // Thus, this function calculates the set and then it gets left shifted 86 // by logLoopTableAssoc in order to return the index of the first of the 87 // N entries of the set 88 Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1; |
85 return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc); | 89 Addr pc = pc_in >> instShiftAmt; 90 if (useHashing) { 91 // copied from TAGE-SC-L 92 // (http://www.jilp.org/cbp2016/code/AndreSeznecLimited.tar.gz) 93 pc ^= (pc_in >> (instShiftAmt + logLoopTableAssoc)); 94 } 95 return ((pc & mask) << logLoopTableAssoc); |
86} 87 | 96} 97 |
98int 99LTAGE::finallindex(int index, int lowPcBits, int way) const 100{ 101 // copied from TAGE-SC-L 102 // (http://www.jilp.org/cbp2016/code/AndreSeznecLimited.tar.gz) 103 return (useHashing ? (index ^ ((lowPcBits >> way) << logLoopTableAssoc)) : 104 (index)) 105 + way; 106} 107 |
|
88//loop prediction: only used if high confidence 89bool | 108//loop prediction: only used if high confidence 109bool |
90LTAGE::getLoop(Addr pc, LTageBranchInfo* bi) const | 110LTAGE::getLoop(Addr pc, LTageBranchInfo* bi, bool speculative) const |
91{ 92 bi->loopHit = -1; 93 bi->loopPredValid = false; 94 bi->loopIndex = lindex(pc); 95 unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc; 96 bi->loopTag = ((pc) >> pcShift) & loopTagMask; 97 | 111{ 112 bi->loopHit = -1; 113 bi->loopPredValid = false; 114 bi->loopIndex = lindex(pc); 115 unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc; 116 bi->loopTag = ((pc) >> pcShift) & loopTagMask; 117 |
118 if (useHashing) { 119 bi->loopTag ^= ((pc >> (pcShift + logSizeLoopPred)) & loopTagMask); 120 bi->loopLowPcBits = (pc >> pcShift) & loopSetMask; 121 } 122 |
|
98 for (int i = 0; i < (1 << logLoopTableAssoc); i++) { | 123 for (int i = 0; i < (1 << logLoopTableAssoc); i++) { |
99 if (ltable[bi->loopIndex + i].tag == bi->loopTag) { | 124 int idx = finallindex(bi->loopIndex, bi->loopLowPcBits, i); 125 if (ltable[idx].tag == bi->loopTag) { |
100 bi->loopHit = i; 101 bi->loopPredValid = | 126 bi->loopHit = i; 127 bi->loopPredValid = |
102 ltable[bi->loopIndex + i].confidence == confidenceThreshold; 103 bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec; 104 if (ltable[bi->loopIndex + i].currentIterSpec + 1 == 105 ltable[bi->loopIndex + i].numIter) { 106 return !(ltable[bi->loopIndex + i].dir); 107 }else { 108 return (ltable[bi->loopIndex + i].dir); | 128 ltable[idx].confidence == confidenceThreshold; 129 130 uint16_t iter = speculative ? ltable[idx].currentIterSpec 131 : ltable[idx].currentIter; 132 133 if ((iter + 1) == ltable[idx].numIter) { 134 return useDirectionBit ? !(ltable[idx].dir) : false; 135 } else { 136 return useDirectionBit ? (ltable[idx].dir) : true; |
109 } 110 } 111 } 112 return false; 113} 114 115void | 137 } 138 } 139 } 140 return false; 141} 142 143void |
116LTAGE::specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) | 144LTAGE::specLoopUpdate(bool taken, LTageBranchInfo* bi) |
117{ 118 if (bi->loopHit>=0) { | 145{ 146 if (bi->loopHit>=0) { |
119 int index = lindex(pc); | 147 int index = finallindex(bi->loopIndex, bi->loopLowPcBits, bi->loopHit); |
120 if (taken != ltable[index].dir) { 121 ltable[index].currentIterSpec = 0; 122 } else { 123 ltable[index].currentIterSpec = 124 (ltable[index].currentIterSpec + 1) & loopNumIterMask; 125 } 126 } 127} 128 129void 130LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) 131{ | 148 if (taken != ltable[index].dir) { 149 ltable[index].currentIterSpec = 0; 150 } else { 151 ltable[index].currentIterSpec = 152 (ltable[index].currentIterSpec + 1) & loopNumIterMask; 153 } 154 } 155} 156 157void 158LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi) 159{ |
132 int idx = bi->loopIndex + bi->loopHit; | 160 int idx = finallindex(bi->loopIndex, bi->loopLowPcBits, bi->loopHit); |
133 if (bi->loopHit >= 0) { 134 //already a hit 135 if (bi->loopPredValid) { 136 if (taken != bi->loopPred) { 137 // free the entry 138 ltable[idx].numIter = 0; 139 ltable[idx].age = 0; 140 ltable[idx].confidence = 0; --- 12 unchanged lines hidden (view full) --- 153 if (ltable[idx].numIter != 0) { 154 // free the entry 155 ltable[idx].numIter = 0; 156 ltable[idx].age = 0; 157 ltable[idx].confidence = 0; 158 } 159 } 160 | 161 if (bi->loopHit >= 0) { 162 //already a hit 163 if (bi->loopPredValid) { 164 if (taken != bi->loopPred) { 165 // free the entry 166 ltable[idx].numIter = 0; 167 ltable[idx].age = 0; 168 ltable[idx].confidence = 0; --- 12 unchanged lines hidden (view full) --- 181 if (ltable[idx].numIter != 0) { 182 // free the entry 183 ltable[idx].numIter = 0; 184 ltable[idx].age = 0; 185 ltable[idx].confidence = 0; 186 } 187 } 188 |
161 if (taken != ltable[idx].dir) { | 189 if (taken != (useDirectionBit ? ltable[idx].dir : true)) { |
162 if (ltable[idx].currentIter == ltable[idx].numIter) { 163 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc); 164 165 unsignedCtrUpdate(ltable[idx].confidence, true, 166 loopTableConfidenceBits); 167 //just do not predict when the loop count is 1 or 2 168 if (ltable[idx].numIter < 3) { 169 // free the entry | 190 if (ltable[idx].currentIter == ltable[idx].numIter) { 191 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc); 192 193 unsignedCtrUpdate(ltable[idx].confidence, true, 194 loopTableConfidenceBits); 195 //just do not predict when the loop count is 1 or 2 196 if (ltable[idx].numIter < 3) { 197 // free the entry |
170 ltable[idx].dir = taken; | 198 ltable[idx].dir = taken; // ignored if no useDirectionBit |
171 ltable[idx].numIter = 0; 172 ltable[idx].age = 0; 173 ltable[idx].confidence = 0; 174 } 175 } else { 176 DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc); 177 if (ltable[idx].numIter == 0) { 178 // first complete nest; --- 5 unchanged lines hidden (view full) --- 184 ltable[idx].numIter = 0; 185 ltable[idx].age = 0; 186 ltable[idx].confidence = 0; 187 } 188 } 189 ltable[idx].currentIter = 0; 190 } 191 | 199 ltable[idx].numIter = 0; 200 ltable[idx].age = 0; 201 ltable[idx].confidence = 0; 202 } 203 } else { 204 DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc); 205 if (ltable[idx].numIter == 0) { 206 // first complete nest; --- 5 unchanged lines hidden (view full) --- 212 ltable[idx].numIter = 0; 213 ltable[idx].age = 0; 214 ltable[idx].confidence = 0; 215 } 216 } 217 ltable[idx].currentIter = 0; 218 } 219 |
192 } else if (taken) { | 220 } else if (useDirectionBit ? 221 ((bi->loopPredValid ? bi->loopPred : bi->tagePred) != taken) : 222 taken) { |
193 //try to allocate an entry on taken branch 194 int nrand = random_mt.random<int>(); 195 for (int i = 0; i < (1 << logLoopTableAssoc); i++) { 196 int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1); 197 idx = bi->loopIndex + loop_hit; 198 if (ltable[idx].age == 0) { 199 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n", 200 pc); | 223 //try to allocate an entry on taken branch 224 int nrand = random_mt.random<int>(); 225 for (int i = 0; i < (1 << logLoopTableAssoc); i++) { 226 int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1); 227 idx = bi->loopIndex + loop_hit; 228 if (ltable[idx].age == 0) { 229 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n", 230 pc); |
201 ltable[idx].dir = !taken; | 231 ltable[idx].dir = !taken; // ignored if no useDirectionBit |
202 ltable[idx].tag = bi->loopTag; 203 ltable[idx].numIter = 0; 204 ltable[idx].age = (1 << loopTableAgeBits) - 1; 205 ltable[idx].confidence = 0; 206 ltable[idx].currentIter = 1; 207 break; 208 209 } --- 9 unchanged lines hidden (view full) --- 219LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) 220{ 221 LTageBranchInfo *bi = new LTageBranchInfo(nHistoryTables+1); 222 b = (void*)(bi); 223 224 bool pred_taken = tagePredict(tid, branch_pc, cond_branch, bi); 225 226 if (cond_branch) { | 232 ltable[idx].tag = bi->loopTag; 233 ltable[idx].numIter = 0; 234 ltable[idx].age = (1 << loopTableAgeBits) - 1; 235 ltable[idx].confidence = 0; 236 ltable[idx].currentIter = 1; 237 break; 238 239 } --- 9 unchanged lines hidden (view full) --- 249LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) 250{ 251 LTageBranchInfo *bi = new LTageBranchInfo(nHistoryTables+1); 252 b = (void*)(bi); 253 254 bool pred_taken = tagePredict(tid, branch_pc, cond_branch, bi); 255 256 if (cond_branch) { |
227 bi->loopPred = getLoop(branch_pc, bi); // loop prediction | 257 // loop prediction 258 bi->loopPred = getLoop(branch_pc, bi, useSpeculation); |
228 229 if ((loopUseCounter >= 0) && bi->loopPredValid) { 230 pred_taken = bi->loopPred; 231 bi->provider = LOOP; 232 } 233 DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, " 234 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n", 235 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid, 236 loopUseCounter, bi->tagePred, bi->altTaken); | 259 260 if ((loopUseCounter >= 0) && bi->loopPredValid) { 261 pred_taken = bi->loopPred; 262 bi->provider = LOOP; 263 } 264 DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, " 265 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n", 266 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid, 267 loopUseCounter, bi->tagePred, bi->altTaken); |
268 269 if (useSpeculation) { 270 specLoopUpdate(pred_taken, bi); 271 } |
|
237 } 238 | 272 } 273 |
239 specLoopUpdate(branch_pc, pred_taken, bi); | |
240 return pred_taken; 241} 242 243void 244LTAGE::condBranchUpdate(Addr branch_pc, bool taken, 245 TageBranchInfo* tage_bi, int nrand) 246{ 247 LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(tage_bi); 248 | 274 return pred_taken; 275} 276 277void 278LTAGE::condBranchUpdate(Addr branch_pc, bool taken, 279 TageBranchInfo* tage_bi, int nrand) 280{ 281 LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(tage_bi); 282 |
249 // first update the loop predictor 250 loopUpdate(branch_pc, taken, bi); | 283 if (useSpeculation) { 284 // recalculate loop prediction without speculation 285 // It is ok to overwrite the loop prediction fields in bi 286 // as the stats have already been updated with the previous 287 // values 288 bi->loopPred = getLoop(branch_pc, bi, false); 289 } |
251 252 if (bi->loopPredValid) { 253 if (bi->tagePred != bi->loopPred) { 254 ctrUpdate(loopUseCounter, 255 (bi->loopPred == taken), 256 withLoopBits); 257 } 258 } 259 | 290 291 if (bi->loopPredValid) { 292 if (bi->tagePred != bi->loopPred) { 293 ctrUpdate(loopUseCounter, 294 (bi->loopPred == taken), 295 withLoopBits); 296 } 297 } 298 |
299 loopUpdate(branch_pc, taken, bi); 300 |
|
260 TAGE::condBranchUpdate(branch_pc, taken, bi, nrand); 261} 262 263void 264LTAGE::squash(ThreadID tid, bool taken, void *bp_history) 265{ 266 TAGE::squash(tid, taken, bp_history); 267 268 LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history); 269 270 if (bi->condBranch) { 271 if (bi->loopHit >= 0) { | 301 TAGE::condBranchUpdate(branch_pc, taken, bi, nrand); 302} 303 304void 305LTAGE::squash(ThreadID tid, bool taken, void *bp_history) 306{ 307 TAGE::squash(tid, taken, bp_history); 308 309 LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history); 310 311 if (bi->condBranch) { 312 if (bi->loopHit >= 0) { |
272 int idx = bi->loopIndex + bi->loopHit; | 313 int idx = finallindex(bi->loopIndex, 314 bi->loopLowPcBits, 315 bi->loopHit); |
273 ltable[idx].currentIterSpec = bi->currentIter; 274 } 275 } 276} 277 278void 279LTAGE::squash(ThreadID tid, void *bp_history) 280{ 281 LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history); 282 if (bi->condBranch) { 283 if (bi->loopHit >= 0) { | 316 ltable[idx].currentIterSpec = bi->currentIter; 317 } 318 } 319} 320 321void 322LTAGE::squash(ThreadID tid, void *bp_history) 323{ 324 LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history); 325 if (bi->condBranch) { 326 if (bi->loopHit >= 0) { |
284 int idx = bi->loopIndex + bi->loopHit; | 327 int idx = finallindex(bi->loopIndex, 328 bi->loopLowPcBits, 329 bi->loopHit); |
285 ltable[idx].currentIterSpec = bi->currentIter; 286 } 287 } 288 289 TAGE::squash(tid, bp_history); 290} 291 292 --- 41 unchanged lines hidden --- | 330 ltable[idx].currentIterSpec = bi->currentIter; 331 } 332 } 333 334 TAGE::squash(tid, bp_history); 335} 336 337 --- 41 unchanged lines hidden --- |