1/* 2 * Copyright (c) 2014 The University of Wisconsin 3 * 4 * Copyright (c) 2006 INRIA (Institut National de Recherche en 5 * Informatique et en Automatique / French National Research Institute 6 * for Computer Science and Applied Mathematics) 7 * 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions are 12 * met: redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer; 14 * redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution; 17 * neither the name of the copyright holders nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais, 34 * from André Seznec's code. 35 */ 36 37/* @file 38 * Implementation of a L-TAGE branch predictor 39 */ 40 41#include "cpu/pred/ltage.hh" 42 43#include "base/intmath.hh" 44#include "base/logging.hh" 45#include "base/random.hh" 46#include "base/trace.hh" 47#include "debug/Fetch.hh" 48#include "debug/LTage.hh" 49 50LTAGE::LTAGE(const LTAGEParams *params) 51 : BPredUnit(params),
|
52 logSizeBiMP(params->logSizeBiMP),
|
52 logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
|
54 logSizeTagTables(params->logSizeTagTables),
|
53 logSizeLoopPred(params->logSizeLoopPred), 54 nHistoryTables(params->nHistoryTables), 55 tagTableCounterBits(params->tagTableCounterBits), 56 tagTableUBits(params->tagTableUBits), 57 histBufferSize(params->histBufferSize), 58 minHist(params->minHist), 59 maxHist(params->maxHist),
|
62 minTagWidth(params->minTagWidth),
|
60 pathHistBits(params->pathHistBits), |
61 loopTableAgeBits(params->loopTableAgeBits), 62 loopTableConfidenceBits(params->loopTableConfidenceBits), 63 loopTableTagBits(params->loopTableTagBits), 64 loopTableIterBits(params->loopTableIterBits),
|
65 logLoopTableAssoc(params->logLoopTableAssoc), |
66 confidenceThreshold((1 << loopTableConfidenceBits) - 1), 67 loopTagMask((1 << loopTableTagBits) - 1), 68 loopNumIterMask((1 << loopTableIterBits) - 1),
|
70 threadHistory(params->numThreads)
|
69 tagTableTagWidths(params->tagTableTagWidths), 70 logTagTableSizes(params->logTagTableSizes), 71 threadHistory(params->numThreads), 72 logUResetPeriod(params->logUResetPeriod), 73 useAltOnNaBits(params->useAltOnNaBits), 74 withLoopBits(params->withLoopBits) |
75{ 76 // Current method for periodically resetting the u counter bits only 77 // works for 1 or 2 bits 78 // Also make sure that it is not 0 79 assert(tagTableUBits <= 2 && (tagTableUBits > 0)); 80 81 // we use uint16_t type for these vales, so they cannot be more than 82 // 16 bits 83 assert(loopTableTagBits <= 16); 84 assert(loopTableIterBits <= 16); 85
|
86 assert(logSizeLoopPred >= logLoopTableAssoc); 87 88 // we use int type for the path history, so it cannot be more than 89 // its size 90 assert(pathHistBits <= (sizeof(int)*8)); 91 92 // initialize the counter to half of the period 93 assert(logUResetPeriod != 0); 94 tCounter = ULL(1) << (logUResetPeriod - 1); 95 |
96 assert(params->histBufferSize > params->maxHist * 2); 97 useAltPredForNewlyAllocated = 0;
|
84 logTick = 19;
85 tCounter = ULL(1) << (logTick - 1);
|
98 99 for (auto& history : threadHistory) { 100 history.pathHist = 0; 101 history.globalHistory = new uint8_t[histBufferSize]; 102 history.gHist = history.globalHistory; 103 memset(history.gHist, 0, histBufferSize); 104 history.ptGhist = 0; 105 } 106 107 histLengths = new int [nHistoryTables+1]; 108 histLengths[1] = minHist; 109 histLengths[nHistoryTables] = maxHist; 110 111 for (int i = 2; i <= nHistoryTables; i++) { 112 histLengths[i] = (int) (((double) minHist * 113 pow ((double) (maxHist) / (double) minHist, 114 (double) (i - 1) / (double) ((nHistoryTables- 1)))) 115 + 0.5); 116 } 117
|
106 tagWidths[1] = minTagWidth;
107 tagWidths[2] = minTagWidth;
108 tagWidths[3] = minTagWidth + 1;
109 tagWidths[4] = minTagWidth + 1;
110 tagWidths[5] = minTagWidth + 2;
111 tagWidths[6] = minTagWidth + 3;
112 tagWidths[7] = minTagWidth + 4;
113 tagWidths[8] = minTagWidth + 5;
114 tagWidths[9] = minTagWidth + 5;
115 tagWidths[10] = minTagWidth + 6;
116 tagWidths[11] = minTagWidth + 7;
117 tagWidths[12] = minTagWidth + 8;
|
118 assert(tagTableTagWidths.size() == (nHistoryTables+1)); 119 assert(logTagTableSizes.size() == (nHistoryTables+1)); |
120
|
119 for (int i = 1; i <= 2; i++)
120 tagTableSizes[i] = logSizeTagTables - 1;
121 for (int i = 3; i <= 6; i++)
122 tagTableSizes[i] = logSizeTagTables;
123 for (int i = 7; i <= 10; i++)
124 tagTableSizes[i] = logSizeTagTables - 1;
125 for (int i = 11; i <= 12; i++)
126 tagTableSizes[i] = logSizeTagTables - 2;
|
121 // First entry is for the Bimodal table and it is untagged in this 122 // implementation 123 assert(tagTableTagWidths[0] == 0); |
124 125 for (auto& history : threadHistory) { 126 history.computeIndices = new FoldedHistory[nHistoryTables+1]; 127 history.computeTags[0] = new FoldedHistory[nHistoryTables+1]; 128 history.computeTags[1] = new FoldedHistory[nHistoryTables+1]; 129 130 for (int i = 1; i <= nHistoryTables; i++) {
|
134 history.computeIndices[i].init(histLengths[i], (tagTableSizes[i]));
|
131 history.computeIndices[i].init( 132 histLengths[i], (logTagTableSizes[i])); |
133 history.computeTags[0][i].init(
|
136 history.computeIndices[i].origLength, tagWidths[i]);
|
134 history.computeIndices[i].origLength, tagTableTagWidths[i]); |
135 history.computeTags[1][i].init(
|
138 history.computeIndices[i].origLength, tagWidths[i] - 1);
|
136 history.computeIndices[i].origLength, tagTableTagWidths[i]-1); |
137 DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
|
140 histLengths[i], tagTableSizes[i], tagWidths[i]);
|
138 histLengths[i], logTagTableSizes[i], tagTableTagWidths[i]); |
139 } 140 } 141
|
144 const uint64_t bimodalTableSize = ULL(1) << logSizeBiMP;
|
142 const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0]; |
143 btablePrediction.resize(bimodalTableSize, false); 144 btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries, 145 true); 146 147 ltable = new LoopEntry[ULL(1) << logSizeLoopPred]; 148 gtable = new TageEntry*[nHistoryTables + 1]; 149 for (int i = 1; i <= nHistoryTables; i++) {
|
152 gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
|
150 gtable[i] = new TageEntry[1<<(logTagTableSizes[i])]; |
151 } 152 153 tableIndices = new int [nHistoryTables+1]; 154 tableTags = new int [nHistoryTables+1]; 155 156 loopUseCounter = 0; 157} 158 159int 160LTAGE::bindex(Addr pc_in) const 161{
|
164 return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1));
|
162 return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) - 1)); |
163} 164 165int 166LTAGE::lindex(Addr pc_in) const 167{
|
170 return (((pc_in >> instShiftAmt) &
171 ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
|
168 // The loop table is implemented as a linear table 169 // If associativity is N (N being 1 << logLoopTableAssoc), 170 // the first N entries are for set 0, the next N entries are for set 1, 171 // and so on. 172 // Thus, this function calculates the set and then it gets left shifted 173 // by logLoopTableAssoc in order to return the index of the first of the 174 // N entries of the set 175 Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1; 176 return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc); |
177} 178 179int 180LTAGE::F(int A, int size, int bank) const 181{ 182 int A1, A2; 183 184 A = A & ((ULL(1) << size) - 1);
|
180 A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
181 A2 = (A >> tagTableSizes[bank]);
182 A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
183 + (A2 >> (tagTableSizes[bank] - bank));
|
185 A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1)); 186 A2 = (A >> logTagTableSizes[bank]); 187 A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1)) 188 + (A2 >> (logTagTableSizes[bank] - bank)); |
189 A = A1 ^ A2;
|
185 A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
186 + (A >> (tagTableSizes[bank] - bank));
|
190 A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1)) 191 + (A >> (logTagTableSizes[bank] - bank)); |
192 return (A); 193} 194 195 196// gindex computes a full hash of pc, ghist and pathHist 197int 198LTAGE::gindex(ThreadID tid, Addr pc, int bank) const 199{ 200 int index;
|
196 int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
|
201 int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits : 202 histLengths[bank]; 203 const Addr shiftedPc = pc >> instShiftAmt; |
204 index =
|
198 (pc >> instShiftAmt) ^
199 ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^
|
205 shiftedPc ^ 206 (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^ |
207 threadHistory[tid].computeIndices[bank].comp ^ 208 F(threadHistory[tid].pathHist, hlen, bank); 209
|
203 return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
|
210 return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1)); |
211} 212 213 214// Tag computation 215uint16_t 216LTAGE::gtag(ThreadID tid, Addr pc, int bank) const 217{ 218 int tag = (pc >> instShiftAmt) ^ 219 threadHistory[tid].computeTags[0][bank].comp ^ 220 (threadHistory[tid].computeTags[1][bank].comp << 1); 221
|
215 return (tag & ((ULL(1) << tagWidths[bank]) - 1));
|
222 return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1)); |
223} 224 225 226// Up-down saturating counter 227void 228LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits) 229{ 230 assert(nbits <= sizeof(int8_t) << 3); 231 if (taken) { 232 if (ctr < ((1 << (nbits - 1)) - 1)) 233 ctr++; 234 } else { 235 if (ctr > -(1 << (nbits - 1))) 236 ctr--; 237 } 238} 239 240// Up-down unsigned saturating counter 241void 242LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits) 243{ 244 assert(nbits <= sizeof(uint8_t) << 3); 245 if (up) { 246 if (ctr < ((1 << nbits) - 1)) 247 ctr++; 248 } else { 249 if (ctr) 250 ctr--; 251 } 252} 253 254// Bimodal prediction 255bool 256LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const 257{ 258 return btablePrediction[bi->bimodalIndex]; 259} 260 261 262// Update the bimodal predictor: a hysteresis bit is shared among N prediction 263// bits (N = 2 ^ logRatioBiModalHystEntries) 264void 265LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi) 266{ 267 int inter = (btablePrediction[bi->bimodalIndex] << 1) 268 + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries]; 269 if (taken) { 270 if (inter < 3) 271 inter++; 272 } else if (inter > 0) { 273 inter--; 274 } 275 const bool pred = inter >> 1; 276 const bool hyst = inter & 1; 277 btablePrediction[bi->bimodalIndex] = pred; 278 btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst; 279 DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst); 280} 281 282 283//loop prediction: only used if high confidence 284bool 285LTAGE::getLoop(Addr pc, BranchInfo* bi) const 286{ 287 bi->loopHit = -1; 288 bi->loopPredValid = false; 289 bi->loopIndex = lindex(pc);
|
283 bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2)) & loopTagMask;
|
290 unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc; 291 bi->loopTag = ((pc) >> pcShift) & loopTagMask; |
292
|
285 for (int i = 0; i < 4; i++) {
|
293 for (int i = 0; i < (1 << logLoopTableAssoc); i++) { |
294 if (ltable[bi->loopIndex + i].tag == bi->loopTag) { 295 bi->loopHit = i; 296 bi->loopPredValid = 297 ltable[bi->loopIndex + i].confidence == confidenceThreshold; 298 bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec; 299 if (ltable[bi->loopIndex + i].currentIterSpec + 1 == 300 ltable[bi->loopIndex + i].numIter) { 301 return !(ltable[bi->loopIndex + i].dir); 302 }else { 303 return (ltable[bi->loopIndex + i].dir); 304 } 305 } 306 } 307 return false; 308} 309 310void 311LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi) 312{ 313 if (bi->loopHit>=0) { 314 int index = lindex(pc); 315 if (taken != ltable[index].dir) { 316 ltable[index].currentIterSpec = 0; 317 } else { 318 ltable[index].currentIterSpec = 319 (ltable[index].currentIterSpec + 1) & loopNumIterMask; 320 } 321 } 322} 323 324void 325LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi) 326{ 327 int idx = bi->loopIndex + bi->loopHit; 328 if (bi->loopHit >= 0) { 329 //already a hit 330 if (bi->loopPredValid) { 331 if (taken != bi->loopPred) { 332 // free the entry 333 ltable[idx].numIter = 0; 334 ltable[idx].age = 0; 335 ltable[idx].confidence = 0; 336 ltable[idx].currentIter = 0; 337 return; 338 } else if (bi->loopPred != bi->tagePred) { 339 DPRINTF(LTage, "Loop Prediction success:%lx\n",pc); 340 unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits); 341 } 342 } 343 344 ltable[idx].currentIter = 345 (ltable[idx].currentIter + 1) & loopNumIterMask; 346 if (ltable[idx].currentIter > ltable[idx].numIter) { 347 ltable[idx].confidence = 0; 348 if (ltable[idx].numIter != 0) { 349 // free the entry 350 ltable[idx].numIter = 0; 351 ltable[idx].age = 0; 352 ltable[idx].confidence = 0; 353 } 354 } 355 356 if (taken != ltable[idx].dir) { 357 if (ltable[idx].currentIter == ltable[idx].numIter) { 358 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc); 359 360 unsignedCtrUpdate(ltable[idx].confidence, true, 361 loopTableConfidenceBits); 362 //just do not predict when the loop count is 1 or 2 363 if (ltable[idx].numIter < 3) { 364 // free the entry 365 ltable[idx].dir = taken; 366 ltable[idx].numIter = 0; 367 ltable[idx].age = 0; 368 ltable[idx].confidence = 0; 369 } 370 } else { 371 DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc); 372 if (ltable[idx].numIter == 0) { 373 // first complete nest; 374 ltable[idx].confidence = 0; 375 ltable[idx].numIter = ltable[idx].currentIter; 376 } else { 377 //not the same number of iterations as last time: free the 378 //entry 379 ltable[idx].numIter = 0; 380 ltable[idx].age = 0; 381 ltable[idx].confidence = 0; 382 } 383 } 384 ltable[idx].currentIter = 0; 385 } 386 387 } else if (taken) { 388 //try to allocate an entry on taken branch 389 int nrand = random_mt.random<int>();
|
382 for (int i = 0; i < 4; i++) {
383 int loop_hit = (nrand + i) & 3;
|
390 for (int i = 0; i < (1 << logLoopTableAssoc); i++) { 391 int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1); |
392 idx = bi->loopIndex + loop_hit; 393 if (ltable[idx].age == 0) { 394 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n", 395 pc); 396 ltable[idx].dir = !taken; 397 ltable[idx].tag = bi->loopTag; 398 ltable[idx].numIter = 0; 399 ltable[idx].age = (1 << loopTableAgeBits) - 1; 400 ltable[idx].confidence = 0; 401 ltable[idx].currentIter = 1; 402 break; 403 404 } 405 else 406 ltable[idx].age--; 407 } 408 } 409 410} 411 412// shifting the global history: we manage the history in a big table in order 413// to reduce simulation time 414void 415LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt) 416{ 417 if (pt == 0) { 418 DPRINTF(LTage, "Rolling over the histories\n"); 419 // Copy beginning of globalHistoryBuffer to end, such that 420 // the last maxHist outcomes are still reachable 421 // through pt[0 .. maxHist - 1]. 422 for (int i = 0; i < maxHist; i++) 423 tab[histBufferSize - maxHist + i] = tab[i]; 424 pt = histBufferSize - maxHist; 425 h = &tab[pt]; 426 } 427 pt--; 428 h--; 429 h[0] = (dir) ? 1 : 0; 430} 431 432// Get GHR for hashing indirect predictor 433// Build history backwards from pointer in 434// bp_history. 435unsigned 436LTAGE::getGHR(ThreadID tid, void *bp_history) const 437{ 438 BranchInfo* bi = static_cast<BranchInfo*>(bp_history); 439 unsigned val = 0; 440 for (unsigned i = 0; i < 32; i++) { 441 // Make sure we don't go out of bounds 442 int gh_offset = bi->ptGhist + i; 443 assert(&(threadHistory[tid].globalHistory[gh_offset]) < 444 threadHistory[tid].globalHistory + histBufferSize); 445 val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i); 446 } 447 448 return val; 449} 450 451//prediction 452bool 453LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) 454{ 455 BranchInfo *bi = new BranchInfo(nHistoryTables+1); 456 b = (void*)(bi); 457 Addr pc = branch_pc; 458 bool pred_taken = true; 459 bi->loopHit = -1; 460 461 if (cond_branch) { 462 // TAGE prediction 463 464 // computes the table addresses and the partial tags 465 for (int i = 1; i <= nHistoryTables; i++) { 466 tableIndices[i] = gindex(tid, pc, i); 467 bi->tableIndices[i] = tableIndices[i]; 468 tableTags[i] = gtag(tid, pc, i); 469 bi->tableTags[i] = tableTags[i]; 470 } 471 472 bi->bimodalIndex = bindex(pc); 473 474 bi->hitBank = 0; 475 bi->altBank = 0; 476 //Look for the bank with longest matching history 477 for (int i = nHistoryTables; i > 0; i--) { 478 if (gtable[i][tableIndices[i]].tag == tableTags[i]) { 479 bi->hitBank = i; 480 bi->hitBankIndex = tableIndices[bi->hitBank]; 481 break; 482 } 483 } 484 //Look for the alternate bank 485 for (int i = bi->hitBank - 1; i > 0; i--) { 486 if (gtable[i][tableIndices[i]].tag == tableTags[i]) { 487 bi->altBank = i; 488 bi->altBankIndex = tableIndices[bi->altBank]; 489 break; 490 } 491 } 492 //computes the prediction and the alternate prediction 493 if (bi->hitBank > 0) { 494 if (bi->altBank > 0) { 495 bi->altTaken = 496 gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0; 497 }else { 498 bi->altTaken = getBimodePred(pc, bi); 499 } 500 501 bi->longestMatchPred = 502 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0; 503 bi->pseudoNewAlloc = 504 abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1; 505 506 //if the entry is recognized as a newly allocated entry and 507 //useAltPredForNewlyAllocated is positive use the alternate 508 //prediction 509 if ((useAltPredForNewlyAllocated < 0) 510 || abs(2 * 511 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1) 512 bi->tagePred = bi->longestMatchPred; 513 else 514 bi->tagePred = bi->altTaken; 515 } else { 516 bi->altTaken = getBimodePred(pc, bi); 517 bi->tagePred = bi->altTaken; 518 bi->longestMatchPred = bi->altTaken; 519 } 520 //end TAGE prediction 521 522 bi->loopPred = getLoop(pc, bi); // loop prediction 523 524 pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ? 525 (bi->loopPred): (bi->tagePred); 526 DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, " 527 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n", 528 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid, 529 loopUseCounter, bi->tagePred, bi->altTaken); 530 } 531 bi->branchPC = branch_pc; 532 bi->condBranch = cond_branch; 533 specLoopUpdate(branch_pc, pred_taken, bi); 534 return pred_taken; 535} 536 537// PREDICTOR UPDATE 538void 539LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history, 540 bool squashed) 541{ 542 assert(bp_history); 543 544 BranchInfo *bi = static_cast<BranchInfo*>(bp_history); 545 546 if (squashed) { 547 // This restores the global history, then update it 548 // and recomputes the folded histories. 549 squash(tid, taken, bp_history); 550 return; 551 } 552 553 int nrand = random_mt.random<int>(0,3); 554 Addr pc = branch_pc; 555 if (bi->condBranch) { 556 DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n", 557 branch_pc, taken); 558 // first update the loop predictor 559 loopUpdate(pc, taken, bi); 560 561 if (bi->loopPredValid) { 562 if (bi->tagePred != bi->loopPred) {
|
555 ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
|
563 ctrUpdate(loopUseCounter, 564 (bi->loopPred == taken), 565 withLoopBits); |
566 } 567 } 568 569 // TAGE UPDATE 570 // try to allocate a new entries only if prediction was wrong 571 bool longest_match_pred = false; 572 bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables); 573 if (bi->hitBank > 0) { 574 // Manage the selection between longest matching and alternate 575 // matching for "pseudo"-newly allocated longest matching entry 576 longest_match_pred = bi->longestMatchPred; 577 bool PseudoNewAlloc = bi->pseudoNewAlloc; 578 // an entry is considered as newly allocated if its prediction 579 // counter is weak 580 if (PseudoNewAlloc) { 581 if (longest_match_pred == taken) { 582 alloc = false; 583 } 584 // if it was delivering the correct prediction, no need to 585 // allocate new entry even if the overall prediction was false 586 if (longest_match_pred != bi->altTaken) { 587 ctrUpdate(useAltPredForNewlyAllocated,
|
578 bi->altTaken == taken, 4);
|
588 bi->altTaken == taken, useAltOnNaBits); |
589 } 590 } 591 } 592 593 if (alloc) { 594 // is there some "unuseful" entry to allocate 595 uint8_t min = 1; 596 for (int i = nHistoryTables; i > bi->hitBank; i--) { 597 if (gtable[i][bi->tableIndices[i]].u < min) { 598 min = gtable[i][bi->tableIndices[i]].u; 599 } 600 } 601 602 // we allocate an entry with a longer history 603 // to avoid ping-pong, we do not choose systematically the next 604 // entry, but among the 3 next entries 605 int Y = nrand & 606 ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1); 607 int X = bi->hitBank + 1; 608 if (Y & 1) { 609 X++; 610 if (Y & 2) 611 X++; 612 } 613 // No entry available, forces one to be available 614 if (min > 0) { 615 gtable[X][bi->tableIndices[X]].u = 0; 616 } 617 618 619 //Allocate only one entry 620 for (int i = X; i <= nHistoryTables; i++) { 621 if ((gtable[i][bi->tableIndices[i]].u == 0)) { 622 gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i]; 623 gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1; 624 break; 625 } 626 } 627 } 628 //periodic reset of u: reset is not complete but bit by bit 629 tCounter++;
|
620 if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
|
630 if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) { |
631 // reset least significant bit 632 // most significant bit becomes least significant bit 633 for (int i = 1; i <= nHistoryTables; i++) {
|
624 for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
|
634 for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) { |
635 gtable[i][j].u = gtable[i][j].u >> 1; 636 } 637 } 638 } 639 640 if (bi->hitBank > 0) { 641 DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n", 642 bi->hitBank, bi->hitBankIndex, branch_pc); 643 ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken, 644 tagTableCounterBits); 645 // if the provider entry is not certified to be useful also update 646 // the alternate prediction 647 if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) { 648 if (bi->altBank > 0) { 649 ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken, 650 tagTableCounterBits); 651 DPRINTF(LTage, "Updating tag table entry (%d,%d) for" 652 " branch %lx\n", bi->hitBank, bi->hitBankIndex, 653 branch_pc); 654 } 655 if (bi->altBank == 0) { 656 baseUpdate(pc, taken, bi); 657 } 658 } 659 660 // update the u counter 661 if (bi->tagePred != bi->altTaken) { 662 unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u, 663 bi->tagePred == taken, tagTableUBits); 664 } 665 } else { 666 baseUpdate(pc, taken, bi); 667 } 668 669 //END PREDICTOR UPDATE 670 } 671 if (!squashed) { 672 delete bi; 673 } 674} 675 676void 677LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b) 678{ 679 BranchInfo* bi = (BranchInfo*)(b); 680 ThreadHistory& tHist = threadHistory[tid]; 681 // UPDATE HISTORIES 682 bool pathbit = ((branch_pc >> instShiftAmt) & 1); 683 //on a squash, return pointers to this and recompute indices. 684 //update user history 685 updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist); 686 tHist.pathHist = (tHist.pathHist << 1) + pathbit;
|
677 tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
|
687 tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1)); |
688 689 bi->ptGhist = tHist.ptGhist; 690 bi->pathHist = tHist.pathHist; 691 //prepare next index and tag computations for user branchs 692 for (int i = 1; i <= nHistoryTables; i++) 693 { 694 bi->ci[i] = tHist.computeIndices[i].comp; 695 bi->ct0[i] = tHist.computeTags[0][i].comp; 696 bi->ct1[i] = tHist.computeTags[1][i].comp; 697 tHist.computeIndices[i].update(tHist.gHist); 698 tHist.computeTags[0][i].update(tHist.gHist); 699 tHist.computeTags[1][i].update(tHist.gHist); 700 } 701 DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, " 702 "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist, 703 tHist.ptGhist); 704} 705 706void 707LTAGE::squash(ThreadID tid, bool taken, void *bp_history) 708{ 709 BranchInfo* bi = (BranchInfo*)(bp_history); 710 ThreadHistory& tHist = threadHistory[tid]; 711 DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, " 712 "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist); 713 tHist.pathHist = bi->pathHist; 714 tHist.ptGhist = bi->ptGhist; 715 tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]); 716 tHist.gHist[0] = (taken ? 1 : 0); 717 for (int i = 1; i <= nHistoryTables; i++) { 718 tHist.computeIndices[i].comp = bi->ci[i]; 719 tHist.computeTags[0][i].comp = bi->ct0[i]; 720 tHist.computeTags[1][i].comp = bi->ct1[i]; 721 tHist.computeIndices[i].update(tHist.gHist); 722 tHist.computeTags[0][i].update(tHist.gHist); 723 tHist.computeTags[1][i].update(tHist.gHist); 724 } 725 726 if (bi->condBranch) { 727 if (bi->loopHit >= 0) { 728 int idx = bi->loopIndex + bi->loopHit; 729 ltable[idx].currentIterSpec = bi->currentIter; 730 } 731 } 732 733} 734 735void 736LTAGE::squash(ThreadID tid, void *bp_history) 737{ 738 BranchInfo* bi = (BranchInfo*)(bp_history); 739 DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC); 740 if (bi->condBranch) { 741 if (bi->loopHit >= 0) { 742 int idx = bi->loopIndex + bi->loopHit; 743 ltable[idx].currentIterSpec = bi->currentIter; 744 } 745 } 746 747 delete bi; 748} 749 750bool 751LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history) 752{ 753 bool retval = predict(tid, branch_pc, true, bp_history); 754 755 DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval); 756 updateHistories(tid, branch_pc, retval, bp_history); 757 assert(threadHistory[tid].gHist == 758 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]); 759 760 return retval; 761} 762 763void 764LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history) 765{ 766 BranchInfo* bi = (BranchInfo*) bp_history; 767 ThreadHistory& tHist = threadHistory[tid]; 768 DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc); 769 assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]); 770 tHist.gHist[0] = 0; 771 for (int i = 1; i <= nHistoryTables; i++) { 772 tHist.computeIndices[i].comp = bi->ci[i]; 773 tHist.computeTags[0][i].comp = bi->ct0[i]; 774 tHist.computeTags[1][i].comp = bi->ct1[i]; 775 tHist.computeIndices[i].update(tHist.gHist); 776 tHist.computeTags[0][i].update(tHist.gHist); 777 tHist.computeTags[1][i].update(tHist.gHist); 778 } 779} 780 781void 782LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history) 783{ 784 DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc); 785 predict(tid, br_pc, false, bp_history); 786 updateHistories(tid, br_pc, true, bp_history); 787 assert(threadHistory[tid].gHist == 788 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]); 789} 790 791LTAGE* 792LTAGEParams::create() 793{ 794 return new LTAGE(this); 795}
|