ltage.cc revision 13443:a111cb197897
14120Sgblack@eecs.umich.edu/* 24120Sgblack@eecs.umich.edu * Copyright (c) 2014 The University of Wisconsin 34120Sgblack@eecs.umich.edu * 44120Sgblack@eecs.umich.edu * Copyright (c) 2006 INRIA (Institut National de Recherche en 57087Snate@binkert.org * Informatique et en Automatique / French National Research Institute 67087Snate@binkert.org * for Computer Science and Applied Mathematics) 77087Snate@binkert.org * 87087Snate@binkert.org * All rights reserved. 97087Snate@binkert.org * 107087Snate@binkert.org * Redistribution and use in source and binary forms, with or without 117087Snate@binkert.org * modification, are permitted provided that the following conditions are 127087Snate@binkert.org * met: redistributions of source code must retain the above copyright 134120Sgblack@eecs.umich.edu * notice, this list of conditions and the following disclaimer; 147087Snate@binkert.org * redistributions in binary form must reproduce the above copyright 157087Snate@binkert.org * notice, this list of conditions and the following disclaimer in the 167087Snate@binkert.org * documentation and/or other materials provided with the distribution; 177087Snate@binkert.org * neither the name of the copyright holders nor the names of its 187087Snate@binkert.org * contributors may be used to endorse or promote products derived from 197087Snate@binkert.org * this software without specific prior written permission. 207087Snate@binkert.org * 217087Snate@binkert.org * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 224120Sgblack@eecs.umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 237087Snate@binkert.org * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 244120Sgblack@eecs.umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 254120Sgblack@eecs.umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 264120Sgblack@eecs.umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 274120Sgblack@eecs.umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 284120Sgblack@eecs.umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 294120Sgblack@eecs.umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 304120Sgblack@eecs.umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 314120Sgblack@eecs.umich.edu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 324120Sgblack@eecs.umich.edu * 334120Sgblack@eecs.umich.edu * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais, 344120Sgblack@eecs.umich.edu * from André Seznec's code. 354120Sgblack@eecs.umich.edu */ 364120Sgblack@eecs.umich.edu 374120Sgblack@eecs.umich.edu/* @file 384120Sgblack@eecs.umich.edu * Implementation of a L-TAGE branch predictor 394120Sgblack@eecs.umich.edu */ 404120Sgblack@eecs.umich.edu 414120Sgblack@eecs.umich.edu#include "cpu/pred/ltage.hh" 424120Sgblack@eecs.umich.edu 434141Sgblack@eecs.umich.edu#include "base/intmath.hh" 444136Sgblack@eecs.umich.edu#include "base/logging.hh" 456214Snate@binkert.org#include "base/random.hh" 464141Sgblack@eecs.umich.edu#include "base/trace.hh" 474121Sgblack@eecs.umich.edu#include "debug/Fetch.hh" 484120Sgblack@eecs.umich.edu#include "debug/LTage.hh" 494120Sgblack@eecs.umich.edu 504120Sgblack@eecs.umich.eduLTAGE::LTAGE(const LTAGEParams *params) 514121Sgblack@eecs.umich.edu : BPredUnit(params), 524121Sgblack@eecs.umich.edu logSizeBiMP(params->logSizeBiMP), 534121Sgblack@eecs.umich.edu logRatioBiModalHystEntries(params->logRatioBiModalHystEntries), 544121Sgblack@eecs.umich.edu logSizeTagTables(params->logSizeTagTables), 554121Sgblack@eecs.umich.edu logSizeLoopPred(params->logSizeLoopPred), 564121Sgblack@eecs.umich.edu nHistoryTables(params->nHistoryTables), 574121Sgblack@eecs.umich.edu tagTableCounterBits(params->tagTableCounterBits), 584121Sgblack@eecs.umich.edu tagTableUBits(params->tagTableUBits), 594121Sgblack@eecs.umich.edu histBufferSize(params->histBufferSize), 604121Sgblack@eecs.umich.edu minHist(params->minHist), 614121Sgblack@eecs.umich.edu maxHist(params->maxHist), 624121Sgblack@eecs.umich.edu minTagWidth(params->minTagWidth), 634141Sgblack@eecs.umich.edu loopTableAgeBits(params->loopTableAgeBits), 644141Sgblack@eecs.umich.edu loopTableConfidenceBits(params->loopTableConfidenceBits), 654141Sgblack@eecs.umich.edu loopTableTagBits(params->loopTableTagBits), 665127Sgblack@eecs.umich.edu loopTableIterBits(params->loopTableIterBits), 674141Sgblack@eecs.umich.edu confidenceThreshold((1 << loopTableConfidenceBits) - 1), 684121Sgblack@eecs.umich.edu loopTagMask((1 << loopTableTagBits) - 1), 694121Sgblack@eecs.umich.edu loopNumIterMask((1 << loopTableIterBits) - 1), 704141Sgblack@eecs.umich.edu threadHistory(params->numThreads) 716974Stjones1@inf.ed.ac.uk{ 726974Stjones1@inf.ed.ac.uk // Current method for periodically resetting the u counter bits only 737623Sgblack@eecs.umich.edu // works for 1 or 2 bits 749329Sdam.sunwoo@arm.com // Also make sure that it is not 0 759329Sdam.sunwoo@arm.com assert(tagTableUBits <= 2 && (tagTableUBits > 0)); 769329Sdam.sunwoo@arm.com 779057SAli.Saidi@ARM.com // we use uint16_t type for these vales, so they cannot be more than 789057SAli.Saidi@ARM.com // 16 bits 799057SAli.Saidi@ARM.com assert(loopTableTagBits <= 16); 809057SAli.Saidi@ARM.com assert(loopTableIterBits <= 16); 819057SAli.Saidi@ARM.com 829057SAli.Saidi@ARM.com assert(params->histBufferSize > params->maxHist * 2); 839057SAli.Saidi@ARM.com useAltPredForNewlyAllocated = 0; 849057SAli.Saidi@ARM.com logTick = 19; 859057SAli.Saidi@ARM.com tCounter = ULL(1) << (logTick - 1); 869057SAli.Saidi@ARM.com 879057SAli.Saidi@ARM.com for (auto& history : threadHistory) { 888902Sandreas.hansson@arm.com history.pathHist = 0; 894120Sgblack@eecs.umich.edu history.globalHistory = new uint8_t[histBufferSize]; 904120Sgblack@eecs.umich.edu history.gHist = history.globalHistory; 91 memset(history.gHist, 0, histBufferSize); 92 history.ptGhist = 0; 93 } 94 95 histLengths = new int [nHistoryTables+1]; 96 histLengths[1] = minHist; 97 histLengths[nHistoryTables] = maxHist; 98 99 for (int i = 2; i <= nHistoryTables; i++) { 100 histLengths[i] = (int) (((double) minHist * 101 pow ((double) (maxHist) / (double) minHist, 102 (double) (i - 1) / (double) ((nHistoryTables- 1)))) 103 + 0.5); 104 } 105 106 tagWidths[1] = minTagWidth; 107 tagWidths[2] = minTagWidth; 108 tagWidths[3] = minTagWidth + 1; 109 tagWidths[4] = minTagWidth + 1; 110 tagWidths[5] = minTagWidth + 2; 111 tagWidths[6] = minTagWidth + 3; 112 tagWidths[7] = minTagWidth + 4; 113 tagWidths[8] = minTagWidth + 5; 114 tagWidths[9] = minTagWidth + 5; 115 tagWidths[10] = minTagWidth + 6; 116 tagWidths[11] = minTagWidth + 7; 117 tagWidths[12] = minTagWidth + 8; 118 119 for (int i = 1; i <= 2; i++) 120 tagTableSizes[i] = logSizeTagTables - 1; 121 for (int i = 3; i <= 6; i++) 122 tagTableSizes[i] = logSizeTagTables; 123 for (int i = 7; i <= 10; i++) 124 tagTableSizes[i] = logSizeTagTables - 1; 125 for (int i = 11; i <= 12; i++) 126 tagTableSizes[i] = logSizeTagTables - 2; 127 128 for (auto& history : threadHistory) { 129 history.computeIndices = new FoldedHistory[nHistoryTables+1]; 130 history.computeTags[0] = new FoldedHistory[nHistoryTables+1]; 131 history.computeTags[1] = new FoldedHistory[nHistoryTables+1]; 132 133 for (int i = 1; i <= nHistoryTables; i++) { 134 history.computeIndices[i].init(histLengths[i], (tagTableSizes[i])); 135 history.computeTags[0][i].init( 136 history.computeIndices[i].origLength, tagWidths[i]); 137 history.computeTags[1][i].init( 138 history.computeIndices[i].origLength, tagWidths[i] - 1); 139 DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n", 140 histLengths[i], tagTableSizes[i], tagWidths[i]); 141 } 142 } 143 144 const uint64_t bimodalTableSize = ULL(1) << logSizeBiMP; 145 btablePrediction.resize(bimodalTableSize, false); 146 btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries, 147 true); 148 149 ltable = new LoopEntry[ULL(1) << logSizeLoopPred]; 150 gtable = new TageEntry*[nHistoryTables + 1]; 151 for (int i = 1; i <= nHistoryTables; i++) { 152 gtable[i] = new TageEntry[1<<(tagTableSizes[i])]; 153 } 154 155 tableIndices = new int [nHistoryTables+1]; 156 tableTags = new int [nHistoryTables+1]; 157 158 loopUseCounter = 0; 159} 160 161int 162LTAGE::bindex(Addr pc_in) const 163{ 164 return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1)); 165} 166 167int 168LTAGE::lindex(Addr pc_in) const 169{ 170 return (((pc_in >> instShiftAmt) & 171 ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2); 172} 173 174int 175LTAGE::F(int A, int size, int bank) const 176{ 177 int A1, A2; 178 179 A = A & ((ULL(1) << size) - 1); 180 A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1)); 181 A2 = (A >> tagTableSizes[bank]); 182 A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1)) 183 + (A2 >> (tagTableSizes[bank] - bank)); 184 A = A1 ^ A2; 185 A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1)) 186 + (A >> (tagTableSizes[bank] - bank)); 187 return (A); 188} 189 190 191// gindex computes a full hash of pc, ghist and pathHist 192int 193LTAGE::gindex(ThreadID tid, Addr pc, int bank) const 194{ 195 int index; 196 int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank]; 197 index = 198 (pc >> instShiftAmt) ^ 199 ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^ 200 threadHistory[tid].computeIndices[bank].comp ^ 201 F(threadHistory[tid].pathHist, hlen, bank); 202 203 return (index & ((ULL(1) << (tagTableSizes[bank])) - 1)); 204} 205 206 207// Tag computation 208uint16_t 209LTAGE::gtag(ThreadID tid, Addr pc, int bank) const 210{ 211 int tag = (pc >> instShiftAmt) ^ 212 threadHistory[tid].computeTags[0][bank].comp ^ 213 (threadHistory[tid].computeTags[1][bank].comp << 1); 214 215 return (tag & ((ULL(1) << tagWidths[bank]) - 1)); 216} 217 218 219// Up-down saturating counter 220void 221LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits) 222{ 223 assert(nbits <= sizeof(int8_t) << 3); 224 if (taken) { 225 if (ctr < ((1 << (nbits - 1)) - 1)) 226 ctr++; 227 } else { 228 if (ctr > -(1 << (nbits - 1))) 229 ctr--; 230 } 231} 232 233// Up-down unsigned saturating counter 234void 235LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits) 236{ 237 assert(nbits <= sizeof(uint8_t) << 3); 238 if (up) { 239 if (ctr < ((1 << nbits) - 1)) 240 ctr++; 241 } else { 242 if (ctr) 243 ctr--; 244 } 245} 246 247// Bimodal prediction 248bool 249LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const 250{ 251 return btablePrediction[bi->bimodalIndex]; 252} 253 254 255// Update the bimodal predictor: a hysteresis bit is shared among N prediction 256// bits (N = 2 ^ logRatioBiModalHystEntries) 257void 258LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi) 259{ 260 int inter = (btablePrediction[bi->bimodalIndex] << 1) 261 + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries]; 262 if (taken) { 263 if (inter < 3) 264 inter++; 265 } else if (inter > 0) { 266 inter--; 267 } 268 const bool pred = inter >> 1; 269 const bool hyst = inter & 1; 270 btablePrediction[bi->bimodalIndex] = pred; 271 btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst; 272 DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst); 273} 274 275 276//loop prediction: only used if high confidence 277bool 278LTAGE::getLoop(Addr pc, BranchInfo* bi) const 279{ 280 bi->loopHit = -1; 281 bi->loopPredValid = false; 282 bi->loopIndex = lindex(pc); 283 bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2)) & loopTagMask; 284 285 for (int i = 0; i < 4; i++) { 286 if (ltable[bi->loopIndex + i].tag == bi->loopTag) { 287 bi->loopHit = i; 288 bi->loopPredValid = 289 ltable[bi->loopIndex + i].confidence == confidenceThreshold; 290 bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec; 291 if (ltable[bi->loopIndex + i].currentIterSpec + 1 == 292 ltable[bi->loopIndex + i].numIter) { 293 return !(ltable[bi->loopIndex + i].dir); 294 }else { 295 return (ltable[bi->loopIndex + i].dir); 296 } 297 } 298 } 299 return false; 300} 301 302void 303LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi) 304{ 305 if (bi->loopHit>=0) { 306 int index = lindex(pc); 307 if (taken != ltable[index].dir) { 308 ltable[index].currentIterSpec = 0; 309 } else { 310 ltable[index].currentIterSpec = 311 (ltable[index].currentIterSpec + 1) & loopNumIterMask; 312 } 313 } 314} 315 316void 317LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi) 318{ 319 int idx = bi->loopIndex + bi->loopHit; 320 if (bi->loopHit >= 0) { 321 //already a hit 322 if (bi->loopPredValid) { 323 if (taken != bi->loopPred) { 324 // free the entry 325 ltable[idx].numIter = 0; 326 ltable[idx].age = 0; 327 ltable[idx].confidence = 0; 328 ltable[idx].currentIter = 0; 329 return; 330 } else if (bi->loopPred != bi->tagePred) { 331 DPRINTF(LTage, "Loop Prediction success:%lx\n",pc); 332 unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits); 333 } 334 } 335 336 ltable[idx].currentIter = 337 (ltable[idx].currentIter + 1) & loopNumIterMask; 338 if (ltable[idx].currentIter > ltable[idx].numIter) { 339 ltable[idx].confidence = 0; 340 if (ltable[idx].numIter != 0) { 341 // free the entry 342 ltable[idx].numIter = 0; 343 ltable[idx].age = 0; 344 ltable[idx].confidence = 0; 345 } 346 } 347 348 if (taken != ltable[idx].dir) { 349 if (ltable[idx].currentIter == ltable[idx].numIter) { 350 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc); 351 352 unsignedCtrUpdate(ltable[idx].confidence, true, 353 loopTableConfidenceBits); 354 //just do not predict when the loop count is 1 or 2 355 if (ltable[idx].numIter < 3) { 356 // free the entry 357 ltable[idx].dir = taken; 358 ltable[idx].numIter = 0; 359 ltable[idx].age = 0; 360 ltable[idx].confidence = 0; 361 } 362 } else { 363 DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc); 364 if (ltable[idx].numIter == 0) { 365 // first complete nest; 366 ltable[idx].confidence = 0; 367 ltable[idx].numIter = ltable[idx].currentIter; 368 } else { 369 //not the same number of iterations as last time: free the 370 //entry 371 ltable[idx].numIter = 0; 372 ltable[idx].age = 0; 373 ltable[idx].confidence = 0; 374 } 375 } 376 ltable[idx].currentIter = 0; 377 } 378 379 } else if (taken) { 380 //try to allocate an entry on taken branch 381 int nrand = random_mt.random<int>(); 382 for (int i = 0; i < 4; i++) { 383 int loop_hit = (nrand + i) & 3; 384 idx = bi->loopIndex + loop_hit; 385 if (ltable[idx].age == 0) { 386 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n", 387 pc); 388 ltable[idx].dir = !taken; 389 ltable[idx].tag = bi->loopTag; 390 ltable[idx].numIter = 0; 391 ltable[idx].age = (1 << loopTableAgeBits) - 1; 392 ltable[idx].confidence = 0; 393 ltable[idx].currentIter = 1; 394 break; 395 396 } 397 else 398 ltable[idx].age--; 399 } 400 } 401 402} 403 404// shifting the global history: we manage the history in a big table in order 405// to reduce simulation time 406void 407LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt) 408{ 409 if (pt == 0) { 410 DPRINTF(LTage, "Rolling over the histories\n"); 411 // Copy beginning of globalHistoryBuffer to end, such that 412 // the last maxHist outcomes are still reachable 413 // through pt[0 .. maxHist - 1]. 414 for (int i = 0; i < maxHist; i++) 415 tab[histBufferSize - maxHist + i] = tab[i]; 416 pt = histBufferSize - maxHist; 417 h = &tab[pt]; 418 } 419 pt--; 420 h--; 421 h[0] = (dir) ? 1 : 0; 422} 423 424// Get GHR for hashing indirect predictor 425// Build history backwards from pointer in 426// bp_history. 427unsigned 428LTAGE::getGHR(ThreadID tid, void *bp_history) const 429{ 430 BranchInfo* bi = static_cast<BranchInfo*>(bp_history); 431 unsigned val = 0; 432 for (unsigned i = 0; i < 32; i++) { 433 // Make sure we don't go out of bounds 434 int gh_offset = bi->ptGhist + i; 435 assert(&(threadHistory[tid].globalHistory[gh_offset]) < 436 threadHistory[tid].globalHistory + histBufferSize); 437 val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i); 438 } 439 440 return val; 441} 442 443//prediction 444bool 445LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) 446{ 447 BranchInfo *bi = new BranchInfo(nHistoryTables+1); 448 b = (void*)(bi); 449 Addr pc = branch_pc; 450 bool pred_taken = true; 451 bi->loopHit = -1; 452 453 if (cond_branch) { 454 // TAGE prediction 455 456 // computes the table addresses and the partial tags 457 for (int i = 1; i <= nHistoryTables; i++) { 458 tableIndices[i] = gindex(tid, pc, i); 459 bi->tableIndices[i] = tableIndices[i]; 460 tableTags[i] = gtag(tid, pc, i); 461 bi->tableTags[i] = tableTags[i]; 462 } 463 464 bi->bimodalIndex = bindex(pc); 465 466 bi->hitBank = 0; 467 bi->altBank = 0; 468 //Look for the bank with longest matching history 469 for (int i = nHistoryTables; i > 0; i--) { 470 if (gtable[i][tableIndices[i]].tag == tableTags[i]) { 471 bi->hitBank = i; 472 bi->hitBankIndex = tableIndices[bi->hitBank]; 473 break; 474 } 475 } 476 //Look for the alternate bank 477 for (int i = bi->hitBank - 1; i > 0; i--) { 478 if (gtable[i][tableIndices[i]].tag == tableTags[i]) { 479 bi->altBank = i; 480 bi->altBankIndex = tableIndices[bi->altBank]; 481 break; 482 } 483 } 484 //computes the prediction and the alternate prediction 485 if (bi->hitBank > 0) { 486 if (bi->altBank > 0) { 487 bi->altTaken = 488 gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0; 489 }else { 490 bi->altTaken = getBimodePred(pc, bi); 491 } 492 493 bi->longestMatchPred = 494 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0; 495 bi->pseudoNewAlloc = 496 abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1; 497 498 //if the entry is recognized as a newly allocated entry and 499 //useAltPredForNewlyAllocated is positive use the alternate 500 //prediction 501 if ((useAltPredForNewlyAllocated < 0) 502 || abs(2 * 503 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1) 504 bi->tagePred = bi->longestMatchPred; 505 else 506 bi->tagePred = bi->altTaken; 507 } else { 508 bi->altTaken = getBimodePred(pc, bi); 509 bi->tagePred = bi->altTaken; 510 bi->longestMatchPred = bi->altTaken; 511 } 512 //end TAGE prediction 513 514 bi->loopPred = getLoop(pc, bi); // loop prediction 515 516 pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ? 517 (bi->loopPred): (bi->tagePred); 518 DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, " 519 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n", 520 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid, 521 loopUseCounter, bi->tagePred, bi->altTaken); 522 } 523 bi->branchPC = branch_pc; 524 bi->condBranch = cond_branch; 525 specLoopUpdate(branch_pc, pred_taken, bi); 526 return pred_taken; 527} 528 529// PREDICTOR UPDATE 530void 531LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history, 532 bool squashed) 533{ 534 assert(bp_history); 535 536 BranchInfo *bi = static_cast<BranchInfo*>(bp_history); 537 538 if (squashed) { 539 // This restores the global history, then update it 540 // and recomputes the folded histories. 541 squash(tid, taken, bp_history); 542 return; 543 } 544 545 int nrand = random_mt.random<int>(0,3); 546 Addr pc = branch_pc; 547 if (bi->condBranch) { 548 DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n", 549 branch_pc, taken); 550 // first update the loop predictor 551 loopUpdate(pc, taken, bi); 552 553 if (bi->loopPredValid) { 554 if (bi->tagePred != bi->loopPred) { 555 ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7); 556 } 557 } 558 559 // TAGE UPDATE 560 // try to allocate a new entries only if prediction was wrong 561 bool longest_match_pred = false; 562 bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables); 563 if (bi->hitBank > 0) { 564 // Manage the selection between longest matching and alternate 565 // matching for "pseudo"-newly allocated longest matching entry 566 longest_match_pred = bi->longestMatchPred; 567 bool PseudoNewAlloc = bi->pseudoNewAlloc; 568 // an entry is considered as newly allocated if its prediction 569 // counter is weak 570 if (PseudoNewAlloc) { 571 if (longest_match_pred == taken) { 572 alloc = false; 573 } 574 // if it was delivering the correct prediction, no need to 575 // allocate new entry even if the overall prediction was false 576 if (longest_match_pred != bi->altTaken) { 577 ctrUpdate(useAltPredForNewlyAllocated, 578 bi->altTaken == taken, 4); 579 } 580 } 581 } 582 583 if (alloc) { 584 // is there some "unuseful" entry to allocate 585 uint8_t min = 1; 586 for (int i = nHistoryTables; i > bi->hitBank; i--) { 587 if (gtable[i][bi->tableIndices[i]].u < min) { 588 min = gtable[i][bi->tableIndices[i]].u; 589 } 590 } 591 592 // we allocate an entry with a longer history 593 // to avoid ping-pong, we do not choose systematically the next 594 // entry, but among the 3 next entries 595 int Y = nrand & 596 ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1); 597 int X = bi->hitBank + 1; 598 if (Y & 1) { 599 X++; 600 if (Y & 2) 601 X++; 602 } 603 // No entry available, forces one to be available 604 if (min > 0) { 605 gtable[X][bi->tableIndices[X]].u = 0; 606 } 607 608 609 //Allocate only one entry 610 for (int i = X; i <= nHistoryTables; i++) { 611 if ((gtable[i][bi->tableIndices[i]].u == 0)) { 612 gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i]; 613 gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1; 614 break; 615 } 616 } 617 } 618 //periodic reset of u: reset is not complete but bit by bit 619 tCounter++; 620 if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) { 621 // reset least significant bit 622 // most significant bit becomes least significant bit 623 for (int i = 1; i <= nHistoryTables; i++) { 624 for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) { 625 gtable[i][j].u = gtable[i][j].u >> 1; 626 } 627 } 628 } 629 630 if (bi->hitBank > 0) { 631 DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n", 632 bi->hitBank, bi->hitBankIndex, branch_pc); 633 ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken, 634 tagTableCounterBits); 635 // if the provider entry is not certified to be useful also update 636 // the alternate prediction 637 if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) { 638 if (bi->altBank > 0) { 639 ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken, 640 tagTableCounterBits); 641 DPRINTF(LTage, "Updating tag table entry (%d,%d) for" 642 " branch %lx\n", bi->hitBank, bi->hitBankIndex, 643 branch_pc); 644 } 645 if (bi->altBank == 0) { 646 baseUpdate(pc, taken, bi); 647 } 648 } 649 650 // update the u counter 651 if (bi->tagePred != bi->altTaken) { 652 unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u, 653 bi->tagePred == taken, tagTableUBits); 654 } 655 } else { 656 baseUpdate(pc, taken, bi); 657 } 658 659 //END PREDICTOR UPDATE 660 } 661 if (!squashed) { 662 delete bi; 663 } 664} 665 666void 667LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b) 668{ 669 BranchInfo* bi = (BranchInfo*)(b); 670 ThreadHistory& tHist = threadHistory[tid]; 671 // UPDATE HISTORIES 672 bool pathbit = ((branch_pc >> instShiftAmt) & 1); 673 //on a squash, return pointers to this and recompute indices. 674 //update user history 675 updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist); 676 tHist.pathHist = (tHist.pathHist << 1) + pathbit; 677 tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1)); 678 679 bi->ptGhist = tHist.ptGhist; 680 bi->pathHist = tHist.pathHist; 681 //prepare next index and tag computations for user branchs 682 for (int i = 1; i <= nHistoryTables; i++) 683 { 684 bi->ci[i] = tHist.computeIndices[i].comp; 685 bi->ct0[i] = tHist.computeTags[0][i].comp; 686 bi->ct1[i] = tHist.computeTags[1][i].comp; 687 tHist.computeIndices[i].update(tHist.gHist); 688 tHist.computeTags[0][i].update(tHist.gHist); 689 tHist.computeTags[1][i].update(tHist.gHist); 690 } 691 DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, " 692 "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist, 693 tHist.ptGhist); 694} 695 696void 697LTAGE::squash(ThreadID tid, bool taken, void *bp_history) 698{ 699 BranchInfo* bi = (BranchInfo*)(bp_history); 700 ThreadHistory& tHist = threadHistory[tid]; 701 DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, " 702 "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist); 703 tHist.pathHist = bi->pathHist; 704 tHist.ptGhist = bi->ptGhist; 705 tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]); 706 tHist.gHist[0] = (taken ? 1 : 0); 707 for (int i = 1; i <= nHistoryTables; i++) { 708 tHist.computeIndices[i].comp = bi->ci[i]; 709 tHist.computeTags[0][i].comp = bi->ct0[i]; 710 tHist.computeTags[1][i].comp = bi->ct1[i]; 711 tHist.computeIndices[i].update(tHist.gHist); 712 tHist.computeTags[0][i].update(tHist.gHist); 713 tHist.computeTags[1][i].update(tHist.gHist); 714 } 715 716 if (bi->condBranch) { 717 if (bi->loopHit >= 0) { 718 int idx = bi->loopIndex + bi->loopHit; 719 ltable[idx].currentIterSpec = bi->currentIter; 720 } 721 } 722 723} 724 725void 726LTAGE::squash(ThreadID tid, void *bp_history) 727{ 728 BranchInfo* bi = (BranchInfo*)(bp_history); 729 DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC); 730 if (bi->condBranch) { 731 if (bi->loopHit >= 0) { 732 int idx = bi->loopIndex + bi->loopHit; 733 ltable[idx].currentIterSpec = bi->currentIter; 734 } 735 } 736 737 delete bi; 738} 739 740bool 741LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history) 742{ 743 bool retval = predict(tid, branch_pc, true, bp_history); 744 745 DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval); 746 updateHistories(tid, branch_pc, retval, bp_history); 747 assert(threadHistory[tid].gHist == 748 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]); 749 750 return retval; 751} 752 753void 754LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history) 755{ 756 BranchInfo* bi = (BranchInfo*) bp_history; 757 ThreadHistory& tHist = threadHistory[tid]; 758 DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc); 759 assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]); 760 tHist.gHist[0] = 0; 761 for (int i = 1; i <= nHistoryTables; i++) { 762 tHist.computeIndices[i].comp = bi->ci[i]; 763 tHist.computeTags[0][i].comp = bi->ct0[i]; 764 tHist.computeTags[1][i].comp = bi->ct1[i]; 765 tHist.computeIndices[i].update(tHist.gHist); 766 tHist.computeTags[0][i].update(tHist.gHist); 767 tHist.computeTags[1][i].update(tHist.gHist); 768 } 769} 770 771void 772LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history) 773{ 774 DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc); 775 predict(tid, br_pc, false, bp_history); 776 updateHistories(tid, br_pc, true, bp_history); 777 assert(threadHistory[tid].gHist == 778 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]); 779} 780 781LTAGE* 782LTAGEParams::create() 783{ 784 return new LTAGE(this); 785} 786