ltage.cc revision 13442:5314c50529a5
1/* 2 * Copyright (c) 2014 The University of Wisconsin 3 * 4 * Copyright (c) 2006 INRIA (Institut National de Recherche en 5 * Informatique et en Automatique / French National Research Institute 6 * for Computer Science and Applied Mathematics) 7 * 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions are 12 * met: redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer; 14 * redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution; 17 * neither the name of the copyright holders nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais, 34 * from André Seznec's code. 35 */ 36 37/* @file 38 * Implementation of a L-TAGE branch predictor 39 */ 40 41#include "cpu/pred/ltage.hh" 42 43#include "base/intmath.hh" 44#include "base/logging.hh" 45#include "base/random.hh" 46#include "base/trace.hh" 47#include "debug/Fetch.hh" 48#include "debug/LTage.hh" 49 50LTAGE::LTAGE(const LTAGEParams *params) 51 : BPredUnit(params), 52 logSizeBiMP(params->logSizeBiMP), 53 logRatioBiModalHystEntries(params->logRatioBiModalHystEntries), 54 logSizeTagTables(params->logSizeTagTables), 55 logSizeLoopPred(params->logSizeLoopPred), 56 nHistoryTables(params->nHistoryTables), 57 tagTableCounterBits(params->tagTableCounterBits), 58 histBufferSize(params->histBufferSize), 59 minHist(params->minHist), 60 maxHist(params->maxHist), 61 minTagWidth(params->minTagWidth), 62 loopTableAgeBits(params->loopTableAgeBits), 63 loopTableConfidenceBits(params->loopTableConfidenceBits), 64 loopTableTagBits(params->loopTableTagBits), 65 loopTableIterBits(params->loopTableIterBits), 66 confidenceThreshold((1 << loopTableConfidenceBits) - 1), 67 loopTagMask((1 << loopTableTagBits) - 1), 68 loopNumIterMask((1 << loopTableIterBits) - 1), 69 threadHistory(params->numThreads) 70{ 71 // we use uint16_t type for these vales, so they cannot be more than 72 // 16 bits 73 assert(loopTableTagBits <= 16); 74 assert(loopTableIterBits <= 16); 75 76 assert(params->histBufferSize > params->maxHist * 2); 77 useAltPredForNewlyAllocated = 0; 78 logTick = 19; 79 tCounter = ULL(1) << (logTick - 1); 80 81 for (auto& history : threadHistory) { 82 history.pathHist = 0; 83 history.globalHistory = new uint8_t[histBufferSize]; 84 history.gHist = history.globalHistory; 85 memset(history.gHist, 0, histBufferSize); 86 history.ptGhist = 0; 87 } 88 89 histLengths = new int [nHistoryTables+1]; 90 histLengths[1] = minHist; 91 histLengths[nHistoryTables] = maxHist; 92 93 for (int i = 2; i <= nHistoryTables; i++) { 94 histLengths[i] = (int) (((double) minHist * 95 pow ((double) (maxHist) / (double) minHist, 96 (double) (i - 1) / (double) ((nHistoryTables- 1)))) 97 + 0.5); 98 } 99 100 tagWidths[1] = minTagWidth; 101 tagWidths[2] = minTagWidth; 102 tagWidths[3] = minTagWidth + 1; 103 tagWidths[4] = minTagWidth + 1; 104 tagWidths[5] = minTagWidth + 2; 105 tagWidths[6] = minTagWidth + 3; 106 tagWidths[7] = minTagWidth + 4; 107 tagWidths[8] = minTagWidth + 5; 108 tagWidths[9] = minTagWidth + 5; 109 tagWidths[10] = minTagWidth + 6; 110 tagWidths[11] = minTagWidth + 7; 111 tagWidths[12] = minTagWidth + 8; 112 113 for (int i = 1; i <= 2; i++) 114 tagTableSizes[i] = logSizeTagTables - 1; 115 for (int i = 3; i <= 6; i++) 116 tagTableSizes[i] = logSizeTagTables; 117 for (int i = 7; i <= 10; i++) 118 tagTableSizes[i] = logSizeTagTables - 1; 119 for (int i = 11; i <= 12; i++) 120 tagTableSizes[i] = logSizeTagTables - 2; 121 122 for (auto& history : threadHistory) { 123 history.computeIndices = new FoldedHistory[nHistoryTables+1]; 124 history.computeTags[0] = new FoldedHistory[nHistoryTables+1]; 125 history.computeTags[1] = new FoldedHistory[nHistoryTables+1]; 126 127 for (int i = 1; i <= nHistoryTables; i++) { 128 history.computeIndices[i].init(histLengths[i], (tagTableSizes[i])); 129 history.computeTags[0][i].init( 130 history.computeIndices[i].origLength, tagWidths[i]); 131 history.computeTags[1][i].init( 132 history.computeIndices[i].origLength, tagWidths[i] - 1); 133 DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n", 134 histLengths[i], tagTableSizes[i], tagWidths[i]); 135 } 136 } 137 138 const uint64_t bimodalTableSize = ULL(1) << logSizeBiMP; 139 btablePrediction.resize(bimodalTableSize, false); 140 btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries, 141 true); 142 143 ltable = new LoopEntry[ULL(1) << logSizeLoopPred]; 144 gtable = new TageEntry*[nHistoryTables + 1]; 145 for (int i = 1; i <= nHistoryTables; i++) { 146 gtable[i] = new TageEntry[1<<(tagTableSizes[i])]; 147 } 148 149 tableIndices = new int [nHistoryTables+1]; 150 tableTags = new int [nHistoryTables+1]; 151 152 loopUseCounter = 0; 153} 154 155int 156LTAGE::bindex(Addr pc_in) const 157{ 158 return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1)); 159} 160 161int 162LTAGE::lindex(Addr pc_in) const 163{ 164 return (((pc_in >> instShiftAmt) & 165 ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2); 166} 167 168int 169LTAGE::F(int A, int size, int bank) const 170{ 171 int A1, A2; 172 173 A = A & ((ULL(1) << size) - 1); 174 A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1)); 175 A2 = (A >> tagTableSizes[bank]); 176 A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1)) 177 + (A2 >> (tagTableSizes[bank] - bank)); 178 A = A1 ^ A2; 179 A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1)) 180 + (A >> (tagTableSizes[bank] - bank)); 181 return (A); 182} 183 184 185// gindex computes a full hash of pc, ghist and pathHist 186int 187LTAGE::gindex(ThreadID tid, Addr pc, int bank) const 188{ 189 int index; 190 int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank]; 191 index = 192 (pc >> instShiftAmt) ^ 193 ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^ 194 threadHistory[tid].computeIndices[bank].comp ^ 195 F(threadHistory[tid].pathHist, hlen, bank); 196 197 return (index & ((ULL(1) << (tagTableSizes[bank])) - 1)); 198} 199 200 201// Tag computation 202uint16_t 203LTAGE::gtag(ThreadID tid, Addr pc, int bank) const 204{ 205 int tag = (pc >> instShiftAmt) ^ 206 threadHistory[tid].computeTags[0][bank].comp ^ 207 (threadHistory[tid].computeTags[1][bank].comp << 1); 208 209 return (tag & ((ULL(1) << tagWidths[bank]) - 1)); 210} 211 212 213// Up-down saturating counter 214void 215LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits) 216{ 217 assert(nbits <= sizeof(int8_t) << 3); 218 if (taken) { 219 if (ctr < ((1 << (nbits - 1)) - 1)) 220 ctr++; 221 } else { 222 if (ctr > -(1 << (nbits - 1))) 223 ctr--; 224 } 225} 226 227// Up-down unsigned saturating counter 228void 229LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits) 230{ 231 assert(nbits <= sizeof(uint8_t) << 3); 232 if (up) { 233 if (ctr < ((1 << nbits) - 1)) 234 ctr++; 235 } else { 236 if (ctr) 237 ctr--; 238 } 239} 240 241// Bimodal prediction 242bool 243LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const 244{ 245 return btablePrediction[bi->bimodalIndex]; 246} 247 248 249// Update the bimodal predictor: a hysteresis bit is shared among N prediction 250// bits (N = 2 ^ logRatioBiModalHystEntries) 251void 252LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi) 253{ 254 int inter = (btablePrediction[bi->bimodalIndex] << 1) 255 + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries]; 256 if (taken) { 257 if (inter < 3) 258 inter++; 259 } else if (inter > 0) { 260 inter--; 261 } 262 const bool pred = inter >> 1; 263 const bool hyst = inter & 1; 264 btablePrediction[bi->bimodalIndex] = pred; 265 btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst; 266 DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst); 267} 268 269 270//loop prediction: only used if high confidence 271bool 272LTAGE::getLoop(Addr pc, BranchInfo* bi) const 273{ 274 bi->loopHit = -1; 275 bi->loopPredValid = false; 276 bi->loopIndex = lindex(pc); 277 bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2)) & loopTagMask; 278 279 for (int i = 0; i < 4; i++) { 280 if (ltable[bi->loopIndex + i].tag == bi->loopTag) { 281 bi->loopHit = i; 282 bi->loopPredValid = 283 ltable[bi->loopIndex + i].confidence == confidenceThreshold; 284 bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec; 285 if (ltable[bi->loopIndex + i].currentIterSpec + 1 == 286 ltable[bi->loopIndex + i].numIter) { 287 return !(ltable[bi->loopIndex + i].dir); 288 }else { 289 return (ltable[bi->loopIndex + i].dir); 290 } 291 } 292 } 293 return false; 294} 295 296void 297LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi) 298{ 299 if (bi->loopHit>=0) { 300 int index = lindex(pc); 301 if (taken != ltable[index].dir) { 302 ltable[index].currentIterSpec = 0; 303 } else { 304 ltable[index].currentIterSpec = 305 (ltable[index].currentIterSpec + 1) & loopNumIterMask; 306 } 307 } 308} 309 310void 311LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi) 312{ 313 int idx = bi->loopIndex + bi->loopHit; 314 if (bi->loopHit >= 0) { 315 //already a hit 316 if (bi->loopPredValid) { 317 if (taken != bi->loopPred) { 318 // free the entry 319 ltable[idx].numIter = 0; 320 ltable[idx].age = 0; 321 ltable[idx].confidence = 0; 322 ltable[idx].currentIter = 0; 323 return; 324 } else if (bi->loopPred != bi->tagePred) { 325 DPRINTF(LTage, "Loop Prediction success:%lx\n",pc); 326 unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits); 327 } 328 } 329 330 ltable[idx].currentIter = 331 (ltable[idx].currentIter + 1) & loopNumIterMask; 332 if (ltable[idx].currentIter > ltable[idx].numIter) { 333 ltable[idx].confidence = 0; 334 if (ltable[idx].numIter != 0) { 335 // free the entry 336 ltable[idx].numIter = 0; 337 ltable[idx].age = 0; 338 ltable[idx].confidence = 0; 339 } 340 } 341 342 if (taken != ltable[idx].dir) { 343 if (ltable[idx].currentIter == ltable[idx].numIter) { 344 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc); 345 346 unsignedCtrUpdate(ltable[idx].confidence, true, 347 loopTableConfidenceBits); 348 //just do not predict when the loop count is 1 or 2 349 if (ltable[idx].numIter < 3) { 350 // free the entry 351 ltable[idx].dir = taken; 352 ltable[idx].numIter = 0; 353 ltable[idx].age = 0; 354 ltable[idx].confidence = 0; 355 } 356 } else { 357 DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc); 358 if (ltable[idx].numIter == 0) { 359 // first complete nest; 360 ltable[idx].confidence = 0; 361 ltable[idx].numIter = ltable[idx].currentIter; 362 } else { 363 //not the same number of iterations as last time: free the 364 //entry 365 ltable[idx].numIter = 0; 366 ltable[idx].age = 0; 367 ltable[idx].confidence = 0; 368 } 369 } 370 ltable[idx].currentIter = 0; 371 } 372 373 } else if (taken) { 374 //try to allocate an entry on taken branch 375 int nrand = random_mt.random<int>(); 376 for (int i = 0; i < 4; i++) { 377 int loop_hit = (nrand + i) & 3; 378 idx = bi->loopIndex + loop_hit; 379 if (ltable[idx].age == 0) { 380 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n", 381 pc); 382 ltable[idx].dir = !taken; 383 ltable[idx].tag = bi->loopTag; 384 ltable[idx].numIter = 0; 385 ltable[idx].age = (1 << loopTableAgeBits) - 1; 386 ltable[idx].confidence = 0; 387 ltable[idx].currentIter = 1; 388 break; 389 390 } 391 else 392 ltable[idx].age--; 393 } 394 } 395 396} 397 398// shifting the global history: we manage the history in a big table in order 399// to reduce simulation time 400void 401LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt) 402{ 403 if (pt == 0) { 404 DPRINTF(LTage, "Rolling over the histories\n"); 405 // Copy beginning of globalHistoryBuffer to end, such that 406 // the last maxHist outcomes are still reachable 407 // through pt[0 .. maxHist - 1]. 408 for (int i = 0; i < maxHist; i++) 409 tab[histBufferSize - maxHist + i] = tab[i]; 410 pt = histBufferSize - maxHist; 411 h = &tab[pt]; 412 } 413 pt--; 414 h--; 415 h[0] = (dir) ? 1 : 0; 416} 417 418// Get GHR for hashing indirect predictor 419// Build history backwards from pointer in 420// bp_history. 421unsigned 422LTAGE::getGHR(ThreadID tid, void *bp_history) const 423{ 424 BranchInfo* bi = static_cast<BranchInfo*>(bp_history); 425 unsigned val = 0; 426 for (unsigned i = 0; i < 32; i++) { 427 // Make sure we don't go out of bounds 428 int gh_offset = bi->ptGhist + i; 429 assert(&(threadHistory[tid].globalHistory[gh_offset]) < 430 threadHistory[tid].globalHistory + histBufferSize); 431 val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i); 432 } 433 434 return val; 435} 436 437//prediction 438bool 439LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) 440{ 441 BranchInfo *bi = new BranchInfo(nHistoryTables+1); 442 b = (void*)(bi); 443 Addr pc = branch_pc; 444 bool pred_taken = true; 445 bi->loopHit = -1; 446 447 if (cond_branch) { 448 // TAGE prediction 449 450 // computes the table addresses and the partial tags 451 for (int i = 1; i <= nHistoryTables; i++) { 452 tableIndices[i] = gindex(tid, pc, i); 453 bi->tableIndices[i] = tableIndices[i]; 454 tableTags[i] = gtag(tid, pc, i); 455 bi->tableTags[i] = tableTags[i]; 456 } 457 458 bi->bimodalIndex = bindex(pc); 459 460 bi->hitBank = 0; 461 bi->altBank = 0; 462 //Look for the bank with longest matching history 463 for (int i = nHistoryTables; i > 0; i--) { 464 if (gtable[i][tableIndices[i]].tag == tableTags[i]) { 465 bi->hitBank = i; 466 bi->hitBankIndex = tableIndices[bi->hitBank]; 467 break; 468 } 469 } 470 //Look for the alternate bank 471 for (int i = bi->hitBank - 1; i > 0; i--) { 472 if (gtable[i][tableIndices[i]].tag == tableTags[i]) { 473 bi->altBank = i; 474 bi->altBankIndex = tableIndices[bi->altBank]; 475 break; 476 } 477 } 478 //computes the prediction and the alternate prediction 479 if (bi->hitBank > 0) { 480 if (bi->altBank > 0) { 481 bi->altTaken = 482 gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0; 483 }else { 484 bi->altTaken = getBimodePred(pc, bi); 485 } 486 487 bi->longestMatchPred = 488 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0; 489 bi->pseudoNewAlloc = 490 abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1; 491 492 //if the entry is recognized as a newly allocated entry and 493 //useAltPredForNewlyAllocated is positive use the alternate 494 //prediction 495 if ((useAltPredForNewlyAllocated < 0) 496 || abs(2 * 497 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1) 498 bi->tagePred = bi->longestMatchPred; 499 else 500 bi->tagePred = bi->altTaken; 501 } else { 502 bi->altTaken = getBimodePred(pc, bi); 503 bi->tagePred = bi->altTaken; 504 bi->longestMatchPred = bi->altTaken; 505 } 506 //end TAGE prediction 507 508 bi->loopPred = getLoop(pc, bi); // loop prediction 509 510 pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ? 511 (bi->loopPred): (bi->tagePred); 512 DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, " 513 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n", 514 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid, 515 loopUseCounter, bi->tagePred, bi->altTaken); 516 } 517 bi->branchPC = branch_pc; 518 bi->condBranch = cond_branch; 519 specLoopUpdate(branch_pc, pred_taken, bi); 520 return pred_taken; 521} 522 523// PREDICTOR UPDATE 524void 525LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history, 526 bool squashed) 527{ 528 assert(bp_history); 529 530 BranchInfo *bi = static_cast<BranchInfo*>(bp_history); 531 532 if (squashed) { 533 // This restores the global history, then update it 534 // and recomputes the folded histories. 535 squash(tid, taken, bp_history); 536 return; 537 } 538 539 int nrand = random_mt.random<int>(0,3); 540 Addr pc = branch_pc; 541 if (bi->condBranch) { 542 DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n", 543 branch_pc, taken); 544 // first update the loop predictor 545 loopUpdate(pc, taken, bi); 546 547 if (bi->loopPredValid) { 548 if (bi->tagePred != bi->loopPred) { 549 ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7); 550 } 551 } 552 553 // TAGE UPDATE 554 // try to allocate a new entries only if prediction was wrong 555 bool longest_match_pred = false; 556 bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables); 557 if (bi->hitBank > 0) { 558 // Manage the selection between longest matching and alternate 559 // matching for "pseudo"-newly allocated longest matching entry 560 longest_match_pred = bi->longestMatchPred; 561 bool PseudoNewAlloc = bi->pseudoNewAlloc; 562 // an entry is considered as newly allocated if its prediction 563 // counter is weak 564 if (PseudoNewAlloc) { 565 if (longest_match_pred == taken) { 566 alloc = false; 567 } 568 // if it was delivering the correct prediction, no need to 569 // allocate new entry even if the overall prediction was false 570 if (longest_match_pred != bi->altTaken) { 571 ctrUpdate(useAltPredForNewlyAllocated, 572 bi->altTaken == taken, 4); 573 } 574 } 575 } 576 577 if (alloc) { 578 // is there some "unuseful" entry to allocate 579 int8_t min = 1; 580 for (int i = nHistoryTables; i > bi->hitBank; i--) { 581 if (gtable[i][bi->tableIndices[i]].u < min) { 582 min = gtable[i][bi->tableIndices[i]].u; 583 } 584 } 585 586 // we allocate an entry with a longer history 587 // to avoid ping-pong, we do not choose systematically the next 588 // entry, but among the 3 next entries 589 int Y = nrand & 590 ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1); 591 int X = bi->hitBank + 1; 592 if (Y & 1) { 593 X++; 594 if (Y & 2) 595 X++; 596 } 597 // No entry available, forces one to be available 598 if (min > 0) { 599 gtable[X][bi->tableIndices[X]].u = 0; 600 } 601 602 603 //Allocate only one entry 604 for (int i = X; i <= nHistoryTables; i++) { 605 if ((gtable[i][bi->tableIndices[i]].u == 0)) { 606 gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i]; 607 gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1; 608 gtable[i][bi->tableIndices[i]].u = 0; //? 609 break; 610 } 611 } 612 } 613 //periodic reset of u: reset is not complete but bit by bit 614 tCounter++; 615 if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) { 616 // reset least significant bit 617 // most significant bit becomes least significant bit 618 for (int i = 1; i <= nHistoryTables; i++) { 619 for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) { 620 gtable[i][j].u = gtable[i][j].u >> 1; 621 } 622 } 623 } 624 625 if (bi->hitBank > 0) { 626 DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n", 627 bi->hitBank, bi->hitBankIndex, branch_pc); 628 ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken, 629 tagTableCounterBits); 630 // if the provider entry is not certified to be useful also update 631 // the alternate prediction 632 if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) { 633 if (bi->altBank > 0) { 634 ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken, 635 tagTableCounterBits); 636 DPRINTF(LTage, "Updating tag table entry (%d,%d) for" 637 " branch %lx\n", bi->hitBank, bi->hitBankIndex, 638 branch_pc); 639 } 640 if (bi->altBank == 0) { 641 baseUpdate(pc, taken, bi); 642 } 643 } 644 645 // update the u counter 646 if (longest_match_pred != bi->altTaken) { 647 if (longest_match_pred == taken) { 648 if (gtable[bi->hitBank][bi->hitBankIndex].u < 1) { 649 gtable[bi->hitBank][bi->hitBankIndex].u++; 650 } 651 } 652 } 653 } else { 654 baseUpdate(pc, taken, bi); 655 } 656 657 //END PREDICTOR UPDATE 658 } 659 if (!squashed) { 660 delete bi; 661 } 662} 663 664void 665LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b) 666{ 667 BranchInfo* bi = (BranchInfo*)(b); 668 ThreadHistory& tHist = threadHistory[tid]; 669 // UPDATE HISTORIES 670 bool pathbit = ((branch_pc >> instShiftAmt) & 1); 671 //on a squash, return pointers to this and recompute indices. 672 //update user history 673 updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist); 674 tHist.pathHist = (tHist.pathHist << 1) + pathbit; 675 tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1)); 676 677 bi->ptGhist = tHist.ptGhist; 678 bi->pathHist = tHist.pathHist; 679 //prepare next index and tag computations for user branchs 680 for (int i = 1; i <= nHistoryTables; i++) 681 { 682 bi->ci[i] = tHist.computeIndices[i].comp; 683 bi->ct0[i] = tHist.computeTags[0][i].comp; 684 bi->ct1[i] = tHist.computeTags[1][i].comp; 685 tHist.computeIndices[i].update(tHist.gHist); 686 tHist.computeTags[0][i].update(tHist.gHist); 687 tHist.computeTags[1][i].update(tHist.gHist); 688 } 689 DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, " 690 "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist, 691 tHist.ptGhist); 692} 693 694void 695LTAGE::squash(ThreadID tid, bool taken, void *bp_history) 696{ 697 BranchInfo* bi = (BranchInfo*)(bp_history); 698 ThreadHistory& tHist = threadHistory[tid]; 699 DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, " 700 "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist); 701 tHist.pathHist = bi->pathHist; 702 tHist.ptGhist = bi->ptGhist; 703 tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]); 704 tHist.gHist[0] = (taken ? 1 : 0); 705 for (int i = 1; i <= nHistoryTables; i++) { 706 tHist.computeIndices[i].comp = bi->ci[i]; 707 tHist.computeTags[0][i].comp = bi->ct0[i]; 708 tHist.computeTags[1][i].comp = bi->ct1[i]; 709 tHist.computeIndices[i].update(tHist.gHist); 710 tHist.computeTags[0][i].update(tHist.gHist); 711 tHist.computeTags[1][i].update(tHist.gHist); 712 } 713 714 if (bi->condBranch) { 715 if (bi->loopHit >= 0) { 716 int idx = bi->loopIndex + bi->loopHit; 717 ltable[idx].currentIterSpec = bi->currentIter; 718 } 719 } 720 721} 722 723void 724LTAGE::squash(ThreadID tid, void *bp_history) 725{ 726 BranchInfo* bi = (BranchInfo*)(bp_history); 727 DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC); 728 if (bi->condBranch) { 729 if (bi->loopHit >= 0) { 730 int idx = bi->loopIndex + bi->loopHit; 731 ltable[idx].currentIterSpec = bi->currentIter; 732 } 733 } 734 735 delete bi; 736} 737 738bool 739LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history) 740{ 741 bool retval = predict(tid, branch_pc, true, bp_history); 742 743 DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval); 744 updateHistories(tid, branch_pc, retval, bp_history); 745 assert(threadHistory[tid].gHist == 746 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]); 747 748 return retval; 749} 750 751void 752LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history) 753{ 754 BranchInfo* bi = (BranchInfo*) bp_history; 755 ThreadHistory& tHist = threadHistory[tid]; 756 DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc); 757 assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]); 758 tHist.gHist[0] = 0; 759 for (int i = 1; i <= nHistoryTables; i++) { 760 tHist.computeIndices[i].comp = bi->ci[i]; 761 tHist.computeTags[0][i].comp = bi->ct0[i]; 762 tHist.computeTags[1][i].comp = bi->ct1[i]; 763 tHist.computeIndices[i].update(tHist.gHist); 764 tHist.computeTags[0][i].update(tHist.gHist); 765 tHist.computeTags[1][i].update(tHist.gHist); 766 } 767} 768 769void 770LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history) 771{ 772 DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc); 773 predict(tid, br_pc, false, bp_history); 774 updateHistories(tid, br_pc, true, bp_history); 775 assert(threadHistory[tid].gHist == 776 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]); 777} 778 779LTAGE* 780LTAGEParams::create() 781{ 782 return new LTAGE(this); 783} 784