51,52c51
< : BPredUnit(params),
< logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
---
> : TAGE(params),
54,60d52
< nHistoryTables(params->nHistoryTables),
< tagTableCounterBits(params->tagTableCounterBits),
< tagTableUBits(params->tagTableUBits),
< histBufferSize(params->histBufferSize),
< minHist(params->minHist),
< maxHist(params->maxHist),
< pathHistBits(params->pathHistBits),
69,73c61
< tagTableTagWidths(params->tagTableTagWidths),
< logTagTableSizes(params->logTagTableSizes),
< threadHistory(params->numThreads),
< logUResetPeriod(params->logUResetPeriod),
< useAltOnNaBits(params->useAltOnNaBits),
---
> loopUseCounter(0),
76,80d63
< // Current method for periodically resetting the u counter bits only
< // works for 1 or 2 bits
< // Also make sure that it is not 0
< assert(tagTableUBits <= 2 && (tagTableUBits > 0));
<
88,146d70
< // we use int type for the path history, so it cannot be more than
< // its size
< assert(pathHistBits <= (sizeof(int)*8));
<
< // initialize the counter to half of the period
< assert(logUResetPeriod != 0);
< tCounter = ULL(1) << (logUResetPeriod - 1);
<
< assert(params->histBufferSize > params->maxHist * 2);
< useAltPredForNewlyAllocated = 0;
<
< for (auto& history : threadHistory) {
< history.pathHist = 0;
< history.globalHistory = new uint8_t[histBufferSize];
< history.gHist = history.globalHistory;
< memset(history.gHist, 0, histBufferSize);
< history.ptGhist = 0;
< }
<
< histLengths = new int [nHistoryTables+1];
< histLengths[1] = minHist;
< histLengths[nHistoryTables] = maxHist;
<
< for (int i = 2; i <= nHistoryTables; i++) {
< histLengths[i] = (int) (((double) minHist *
< pow ((double) (maxHist) / (double) minHist,
< (double) (i - 1) / (double) ((nHistoryTables- 1))))
< + 0.5);
< }
<
< assert(tagTableTagWidths.size() == (nHistoryTables+1));
< assert(logTagTableSizes.size() == (nHistoryTables+1));
<
< // First entry is for the Bimodal table and it is untagged in this
< // implementation
< assert(tagTableTagWidths[0] == 0);
<
< for (auto& history : threadHistory) {
< history.computeIndices = new FoldedHistory[nHistoryTables+1];
< history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
< history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
<
< for (int i = 1; i <= nHistoryTables; i++) {
< history.computeIndices[i].init(
< histLengths[i], (logTagTableSizes[i]));
< history.computeTags[0][i].init(
< history.computeIndices[i].origLength, tagTableTagWidths[i]);
< history.computeTags[1][i].init(
< history.computeIndices[i].origLength, tagTableTagWidths[i]-1);
< DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
< histLengths[i], logTagTableSizes[i], tagTableTagWidths[i]);
< }
< }
<
< const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0];
< btablePrediction.resize(bimodalTableSize, false);
< btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
< true);
<
148,156d71
< gtable = new TageEntry*[nHistoryTables + 1];
< for (int i = 1; i <= nHistoryTables; i++) {
< gtable[i] = new TageEntry[1<<(logTagTableSizes[i])];
< }
<
< tableIndices = new int [nHistoryTables+1];
< tableTags = new int [nHistoryTables+1];
<
< loopUseCounter = 0;
160,165d74
< LTAGE::bindex(Addr pc_in) const
< {
< return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) - 1));
< }
<
< int
179,282d87
< int
< LTAGE::F(int A, int size, int bank) const
< {
< int A1, A2;
<
< A = A & ((ULL(1) << size) - 1);
< A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1));
< A2 = (A >> logTagTableSizes[bank]);
< A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
< + (A2 >> (logTagTableSizes[bank] - bank));
< A = A1 ^ A2;
< A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
< + (A >> (logTagTableSizes[bank] - bank));
< return (A);
< }
<
<
< // gindex computes a full hash of pc, ghist and pathHist
< int
< LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
< {
< int index;
< int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits :
< histLengths[bank];
< const Addr shiftedPc = pc >> instShiftAmt;
< index =
< shiftedPc ^
< (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^
< threadHistory[tid].computeIndices[bank].comp ^
< F(threadHistory[tid].pathHist, hlen, bank);
<
< return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1));
< }
<
<
< // Tag computation
< uint16_t
< LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
< {
< int tag = (pc >> instShiftAmt) ^
< threadHistory[tid].computeTags[0][bank].comp ^
< (threadHistory[tid].computeTags[1][bank].comp << 1);
<
< return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1));
< }
<
<
< // Up-down saturating counter
< void
< LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
< {
< assert(nbits <= sizeof(int8_t) << 3);
< if (taken) {
< if (ctr < ((1 << (nbits - 1)) - 1))
< ctr++;
< } else {
< if (ctr > -(1 << (nbits - 1)))
< ctr--;
< }
< }
<
< // Up-down unsigned saturating counter
< void
< LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
< {
< assert(nbits <= sizeof(uint8_t) << 3);
< if (up) {
< if (ctr < ((1 << nbits) - 1))
< ctr++;
< } else {
< if (ctr)
< ctr--;
< }
< }
<
< // Bimodal prediction
< bool
< LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
< {
< return btablePrediction[bi->bimodalIndex];
< }
<
<
< // Update the bimodal predictor: a hysteresis bit is shared among N prediction
< // bits (N = 2 ^ logRatioBiModalHystEntries)
< void
< LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
< {
< int inter = (btablePrediction[bi->bimodalIndex] << 1)
< + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
< if (taken) {
< if (inter < 3)
< inter++;
< } else if (inter > 0) {
< inter--;
< }
< const bool pred = inter >> 1;
< const bool hyst = inter & 1;
< btablePrediction[bi->bimodalIndex] = pred;
< btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst;
< DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst);
< }
<
<
285c90
< LTAGE::getLoop(Addr pc, BranchInfo* bi) const
---
> LTAGE::getLoop(Addr pc, LTageBranchInfo* bi) const
311c116
< LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
---
> LTAGE::specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
325c130
< LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
---
> LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
412,450d216
< // shifting the global history: we manage the history in a big table in order
< // to reduce simulation time
< void
< LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
< {
< if (pt == 0) {
< DPRINTF(LTage, "Rolling over the histories\n");
< // Copy beginning of globalHistoryBuffer to end, such that
< // the last maxHist outcomes are still reachable
< // through pt[0 .. maxHist - 1].
< for (int i = 0; i < maxHist; i++)
< tab[histBufferSize - maxHist + i] = tab[i];
< pt = histBufferSize - maxHist;
< h = &tab[pt];
< }
< pt--;
< h--;
< h[0] = (dir) ? 1 : 0;
< }
<
< // Get GHR for hashing indirect predictor
< // Build history backwards from pointer in
< // bp_history.
< unsigned
< LTAGE::getGHR(ThreadID tid, void *bp_history) const
< {
< BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
< unsigned val = 0;
< for (unsigned i = 0; i < 32; i++) {
< // Make sure we don't go out of bounds
< int gh_offset = bi->ptGhist + i;
< assert(&(threadHistory[tid].globalHistory[gh_offset]) <
< threadHistory[tid].globalHistory + histBufferSize);
< val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
< }
<
< return val;
< }
<
455c221
< BranchInfo *bi = new BranchInfo(nHistoryTables+1);
---
> LTageBranchInfo *bi = new LTageBranchInfo(nHistoryTables+1);
457,459d222
< Addr pc = branch_pc;
< bool pred_taken = true;
< bi->loopHit = -1;
460a224,225
> bool pred_taken = tagePredict(tid, branch_pc, cond_branch, bi);
>
462c227
< // TAGE prediction
---
> bi->loopPred = getLoop(branch_pc, bi); // loop prediction
464,469c229,230
< // computes the table addresses and the partial tags
< for (int i = 1; i <= nHistoryTables; i++) {
< tableIndices[i] = gindex(tid, pc, i);
< bi->tableIndices[i] = tableIndices[i];
< tableTags[i] = gtag(tid, pc, i);
< bi->tableTags[i] = tableTags[i];
---
> if ((loopUseCounter >= 0) && bi->loopPredValid) {
> pred_taken = bi->loopPred;
471,525d231
<
< bi->bimodalIndex = bindex(pc);
<
< bi->hitBank = 0;
< bi->altBank = 0;
< //Look for the bank with longest matching history
< for (int i = nHistoryTables; i > 0; i--) {
< if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
< bi->hitBank = i;
< bi->hitBankIndex = tableIndices[bi->hitBank];
< break;
< }
< }
< //Look for the alternate bank
< for (int i = bi->hitBank - 1; i > 0; i--) {
< if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
< bi->altBank = i;
< bi->altBankIndex = tableIndices[bi->altBank];
< break;
< }
< }
< //computes the prediction and the alternate prediction
< if (bi->hitBank > 0) {
< if (bi->altBank > 0) {
< bi->altTaken =
< gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
< }else {
< bi->altTaken = getBimodePred(pc, bi);
< }
<
< bi->longestMatchPred =
< gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
< bi->pseudoNewAlloc =
< abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
<
< //if the entry is recognized as a newly allocated entry and
< //useAltPredForNewlyAllocated is positive use the alternate
< //prediction
< if ((useAltPredForNewlyAllocated < 0)
< || abs(2 *
< gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1)
< bi->tagePred = bi->longestMatchPred;
< else
< bi->tagePred = bi->altTaken;
< } else {
< bi->altTaken = getBimodePred(pc, bi);
< bi->tagePred = bi->altTaken;
< bi->longestMatchPred = bi->altTaken;
< }
< //end TAGE prediction
<
< bi->loopPred = getLoop(pc, bi); // loop prediction
<
< pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
< (bi->loopPred): (bi->tagePred);
531,532c237
< bi->branchPC = branch_pc;
< bi->condBranch = cond_branch;
---
>
537d241
< // PREDICTOR UPDATE
539,540c243,244
< LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
< bool squashed)
---
> LTAGE::condBranchUpdate(Addr branch_pc, bool taken,
> TageBranchInfo* tage_bi, int nrand)
542c246
< assert(bp_history);
---
> LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(tage_bi);
544c248,249
< BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
---
> // first update the loop predictor
> loopUpdate(branch_pc, taken, bi);
546,566c251,255
< if (squashed) {
< // This restores the global history, then update it
< // and recomputes the folded histories.
< squash(tid, taken, bp_history);
< return;
< }
<
< int nrand = random_mt.random<int>(0,3);
< Addr pc = branch_pc;
< if (bi->condBranch) {
< DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
< branch_pc, taken);
< // first update the loop predictor
< loopUpdate(pc, taken, bi);
<
< if (bi->loopPredValid) {
< if (bi->tagePred != bi->loopPred) {
< ctrUpdate(loopUseCounter,
< (bi->loopPred == taken),
< withLoopBits);
< }
---
> if (bi->loopPredValid) {
> if (bi->tagePred != bi->loopPred) {
> ctrUpdate(loopUseCounter,
> (bi->loopPred == taken),
> withLoopBits);
568,669d256
<
< // TAGE UPDATE
< // try to allocate a new entries only if prediction was wrong
< bool longest_match_pred = false;
< bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
< if (bi->hitBank > 0) {
< // Manage the selection between longest matching and alternate
< // matching for "pseudo"-newly allocated longest matching entry
< longest_match_pred = bi->longestMatchPred;
< bool PseudoNewAlloc = bi->pseudoNewAlloc;
< // an entry is considered as newly allocated if its prediction
< // counter is weak
< if (PseudoNewAlloc) {
< if (longest_match_pred == taken) {
< alloc = false;
< }
< // if it was delivering the correct prediction, no need to
< // allocate new entry even if the overall prediction was false
< if (longest_match_pred != bi->altTaken) {
< ctrUpdate(useAltPredForNewlyAllocated,
< bi->altTaken == taken, useAltOnNaBits);
< }
< }
< }
<
< if (alloc) {
< // is there some "unuseful" entry to allocate
< uint8_t min = 1;
< for (int i = nHistoryTables; i > bi->hitBank; i--) {
< if (gtable[i][bi->tableIndices[i]].u < min) {
< min = gtable[i][bi->tableIndices[i]].u;
< }
< }
<
< // we allocate an entry with a longer history
< // to avoid ping-pong, we do not choose systematically the next
< // entry, but among the 3 next entries
< int Y = nrand &
< ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
< int X = bi->hitBank + 1;
< if (Y & 1) {
< X++;
< if (Y & 2)
< X++;
< }
< // No entry available, forces one to be available
< if (min > 0) {
< gtable[X][bi->tableIndices[X]].u = 0;
< }
<
<
< //Allocate only one entry
< for (int i = X; i <= nHistoryTables; i++) {
< if ((gtable[i][bi->tableIndices[i]].u == 0)) {
< gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
< gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
< break;
< }
< }
< }
< //periodic reset of u: reset is not complete but bit by bit
< tCounter++;
< if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) {
< // reset least significant bit
< // most significant bit becomes least significant bit
< for (int i = 1; i <= nHistoryTables; i++) {
< for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) {
< gtable[i][j].u = gtable[i][j].u >> 1;
< }
< }
< }
<
< if (bi->hitBank > 0) {
< DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n",
< bi->hitBank, bi->hitBankIndex, branch_pc);
< ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
< tagTableCounterBits);
< // if the provider entry is not certified to be useful also update
< // the alternate prediction
< if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
< if (bi->altBank > 0) {
< ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
< tagTableCounterBits);
< DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
< " branch %lx\n", bi->hitBank, bi->hitBankIndex,
< branch_pc);
< }
< if (bi->altBank == 0) {
< baseUpdate(pc, taken, bi);
< }
< }
<
< // update the u counter
< if (bi->tagePred != bi->altTaken) {
< unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u,
< bi->tagePred == taken, tagTableUBits);
< }
< } else {
< baseUpdate(pc, taken, bi);
< }
<
< //END PREDICTOR UPDATE
671,674d257
< if (!squashed) {
< delete bi;
< }
< }
676,703c259
< void
< LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
< {
< BranchInfo* bi = (BranchInfo*)(b);
< ThreadHistory& tHist = threadHistory[tid];
< // UPDATE HISTORIES
< bool pathbit = ((branch_pc >> instShiftAmt) & 1);
< //on a squash, return pointers to this and recompute indices.
< //update user history
< updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
< tHist.pathHist = (tHist.pathHist << 1) + pathbit;
< tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1));
<
< bi->ptGhist = tHist.ptGhist;
< bi->pathHist = tHist.pathHist;
< //prepare next index and tag computations for user branchs
< for (int i = 1; i <= nHistoryTables; i++)
< {
< bi->ci[i] = tHist.computeIndices[i].comp;
< bi->ct0[i] = tHist.computeTags[0][i].comp;
< bi->ct1[i] = tHist.computeTags[1][i].comp;
< tHist.computeIndices[i].update(tHist.gHist);
< tHist.computeTags[0][i].update(tHist.gHist);
< tHist.computeTags[1][i].update(tHist.gHist);
< }
< DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
< "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
< tHist.ptGhist);
---
> TAGE::condBranchUpdate(branch_pc, taken, bi, nrand);
709,724c265
< BranchInfo* bi = (BranchInfo*)(bp_history);
< ThreadHistory& tHist = threadHistory[tid];
< DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
< "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
< tHist.pathHist = bi->pathHist;
< tHist.ptGhist = bi->ptGhist;
< tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
< tHist.gHist[0] = (taken ? 1 : 0);
< for (int i = 1; i <= nHistoryTables; i++) {
< tHist.computeIndices[i].comp = bi->ci[i];
< tHist.computeTags[0][i].comp = bi->ct0[i];
< tHist.computeTags[1][i].comp = bi->ct1[i];
< tHist.computeIndices[i].update(tHist.gHist);
< tHist.computeTags[0][i].update(tHist.gHist);
< tHist.computeTags[1][i].update(tHist.gHist);
< }
---
> TAGE::squash(tid, taken, bp_history);
725a267,268
> LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);
>
732d274
<
738,739c280
< BranchInfo* bi = (BranchInfo*)(bp_history);
< DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
---
> LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);
747c288
< delete bi;
---
> TAGE::squash(tid, bp_history);
750,790d290
< bool
< LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
< {
< bool retval = predict(tid, branch_pc, true, bp_history);
<
< DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
< updateHistories(tid, branch_pc, retval, bp_history);
< assert(threadHistory[tid].gHist ==
< &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
<
< return retval;
< }
<
< void
< LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
< {
< BranchInfo* bi = (BranchInfo*) bp_history;
< ThreadHistory& tHist = threadHistory[tid];
< DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
< assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
< tHist.gHist[0] = 0;
< for (int i = 1; i <= nHistoryTables; i++) {
< tHist.computeIndices[i].comp = bi->ci[i];
< tHist.computeTags[0][i].comp = bi->ct0[i];
< tHist.computeTags[1][i].comp = bi->ct1[i];
< tHist.computeIndices[i].update(tHist.gHist);
< tHist.computeTags[0][i].update(tHist.gHist);
< tHist.computeTags[1][i].update(tHist.gHist);
< }
< }
<
< void
< LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
< {
< DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
< predict(tid, br_pc, false, bp_history);
< updateHistories(tid, br_pc, true, bp_history);
< assert(threadHistory[tid].gHist ==
< &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
< }
<