ltage.cc revision 13444:26f81be73cb7
1/*
2 * Copyright (c) 2014 The University of Wisconsin
3 *
4 * Copyright (c) 2006 INRIA (Institut National de Recherche en
5 * Informatique et en Automatique  / French National Research Institute
6 * for Computer Science and Applied Mathematics)
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are
12 * met: redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer;
14 * redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution;
17 * neither the name of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
34 * from André Seznec's code.
35 */
36
37/* @file
38 * Implementation of a L-TAGE branch predictor
39 */
40
41#include "cpu/pred/ltage.hh"
42
43#include "base/intmath.hh"
44#include "base/logging.hh"
45#include "base/random.hh"
46#include "base/trace.hh"
47#include "debug/Fetch.hh"
48#include "debug/LTage.hh"
49
50LTAGE::LTAGE(const LTAGEParams *params)
51  : BPredUnit(params),
52    logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
53    logSizeLoopPred(params->logSizeLoopPred),
54    nHistoryTables(params->nHistoryTables),
55    tagTableCounterBits(params->tagTableCounterBits),
56    tagTableUBits(params->tagTableUBits),
57    histBufferSize(params->histBufferSize),
58    minHist(params->minHist),
59    maxHist(params->maxHist),
60    pathHistBits(params->pathHistBits),
61    loopTableAgeBits(params->loopTableAgeBits),
62    loopTableConfidenceBits(params->loopTableConfidenceBits),
63    loopTableTagBits(params->loopTableTagBits),
64    loopTableIterBits(params->loopTableIterBits),
65    logLoopTableAssoc(params->logLoopTableAssoc),
66    confidenceThreshold((1 << loopTableConfidenceBits) - 1),
67    loopTagMask((1 << loopTableTagBits) - 1),
68    loopNumIterMask((1 << loopTableIterBits) - 1),
69    tagTableTagWidths(params->tagTableTagWidths),
70    logTagTableSizes(params->logTagTableSizes),
71    threadHistory(params->numThreads),
72    logUResetPeriod(params->logUResetPeriod),
73    useAltOnNaBits(params->useAltOnNaBits),
74    withLoopBits(params->withLoopBits)
75{
76    // Current method for periodically resetting the u counter bits only
77    // works for 1 or 2 bits
78    // Also make sure that it is not 0
79    assert(tagTableUBits <= 2 && (tagTableUBits > 0));
80
81    // we use uint16_t type for these vales, so they cannot be more than
82    // 16 bits
83    assert(loopTableTagBits <= 16);
84    assert(loopTableIterBits <= 16);
85
86    assert(logSizeLoopPred >= logLoopTableAssoc);
87
88    // we use int type for the path history, so it cannot be more than
89    // its size
90    assert(pathHistBits <= (sizeof(int)*8));
91
92    // initialize the counter to half of the period
93    assert(logUResetPeriod != 0);
94    tCounter = ULL(1) << (logUResetPeriod - 1);
95
96    assert(params->histBufferSize > params->maxHist * 2);
97    useAltPredForNewlyAllocated = 0;
98
99    for (auto& history : threadHistory) {
100        history.pathHist = 0;
101        history.globalHistory = new uint8_t[histBufferSize];
102        history.gHist = history.globalHistory;
103        memset(history.gHist, 0, histBufferSize);
104        history.ptGhist = 0;
105    }
106
107    histLengths = new int [nHistoryTables+1];
108    histLengths[1] = minHist;
109    histLengths[nHistoryTables] = maxHist;
110
111    for (int i = 2; i <= nHistoryTables; i++) {
112        histLengths[i] = (int) (((double) minHist *
113                    pow ((double) (maxHist) / (double) minHist,
114                        (double) (i - 1) / (double) ((nHistoryTables- 1))))
115                    + 0.5);
116    }
117
118    assert(tagTableTagWidths.size() == (nHistoryTables+1));
119    assert(logTagTableSizes.size() == (nHistoryTables+1));
120
121    // First entry is for the Bimodal table and it is untagged in this
122    // implementation
123    assert(tagTableTagWidths[0] == 0);
124
125    for (auto& history : threadHistory) {
126        history.computeIndices = new FoldedHistory[nHistoryTables+1];
127        history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
128        history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
129
130        for (int i = 1; i <= nHistoryTables; i++) {
131            history.computeIndices[i].init(
132                histLengths[i], (logTagTableSizes[i]));
133            history.computeTags[0][i].init(
134                history.computeIndices[i].origLength, tagTableTagWidths[i]);
135            history.computeTags[1][i].init(
136                history.computeIndices[i].origLength, tagTableTagWidths[i]-1);
137            DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
138                    histLengths[i], logTagTableSizes[i], tagTableTagWidths[i]);
139        }
140    }
141
142    const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0];
143    btablePrediction.resize(bimodalTableSize, false);
144    btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
145                            true);
146
147    ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
148    gtable = new TageEntry*[nHistoryTables + 1];
149    for (int i = 1; i <= nHistoryTables; i++) {
150        gtable[i] = new TageEntry[1<<(logTagTableSizes[i])];
151    }
152
153    tableIndices = new int [nHistoryTables+1];
154    tableTags = new int [nHistoryTables+1];
155
156    loopUseCounter = 0;
157}
158
159int
160LTAGE::bindex(Addr pc_in) const
161{
162    return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) - 1));
163}
164
165int
166LTAGE::lindex(Addr pc_in) const
167{
168    // The loop table is implemented as a linear table
169    // If associativity is N (N being 1 << logLoopTableAssoc),
170    // the first N entries are for set 0, the next N entries are for set 1,
171    // and so on.
172    // Thus, this function calculates the set and then it gets left shifted
173    // by logLoopTableAssoc in order to return the index of the first of the
174    // N entries of the set
175    Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1;
176    return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc);
177}
178
179int
180LTAGE::F(int A, int size, int bank) const
181{
182    int A1, A2;
183
184    A = A & ((ULL(1) << size) - 1);
185    A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1));
186    A2 = (A >> logTagTableSizes[bank]);
187    A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
188       + (A2 >> (logTagTableSizes[bank] - bank));
189    A = A1 ^ A2;
190    A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
191      + (A >> (logTagTableSizes[bank] - bank));
192    return (A);
193}
194
195
196// gindex computes a full hash of pc, ghist and pathHist
197int
198LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
199{
200    int index;
201    int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits :
202                                                    histLengths[bank];
203    const Addr shiftedPc = pc >> instShiftAmt;
204    index =
205        shiftedPc ^
206        (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^
207        threadHistory[tid].computeIndices[bank].comp ^
208        F(threadHistory[tid].pathHist, hlen, bank);
209
210    return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1));
211}
212
213
214// Tag computation
215uint16_t
216LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
217{
218    int tag = (pc >> instShiftAmt) ^
219              threadHistory[tid].computeTags[0][bank].comp ^
220              (threadHistory[tid].computeTags[1][bank].comp << 1);
221
222    return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1));
223}
224
225
226// Up-down saturating counter
227void
228LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
229{
230    assert(nbits <= sizeof(int8_t) << 3);
231    if (taken) {
232        if (ctr < ((1 << (nbits - 1)) - 1))
233            ctr++;
234    } else {
235        if (ctr > -(1 << (nbits - 1)))
236            ctr--;
237    }
238}
239
240// Up-down unsigned saturating counter
241void
242LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
243{
244    assert(nbits <= sizeof(uint8_t) << 3);
245    if (up) {
246        if (ctr < ((1 << nbits) - 1))
247            ctr++;
248    } else {
249        if (ctr)
250            ctr--;
251    }
252}
253
254// Bimodal prediction
255bool
256LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
257{
258    return btablePrediction[bi->bimodalIndex];
259}
260
261
262// Update the bimodal predictor: a hysteresis bit is shared among N prediction
263// bits (N = 2 ^ logRatioBiModalHystEntries)
264void
265LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
266{
267    int inter = (btablePrediction[bi->bimodalIndex] << 1)
268        + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
269    if (taken) {
270        if (inter < 3)
271            inter++;
272    } else if (inter > 0) {
273        inter--;
274    }
275    const bool pred = inter >> 1;
276    const bool hyst = inter & 1;
277    btablePrediction[bi->bimodalIndex] = pred;
278    btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst;
279    DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst);
280}
281
282
283//loop prediction: only used if high confidence
284bool
285LTAGE::getLoop(Addr pc, BranchInfo* bi) const
286{
287    bi->loopHit = -1;
288    bi->loopPredValid = false;
289    bi->loopIndex = lindex(pc);
290    unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc;
291    bi->loopTag = ((pc) >> pcShift) & loopTagMask;
292
293    for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
294        if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
295            bi->loopHit = i;
296            bi->loopPredValid =
297                ltable[bi->loopIndex + i].confidence == confidenceThreshold;
298            bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
299            if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
300                ltable[bi->loopIndex + i].numIter) {
301                return !(ltable[bi->loopIndex + i].dir);
302            }else {
303                return (ltable[bi->loopIndex + i].dir);
304            }
305        }
306    }
307    return false;
308}
309
310void
311LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
312{
313    if (bi->loopHit>=0) {
314        int index = lindex(pc);
315        if (taken != ltable[index].dir) {
316            ltable[index].currentIterSpec = 0;
317        } else {
318            ltable[index].currentIterSpec =
319                (ltable[index].currentIterSpec + 1) & loopNumIterMask;
320        }
321    }
322}
323
324void
325LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
326{
327    int idx = bi->loopIndex + bi->loopHit;
328    if (bi->loopHit >= 0) {
329        //already a hit
330        if (bi->loopPredValid) {
331            if (taken != bi->loopPred) {
332                // free the entry
333                ltable[idx].numIter = 0;
334                ltable[idx].age = 0;
335                ltable[idx].confidence = 0;
336                ltable[idx].currentIter = 0;
337                return;
338            } else if (bi->loopPred != bi->tagePred) {
339                DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
340                unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits);
341            }
342        }
343
344        ltable[idx].currentIter =
345            (ltable[idx].currentIter + 1) & loopNumIterMask;
346        if (ltable[idx].currentIter > ltable[idx].numIter) {
347            ltable[idx].confidence = 0;
348            if (ltable[idx].numIter != 0) {
349                // free the entry
350                ltable[idx].numIter = 0;
351                ltable[idx].age = 0;
352                ltable[idx].confidence = 0;
353            }
354        }
355
356        if (taken != ltable[idx].dir) {
357            if (ltable[idx].currentIter == ltable[idx].numIter) {
358                DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
359
360                unsignedCtrUpdate(ltable[idx].confidence, true,
361                                  loopTableConfidenceBits);
362                //just do not predict when the loop count is 1 or 2
363                if (ltable[idx].numIter < 3) {
364                    // free the entry
365                    ltable[idx].dir = taken;
366                    ltable[idx].numIter = 0;
367                    ltable[idx].age = 0;
368                    ltable[idx].confidence = 0;
369                }
370            } else {
371                DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc);
372                if (ltable[idx].numIter == 0) {
373                    // first complete nest;
374                    ltable[idx].confidence = 0;
375                    ltable[idx].numIter = ltable[idx].currentIter;
376                } else {
377                    //not the same number of iterations as last time: free the
378                    //entry
379                    ltable[idx].numIter = 0;
380                    ltable[idx].age = 0;
381                    ltable[idx].confidence = 0;
382                }
383            }
384            ltable[idx].currentIter = 0;
385        }
386
387    } else if (taken) {
388        //try to allocate an entry on taken branch
389        int nrand = random_mt.random<int>();
390        for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
391            int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1);
392            idx = bi->loopIndex + loop_hit;
393            if (ltable[idx].age == 0) {
394                DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
395                        pc);
396                ltable[idx].dir = !taken;
397                ltable[idx].tag = bi->loopTag;
398                ltable[idx].numIter = 0;
399                ltable[idx].age = (1 << loopTableAgeBits) - 1;
400                ltable[idx].confidence = 0;
401                ltable[idx].currentIter = 1;
402                break;
403
404            }
405            else
406                ltable[idx].age--;
407        }
408    }
409
410}
411
412// shifting the global history:  we manage the history in a big table in order
413// to reduce simulation time
414void
415LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
416{
417    if (pt == 0) {
418        DPRINTF(LTage, "Rolling over the histories\n");
419         // Copy beginning of globalHistoryBuffer to end, such that
420         // the last maxHist outcomes are still reachable
421         // through pt[0 .. maxHist - 1].
422         for (int i = 0; i < maxHist; i++)
423             tab[histBufferSize - maxHist + i] = tab[i];
424         pt =  histBufferSize - maxHist;
425         h = &tab[pt];
426    }
427    pt--;
428    h--;
429    h[0] = (dir) ? 1 : 0;
430}
431
432// Get GHR for hashing indirect predictor
433// Build history backwards from pointer in
434// bp_history.
435unsigned
436LTAGE::getGHR(ThreadID tid, void *bp_history) const
437{
438    BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
439    unsigned val = 0;
440    for (unsigned i = 0; i < 32; i++) {
441        // Make sure we don't go out of bounds
442        int gh_offset = bi->ptGhist + i;
443        assert(&(threadHistory[tid].globalHistory[gh_offset]) <
444               threadHistory[tid].globalHistory + histBufferSize);
445        val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
446    }
447
448    return val;
449}
450
451//prediction
452bool
453LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
454{
455    BranchInfo *bi = new BranchInfo(nHistoryTables+1);
456    b = (void*)(bi);
457    Addr pc = branch_pc;
458    bool pred_taken = true;
459    bi->loopHit = -1;
460
461    if (cond_branch) {
462        // TAGE prediction
463
464        // computes the table addresses and the partial tags
465        for (int i = 1; i <= nHistoryTables; i++) {
466            tableIndices[i] = gindex(tid, pc, i);
467            bi->tableIndices[i] = tableIndices[i];
468            tableTags[i] = gtag(tid, pc, i);
469            bi->tableTags[i] = tableTags[i];
470        }
471
472        bi->bimodalIndex = bindex(pc);
473
474        bi->hitBank = 0;
475        bi->altBank = 0;
476        //Look for the bank with longest matching history
477        for (int i = nHistoryTables; i > 0; i--) {
478            if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
479                bi->hitBank = i;
480                bi->hitBankIndex = tableIndices[bi->hitBank];
481                break;
482            }
483        }
484        //Look for the alternate bank
485        for (int i = bi->hitBank - 1; i > 0; i--) {
486            if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
487                bi->altBank = i;
488                bi->altBankIndex = tableIndices[bi->altBank];
489                break;
490            }
491        }
492        //computes the prediction and the alternate prediction
493        if (bi->hitBank > 0) {
494            if (bi->altBank > 0) {
495                bi->altTaken =
496                    gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
497            }else {
498                bi->altTaken = getBimodePred(pc, bi);
499            }
500
501            bi->longestMatchPred =
502                gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
503            bi->pseudoNewAlloc =
504                abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
505
506            //if the entry is recognized as a newly allocated entry and
507            //useAltPredForNewlyAllocated is positive use the alternate
508            //prediction
509            if ((useAltPredForNewlyAllocated < 0)
510                   || abs(2 *
511                   gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1)
512                bi->tagePred = bi->longestMatchPred;
513            else
514                bi->tagePred = bi->altTaken;
515        } else {
516            bi->altTaken = getBimodePred(pc, bi);
517            bi->tagePred = bi->altTaken;
518            bi->longestMatchPred = bi->altTaken;
519        }
520        //end TAGE prediction
521
522        bi->loopPred = getLoop(pc, bi);	// loop prediction
523
524        pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
525                     (bi->loopPred): (bi->tagePred);
526        DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
527                "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
528                branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
529                loopUseCounter, bi->tagePred, bi->altTaken);
530    }
531    bi->branchPC = branch_pc;
532    bi->condBranch = cond_branch;
533    specLoopUpdate(branch_pc, pred_taken, bi);
534    return pred_taken;
535}
536
537// PREDICTOR UPDATE
538void
539LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
540              bool squashed)
541{
542    assert(bp_history);
543
544    BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
545
546    if (squashed) {
547        // This restores the global history, then update it
548        // and recomputes the folded histories.
549        squash(tid, taken, bp_history);
550        return;
551    }
552
553    int nrand  = random_mt.random<int>(0,3);
554    Addr pc = branch_pc;
555    if (bi->condBranch) {
556        DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
557                branch_pc, taken);
558        // first update the loop predictor
559        loopUpdate(pc, taken, bi);
560
561        if (bi->loopPredValid) {
562            if (bi->tagePred != bi->loopPred) {
563                ctrUpdate(loopUseCounter,
564                          (bi->loopPred == taken),
565                          withLoopBits);
566            }
567        }
568
569        // TAGE UPDATE
570        // try to allocate a  new entries only if prediction was wrong
571        bool longest_match_pred = false;
572        bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
573        if (bi->hitBank > 0) {
574            // Manage the selection between longest matching and alternate
575            // matching for "pseudo"-newly allocated longest matching entry
576             longest_match_pred = bi->longestMatchPred;
577            bool PseudoNewAlloc = bi->pseudoNewAlloc;
578            // an entry is considered as newly allocated if its prediction
579            // counter is weak
580            if (PseudoNewAlloc) {
581                if (longest_match_pred == taken) {
582                    alloc = false;
583                }
584                // if it was delivering the correct prediction, no need to
585                // allocate new entry even if the overall prediction was false
586                if (longest_match_pred != bi->altTaken) {
587                    ctrUpdate(useAltPredForNewlyAllocated,
588                         bi->altTaken == taken, useAltOnNaBits);
589                }
590            }
591        }
592
593        if (alloc) {
594            // is there some "unuseful" entry to allocate
595            uint8_t min = 1;
596            for (int i = nHistoryTables; i > bi->hitBank; i--) {
597                if (gtable[i][bi->tableIndices[i]].u < min) {
598                    min = gtable[i][bi->tableIndices[i]].u;
599                }
600            }
601
602            // we allocate an entry with a longer history
603            // to  avoid ping-pong, we do not choose systematically the next
604            // entry, but among the 3 next entries
605            int Y = nrand &
606                ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
607            int X = bi->hitBank + 1;
608            if (Y & 1) {
609                X++;
610                if (Y & 2)
611                    X++;
612            }
613            // No entry available, forces one to be available
614            if (min > 0) {
615                gtable[X][bi->tableIndices[X]].u = 0;
616            }
617
618
619            //Allocate only  one entry
620            for (int i = X; i <= nHistoryTables; i++) {
621                if ((gtable[i][bi->tableIndices[i]].u == 0)) {
622                    gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
623                    gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
624                    break;
625                }
626            }
627        }
628        //periodic reset of u: reset is not complete but bit by bit
629        tCounter++;
630        if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) {
631            // reset least significant bit
632            // most significant bit becomes least significant bit
633            for (int i = 1; i <= nHistoryTables; i++) {
634                for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) {
635                    gtable[i][j].u = gtable[i][j].u >> 1;
636                }
637            }
638        }
639
640        if (bi->hitBank > 0) {
641            DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n",
642                    bi->hitBank, bi->hitBankIndex, branch_pc);
643            ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
644                      tagTableCounterBits);
645            // if the provider entry is not certified to be useful also update
646            // the alternate prediction
647            if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
648                if (bi->altBank > 0) {
649                    ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
650                              tagTableCounterBits);
651                    DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
652                            " branch %lx\n", bi->hitBank, bi->hitBankIndex,
653                            branch_pc);
654                }
655                if (bi->altBank == 0) {
656                    baseUpdate(pc, taken, bi);
657                }
658            }
659
660            // update the u counter
661            if (bi->tagePred != bi->altTaken) {
662                unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u,
663                                  bi->tagePred == taken, tagTableUBits);
664            }
665        } else {
666            baseUpdate(pc, taken, bi);
667        }
668
669        //END PREDICTOR UPDATE
670    }
671    if (!squashed) {
672        delete bi;
673    }
674}
675
676void
677LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
678{
679    BranchInfo* bi = (BranchInfo*)(b);
680    ThreadHistory& tHist = threadHistory[tid];
681    //  UPDATE HISTORIES
682    bool pathbit = ((branch_pc >> instShiftAmt) & 1);
683    //on a squash, return pointers to this and recompute indices.
684    //update user history
685    updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
686    tHist.pathHist = (tHist.pathHist << 1) + pathbit;
687    tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1));
688
689    bi->ptGhist = tHist.ptGhist;
690    bi->pathHist = tHist.pathHist;
691    //prepare next index and tag computations for user branchs
692    for (int i = 1; i <= nHistoryTables; i++)
693    {
694        bi->ci[i]  = tHist.computeIndices[i].comp;
695        bi->ct0[i] = tHist.computeTags[0][i].comp;
696        bi->ct1[i] = tHist.computeTags[1][i].comp;
697        tHist.computeIndices[i].update(tHist.gHist);
698        tHist.computeTags[0][i].update(tHist.gHist);
699        tHist.computeTags[1][i].update(tHist.gHist);
700    }
701    DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
702            "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
703            tHist.ptGhist);
704}
705
706void
707LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
708{
709    BranchInfo* bi = (BranchInfo*)(bp_history);
710    ThreadHistory& tHist = threadHistory[tid];
711    DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
712            "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
713    tHist.pathHist = bi->pathHist;
714    tHist.ptGhist = bi->ptGhist;
715    tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
716    tHist.gHist[0] = (taken ? 1 : 0);
717    for (int i = 1; i <= nHistoryTables; i++) {
718        tHist.computeIndices[i].comp = bi->ci[i];
719        tHist.computeTags[0][i].comp = bi->ct0[i];
720        tHist.computeTags[1][i].comp = bi->ct1[i];
721        tHist.computeIndices[i].update(tHist.gHist);
722        tHist.computeTags[0][i].update(tHist.gHist);
723        tHist.computeTags[1][i].update(tHist.gHist);
724    }
725
726    if (bi->condBranch) {
727        if (bi->loopHit >= 0) {
728            int idx = bi->loopIndex + bi->loopHit;
729            ltable[idx].currentIterSpec = bi->currentIter;
730        }
731    }
732
733}
734
735void
736LTAGE::squash(ThreadID tid, void *bp_history)
737{
738    BranchInfo* bi = (BranchInfo*)(bp_history);
739    DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
740    if (bi->condBranch) {
741        if (bi->loopHit >= 0) {
742            int idx = bi->loopIndex + bi->loopHit;
743            ltable[idx].currentIterSpec = bi->currentIter;
744        }
745    }
746
747    delete bi;
748}
749
750bool
751LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
752{
753    bool retval = predict(tid, branch_pc, true, bp_history);
754
755    DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
756    updateHistories(tid, branch_pc, retval, bp_history);
757    assert(threadHistory[tid].gHist ==
758           &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
759
760    return retval;
761}
762
763void
764LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
765{
766    BranchInfo* bi = (BranchInfo*) bp_history;
767    ThreadHistory& tHist = threadHistory[tid];
768    DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
769    assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
770    tHist.gHist[0] = 0;
771    for (int i = 1; i <= nHistoryTables; i++) {
772        tHist.computeIndices[i].comp = bi->ci[i];
773        tHist.computeTags[0][i].comp = bi->ct0[i];
774        tHist.computeTags[1][i].comp = bi->ct1[i];
775        tHist.computeIndices[i].update(tHist.gHist);
776        tHist.computeTags[0][i].update(tHist.gHist);
777        tHist.computeTags[1][i].update(tHist.gHist);
778    }
779}
780
781void
782LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
783{
784    DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
785    predict(tid, br_pc, false, bp_history);
786    updateHistories(tid, br_pc, true, bp_history);
787    assert(threadHistory[tid].gHist ==
788           &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
789}
790
791LTAGE*
792LTAGEParams::create()
793{
794    return new LTAGE(this);
795}
796