ltage.cc revision 13442
1/*
2 * Copyright (c) 2014 The University of Wisconsin
3 *
4 * Copyright (c) 2006 INRIA (Institut National de Recherche en
5 * Informatique et en Automatique  / French National Research Institute
6 * for Computer Science and Applied Mathematics)
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are
12 * met: redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer;
14 * redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution;
17 * neither the name of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
34 * from André Seznec's code.
35 */
36
37/* @file
38 * Implementation of a L-TAGE branch predictor
39 */
40
41#include "cpu/pred/ltage.hh"
42
43#include "base/intmath.hh"
44#include "base/logging.hh"
45#include "base/random.hh"
46#include "base/trace.hh"
47#include "debug/Fetch.hh"
48#include "debug/LTage.hh"
49
50LTAGE::LTAGE(const LTAGEParams *params)
51  : BPredUnit(params),
52    logSizeBiMP(params->logSizeBiMP),
53    logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
54    logSizeTagTables(params->logSizeTagTables),
55    logSizeLoopPred(params->logSizeLoopPred),
56    nHistoryTables(params->nHistoryTables),
57    tagTableCounterBits(params->tagTableCounterBits),
58    histBufferSize(params->histBufferSize),
59    minHist(params->minHist),
60    maxHist(params->maxHist),
61    minTagWidth(params->minTagWidth),
62    loopTableAgeBits(params->loopTableAgeBits),
63    loopTableConfidenceBits(params->loopTableConfidenceBits),
64    loopTableTagBits(params->loopTableTagBits),
65    loopTableIterBits(params->loopTableIterBits),
66    confidenceThreshold((1 << loopTableConfidenceBits) - 1),
67    loopTagMask((1 << loopTableTagBits) - 1),
68    loopNumIterMask((1 << loopTableIterBits) - 1),
69    threadHistory(params->numThreads)
70{
71    // we use uint16_t type for these vales, so they cannot be more than
72    // 16 bits
73    assert(loopTableTagBits <= 16);
74    assert(loopTableIterBits <= 16);
75
76    assert(params->histBufferSize > params->maxHist * 2);
77    useAltPredForNewlyAllocated = 0;
78    logTick = 19;
79    tCounter = ULL(1) << (logTick - 1);
80
81    for (auto& history : threadHistory) {
82        history.pathHist = 0;
83        history.globalHistory = new uint8_t[histBufferSize];
84        history.gHist = history.globalHistory;
85        memset(history.gHist, 0, histBufferSize);
86        history.ptGhist = 0;
87    }
88
89    histLengths = new int [nHistoryTables+1];
90    histLengths[1] = minHist;
91    histLengths[nHistoryTables] = maxHist;
92
93    for (int i = 2; i <= nHistoryTables; i++) {
94        histLengths[i] = (int) (((double) minHist *
95                    pow ((double) (maxHist) / (double) minHist,
96                        (double) (i - 1) / (double) ((nHistoryTables- 1))))
97                    + 0.5);
98    }
99
100    tagWidths[1] = minTagWidth;
101    tagWidths[2] = minTagWidth;
102    tagWidths[3] = minTagWidth + 1;
103    tagWidths[4] = minTagWidth + 1;
104    tagWidths[5] = minTagWidth + 2;
105    tagWidths[6] = minTagWidth + 3;
106    tagWidths[7] = minTagWidth + 4;
107    tagWidths[8] = minTagWidth + 5;
108    tagWidths[9] = minTagWidth + 5;
109    tagWidths[10] = minTagWidth + 6;
110    tagWidths[11] = minTagWidth + 7;
111    tagWidths[12] = minTagWidth + 8;
112
113    for (int i = 1; i <= 2; i++)
114        tagTableSizes[i] = logSizeTagTables - 1;
115    for (int i = 3; i <= 6; i++)
116        tagTableSizes[i] = logSizeTagTables;
117    for (int i = 7; i <= 10; i++)
118        tagTableSizes[i] = logSizeTagTables - 1;
119    for (int i = 11; i <= 12; i++)
120        tagTableSizes[i] = logSizeTagTables - 2;
121
122    for (auto& history : threadHistory) {
123        history.computeIndices = new FoldedHistory[nHistoryTables+1];
124        history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
125        history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
126
127        for (int i = 1; i <= nHistoryTables; i++) {
128            history.computeIndices[i].init(histLengths[i], (tagTableSizes[i]));
129            history.computeTags[0][i].init(
130                history.computeIndices[i].origLength, tagWidths[i]);
131            history.computeTags[1][i].init(
132                history.computeIndices[i].origLength, tagWidths[i] - 1);
133            DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
134                    histLengths[i], tagTableSizes[i], tagWidths[i]);
135        }
136    }
137
138    const uint64_t bimodalTableSize = ULL(1) << logSizeBiMP;
139    btablePrediction.resize(bimodalTableSize, false);
140    btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
141                            true);
142
143    ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
144    gtable = new TageEntry*[nHistoryTables + 1];
145    for (int i = 1; i <= nHistoryTables; i++) {
146        gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
147    }
148
149    tableIndices = new int [nHistoryTables+1];
150    tableTags = new int [nHistoryTables+1];
151
152    loopUseCounter = 0;
153}
154
155int
156LTAGE::bindex(Addr pc_in) const
157{
158    return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1));
159}
160
161int
162LTAGE::lindex(Addr pc_in) const
163{
164    return (((pc_in >> instShiftAmt) &
165             ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
166}
167
168int
169LTAGE::F(int A, int size, int bank) const
170{
171    int A1, A2;
172
173    A = A & ((ULL(1) << size) - 1);
174    A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
175    A2 = (A >> tagTableSizes[bank]);
176    A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
177       + (A2 >> (tagTableSizes[bank] - bank));
178    A = A1 ^ A2;
179    A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
180      + (A >> (tagTableSizes[bank] - bank));
181    return (A);
182}
183
184
185// gindex computes a full hash of pc, ghist and pathHist
186int
187LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
188{
189    int index;
190    int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
191    index =
192        (pc >> instShiftAmt) ^
193        ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^
194        threadHistory[tid].computeIndices[bank].comp ^
195        F(threadHistory[tid].pathHist, hlen, bank);
196
197    return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
198}
199
200
201// Tag computation
202uint16_t
203LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
204{
205    int tag = (pc >> instShiftAmt) ^
206              threadHistory[tid].computeTags[0][bank].comp ^
207              (threadHistory[tid].computeTags[1][bank].comp << 1);
208
209    return (tag & ((ULL(1) << tagWidths[bank]) - 1));
210}
211
212
213// Up-down saturating counter
214void
215LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
216{
217    assert(nbits <= sizeof(int8_t) << 3);
218    if (taken) {
219        if (ctr < ((1 << (nbits - 1)) - 1))
220            ctr++;
221    } else {
222        if (ctr > -(1 << (nbits - 1)))
223            ctr--;
224    }
225}
226
227// Up-down unsigned saturating counter
228void
229LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
230{
231    assert(nbits <= sizeof(uint8_t) << 3);
232    if (up) {
233        if (ctr < ((1 << nbits) - 1))
234            ctr++;
235    } else {
236        if (ctr)
237            ctr--;
238    }
239}
240
241// Bimodal prediction
242bool
243LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
244{
245    return btablePrediction[bi->bimodalIndex];
246}
247
248
249// Update the bimodal predictor: a hysteresis bit is shared among N prediction
250// bits (N = 2 ^ logRatioBiModalHystEntries)
251void
252LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
253{
254    int inter = (btablePrediction[bi->bimodalIndex] << 1)
255        + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
256    if (taken) {
257        if (inter < 3)
258            inter++;
259    } else if (inter > 0) {
260        inter--;
261    }
262    const bool pred = inter >> 1;
263    const bool hyst = inter & 1;
264    btablePrediction[bi->bimodalIndex] = pred;
265    btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst;
266    DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst);
267}
268
269
270//loop prediction: only used if high confidence
271bool
272LTAGE::getLoop(Addr pc, BranchInfo* bi) const
273{
274    bi->loopHit = -1;
275    bi->loopPredValid = false;
276    bi->loopIndex = lindex(pc);
277    bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2)) & loopTagMask;
278
279    for (int i = 0; i < 4; i++) {
280        if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
281            bi->loopHit = i;
282            bi->loopPredValid =
283                ltable[bi->loopIndex + i].confidence == confidenceThreshold;
284            bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
285            if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
286                ltable[bi->loopIndex + i].numIter) {
287                return !(ltable[bi->loopIndex + i].dir);
288            }else {
289                return (ltable[bi->loopIndex + i].dir);
290            }
291        }
292    }
293    return false;
294}
295
296void
297LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
298{
299    if (bi->loopHit>=0) {
300        int index = lindex(pc);
301        if (taken != ltable[index].dir) {
302            ltable[index].currentIterSpec = 0;
303        } else {
304            ltable[index].currentIterSpec =
305                (ltable[index].currentIterSpec + 1) & loopNumIterMask;
306        }
307    }
308}
309
310void
311LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
312{
313    int idx = bi->loopIndex + bi->loopHit;
314    if (bi->loopHit >= 0) {
315        //already a hit
316        if (bi->loopPredValid) {
317            if (taken != bi->loopPred) {
318                // free the entry
319                ltable[idx].numIter = 0;
320                ltable[idx].age = 0;
321                ltable[idx].confidence = 0;
322                ltable[idx].currentIter = 0;
323                return;
324            } else if (bi->loopPred != bi->tagePred) {
325                DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
326                unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits);
327            }
328        }
329
330        ltable[idx].currentIter =
331            (ltable[idx].currentIter + 1) & loopNumIterMask;
332        if (ltable[idx].currentIter > ltable[idx].numIter) {
333            ltable[idx].confidence = 0;
334            if (ltable[idx].numIter != 0) {
335                // free the entry
336                ltable[idx].numIter = 0;
337                ltable[idx].age = 0;
338                ltable[idx].confidence = 0;
339            }
340        }
341
342        if (taken != ltable[idx].dir) {
343            if (ltable[idx].currentIter == ltable[idx].numIter) {
344                DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
345
346                unsignedCtrUpdate(ltable[idx].confidence, true,
347                                  loopTableConfidenceBits);
348                //just do not predict when the loop count is 1 or 2
349                if (ltable[idx].numIter < 3) {
350                    // free the entry
351                    ltable[idx].dir = taken;
352                    ltable[idx].numIter = 0;
353                    ltable[idx].age = 0;
354                    ltable[idx].confidence = 0;
355                }
356            } else {
357                DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc);
358                if (ltable[idx].numIter == 0) {
359                    // first complete nest;
360                    ltable[idx].confidence = 0;
361                    ltable[idx].numIter = ltable[idx].currentIter;
362                } else {
363                    //not the same number of iterations as last time: free the
364                    //entry
365                    ltable[idx].numIter = 0;
366                    ltable[idx].age = 0;
367                    ltable[idx].confidence = 0;
368                }
369            }
370            ltable[idx].currentIter = 0;
371        }
372
373    } else if (taken) {
374        //try to allocate an entry on taken branch
375        int nrand = random_mt.random<int>();
376        for (int i = 0; i < 4; i++) {
377            int loop_hit = (nrand + i) & 3;
378            idx = bi->loopIndex + loop_hit;
379            if (ltable[idx].age == 0) {
380                DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
381                        pc);
382                ltable[idx].dir = !taken;
383                ltable[idx].tag = bi->loopTag;
384                ltable[idx].numIter = 0;
385                ltable[idx].age = (1 << loopTableAgeBits) - 1;
386                ltable[idx].confidence = 0;
387                ltable[idx].currentIter = 1;
388                break;
389
390            }
391            else
392                ltable[idx].age--;
393        }
394    }
395
396}
397
398// shifting the global history:  we manage the history in a big table in order
399// to reduce simulation time
400void
401LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
402{
403    if (pt == 0) {
404        DPRINTF(LTage, "Rolling over the histories\n");
405         // Copy beginning of globalHistoryBuffer to end, such that
406         // the last maxHist outcomes are still reachable
407         // through pt[0 .. maxHist - 1].
408         for (int i = 0; i < maxHist; i++)
409             tab[histBufferSize - maxHist + i] = tab[i];
410         pt =  histBufferSize - maxHist;
411         h = &tab[pt];
412    }
413    pt--;
414    h--;
415    h[0] = (dir) ? 1 : 0;
416}
417
418// Get GHR for hashing indirect predictor
419// Build history backwards from pointer in
420// bp_history.
421unsigned
422LTAGE::getGHR(ThreadID tid, void *bp_history) const
423{
424    BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
425    unsigned val = 0;
426    for (unsigned i = 0; i < 32; i++) {
427        // Make sure we don't go out of bounds
428        int gh_offset = bi->ptGhist + i;
429        assert(&(threadHistory[tid].globalHistory[gh_offset]) <
430               threadHistory[tid].globalHistory + histBufferSize);
431        val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
432    }
433
434    return val;
435}
436
437//prediction
438bool
439LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
440{
441    BranchInfo *bi = new BranchInfo(nHistoryTables+1);
442    b = (void*)(bi);
443    Addr pc = branch_pc;
444    bool pred_taken = true;
445    bi->loopHit = -1;
446
447    if (cond_branch) {
448        // TAGE prediction
449
450        // computes the table addresses and the partial tags
451        for (int i = 1; i <= nHistoryTables; i++) {
452            tableIndices[i] = gindex(tid, pc, i);
453            bi->tableIndices[i] = tableIndices[i];
454            tableTags[i] = gtag(tid, pc, i);
455            bi->tableTags[i] = tableTags[i];
456        }
457
458        bi->bimodalIndex = bindex(pc);
459
460        bi->hitBank = 0;
461        bi->altBank = 0;
462        //Look for the bank with longest matching history
463        for (int i = nHistoryTables; i > 0; i--) {
464            if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
465                bi->hitBank = i;
466                bi->hitBankIndex = tableIndices[bi->hitBank];
467                break;
468            }
469        }
470        //Look for the alternate bank
471        for (int i = bi->hitBank - 1; i > 0; i--) {
472            if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
473                bi->altBank = i;
474                bi->altBankIndex = tableIndices[bi->altBank];
475                break;
476            }
477        }
478        //computes the prediction and the alternate prediction
479        if (bi->hitBank > 0) {
480            if (bi->altBank > 0) {
481                bi->altTaken =
482                    gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
483            }else {
484                bi->altTaken = getBimodePred(pc, bi);
485            }
486
487            bi->longestMatchPred =
488                gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
489            bi->pseudoNewAlloc =
490                abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
491
492            //if the entry is recognized as a newly allocated entry and
493            //useAltPredForNewlyAllocated is positive use the alternate
494            //prediction
495            if ((useAltPredForNewlyAllocated < 0)
496                   || abs(2 *
497                   gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1)
498                bi->tagePred = bi->longestMatchPred;
499            else
500                bi->tagePred = bi->altTaken;
501        } else {
502            bi->altTaken = getBimodePred(pc, bi);
503            bi->tagePred = bi->altTaken;
504            bi->longestMatchPred = bi->altTaken;
505        }
506        //end TAGE prediction
507
508        bi->loopPred = getLoop(pc, bi);	// loop prediction
509
510        pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
511                     (bi->loopPred): (bi->tagePred);
512        DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
513                "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
514                branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
515                loopUseCounter, bi->tagePred, bi->altTaken);
516    }
517    bi->branchPC = branch_pc;
518    bi->condBranch = cond_branch;
519    specLoopUpdate(branch_pc, pred_taken, bi);
520    return pred_taken;
521}
522
523// PREDICTOR UPDATE
524void
525LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
526              bool squashed)
527{
528    assert(bp_history);
529
530    BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
531
532    if (squashed) {
533        // This restores the global history, then update it
534        // and recomputes the folded histories.
535        squash(tid, taken, bp_history);
536        return;
537    }
538
539    int nrand  = random_mt.random<int>(0,3);
540    Addr pc = branch_pc;
541    if (bi->condBranch) {
542        DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
543                branch_pc, taken);
544        // first update the loop predictor
545        loopUpdate(pc, taken, bi);
546
547        if (bi->loopPredValid) {
548            if (bi->tagePred != bi->loopPred) {
549                ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
550            }
551        }
552
553        // TAGE UPDATE
554        // try to allocate a  new entries only if prediction was wrong
555        bool longest_match_pred = false;
556        bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
557        if (bi->hitBank > 0) {
558            // Manage the selection between longest matching and alternate
559            // matching for "pseudo"-newly allocated longest matching entry
560             longest_match_pred = bi->longestMatchPred;
561            bool PseudoNewAlloc = bi->pseudoNewAlloc;
562            // an entry is considered as newly allocated if its prediction
563            // counter is weak
564            if (PseudoNewAlloc) {
565                if (longest_match_pred == taken) {
566                    alloc = false;
567                }
568                // if it was delivering the correct prediction, no need to
569                // allocate new entry even if the overall prediction was false
570                if (longest_match_pred != bi->altTaken) {
571                    ctrUpdate(useAltPredForNewlyAllocated,
572                         bi->altTaken == taken, 4);
573                }
574            }
575        }
576
577        if (alloc) {
578            // is there some "unuseful" entry to allocate
579            int8_t min = 1;
580            for (int i = nHistoryTables; i > bi->hitBank; i--) {
581                if (gtable[i][bi->tableIndices[i]].u < min) {
582                    min = gtable[i][bi->tableIndices[i]].u;
583                }
584            }
585
586            // we allocate an entry with a longer history
587            // to  avoid ping-pong, we do not choose systematically the next
588            // entry, but among the 3 next entries
589            int Y = nrand &
590                ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
591            int X = bi->hitBank + 1;
592            if (Y & 1) {
593                X++;
594                if (Y & 2)
595                    X++;
596            }
597            // No entry available, forces one to be available
598            if (min > 0) {
599                gtable[X][bi->tableIndices[X]].u = 0;
600            }
601
602
603            //Allocate only  one entry
604            for (int i = X; i <= nHistoryTables; i++) {
605                if ((gtable[i][bi->tableIndices[i]].u == 0)) {
606                    gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
607                    gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
608                    gtable[i][bi->tableIndices[i]].u = 0; //?
609                    break;
610                }
611            }
612        }
613        //periodic reset of u: reset is not complete but bit by bit
614        tCounter++;
615        if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
616            // reset least significant bit
617            // most significant bit becomes least significant bit
618            for (int i = 1; i <= nHistoryTables; i++) {
619                for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
620                    gtable[i][j].u = gtable[i][j].u >> 1;
621                }
622            }
623        }
624
625        if (bi->hitBank > 0) {
626            DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n",
627                    bi->hitBank, bi->hitBankIndex, branch_pc);
628            ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
629                      tagTableCounterBits);
630            // if the provider entry is not certified to be useful also update
631            // the alternate prediction
632            if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
633                if (bi->altBank > 0) {
634                    ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
635                              tagTableCounterBits);
636                    DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
637                            " branch %lx\n", bi->hitBank, bi->hitBankIndex,
638                            branch_pc);
639                }
640                if (bi->altBank == 0) {
641                    baseUpdate(pc, taken, bi);
642                }
643            }
644
645            // update the u counter
646            if (longest_match_pred != bi->altTaken) {
647                if (longest_match_pred == taken) {
648                    if (gtable[bi->hitBank][bi->hitBankIndex].u < 1) {
649                        gtable[bi->hitBank][bi->hitBankIndex].u++;
650                    }
651                }
652            }
653        } else {
654            baseUpdate(pc, taken, bi);
655        }
656
657        //END PREDICTOR UPDATE
658    }
659    if (!squashed) {
660        delete bi;
661    }
662}
663
664void
665LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
666{
667    BranchInfo* bi = (BranchInfo*)(b);
668    ThreadHistory& tHist = threadHistory[tid];
669    //  UPDATE HISTORIES
670    bool pathbit = ((branch_pc >> instShiftAmt) & 1);
671    //on a squash, return pointers to this and recompute indices.
672    //update user history
673    updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
674    tHist.pathHist = (tHist.pathHist << 1) + pathbit;
675    tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
676
677    bi->ptGhist = tHist.ptGhist;
678    bi->pathHist = tHist.pathHist;
679    //prepare next index and tag computations for user branchs
680    for (int i = 1; i <= nHistoryTables; i++)
681    {
682        bi->ci[i]  = tHist.computeIndices[i].comp;
683        bi->ct0[i] = tHist.computeTags[0][i].comp;
684        bi->ct1[i] = tHist.computeTags[1][i].comp;
685        tHist.computeIndices[i].update(tHist.gHist);
686        tHist.computeTags[0][i].update(tHist.gHist);
687        tHist.computeTags[1][i].update(tHist.gHist);
688    }
689    DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
690            "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
691            tHist.ptGhist);
692}
693
694void
695LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
696{
697    BranchInfo* bi = (BranchInfo*)(bp_history);
698    ThreadHistory& tHist = threadHistory[tid];
699    DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
700            "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
701    tHist.pathHist = bi->pathHist;
702    tHist.ptGhist = bi->ptGhist;
703    tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
704    tHist.gHist[0] = (taken ? 1 : 0);
705    for (int i = 1; i <= nHistoryTables; i++) {
706        tHist.computeIndices[i].comp = bi->ci[i];
707        tHist.computeTags[0][i].comp = bi->ct0[i];
708        tHist.computeTags[1][i].comp = bi->ct1[i];
709        tHist.computeIndices[i].update(tHist.gHist);
710        tHist.computeTags[0][i].update(tHist.gHist);
711        tHist.computeTags[1][i].update(tHist.gHist);
712    }
713
714    if (bi->condBranch) {
715        if (bi->loopHit >= 0) {
716            int idx = bi->loopIndex + bi->loopHit;
717            ltable[idx].currentIterSpec = bi->currentIter;
718        }
719    }
720
721}
722
723void
724LTAGE::squash(ThreadID tid, void *bp_history)
725{
726    BranchInfo* bi = (BranchInfo*)(bp_history);
727    DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
728    if (bi->condBranch) {
729        if (bi->loopHit >= 0) {
730            int idx = bi->loopIndex + bi->loopHit;
731            ltable[idx].currentIterSpec = bi->currentIter;
732        }
733    }
734
735    delete bi;
736}
737
738bool
739LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
740{
741    bool retval = predict(tid, branch_pc, true, bp_history);
742
743    DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
744    updateHistories(tid, branch_pc, retval, bp_history);
745    assert(threadHistory[tid].gHist ==
746           &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
747
748    return retval;
749}
750
751void
752LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
753{
754    BranchInfo* bi = (BranchInfo*) bp_history;
755    ThreadHistory& tHist = threadHistory[tid];
756    DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
757    assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
758    tHist.gHist[0] = 0;
759    for (int i = 1; i <= nHistoryTables; i++) {
760        tHist.computeIndices[i].comp = bi->ci[i];
761        tHist.computeTags[0][i].comp = bi->ct0[i];
762        tHist.computeTags[1][i].comp = bi->ct1[i];
763        tHist.computeIndices[i].update(tHist.gHist);
764        tHist.computeTags[0][i].update(tHist.gHist);
765        tHist.computeTags[1][i].update(tHist.gHist);
766    }
767}
768
769void
770LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
771{
772    DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
773    predict(tid, br_pc, false, bp_history);
774    updateHistories(tid, br_pc, true, bp_history);
775    assert(threadHistory[tid].gHist ==
776           &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
777}
778
779LTAGE*
780LTAGEParams::create()
781{
782    return new LTAGE(this);
783}
784