ltage.cc revision 13443:a111cb197897
14120Sgblack@eecs.umich.edu/*
24120Sgblack@eecs.umich.edu * Copyright (c) 2014 The University of Wisconsin
34120Sgblack@eecs.umich.edu *
44120Sgblack@eecs.umich.edu * Copyright (c) 2006 INRIA (Institut National de Recherche en
57087Snate@binkert.org * Informatique et en Automatique  / French National Research Institute
67087Snate@binkert.org * for Computer Science and Applied Mathematics)
77087Snate@binkert.org *
87087Snate@binkert.org * All rights reserved.
97087Snate@binkert.org *
107087Snate@binkert.org * Redistribution and use in source and binary forms, with or without
117087Snate@binkert.org * modification, are permitted provided that the following conditions are
127087Snate@binkert.org * met: redistributions of source code must retain the above copyright
134120Sgblack@eecs.umich.edu * notice, this list of conditions and the following disclaimer;
147087Snate@binkert.org * redistributions in binary form must reproduce the above copyright
157087Snate@binkert.org * notice, this list of conditions and the following disclaimer in the
167087Snate@binkert.org * documentation and/or other materials provided with the distribution;
177087Snate@binkert.org * neither the name of the copyright holders nor the names of its
187087Snate@binkert.org * contributors may be used to endorse or promote products derived from
197087Snate@binkert.org * this software without specific prior written permission.
207087Snate@binkert.org *
217087Snate@binkert.org * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
224120Sgblack@eecs.umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
237087Snate@binkert.org * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
244120Sgblack@eecs.umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
254120Sgblack@eecs.umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
264120Sgblack@eecs.umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
274120Sgblack@eecs.umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
284120Sgblack@eecs.umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
294120Sgblack@eecs.umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
304120Sgblack@eecs.umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
314120Sgblack@eecs.umich.edu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
324120Sgblack@eecs.umich.edu *
334120Sgblack@eecs.umich.edu * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
344120Sgblack@eecs.umich.edu * from André Seznec's code.
354120Sgblack@eecs.umich.edu */
364120Sgblack@eecs.umich.edu
374120Sgblack@eecs.umich.edu/* @file
384120Sgblack@eecs.umich.edu * Implementation of a L-TAGE branch predictor
394120Sgblack@eecs.umich.edu */
404120Sgblack@eecs.umich.edu
414120Sgblack@eecs.umich.edu#include "cpu/pred/ltage.hh"
424120Sgblack@eecs.umich.edu
434141Sgblack@eecs.umich.edu#include "base/intmath.hh"
444136Sgblack@eecs.umich.edu#include "base/logging.hh"
456214Snate@binkert.org#include "base/random.hh"
464141Sgblack@eecs.umich.edu#include "base/trace.hh"
474121Sgblack@eecs.umich.edu#include "debug/Fetch.hh"
484120Sgblack@eecs.umich.edu#include "debug/LTage.hh"
494120Sgblack@eecs.umich.edu
504120Sgblack@eecs.umich.eduLTAGE::LTAGE(const LTAGEParams *params)
514121Sgblack@eecs.umich.edu  : BPredUnit(params),
524121Sgblack@eecs.umich.edu    logSizeBiMP(params->logSizeBiMP),
534121Sgblack@eecs.umich.edu    logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
544121Sgblack@eecs.umich.edu    logSizeTagTables(params->logSizeTagTables),
554121Sgblack@eecs.umich.edu    logSizeLoopPred(params->logSizeLoopPred),
564121Sgblack@eecs.umich.edu    nHistoryTables(params->nHistoryTables),
574121Sgblack@eecs.umich.edu    tagTableCounterBits(params->tagTableCounterBits),
584121Sgblack@eecs.umich.edu    tagTableUBits(params->tagTableUBits),
594121Sgblack@eecs.umich.edu    histBufferSize(params->histBufferSize),
604121Sgblack@eecs.umich.edu    minHist(params->minHist),
614121Sgblack@eecs.umich.edu    maxHist(params->maxHist),
624121Sgblack@eecs.umich.edu    minTagWidth(params->minTagWidth),
634141Sgblack@eecs.umich.edu    loopTableAgeBits(params->loopTableAgeBits),
644141Sgblack@eecs.umich.edu    loopTableConfidenceBits(params->loopTableConfidenceBits),
654141Sgblack@eecs.umich.edu    loopTableTagBits(params->loopTableTagBits),
665127Sgblack@eecs.umich.edu    loopTableIterBits(params->loopTableIterBits),
674141Sgblack@eecs.umich.edu    confidenceThreshold((1 << loopTableConfidenceBits) - 1),
684121Sgblack@eecs.umich.edu    loopTagMask((1 << loopTableTagBits) - 1),
694121Sgblack@eecs.umich.edu    loopNumIterMask((1 << loopTableIterBits) - 1),
704141Sgblack@eecs.umich.edu    threadHistory(params->numThreads)
716974Stjones1@inf.ed.ac.uk{
726974Stjones1@inf.ed.ac.uk    // Current method for periodically resetting the u counter bits only
737623Sgblack@eecs.umich.edu    // works for 1 or 2 bits
749329Sdam.sunwoo@arm.com    // Also make sure that it is not 0
759329Sdam.sunwoo@arm.com    assert(tagTableUBits <= 2 && (tagTableUBits > 0));
769329Sdam.sunwoo@arm.com
779057SAli.Saidi@ARM.com    // we use uint16_t type for these vales, so they cannot be more than
789057SAli.Saidi@ARM.com    // 16 bits
799057SAli.Saidi@ARM.com    assert(loopTableTagBits <= 16);
809057SAli.Saidi@ARM.com    assert(loopTableIterBits <= 16);
819057SAli.Saidi@ARM.com
829057SAli.Saidi@ARM.com    assert(params->histBufferSize > params->maxHist * 2);
839057SAli.Saidi@ARM.com    useAltPredForNewlyAllocated = 0;
849057SAli.Saidi@ARM.com    logTick = 19;
859057SAli.Saidi@ARM.com    tCounter = ULL(1) << (logTick - 1);
869057SAli.Saidi@ARM.com
879057SAli.Saidi@ARM.com    for (auto& history : threadHistory) {
888902Sandreas.hansson@arm.com        history.pathHist = 0;
894120Sgblack@eecs.umich.edu        history.globalHistory = new uint8_t[histBufferSize];
904120Sgblack@eecs.umich.edu        history.gHist = history.globalHistory;
91        memset(history.gHist, 0, histBufferSize);
92        history.ptGhist = 0;
93    }
94
95    histLengths = new int [nHistoryTables+1];
96    histLengths[1] = minHist;
97    histLengths[nHistoryTables] = maxHist;
98
99    for (int i = 2; i <= nHistoryTables; i++) {
100        histLengths[i] = (int) (((double) minHist *
101                    pow ((double) (maxHist) / (double) minHist,
102                        (double) (i - 1) / (double) ((nHistoryTables- 1))))
103                    + 0.5);
104    }
105
106    tagWidths[1] = minTagWidth;
107    tagWidths[2] = minTagWidth;
108    tagWidths[3] = minTagWidth + 1;
109    tagWidths[4] = minTagWidth + 1;
110    tagWidths[5] = minTagWidth + 2;
111    tagWidths[6] = minTagWidth + 3;
112    tagWidths[7] = minTagWidth + 4;
113    tagWidths[8] = minTagWidth + 5;
114    tagWidths[9] = minTagWidth + 5;
115    tagWidths[10] = minTagWidth + 6;
116    tagWidths[11] = minTagWidth + 7;
117    tagWidths[12] = minTagWidth + 8;
118
119    for (int i = 1; i <= 2; i++)
120        tagTableSizes[i] = logSizeTagTables - 1;
121    for (int i = 3; i <= 6; i++)
122        tagTableSizes[i] = logSizeTagTables;
123    for (int i = 7; i <= 10; i++)
124        tagTableSizes[i] = logSizeTagTables - 1;
125    for (int i = 11; i <= 12; i++)
126        tagTableSizes[i] = logSizeTagTables - 2;
127
128    for (auto& history : threadHistory) {
129        history.computeIndices = new FoldedHistory[nHistoryTables+1];
130        history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
131        history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
132
133        for (int i = 1; i <= nHistoryTables; i++) {
134            history.computeIndices[i].init(histLengths[i], (tagTableSizes[i]));
135            history.computeTags[0][i].init(
136                history.computeIndices[i].origLength, tagWidths[i]);
137            history.computeTags[1][i].init(
138                history.computeIndices[i].origLength, tagWidths[i] - 1);
139            DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
140                    histLengths[i], tagTableSizes[i], tagWidths[i]);
141        }
142    }
143
144    const uint64_t bimodalTableSize = ULL(1) << logSizeBiMP;
145    btablePrediction.resize(bimodalTableSize, false);
146    btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
147                            true);
148
149    ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
150    gtable = new TageEntry*[nHistoryTables + 1];
151    for (int i = 1; i <= nHistoryTables; i++) {
152        gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
153    }
154
155    tableIndices = new int [nHistoryTables+1];
156    tableTags = new int [nHistoryTables+1];
157
158    loopUseCounter = 0;
159}
160
161int
162LTAGE::bindex(Addr pc_in) const
163{
164    return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1));
165}
166
167int
168LTAGE::lindex(Addr pc_in) const
169{
170    return (((pc_in >> instShiftAmt) &
171             ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
172}
173
174int
175LTAGE::F(int A, int size, int bank) const
176{
177    int A1, A2;
178
179    A = A & ((ULL(1) << size) - 1);
180    A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
181    A2 = (A >> tagTableSizes[bank]);
182    A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
183       + (A2 >> (tagTableSizes[bank] - bank));
184    A = A1 ^ A2;
185    A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
186      + (A >> (tagTableSizes[bank] - bank));
187    return (A);
188}
189
190
191// gindex computes a full hash of pc, ghist and pathHist
192int
193LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
194{
195    int index;
196    int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
197    index =
198        (pc >> instShiftAmt) ^
199        ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^
200        threadHistory[tid].computeIndices[bank].comp ^
201        F(threadHistory[tid].pathHist, hlen, bank);
202
203    return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
204}
205
206
207// Tag computation
208uint16_t
209LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
210{
211    int tag = (pc >> instShiftAmt) ^
212              threadHistory[tid].computeTags[0][bank].comp ^
213              (threadHistory[tid].computeTags[1][bank].comp << 1);
214
215    return (tag & ((ULL(1) << tagWidths[bank]) - 1));
216}
217
218
219// Up-down saturating counter
220void
221LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
222{
223    assert(nbits <= sizeof(int8_t) << 3);
224    if (taken) {
225        if (ctr < ((1 << (nbits - 1)) - 1))
226            ctr++;
227    } else {
228        if (ctr > -(1 << (nbits - 1)))
229            ctr--;
230    }
231}
232
233// Up-down unsigned saturating counter
234void
235LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
236{
237    assert(nbits <= sizeof(uint8_t) << 3);
238    if (up) {
239        if (ctr < ((1 << nbits) - 1))
240            ctr++;
241    } else {
242        if (ctr)
243            ctr--;
244    }
245}
246
247// Bimodal prediction
248bool
249LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
250{
251    return btablePrediction[bi->bimodalIndex];
252}
253
254
255// Update the bimodal predictor: a hysteresis bit is shared among N prediction
256// bits (N = 2 ^ logRatioBiModalHystEntries)
257void
258LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
259{
260    int inter = (btablePrediction[bi->bimodalIndex] << 1)
261        + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
262    if (taken) {
263        if (inter < 3)
264            inter++;
265    } else if (inter > 0) {
266        inter--;
267    }
268    const bool pred = inter >> 1;
269    const bool hyst = inter & 1;
270    btablePrediction[bi->bimodalIndex] = pred;
271    btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst;
272    DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst);
273}
274
275
276//loop prediction: only used if high confidence
277bool
278LTAGE::getLoop(Addr pc, BranchInfo* bi) const
279{
280    bi->loopHit = -1;
281    bi->loopPredValid = false;
282    bi->loopIndex = lindex(pc);
283    bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2)) & loopTagMask;
284
285    for (int i = 0; i < 4; i++) {
286        if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
287            bi->loopHit = i;
288            bi->loopPredValid =
289                ltable[bi->loopIndex + i].confidence == confidenceThreshold;
290            bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
291            if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
292                ltable[bi->loopIndex + i].numIter) {
293                return !(ltable[bi->loopIndex + i].dir);
294            }else {
295                return (ltable[bi->loopIndex + i].dir);
296            }
297        }
298    }
299    return false;
300}
301
302void
303LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
304{
305    if (bi->loopHit>=0) {
306        int index = lindex(pc);
307        if (taken != ltable[index].dir) {
308            ltable[index].currentIterSpec = 0;
309        } else {
310            ltable[index].currentIterSpec =
311                (ltable[index].currentIterSpec + 1) & loopNumIterMask;
312        }
313    }
314}
315
316void
317LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
318{
319    int idx = bi->loopIndex + bi->loopHit;
320    if (bi->loopHit >= 0) {
321        //already a hit
322        if (bi->loopPredValid) {
323            if (taken != bi->loopPred) {
324                // free the entry
325                ltable[idx].numIter = 0;
326                ltable[idx].age = 0;
327                ltable[idx].confidence = 0;
328                ltable[idx].currentIter = 0;
329                return;
330            } else if (bi->loopPred != bi->tagePred) {
331                DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
332                unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits);
333            }
334        }
335
336        ltable[idx].currentIter =
337            (ltable[idx].currentIter + 1) & loopNumIterMask;
338        if (ltable[idx].currentIter > ltable[idx].numIter) {
339            ltable[idx].confidence = 0;
340            if (ltable[idx].numIter != 0) {
341                // free the entry
342                ltable[idx].numIter = 0;
343                ltable[idx].age = 0;
344                ltable[idx].confidence = 0;
345            }
346        }
347
348        if (taken != ltable[idx].dir) {
349            if (ltable[idx].currentIter == ltable[idx].numIter) {
350                DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
351
352                unsignedCtrUpdate(ltable[idx].confidence, true,
353                                  loopTableConfidenceBits);
354                //just do not predict when the loop count is 1 or 2
355                if (ltable[idx].numIter < 3) {
356                    // free the entry
357                    ltable[idx].dir = taken;
358                    ltable[idx].numIter = 0;
359                    ltable[idx].age = 0;
360                    ltable[idx].confidence = 0;
361                }
362            } else {
363                DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc);
364                if (ltable[idx].numIter == 0) {
365                    // first complete nest;
366                    ltable[idx].confidence = 0;
367                    ltable[idx].numIter = ltable[idx].currentIter;
368                } else {
369                    //not the same number of iterations as last time: free the
370                    //entry
371                    ltable[idx].numIter = 0;
372                    ltable[idx].age = 0;
373                    ltable[idx].confidence = 0;
374                }
375            }
376            ltable[idx].currentIter = 0;
377        }
378
379    } else if (taken) {
380        //try to allocate an entry on taken branch
381        int nrand = random_mt.random<int>();
382        for (int i = 0; i < 4; i++) {
383            int loop_hit = (nrand + i) & 3;
384            idx = bi->loopIndex + loop_hit;
385            if (ltable[idx].age == 0) {
386                DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
387                        pc);
388                ltable[idx].dir = !taken;
389                ltable[idx].tag = bi->loopTag;
390                ltable[idx].numIter = 0;
391                ltable[idx].age = (1 << loopTableAgeBits) - 1;
392                ltable[idx].confidence = 0;
393                ltable[idx].currentIter = 1;
394                break;
395
396            }
397            else
398                ltable[idx].age--;
399        }
400    }
401
402}
403
404// shifting the global history:  we manage the history in a big table in order
405// to reduce simulation time
406void
407LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
408{
409    if (pt == 0) {
410        DPRINTF(LTage, "Rolling over the histories\n");
411         // Copy beginning of globalHistoryBuffer to end, such that
412         // the last maxHist outcomes are still reachable
413         // through pt[0 .. maxHist - 1].
414         for (int i = 0; i < maxHist; i++)
415             tab[histBufferSize - maxHist + i] = tab[i];
416         pt =  histBufferSize - maxHist;
417         h = &tab[pt];
418    }
419    pt--;
420    h--;
421    h[0] = (dir) ? 1 : 0;
422}
423
424// Get GHR for hashing indirect predictor
425// Build history backwards from pointer in
426// bp_history.
427unsigned
428LTAGE::getGHR(ThreadID tid, void *bp_history) const
429{
430    BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
431    unsigned val = 0;
432    for (unsigned i = 0; i < 32; i++) {
433        // Make sure we don't go out of bounds
434        int gh_offset = bi->ptGhist + i;
435        assert(&(threadHistory[tid].globalHistory[gh_offset]) <
436               threadHistory[tid].globalHistory + histBufferSize);
437        val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
438    }
439
440    return val;
441}
442
443//prediction
444bool
445LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
446{
447    BranchInfo *bi = new BranchInfo(nHistoryTables+1);
448    b = (void*)(bi);
449    Addr pc = branch_pc;
450    bool pred_taken = true;
451    bi->loopHit = -1;
452
453    if (cond_branch) {
454        // TAGE prediction
455
456        // computes the table addresses and the partial tags
457        for (int i = 1; i <= nHistoryTables; i++) {
458            tableIndices[i] = gindex(tid, pc, i);
459            bi->tableIndices[i] = tableIndices[i];
460            tableTags[i] = gtag(tid, pc, i);
461            bi->tableTags[i] = tableTags[i];
462        }
463
464        bi->bimodalIndex = bindex(pc);
465
466        bi->hitBank = 0;
467        bi->altBank = 0;
468        //Look for the bank with longest matching history
469        for (int i = nHistoryTables; i > 0; i--) {
470            if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
471                bi->hitBank = i;
472                bi->hitBankIndex = tableIndices[bi->hitBank];
473                break;
474            }
475        }
476        //Look for the alternate bank
477        for (int i = bi->hitBank - 1; i > 0; i--) {
478            if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
479                bi->altBank = i;
480                bi->altBankIndex = tableIndices[bi->altBank];
481                break;
482            }
483        }
484        //computes the prediction and the alternate prediction
485        if (bi->hitBank > 0) {
486            if (bi->altBank > 0) {
487                bi->altTaken =
488                    gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
489            }else {
490                bi->altTaken = getBimodePred(pc, bi);
491            }
492
493            bi->longestMatchPred =
494                gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
495            bi->pseudoNewAlloc =
496                abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
497
498            //if the entry is recognized as a newly allocated entry and
499            //useAltPredForNewlyAllocated is positive use the alternate
500            //prediction
501            if ((useAltPredForNewlyAllocated < 0)
502                   || abs(2 *
503                   gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1)
504                bi->tagePred = bi->longestMatchPred;
505            else
506                bi->tagePred = bi->altTaken;
507        } else {
508            bi->altTaken = getBimodePred(pc, bi);
509            bi->tagePred = bi->altTaken;
510            bi->longestMatchPred = bi->altTaken;
511        }
512        //end TAGE prediction
513
514        bi->loopPred = getLoop(pc, bi);	// loop prediction
515
516        pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
517                     (bi->loopPred): (bi->tagePred);
518        DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
519                "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
520                branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
521                loopUseCounter, bi->tagePred, bi->altTaken);
522    }
523    bi->branchPC = branch_pc;
524    bi->condBranch = cond_branch;
525    specLoopUpdate(branch_pc, pred_taken, bi);
526    return pred_taken;
527}
528
529// PREDICTOR UPDATE
530void
531LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
532              bool squashed)
533{
534    assert(bp_history);
535
536    BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
537
538    if (squashed) {
539        // This restores the global history, then update it
540        // and recomputes the folded histories.
541        squash(tid, taken, bp_history);
542        return;
543    }
544
545    int nrand  = random_mt.random<int>(0,3);
546    Addr pc = branch_pc;
547    if (bi->condBranch) {
548        DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
549                branch_pc, taken);
550        // first update the loop predictor
551        loopUpdate(pc, taken, bi);
552
553        if (bi->loopPredValid) {
554            if (bi->tagePred != bi->loopPred) {
555                ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
556            }
557        }
558
559        // TAGE UPDATE
560        // try to allocate a  new entries only if prediction was wrong
561        bool longest_match_pred = false;
562        bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
563        if (bi->hitBank > 0) {
564            // Manage the selection between longest matching and alternate
565            // matching for "pseudo"-newly allocated longest matching entry
566             longest_match_pred = bi->longestMatchPred;
567            bool PseudoNewAlloc = bi->pseudoNewAlloc;
568            // an entry is considered as newly allocated if its prediction
569            // counter is weak
570            if (PseudoNewAlloc) {
571                if (longest_match_pred == taken) {
572                    alloc = false;
573                }
574                // if it was delivering the correct prediction, no need to
575                // allocate new entry even if the overall prediction was false
576                if (longest_match_pred != bi->altTaken) {
577                    ctrUpdate(useAltPredForNewlyAllocated,
578                         bi->altTaken == taken, 4);
579                }
580            }
581        }
582
583        if (alloc) {
584            // is there some "unuseful" entry to allocate
585            uint8_t min = 1;
586            for (int i = nHistoryTables; i > bi->hitBank; i--) {
587                if (gtable[i][bi->tableIndices[i]].u < min) {
588                    min = gtable[i][bi->tableIndices[i]].u;
589                }
590            }
591
592            // we allocate an entry with a longer history
593            // to  avoid ping-pong, we do not choose systematically the next
594            // entry, but among the 3 next entries
595            int Y = nrand &
596                ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
597            int X = bi->hitBank + 1;
598            if (Y & 1) {
599                X++;
600                if (Y & 2)
601                    X++;
602            }
603            // No entry available, forces one to be available
604            if (min > 0) {
605                gtable[X][bi->tableIndices[X]].u = 0;
606            }
607
608
609            //Allocate only  one entry
610            for (int i = X; i <= nHistoryTables; i++) {
611                if ((gtable[i][bi->tableIndices[i]].u == 0)) {
612                    gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
613                    gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
614                    break;
615                }
616            }
617        }
618        //periodic reset of u: reset is not complete but bit by bit
619        tCounter++;
620        if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
621            // reset least significant bit
622            // most significant bit becomes least significant bit
623            for (int i = 1; i <= nHistoryTables; i++) {
624                for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
625                    gtable[i][j].u = gtable[i][j].u >> 1;
626                }
627            }
628        }
629
630        if (bi->hitBank > 0) {
631            DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n",
632                    bi->hitBank, bi->hitBankIndex, branch_pc);
633            ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
634                      tagTableCounterBits);
635            // if the provider entry is not certified to be useful also update
636            // the alternate prediction
637            if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
638                if (bi->altBank > 0) {
639                    ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
640                              tagTableCounterBits);
641                    DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
642                            " branch %lx\n", bi->hitBank, bi->hitBankIndex,
643                            branch_pc);
644                }
645                if (bi->altBank == 0) {
646                    baseUpdate(pc, taken, bi);
647                }
648            }
649
650            // update the u counter
651            if (bi->tagePred != bi->altTaken) {
652                unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u,
653                                  bi->tagePred == taken, tagTableUBits);
654            }
655        } else {
656            baseUpdate(pc, taken, bi);
657        }
658
659        //END PREDICTOR UPDATE
660    }
661    if (!squashed) {
662        delete bi;
663    }
664}
665
666void
667LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
668{
669    BranchInfo* bi = (BranchInfo*)(b);
670    ThreadHistory& tHist = threadHistory[tid];
671    //  UPDATE HISTORIES
672    bool pathbit = ((branch_pc >> instShiftAmt) & 1);
673    //on a squash, return pointers to this and recompute indices.
674    //update user history
675    updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
676    tHist.pathHist = (tHist.pathHist << 1) + pathbit;
677    tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
678
679    bi->ptGhist = tHist.ptGhist;
680    bi->pathHist = tHist.pathHist;
681    //prepare next index and tag computations for user branchs
682    for (int i = 1; i <= nHistoryTables; i++)
683    {
684        bi->ci[i]  = tHist.computeIndices[i].comp;
685        bi->ct0[i] = tHist.computeTags[0][i].comp;
686        bi->ct1[i] = tHist.computeTags[1][i].comp;
687        tHist.computeIndices[i].update(tHist.gHist);
688        tHist.computeTags[0][i].update(tHist.gHist);
689        tHist.computeTags[1][i].update(tHist.gHist);
690    }
691    DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
692            "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
693            tHist.ptGhist);
694}
695
696void
697LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
698{
699    BranchInfo* bi = (BranchInfo*)(bp_history);
700    ThreadHistory& tHist = threadHistory[tid];
701    DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
702            "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
703    tHist.pathHist = bi->pathHist;
704    tHist.ptGhist = bi->ptGhist;
705    tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
706    tHist.gHist[0] = (taken ? 1 : 0);
707    for (int i = 1; i <= nHistoryTables; i++) {
708        tHist.computeIndices[i].comp = bi->ci[i];
709        tHist.computeTags[0][i].comp = bi->ct0[i];
710        tHist.computeTags[1][i].comp = bi->ct1[i];
711        tHist.computeIndices[i].update(tHist.gHist);
712        tHist.computeTags[0][i].update(tHist.gHist);
713        tHist.computeTags[1][i].update(tHist.gHist);
714    }
715
716    if (bi->condBranch) {
717        if (bi->loopHit >= 0) {
718            int idx = bi->loopIndex + bi->loopHit;
719            ltable[idx].currentIterSpec = bi->currentIter;
720        }
721    }
722
723}
724
725void
726LTAGE::squash(ThreadID tid, void *bp_history)
727{
728    BranchInfo* bi = (BranchInfo*)(bp_history);
729    DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
730    if (bi->condBranch) {
731        if (bi->loopHit >= 0) {
732            int idx = bi->loopIndex + bi->loopHit;
733            ltable[idx].currentIterSpec = bi->currentIter;
734        }
735    }
736
737    delete bi;
738}
739
740bool
741LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
742{
743    bool retval = predict(tid, branch_pc, true, bp_history);
744
745    DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
746    updateHistories(tid, branch_pc, retval, bp_history);
747    assert(threadHistory[tid].gHist ==
748           &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
749
750    return retval;
751}
752
753void
754LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
755{
756    BranchInfo* bi = (BranchInfo*) bp_history;
757    ThreadHistory& tHist = threadHistory[tid];
758    DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
759    assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
760    tHist.gHist[0] = 0;
761    for (int i = 1; i <= nHistoryTables; i++) {
762        tHist.computeIndices[i].comp = bi->ci[i];
763        tHist.computeTags[0][i].comp = bi->ct0[i];
764        tHist.computeTags[1][i].comp = bi->ct1[i];
765        tHist.computeIndices[i].update(tHist.gHist);
766        tHist.computeTags[0][i].update(tHist.gHist);
767        tHist.computeTags[1][i].update(tHist.gHist);
768    }
769}
770
771void
772LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
773{
774    DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
775    predict(tid, br_pc, false, bp_history);
776    updateHistories(tid, br_pc, true, bp_history);
777    assert(threadHistory[tid].gHist ==
778           &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
779}
780
781LTAGE*
782LTAGEParams::create()
783{
784    return new LTAGE(this);
785}
786