ltage.cc revision 13420:5cb2b90e1cb5
12391SN/A/*
28931Sandreas.hansson@arm.com * Copyright (c) 2014 The University of Wisconsin
38931Sandreas.hansson@arm.com *
48931Sandreas.hansson@arm.com * Copyright (c) 2006 INRIA (Institut National de Recherche en
58931Sandreas.hansson@arm.com * Informatique et en Automatique  / French National Research Institute
68931Sandreas.hansson@arm.com * for Computer Science and Applied Mathematics)
78931Sandreas.hansson@arm.com *
88931Sandreas.hansson@arm.com * All rights reserved.
98931Sandreas.hansson@arm.com *
108931Sandreas.hansson@arm.com * Redistribution and use in source and binary forms, with or without
118931Sandreas.hansson@arm.com * modification, are permitted provided that the following conditions are
128931Sandreas.hansson@arm.com * met: redistributions of source code must retain the above copyright
138931Sandreas.hansson@arm.com * notice, this list of conditions and the following disclaimer;
142391SN/A * redistributions in binary form must reproduce the above copyright
152391SN/A * notice, this list of conditions and the following disclaimer in the
162391SN/A * documentation and/or other materials provided with the distribution;
172391SN/A * neither the name of the copyright holders nor the names of its
182391SN/A * contributors may be used to endorse or promote products derived from
192391SN/A * this software without specific prior written permission.
202391SN/A *
212391SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
222391SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
232391SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
242391SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
252391SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
262391SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
272391SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
282391SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
292391SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
302391SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
312391SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
322391SN/A *
332391SN/A * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
342391SN/A * from André Seznec's code.
352391SN/A */
362391SN/A
372391SN/A/* @file
382391SN/A * Implementation of a L-TAGE branch predictor
392665SN/A */
402665SN/A
418931Sandreas.hansson@arm.com#include "cpu/pred/ltage.hh"
422391SN/A
432391SN/A#include "base/intmath.hh"
448931Sandreas.hansson@arm.com#include "base/logging.hh"
458931Sandreas.hansson@arm.com#include "base/random.hh"
468931Sandreas.hansson@arm.com#include "base/trace.hh"
472391SN/A#include "debug/Fetch.hh"
482391SN/A#include "debug/LTage.hh"
498931Sandreas.hansson@arm.com
508931Sandreas.hansson@arm.comLTAGE::LTAGE(const LTAGEParams *params)
512391SN/A  : BPredUnit(params),
522462SN/A    logSizeBiMP(params->logSizeBiMP),
538931Sandreas.hansson@arm.com    logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
548719SN/A    logSizeTagTables(params->logSizeTagTables),
552462SN/A    logSizeLoopPred(params->logSizeLoopPred),
569053Sdam.sunwoo@arm.com    nHistoryTables(params->nHistoryTables),
579053Sdam.sunwoo@arm.com    tagTableCounterBits(params->tagTableCounterBits),
589053Sdam.sunwoo@arm.com    histBufferSize(params->histBufferSize),
598931Sandreas.hansson@arm.com    minHist(params->minHist),
609293Sandreas.hansson@arm.com    maxHist(params->maxHist),
619293Sandreas.hansson@arm.com    minTagWidth(params->minTagWidth),
629293Sandreas.hansson@arm.com    threadHistory(params->numThreads)
639293Sandreas.hansson@arm.com{
649293Sandreas.hansson@arm.com    assert(params->histBufferSize > params->maxHist * 2);
659293Sandreas.hansson@arm.com    useAltPredForNewlyAllocated = 0;
669293Sandreas.hansson@arm.com    logTick = 19;
679293Sandreas.hansson@arm.com    tCounter = ULL(1) << (logTick - 1);
689293Sandreas.hansson@arm.com
699293Sandreas.hansson@arm.com    for (auto& history : threadHistory) {
709293Sandreas.hansson@arm.com        history.pathHist = 0;
719293Sandreas.hansson@arm.com        history.globalHistory = new uint8_t[histBufferSize];
729293Sandreas.hansson@arm.com        history.gHist = history.globalHistory;
739293Sandreas.hansson@arm.com        memset(history.gHist, 0, histBufferSize);
749293Sandreas.hansson@arm.com        history.ptGhist = 0;
759293Sandreas.hansson@arm.com    }
769293Sandreas.hansson@arm.com
7711005Sandreas.sandberg@arm.com    histLengths = new int [nHistoryTables+1];
789293Sandreas.hansson@arm.com    histLengths[1] = minHist;
799293Sandreas.hansson@arm.com    histLengths[nHistoryTables] = maxHist;
809293Sandreas.hansson@arm.com
819293Sandreas.hansson@arm.com    for (int i = 2; i <= nHistoryTables; i++) {
829293Sandreas.hansson@arm.com        histLengths[i] = (int) (((double) minHist *
839293Sandreas.hansson@arm.com                    pow ((double) (maxHist) / (double) minHist,
849293Sandreas.hansson@arm.com                        (double) (i - 1) / (double) ((nHistoryTables- 1))))
859293Sandreas.hansson@arm.com                    + 0.5);
869293Sandreas.hansson@arm.com    }
879293Sandreas.hansson@arm.com
889293Sandreas.hansson@arm.com    tagWidths[1] = minTagWidth;
899293Sandreas.hansson@arm.com    tagWidths[2] = minTagWidth;
909293Sandreas.hansson@arm.com    tagWidths[3] = minTagWidth + 1;
919293Sandreas.hansson@arm.com    tagWidths[4] = minTagWidth + 1;
929293Sandreas.hansson@arm.com    tagWidths[5] = minTagWidth + 2;
939293Sandreas.hansson@arm.com    tagWidths[6] = minTagWidth + 3;
949293Sandreas.hansson@arm.com    tagWidths[7] = minTagWidth + 4;
959293Sandreas.hansson@arm.com    tagWidths[8] = minTagWidth + 5;
969293Sandreas.hansson@arm.com    tagWidths[9] = minTagWidth + 5;
978931Sandreas.hansson@arm.com    tagWidths[10] = minTagWidth + 6;
988931Sandreas.hansson@arm.com    tagWidths[11] = minTagWidth + 7;
998931Sandreas.hansson@arm.com    tagWidths[12] = minTagWidth + 8;
1008931Sandreas.hansson@arm.com
1018931Sandreas.hansson@arm.com    for (int i = 1; i <= 2; i++)
1028931Sandreas.hansson@arm.com        tagTableSizes[i] = logSizeTagTables - 1;
1038931Sandreas.hansson@arm.com    for (int i = 3; i <= 6; i++)
1042391SN/A        tagTableSizes[i] = logSizeTagTables;
1056107SN/A    for (int i = 7; i <= 10; i++)
1066107SN/A        tagTableSizes[i] = logSizeTagTables - 1;
1078931Sandreas.hansson@arm.com    for (int i = 11; i <= 12; i++)
1089235Sandreas.hansson@arm.com        tagTableSizes[i] = logSizeTagTables - 2;
1092413SN/A
1108931Sandreas.hansson@arm.com    for (auto& history : threadHistory) {
1118931Sandreas.hansson@arm.com        history.computeIndices = new FoldedHistory[nHistoryTables+1];
1122413SN/A        history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
1138931Sandreas.hansson@arm.com        history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
11411614Sdavid.j.hashe@gmail.com
1152413SN/A        for (int i = 1; i <= nHistoryTables; i++) {
1168931Sandreas.hansson@arm.com            history.computeIndices[i].init(histLengths[i], (tagTableSizes[i]));
11711614Sdavid.j.hashe@gmail.com            history.computeTags[0][i].init(
11811614Sdavid.j.hashe@gmail.com                history.computeIndices[i].origLength, tagWidths[i]);
11911614Sdavid.j.hashe@gmail.com            history.computeTags[1][i].init(
12011614Sdavid.j.hashe@gmail.com                history.computeIndices[i].origLength, tagWidths[i] - 1);
1213170SN/A            DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
1223170SN/A                    histLengths[i], tagTableSizes[i], tagWidths[i]);
1233170SN/A        }
1243170SN/A    }
1253170SN/A
1263170SN/A    const uint64_t bimodalTableSize = ULL(1) << logSizeBiMP;
1273170SN/A    btablePrediction.resize(bimodalTableSize, false);
1284626SN/A    btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
1293170SN/A                            true);
1303170SN/A
1313170SN/A    ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
1323170SN/A    gtable = new TageEntry*[nHistoryTables + 1];
1334626SN/A    for (int i = 1; i <= nHistoryTables; i++) {
1343170SN/A        gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
1353170SN/A    }
1363170SN/A
1373170SN/A    tableIndices = new int [nHistoryTables+1];
1383170SN/A    tableTags = new int [nHistoryTables+1];
1393170SN/A
1403170SN/A    loopUseCounter = 0;
1413170SN/A}
1424626SN/A
1434626SN/Aint
1443170SN/ALTAGE::bindex(Addr pc_in) const
1453170SN/A{
1466102SN/A    return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1));
1476102SN/A}
1484040SN/A
1493170SN/Aint
1506102SN/ALTAGE::lindex(Addr pc_in) const
1513170SN/A{
1523170SN/A    return (((pc_in >> instShiftAmt) &
1534626SN/A             ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
1543170SN/A}
1553170SN/A
1563170SN/Aint
1578719SN/ALTAGE::F(int A, int size, int bank) const
1589053Sdam.sunwoo@arm.com{
1598719SN/A    int A1, A2;
1609053Sdam.sunwoo@arm.com
1618719SN/A    A = A & ((ULL(1) << size) - 1);
1629053Sdam.sunwoo@arm.com    A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
1638719SN/A    A2 = (A >> tagTableSizes[bank]);
1649053Sdam.sunwoo@arm.com    A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
1658719SN/A       + (A2 >> (tagTableSizes[bank] - bank));
1669053Sdam.sunwoo@arm.com    A = A1 ^ A2;
1678719SN/A    A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
1689053Sdam.sunwoo@arm.com      + (A >> (tagTableSizes[bank] - bank));
1698719SN/A    return (A);
1708719SN/A}
1718719SN/A
1728719SN/A
1738719SN/A// gindex computes a full hash of pc, ghist and pathHist
1748719SN/Aint
1758719SN/ALTAGE::gindex(ThreadID tid, Addr pc, int bank) const
1768719SN/A{
1778719SN/A    int index;
1789053Sdam.sunwoo@arm.com    int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
1799053Sdam.sunwoo@arm.com    index =
1809053Sdam.sunwoo@arm.com        (pc >> instShiftAmt) ^
1819053Sdam.sunwoo@arm.com        ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^
1829053Sdam.sunwoo@arm.com        threadHistory[tid].computeIndices[bank].comp ^
1839053Sdam.sunwoo@arm.com        F(threadHistory[tid].pathHist, hlen, bank);
1849053Sdam.sunwoo@arm.com
1858931Sandreas.hansson@arm.com    return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
1868931Sandreas.hansson@arm.com}
1878931Sandreas.hansson@arm.com
1888931Sandreas.hansson@arm.com
1898931Sandreas.hansson@arm.com// Tag computation
1908931Sandreas.hansson@arm.comuint16_t
1918931Sandreas.hansson@arm.comLTAGE::gtag(ThreadID tid, Addr pc, int bank) const
1922391SN/A{
1932391SN/A    int tag = (pc >> instShiftAmt) ^
1948931Sandreas.hansson@arm.com              threadHistory[tid].computeTags[0][bank].comp ^
1958931Sandreas.hansson@arm.com              (threadHistory[tid].computeTags[1][bank].comp << 1);
1968931Sandreas.hansson@arm.com
1978931Sandreas.hansson@arm.com    return (tag & ((ULL(1) << tagWidths[bank]) - 1));
1989293Sandreas.hansson@arm.com}
1999293Sandreas.hansson@arm.com
2009293Sandreas.hansson@arm.com
20110466Sandreas.hansson@arm.com// Up-down saturating counter
20210466Sandreas.hansson@arm.comvoid
20311169Sandreas.hansson@arm.comLTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
20410466Sandreas.hansson@arm.com{
20510466Sandreas.hansson@arm.com    assert(nbits <= sizeof(int8_t) << 3);
2069293Sandreas.hansson@arm.com    if (taken) {
2079293Sandreas.hansson@arm.com        if (ctr < ((1 << (nbits - 1)) - 1))
2089293Sandreas.hansson@arm.com            ctr++;
2099293Sandreas.hansson@arm.com    } else {
2109293Sandreas.hansson@arm.com        if (ctr > -(1 << (nbits - 1)))
2119293Sandreas.hansson@arm.com            ctr--;
2129293Sandreas.hansson@arm.com    }
2139293Sandreas.hansson@arm.com}
2149293Sandreas.hansson@arm.com
2159293Sandreas.hansson@arm.com// Bimodal prediction
2169293Sandreas.hansson@arm.combool
2179293Sandreas.hansson@arm.comLTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
2189293Sandreas.hansson@arm.com{
2199293Sandreas.hansson@arm.com    return btablePrediction[bi->bimodalIndex];
2209293Sandreas.hansson@arm.com}
2219293Sandreas.hansson@arm.com
2229293Sandreas.hansson@arm.com
2239293Sandreas.hansson@arm.com// Update the bimodal predictor: a hysteresis bit is shared among N prediction
2249293Sandreas.hansson@arm.com// bits (N = 2 ^ logRatioBiModalHystEntries)
2259293Sandreas.hansson@arm.comvoid
2269293Sandreas.hansson@arm.comLTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
2279293Sandreas.hansson@arm.com{
2289293Sandreas.hansson@arm.com    int inter = (btablePrediction[bi->bimodalIndex] << 1)
2299293Sandreas.hansson@arm.com        + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
2309293Sandreas.hansson@arm.com    if (taken) {
2312391SN/A        if (inter < 3)
2329053Sdam.sunwoo@arm.com            inter++;
2339053Sdam.sunwoo@arm.com    } else if (inter > 0) {
2349053Sdam.sunwoo@arm.com        inter--;
2359053Sdam.sunwoo@arm.com    }
2369053Sdam.sunwoo@arm.com    const bool pred = inter >> 1;
2379053Sdam.sunwoo@arm.com    const bool hyst = inter & 1;
2389053Sdam.sunwoo@arm.com    btablePrediction[bi->bimodalIndex] = pred;
2399053Sdam.sunwoo@arm.com    btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst;
2409053Sdam.sunwoo@arm.com    DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst);
2419053Sdam.sunwoo@arm.com}
2429053Sdam.sunwoo@arm.com
2439053Sdam.sunwoo@arm.com
2449053Sdam.sunwoo@arm.com//loop prediction: only used if high confidence
2454762SN/Abool
2464762SN/ALTAGE::getLoop(Addr pc, BranchInfo* bi) const
2474762SN/A{
2484762SN/A    bi->loopHit = -1;
2494762SN/A    bi->loopPredValid = false;
2504762SN/A    bi->loopIndex = lindex(pc);
2518931Sandreas.hansson@arm.com    bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2));
2528931Sandreas.hansson@arm.com
2538931Sandreas.hansson@arm.com    for (int i = 0; i < 4; i++) {
2548931Sandreas.hansson@arm.com        if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
2558931Sandreas.hansson@arm.com            bi->loopHit = i;
2569235Sandreas.hansson@arm.com            bi->loopPredValid = (ltable[bi->loopIndex + i].confidence >= 3);
2572391SN/A            bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
2588931Sandreas.hansson@arm.com            if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
2598931Sandreas.hansson@arm.com                ltable[bi->loopIndex + i].numIter) {
2608931Sandreas.hansson@arm.com                return !(ltable[bi->loopIndex + i].dir);
2618931Sandreas.hansson@arm.com            }else {
2628931Sandreas.hansson@arm.com                return (ltable[bi->loopIndex + i].dir);
2639098Sandreas.hansson@arm.com            }
2648923SN/A        }
2658931Sandreas.hansson@arm.com    }
2668931Sandreas.hansson@arm.com    return false;
2678931Sandreas.hansson@arm.com}
2688931Sandreas.hansson@arm.com
2698931Sandreas.hansson@arm.comvoid
2709405Sandreas.hansson@arm.comLTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
2718923SN/A{
2728931Sandreas.hansson@arm.com    if (bi->loopHit>=0) {
2738931Sandreas.hansson@arm.com        int index = lindex(pc);
2748931Sandreas.hansson@arm.com        if (taken != ltable[index].dir) {
2758931Sandreas.hansson@arm.com            ltable[index].currentIterSpec = 0;
2768931Sandreas.hansson@arm.com        } else {
2778931Sandreas.hansson@arm.com            ltable[index].currentIterSpec++;
2788931Sandreas.hansson@arm.com        }
2792391SN/A    }
2808931Sandreas.hansson@arm.com}
2818931Sandreas.hansson@arm.com
2828931Sandreas.hansson@arm.comvoid
2838931Sandreas.hansson@arm.comLTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
2848931Sandreas.hansson@arm.com{
2858931Sandreas.hansson@arm.com    int idx = bi->loopIndex + bi->loopHit;
2868931Sandreas.hansson@arm.com    if (bi->loopHit >= 0) {
2878931Sandreas.hansson@arm.com        //already a hit
2888931Sandreas.hansson@arm.com        if (bi->loopPredValid) {
28911614Sdavid.j.hashe@gmail.com            if (taken != bi->loopPred) {
29011614Sdavid.j.hashe@gmail.com                // free the entry
29111614Sdavid.j.hashe@gmail.com                ltable[idx].numIter = 0;
29211614Sdavid.j.hashe@gmail.com                ltable[idx].age = 0;
29311614Sdavid.j.hashe@gmail.com                ltable[idx].confidence = 0;
29411614Sdavid.j.hashe@gmail.com                ltable[idx].currentIter = 0;
29511614Sdavid.j.hashe@gmail.com                return;
29611614Sdavid.j.hashe@gmail.com            } else if (bi->loopPred != bi->tagePred) {
2978931Sandreas.hansson@arm.com                DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
2988931Sandreas.hansson@arm.com                if (ltable[idx].age < 7)
2998931Sandreas.hansson@arm.com                    ltable[idx].age++;
3008931Sandreas.hansson@arm.com            }
3018931Sandreas.hansson@arm.com        }
3028931Sandreas.hansson@arm.com
3038931Sandreas.hansson@arm.com        ltable[idx].currentIter++;
3048931Sandreas.hansson@arm.com        if (ltable[idx].currentIter > ltable[idx].numIter) {
3058931Sandreas.hansson@arm.com            ltable[idx].confidence = 0;
3068931Sandreas.hansson@arm.com            if (ltable[idx].numIter != 0) {
3078931Sandreas.hansson@arm.com                // free the entry
3088931Sandreas.hansson@arm.com                ltable[idx].numIter = 0;
3098931Sandreas.hansson@arm.com                ltable[idx].age = 0;
3108931Sandreas.hansson@arm.com                ltable[idx].confidence = 0;
3118931Sandreas.hansson@arm.com            }
3128931Sandreas.hansson@arm.com        }
3138931Sandreas.hansson@arm.com
3148931Sandreas.hansson@arm.com        if (taken != ltable[idx].dir) {
3158931Sandreas.hansson@arm.com            if (ltable[idx].currentIter == ltable[idx].numIter) {
3168719SN/A                DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
3178719SN/A
31811169Sandreas.hansson@arm.com                if (ltable[idx].confidence < 7) {
3198719SN/A                    ltable[idx].confidence++;
3202391SN/A                }
3212391SN/A                //just do not predict when the loop count is 1 or 2
3228931Sandreas.hansson@arm.com                if (ltable[idx].numIter < 3) {
323                    // free the entry
324                    ltable[idx].dir = taken;
325                    ltable[idx].numIter = 0;
326                    ltable[idx].age = 0;
327                    ltable[idx].confidence = 0;
328                }
329            } else {
330                DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc);
331                if (ltable[idx].numIter == 0) {
332                    // first complete nest;
333                    ltable[idx].confidence = 0;
334                    ltable[idx].numIter = ltable[idx].currentIter;
335                } else {
336                    //not the same number of iterations as last time: free the
337                    //entry
338                    ltable[idx].numIter = 0;
339                    ltable[idx].age = 0;
340                    ltable[idx].confidence = 0;
341                }
342            }
343            ltable[idx].currentIter = 0;
344        }
345
346    } else if (taken) {
347        //try to allocate an entry on taken branch
348        int nrand = random_mt.random<int>();
349        for (int i = 0; i < 4; i++) {
350            int loop_hit = (nrand + i) & 3;
351            idx = bi->loopIndex + loop_hit;
352            if (ltable[idx].age == 0) {
353                DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
354                        pc);
355                ltable[idx].dir = !taken;
356                ltable[idx].tag = bi->loopTag;
357                ltable[idx].numIter = 0;
358                ltable[idx].age = 7;
359                ltable[idx].confidence = 0;
360                ltable[idx].currentIter = 1;
361                break;
362
363            }
364            else
365                ltable[idx].age--;
366        }
367    }
368
369}
370
371// shifting the global history:  we manage the history in a big table in order
372// to reduce simulation time
373void
374LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
375{
376    if (pt == 0) {
377        DPRINTF(LTage, "Rolling over the histories\n");
378         // Copy beginning of globalHistoryBuffer to end, such that
379         // the last maxHist outcomes are still reachable
380         // through pt[0 .. maxHist - 1].
381         for (int i = 0; i < maxHist; i++)
382             tab[histBufferSize - maxHist + i] = tab[i];
383         pt =  histBufferSize - maxHist;
384         h = &tab[pt];
385    }
386    pt--;
387    h--;
388    h[0] = (dir) ? 1 : 0;
389}
390
391// Get GHR for hashing indirect predictor
392// Build history backwards from pointer in
393// bp_history.
394unsigned
395LTAGE::getGHR(ThreadID tid, void *bp_history) const
396{
397    BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
398    unsigned val = 0;
399    for (unsigned i = 0; i < 32; i++) {
400        // Make sure we don't go out of bounds
401        int gh_offset = bi->ptGhist + i;
402        assert(&(threadHistory[tid].globalHistory[gh_offset]) <
403               threadHistory[tid].globalHistory + histBufferSize);
404        val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
405    }
406
407    return val;
408}
409
410//prediction
411bool
412LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
413{
414    BranchInfo *bi = new BranchInfo(nHistoryTables+1);
415    b = (void*)(bi);
416    Addr pc = branch_pc;
417    bool pred_taken = true;
418    bi->loopHit = -1;
419
420    if (cond_branch) {
421        // TAGE prediction
422
423        // computes the table addresses and the partial tags
424        for (int i = 1; i <= nHistoryTables; i++) {
425            tableIndices[i] = gindex(tid, pc, i);
426            bi->tableIndices[i] = tableIndices[i];
427            tableTags[i] = gtag(tid, pc, i);
428            bi->tableTags[i] = tableTags[i];
429        }
430
431        bi->bimodalIndex = bindex(pc);
432
433        bi->hitBank = 0;
434        bi->altBank = 0;
435        //Look for the bank with longest matching history
436        for (int i = nHistoryTables; i > 0; i--) {
437            if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
438                bi->hitBank = i;
439                bi->hitBankIndex = tableIndices[bi->hitBank];
440                break;
441            }
442        }
443        //Look for the alternate bank
444        for (int i = bi->hitBank - 1; i > 0; i--) {
445            if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
446                bi->altBank = i;
447                bi->altBankIndex = tableIndices[bi->altBank];
448                break;
449            }
450        }
451        //computes the prediction and the alternate prediction
452        if (bi->hitBank > 0) {
453            if (bi->altBank > 0) {
454                bi->altTaken =
455                    gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
456            }else {
457                bi->altTaken = getBimodePred(pc, bi);
458            }
459
460            bi->longestMatchPred =
461                gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
462            bi->pseudoNewAlloc =
463                abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
464
465            //if the entry is recognized as a newly allocated entry and
466            //useAltPredForNewlyAllocated is positive use the alternate
467            //prediction
468            if ((useAltPredForNewlyAllocated < 0)
469                   || abs(2 *
470                   gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1)
471                bi->tagePred = bi->longestMatchPred;
472            else
473                bi->tagePred = bi->altTaken;
474        } else {
475            bi->altTaken = getBimodePred(pc, bi);
476            bi->tagePred = bi->altTaken;
477            bi->longestMatchPred = bi->altTaken;
478        }
479        //end TAGE prediction
480
481        bi->loopPred = getLoop(pc, bi);	// loop prediction
482
483        pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
484                     (bi->loopPred): (bi->tagePred);
485        DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
486                "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
487                branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
488                loopUseCounter, bi->tagePred, bi->altTaken);
489    }
490    bi->branchPC = branch_pc;
491    bi->condBranch = cond_branch;
492    specLoopUpdate(branch_pc, pred_taken, bi);
493    return pred_taken;
494}
495
496// PREDICTOR UPDATE
497void
498LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
499              bool squashed)
500{
501    assert(bp_history);
502
503    BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
504
505    if (squashed) {
506        // This restores the global history, then update it
507        // and recomputes the folded histories.
508        squash(tid, taken, bp_history);
509        return;
510    }
511
512    int nrand  = random_mt.random<int>(0,3);
513    Addr pc = branch_pc;
514    if (bi->condBranch) {
515        DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
516                branch_pc, taken);
517        // first update the loop predictor
518        loopUpdate(pc, taken, bi);
519
520        if (bi->loopPredValid) {
521            if (bi->tagePred != bi->loopPred) {
522                ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
523            }
524        }
525
526        // TAGE UPDATE
527        // try to allocate a  new entries only if prediction was wrong
528        bool longest_match_pred = false;
529        bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
530        if (bi->hitBank > 0) {
531            // Manage the selection between longest matching and alternate
532            // matching for "pseudo"-newly allocated longest matching entry
533             longest_match_pred = bi->longestMatchPred;
534            bool PseudoNewAlloc = bi->pseudoNewAlloc;
535            // an entry is considered as newly allocated if its prediction
536            // counter is weak
537            if (PseudoNewAlloc) {
538                if (longest_match_pred == taken) {
539                    alloc = false;
540                }
541                // if it was delivering the correct prediction, no need to
542                // allocate new entry even if the overall prediction was false
543                if (longest_match_pred != bi->altTaken) {
544                    ctrUpdate(useAltPredForNewlyAllocated,
545                         bi->altTaken == taken, 4);
546                }
547            }
548        }
549
550        if (alloc) {
551            // is there some "unuseful" entry to allocate
552            int8_t min = 1;
553            for (int i = nHistoryTables; i > bi->hitBank; i--) {
554                if (gtable[i][bi->tableIndices[i]].u < min) {
555                    min = gtable[i][bi->tableIndices[i]].u;
556                }
557            }
558
559            // we allocate an entry with a longer history
560            // to  avoid ping-pong, we do not choose systematically the next
561            // entry, but among the 3 next entries
562            int Y = nrand &
563                ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
564            int X = bi->hitBank + 1;
565            if (Y & 1) {
566                X++;
567                if (Y & 2)
568                    X++;
569            }
570            // No entry available, forces one to be available
571            if (min > 0) {
572                gtable[X][bi->tableIndices[X]].u = 0;
573            }
574
575
576            //Allocate only  one entry
577            for (int i = X; i <= nHistoryTables; i++) {
578                if ((gtable[i][bi->tableIndices[i]].u == 0)) {
579                    gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
580                    gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
581                    gtable[i][bi->tableIndices[i]].u = 0; //?
582                }
583            }
584        }
585        //periodic reset of u: reset is not complete but bit by bit
586        tCounter++;
587        if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
588            // reset least significant bit
589            // most significant bit becomes least significant bit
590            for (int i = 1; i <= nHistoryTables; i++) {
591                for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
592                    gtable[i][j].u = gtable[i][j].u >> 1;
593                }
594            }
595        }
596
597        if (bi->hitBank > 0) {
598            DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n",
599                    bi->hitBank, bi->hitBankIndex, branch_pc);
600            ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
601                      tagTableCounterBits);
602            // if the provider entry is not certified to be useful also update
603            // the alternate prediction
604            if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
605                if (bi->altBank > 0) {
606                    ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
607                              tagTableCounterBits);
608                    DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
609                            " branch %lx\n", bi->hitBank, bi->hitBankIndex,
610                            branch_pc);
611                }
612                if (bi->altBank == 0) {
613                    baseUpdate(pc, taken, bi);
614                }
615            }
616
617            // update the u counter
618            if (longest_match_pred != bi->altTaken) {
619                if (longest_match_pred == taken) {
620                    if (gtable[bi->hitBank][bi->hitBankIndex].u < 1) {
621                        gtable[bi->hitBank][bi->hitBankIndex].u++;
622                    }
623                }
624            }
625        } else {
626            baseUpdate(pc, taken, bi);
627        }
628
629        //END PREDICTOR UPDATE
630    }
631    if (!squashed) {
632        delete bi;
633    }
634}
635
636void
637LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
638{
639    BranchInfo* bi = (BranchInfo*)(b);
640    ThreadHistory& tHist = threadHistory[tid];
641    //  UPDATE HISTORIES
642    bool pathbit = ((branch_pc >> instShiftAmt) & 1);
643    //on a squash, return pointers to this and recompute indices.
644    //update user history
645    updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
646    tHist.pathHist = (tHist.pathHist << 1) + pathbit;
647    tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
648
649    bi->ptGhist = tHist.ptGhist;
650    bi->pathHist = tHist.pathHist;
651    //prepare next index and tag computations for user branchs
652    for (int i = 1; i <= nHistoryTables; i++)
653    {
654        bi->ci[i]  = tHist.computeIndices[i].comp;
655        bi->ct0[i] = tHist.computeTags[0][i].comp;
656        bi->ct1[i] = tHist.computeTags[1][i].comp;
657        tHist.computeIndices[i].update(tHist.gHist);
658        tHist.computeTags[0][i].update(tHist.gHist);
659        tHist.computeTags[1][i].update(tHist.gHist);
660    }
661    DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
662            "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
663            tHist.ptGhist);
664}
665
666void
667LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
668{
669    BranchInfo* bi = (BranchInfo*)(bp_history);
670    ThreadHistory& tHist = threadHistory[tid];
671    DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
672            "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
673    tHist.pathHist = bi->pathHist;
674    tHist.ptGhist = bi->ptGhist;
675    tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
676    tHist.gHist[0] = (taken ? 1 : 0);
677    for (int i = 1; i <= nHistoryTables; i++) {
678        tHist.computeIndices[i].comp = bi->ci[i];
679        tHist.computeTags[0][i].comp = bi->ct0[i];
680        tHist.computeTags[1][i].comp = bi->ct1[i];
681        tHist.computeIndices[i].update(tHist.gHist);
682        tHist.computeTags[0][i].update(tHist.gHist);
683        tHist.computeTags[1][i].update(tHist.gHist);
684    }
685
686    if (bi->condBranch) {
687        if (bi->loopHit >= 0) {
688            int idx = bi->loopIndex + bi->loopHit;
689            ltable[idx].currentIterSpec = bi->currentIter;
690        }
691    }
692
693}
694
695void
696LTAGE::squash(ThreadID tid, void *bp_history)
697{
698    BranchInfo* bi = (BranchInfo*)(bp_history);
699    DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
700    if (bi->condBranch) {
701        if (bi->loopHit >= 0) {
702            int idx = bi->loopIndex + bi->loopHit;
703            ltable[idx].currentIterSpec = bi->currentIter;
704        }
705    }
706
707    delete bi;
708}
709
710bool
711LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
712{
713    bool retval = predict(tid, branch_pc, true, bp_history);
714
715    DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
716    updateHistories(tid, branch_pc, retval, bp_history);
717    assert(threadHistory[tid].gHist ==
718           &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
719
720    return retval;
721}
722
723void
724LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
725{
726    BranchInfo* bi = (BranchInfo*) bp_history;
727    ThreadHistory& tHist = threadHistory[tid];
728    DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
729    assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
730    tHist.gHist[0] = 0;
731    for (int i = 1; i <= nHistoryTables; i++) {
732        tHist.computeIndices[i].comp = bi->ci[i];
733        tHist.computeTags[0][i].comp = bi->ct0[i];
734        tHist.computeTags[1][i].comp = bi->ct1[i];
735        tHist.computeIndices[i].update(tHist.gHist);
736        tHist.computeTags[0][i].update(tHist.gHist);
737        tHist.computeTags[1][i].update(tHist.gHist);
738    }
739}
740
741void
742LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
743{
744    DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
745    predict(tid, br_pc, false, bp_history);
746    updateHistories(tid, br_pc, true, bp_history);
747    assert(threadHistory[tid].gHist ==
748           &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
749}
750
751LTAGE*
752LTAGEParams::create()
753{
754    return new LTAGE(this);
755}
756