ltage.cc (13444:26f81be73cb7) ltage.cc (13454:19a5b4fb1f1f)
1/*
2 * Copyright (c) 2014 The University of Wisconsin
3 *
4 * Copyright (c) 2006 INRIA (Institut National de Recherche en
5 * Informatique et en Automatique / French National Research Institute
6 * for Computer Science and Applied Mathematics)
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are
12 * met: redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer;
14 * redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution;
17 * neither the name of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
34 * from André Seznec's code.
35 */
36
37/* @file
38 * Implementation of a L-TAGE branch predictor
39 */
40
41#include "cpu/pred/ltage.hh"
42
43#include "base/intmath.hh"
44#include "base/logging.hh"
45#include "base/random.hh"
46#include "base/trace.hh"
47#include "debug/Fetch.hh"
48#include "debug/LTage.hh"
49
50LTAGE::LTAGE(const LTAGEParams *params)
1/*
2 * Copyright (c) 2014 The University of Wisconsin
3 *
4 * Copyright (c) 2006 INRIA (Institut National de Recherche en
5 * Informatique et en Automatique / French National Research Institute
6 * for Computer Science and Applied Mathematics)
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are
12 * met: redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer;
14 * redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution;
17 * neither the name of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
34 * from André Seznec's code.
35 */
36
37/* @file
38 * Implementation of a L-TAGE branch predictor
39 */
40
41#include "cpu/pred/ltage.hh"
42
43#include "base/intmath.hh"
44#include "base/logging.hh"
45#include "base/random.hh"
46#include "base/trace.hh"
47#include "debug/Fetch.hh"
48#include "debug/LTage.hh"
49
50LTAGE::LTAGE(const LTAGEParams *params)
51 : BPredUnit(params),
52 logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
51 : TAGE(params),
53 logSizeLoopPred(params->logSizeLoopPred),
52 logSizeLoopPred(params->logSizeLoopPred),
54 nHistoryTables(params->nHistoryTables),
55 tagTableCounterBits(params->tagTableCounterBits),
56 tagTableUBits(params->tagTableUBits),
57 histBufferSize(params->histBufferSize),
58 minHist(params->minHist),
59 maxHist(params->maxHist),
60 pathHistBits(params->pathHistBits),
61 loopTableAgeBits(params->loopTableAgeBits),
62 loopTableConfidenceBits(params->loopTableConfidenceBits),
63 loopTableTagBits(params->loopTableTagBits),
64 loopTableIterBits(params->loopTableIterBits),
65 logLoopTableAssoc(params->logLoopTableAssoc),
66 confidenceThreshold((1 << loopTableConfidenceBits) - 1),
67 loopTagMask((1 << loopTableTagBits) - 1),
68 loopNumIterMask((1 << loopTableIterBits) - 1),
53 loopTableAgeBits(params->loopTableAgeBits),
54 loopTableConfidenceBits(params->loopTableConfidenceBits),
55 loopTableTagBits(params->loopTableTagBits),
56 loopTableIterBits(params->loopTableIterBits),
57 logLoopTableAssoc(params->logLoopTableAssoc),
58 confidenceThreshold((1 << loopTableConfidenceBits) - 1),
59 loopTagMask((1 << loopTableTagBits) - 1),
60 loopNumIterMask((1 << loopTableIterBits) - 1),
69 tagTableTagWidths(params->tagTableTagWidths),
70 logTagTableSizes(params->logTagTableSizes),
71 threadHistory(params->numThreads),
72 logUResetPeriod(params->logUResetPeriod),
73 useAltOnNaBits(params->useAltOnNaBits),
61 loopUseCounter(0),
74 withLoopBits(params->withLoopBits)
75{
62 withLoopBits(params->withLoopBits)
63{
76 // Current method for periodically resetting the u counter bits only
77 // works for 1 or 2 bits
78 // Also make sure that it is not 0
79 assert(tagTableUBits <= 2 && (tagTableUBits > 0));
80
81 // we use uint16_t type for these vales, so they cannot be more than
82 // 16 bits
83 assert(loopTableTagBits <= 16);
84 assert(loopTableIterBits <= 16);
85
86 assert(logSizeLoopPred >= logLoopTableAssoc);
87
64 // we use uint16_t type for these vales, so they cannot be more than
65 // 16 bits
66 assert(loopTableTagBits <= 16);
67 assert(loopTableIterBits <= 16);
68
69 assert(logSizeLoopPred >= logLoopTableAssoc);
70
88 // we use int type for the path history, so it cannot be more than
89 // its size
90 assert(pathHistBits <= (sizeof(int)*8));
91
92 // initialize the counter to half of the period
93 assert(logUResetPeriod != 0);
94 tCounter = ULL(1) << (logUResetPeriod - 1);
95
96 assert(params->histBufferSize > params->maxHist * 2);
97 useAltPredForNewlyAllocated = 0;
98
99 for (auto& history : threadHistory) {
100 history.pathHist = 0;
101 history.globalHistory = new uint8_t[histBufferSize];
102 history.gHist = history.globalHistory;
103 memset(history.gHist, 0, histBufferSize);
104 history.ptGhist = 0;
105 }
106
107 histLengths = new int [nHistoryTables+1];
108 histLengths[1] = minHist;
109 histLengths[nHistoryTables] = maxHist;
110
111 for (int i = 2; i <= nHistoryTables; i++) {
112 histLengths[i] = (int) (((double) minHist *
113 pow ((double) (maxHist) / (double) minHist,
114 (double) (i - 1) / (double) ((nHistoryTables- 1))))
115 + 0.5);
116 }
117
118 assert(tagTableTagWidths.size() == (nHistoryTables+1));
119 assert(logTagTableSizes.size() == (nHistoryTables+1));
120
121 // First entry is for the Bimodal table and it is untagged in this
122 // implementation
123 assert(tagTableTagWidths[0] == 0);
124
125 for (auto& history : threadHistory) {
126 history.computeIndices = new FoldedHistory[nHistoryTables+1];
127 history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
128 history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
129
130 for (int i = 1; i <= nHistoryTables; i++) {
131 history.computeIndices[i].init(
132 histLengths[i], (logTagTableSizes[i]));
133 history.computeTags[0][i].init(
134 history.computeIndices[i].origLength, tagTableTagWidths[i]);
135 history.computeTags[1][i].init(
136 history.computeIndices[i].origLength, tagTableTagWidths[i]-1);
137 DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
138 histLengths[i], logTagTableSizes[i], tagTableTagWidths[i]);
139 }
140 }
141
142 const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0];
143 btablePrediction.resize(bimodalTableSize, false);
144 btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
145 true);
146
147 ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
71 ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
148 gtable = new TageEntry*[nHistoryTables + 1];
149 for (int i = 1; i <= nHistoryTables; i++) {
150 gtable[i] = new TageEntry[1<<(logTagTableSizes[i])];
151 }
152
153 tableIndices = new int [nHistoryTables+1];
154 tableTags = new int [nHistoryTables+1];
155
156 loopUseCounter = 0;
157}
158
159int
72}
73
74int
160LTAGE::bindex(Addr pc_in) const
161{
162 return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) - 1));
163}
164
165int
166LTAGE::lindex(Addr pc_in) const
167{
168 // The loop table is implemented as a linear table
169 // If associativity is N (N being 1 << logLoopTableAssoc),
170 // the first N entries are for set 0, the next N entries are for set 1,
171 // and so on.
172 // Thus, this function calculates the set and then it gets left shifted
173 // by logLoopTableAssoc in order to return the index of the first of the
174 // N entries of the set
175 Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1;
176 return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc);
177}
178
75LTAGE::lindex(Addr pc_in) const
76{
77 // The loop table is implemented as a linear table
78 // If associativity is N (N being 1 << logLoopTableAssoc),
79 // the first N entries are for set 0, the next N entries are for set 1,
80 // and so on.
81 // Thus, this function calculates the set and then it gets left shifted
82 // by logLoopTableAssoc in order to return the index of the first of the
83 // N entries of the set
84 Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1;
85 return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc);
86}
87
179int
180LTAGE::F(int A, int size, int bank) const
181{
182 int A1, A2;
183
184 A = A & ((ULL(1) << size) - 1);
185 A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1));
186 A2 = (A >> logTagTableSizes[bank]);
187 A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
188 + (A2 >> (logTagTableSizes[bank] - bank));
189 A = A1 ^ A2;
190 A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
191 + (A >> (logTagTableSizes[bank] - bank));
192 return (A);
193}
194
195
196// gindex computes a full hash of pc, ghist and pathHist
197int
198LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
199{
200 int index;
201 int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits :
202 histLengths[bank];
203 const Addr shiftedPc = pc >> instShiftAmt;
204 index =
205 shiftedPc ^
206 (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^
207 threadHistory[tid].computeIndices[bank].comp ^
208 F(threadHistory[tid].pathHist, hlen, bank);
209
210 return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1));
211}
212
213
214// Tag computation
215uint16_t
216LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
217{
218 int tag = (pc >> instShiftAmt) ^
219 threadHistory[tid].computeTags[0][bank].comp ^
220 (threadHistory[tid].computeTags[1][bank].comp << 1);
221
222 return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1));
223}
224
225
226// Up-down saturating counter
227void
228LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
229{
230 assert(nbits <= sizeof(int8_t) << 3);
231 if (taken) {
232 if (ctr < ((1 << (nbits - 1)) - 1))
233 ctr++;
234 } else {
235 if (ctr > -(1 << (nbits - 1)))
236 ctr--;
237 }
238}
239
240// Up-down unsigned saturating counter
241void
242LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
243{
244 assert(nbits <= sizeof(uint8_t) << 3);
245 if (up) {
246 if (ctr < ((1 << nbits) - 1))
247 ctr++;
248 } else {
249 if (ctr)
250 ctr--;
251 }
252}
253
254// Bimodal prediction
255bool
256LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
257{
258 return btablePrediction[bi->bimodalIndex];
259}
260
261
262// Update the bimodal predictor: a hysteresis bit is shared among N prediction
263// bits (N = 2 ^ logRatioBiModalHystEntries)
264void
265LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
266{
267 int inter = (btablePrediction[bi->bimodalIndex] << 1)
268 + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
269 if (taken) {
270 if (inter < 3)
271 inter++;
272 } else if (inter > 0) {
273 inter--;
274 }
275 const bool pred = inter >> 1;
276 const bool hyst = inter & 1;
277 btablePrediction[bi->bimodalIndex] = pred;
278 btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst;
279 DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst);
280}
281
282
283//loop prediction: only used if high confidence
284bool
88//loop prediction: only used if high confidence
89bool
285LTAGE::getLoop(Addr pc, BranchInfo* bi) const
90LTAGE::getLoop(Addr pc, LTageBranchInfo* bi) const
286{
287 bi->loopHit = -1;
288 bi->loopPredValid = false;
289 bi->loopIndex = lindex(pc);
290 unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc;
291 bi->loopTag = ((pc) >> pcShift) & loopTagMask;
292
293 for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
294 if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
295 bi->loopHit = i;
296 bi->loopPredValid =
297 ltable[bi->loopIndex + i].confidence == confidenceThreshold;
298 bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
299 if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
300 ltable[bi->loopIndex + i].numIter) {
301 return !(ltable[bi->loopIndex + i].dir);
302 }else {
303 return (ltable[bi->loopIndex + i].dir);
304 }
305 }
306 }
307 return false;
308}
309
310void
91{
92 bi->loopHit = -1;
93 bi->loopPredValid = false;
94 bi->loopIndex = lindex(pc);
95 unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc;
96 bi->loopTag = ((pc) >> pcShift) & loopTagMask;
97
98 for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
99 if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
100 bi->loopHit = i;
101 bi->loopPredValid =
102 ltable[bi->loopIndex + i].confidence == confidenceThreshold;
103 bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
104 if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
105 ltable[bi->loopIndex + i].numIter) {
106 return !(ltable[bi->loopIndex + i].dir);
107 }else {
108 return (ltable[bi->loopIndex + i].dir);
109 }
110 }
111 }
112 return false;
113}
114
115void
311LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
116LTAGE::specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
312{
313 if (bi->loopHit>=0) {
314 int index = lindex(pc);
315 if (taken != ltable[index].dir) {
316 ltable[index].currentIterSpec = 0;
317 } else {
318 ltable[index].currentIterSpec =
319 (ltable[index].currentIterSpec + 1) & loopNumIterMask;
320 }
321 }
322}
323
324void
117{
118 if (bi->loopHit>=0) {
119 int index = lindex(pc);
120 if (taken != ltable[index].dir) {
121 ltable[index].currentIterSpec = 0;
122 } else {
123 ltable[index].currentIterSpec =
124 (ltable[index].currentIterSpec + 1) & loopNumIterMask;
125 }
126 }
127}
128
129void
325LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
130LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
326{
327 int idx = bi->loopIndex + bi->loopHit;
328 if (bi->loopHit >= 0) {
329 //already a hit
330 if (bi->loopPredValid) {
331 if (taken != bi->loopPred) {
332 // free the entry
333 ltable[idx].numIter = 0;
334 ltable[idx].age = 0;
335 ltable[idx].confidence = 0;
336 ltable[idx].currentIter = 0;
337 return;
338 } else if (bi->loopPred != bi->tagePred) {
339 DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
340 unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits);
341 }
342 }
343
344 ltable[idx].currentIter =
345 (ltable[idx].currentIter + 1) & loopNumIterMask;
346 if (ltable[idx].currentIter > ltable[idx].numIter) {
347 ltable[idx].confidence = 0;
348 if (ltable[idx].numIter != 0) {
349 // free the entry
350 ltable[idx].numIter = 0;
351 ltable[idx].age = 0;
352 ltable[idx].confidence = 0;
353 }
354 }
355
356 if (taken != ltable[idx].dir) {
357 if (ltable[idx].currentIter == ltable[idx].numIter) {
358 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
359
360 unsignedCtrUpdate(ltable[idx].confidence, true,
361 loopTableConfidenceBits);
362 //just do not predict when the loop count is 1 or 2
363 if (ltable[idx].numIter < 3) {
364 // free the entry
365 ltable[idx].dir = taken;
366 ltable[idx].numIter = 0;
367 ltable[idx].age = 0;
368 ltable[idx].confidence = 0;
369 }
370 } else {
371 DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc);
372 if (ltable[idx].numIter == 0) {
373 // first complete nest;
374 ltable[idx].confidence = 0;
375 ltable[idx].numIter = ltable[idx].currentIter;
376 } else {
377 //not the same number of iterations as last time: free the
378 //entry
379 ltable[idx].numIter = 0;
380 ltable[idx].age = 0;
381 ltable[idx].confidence = 0;
382 }
383 }
384 ltable[idx].currentIter = 0;
385 }
386
387 } else if (taken) {
388 //try to allocate an entry on taken branch
389 int nrand = random_mt.random<int>();
390 for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
391 int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1);
392 idx = bi->loopIndex + loop_hit;
393 if (ltable[idx].age == 0) {
394 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
395 pc);
396 ltable[idx].dir = !taken;
397 ltable[idx].tag = bi->loopTag;
398 ltable[idx].numIter = 0;
399 ltable[idx].age = (1 << loopTableAgeBits) - 1;
400 ltable[idx].confidence = 0;
401 ltable[idx].currentIter = 1;
402 break;
403
404 }
405 else
406 ltable[idx].age--;
407 }
408 }
409
410}
411
131{
132 int idx = bi->loopIndex + bi->loopHit;
133 if (bi->loopHit >= 0) {
134 //already a hit
135 if (bi->loopPredValid) {
136 if (taken != bi->loopPred) {
137 // free the entry
138 ltable[idx].numIter = 0;
139 ltable[idx].age = 0;
140 ltable[idx].confidence = 0;
141 ltable[idx].currentIter = 0;
142 return;
143 } else if (bi->loopPred != bi->tagePred) {
144 DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
145 unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits);
146 }
147 }
148
149 ltable[idx].currentIter =
150 (ltable[idx].currentIter + 1) & loopNumIterMask;
151 if (ltable[idx].currentIter > ltable[idx].numIter) {
152 ltable[idx].confidence = 0;
153 if (ltable[idx].numIter != 0) {
154 // free the entry
155 ltable[idx].numIter = 0;
156 ltable[idx].age = 0;
157 ltable[idx].confidence = 0;
158 }
159 }
160
161 if (taken != ltable[idx].dir) {
162 if (ltable[idx].currentIter == ltable[idx].numIter) {
163 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
164
165 unsignedCtrUpdate(ltable[idx].confidence, true,
166 loopTableConfidenceBits);
167 //just do not predict when the loop count is 1 or 2
168 if (ltable[idx].numIter < 3) {
169 // free the entry
170 ltable[idx].dir = taken;
171 ltable[idx].numIter = 0;
172 ltable[idx].age = 0;
173 ltable[idx].confidence = 0;
174 }
175 } else {
176 DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc);
177 if (ltable[idx].numIter == 0) {
178 // first complete nest;
179 ltable[idx].confidence = 0;
180 ltable[idx].numIter = ltable[idx].currentIter;
181 } else {
182 //not the same number of iterations as last time: free the
183 //entry
184 ltable[idx].numIter = 0;
185 ltable[idx].age = 0;
186 ltable[idx].confidence = 0;
187 }
188 }
189 ltable[idx].currentIter = 0;
190 }
191
192 } else if (taken) {
193 //try to allocate an entry on taken branch
194 int nrand = random_mt.random<int>();
195 for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
196 int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1);
197 idx = bi->loopIndex + loop_hit;
198 if (ltable[idx].age == 0) {
199 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
200 pc);
201 ltable[idx].dir = !taken;
202 ltable[idx].tag = bi->loopTag;
203 ltable[idx].numIter = 0;
204 ltable[idx].age = (1 << loopTableAgeBits) - 1;
205 ltable[idx].confidence = 0;
206 ltable[idx].currentIter = 1;
207 break;
208
209 }
210 else
211 ltable[idx].age--;
212 }
213 }
214
215}
216
412// shifting the global history: we manage the history in a big table in order
413// to reduce simulation time
414void
415LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
416{
417 if (pt == 0) {
418 DPRINTF(LTage, "Rolling over the histories\n");
419 // Copy beginning of globalHistoryBuffer to end, such that
420 // the last maxHist outcomes are still reachable
421 // through pt[0 .. maxHist - 1].
422 for (int i = 0; i < maxHist; i++)
423 tab[histBufferSize - maxHist + i] = tab[i];
424 pt = histBufferSize - maxHist;
425 h = &tab[pt];
426 }
427 pt--;
428 h--;
429 h[0] = (dir) ? 1 : 0;
430}
431
432// Get GHR for hashing indirect predictor
433// Build history backwards from pointer in
434// bp_history.
435unsigned
436LTAGE::getGHR(ThreadID tid, void *bp_history) const
437{
438 BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
439 unsigned val = 0;
440 for (unsigned i = 0; i < 32; i++) {
441 // Make sure we don't go out of bounds
442 int gh_offset = bi->ptGhist + i;
443 assert(&(threadHistory[tid].globalHistory[gh_offset]) <
444 threadHistory[tid].globalHistory + histBufferSize);
445 val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
446 }
447
448 return val;
449}
450
451//prediction
452bool
453LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
454{
217//prediction
218bool
219LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
220{
455 BranchInfo *bi = new BranchInfo(nHistoryTables+1);
221 LTageBranchInfo *bi = new LTageBranchInfo(nHistoryTables+1);
456 b = (void*)(bi);
222 b = (void*)(bi);
457 Addr pc = branch_pc;
458 bool pred_taken = true;
459 bi->loopHit = -1;
460
223
224 bool pred_taken = tagePredict(tid, branch_pc, cond_branch, bi);
225
461 if (cond_branch) {
226 if (cond_branch) {
462 // TAGE prediction
227 bi->loopPred = getLoop(branch_pc, bi); // loop prediction
463
228
464 // computes the table addresses and the partial tags
465 for (int i = 1; i <= nHistoryTables; i++) {
466 tableIndices[i] = gindex(tid, pc, i);
467 bi->tableIndices[i] = tableIndices[i];
468 tableTags[i] = gtag(tid, pc, i);
469 bi->tableTags[i] = tableTags[i];
229 if ((loopUseCounter >= 0) && bi->loopPredValid) {
230 pred_taken = bi->loopPred;
470 }
231 }
471
472 bi->bimodalIndex = bindex(pc);
473
474 bi->hitBank = 0;
475 bi->altBank = 0;
476 //Look for the bank with longest matching history
477 for (int i = nHistoryTables; i > 0; i--) {
478 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
479 bi->hitBank = i;
480 bi->hitBankIndex = tableIndices[bi->hitBank];
481 break;
482 }
483 }
484 //Look for the alternate bank
485 for (int i = bi->hitBank - 1; i > 0; i--) {
486 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
487 bi->altBank = i;
488 bi->altBankIndex = tableIndices[bi->altBank];
489 break;
490 }
491 }
492 //computes the prediction and the alternate prediction
493 if (bi->hitBank > 0) {
494 if (bi->altBank > 0) {
495 bi->altTaken =
496 gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
497 }else {
498 bi->altTaken = getBimodePred(pc, bi);
499 }
500
501 bi->longestMatchPred =
502 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
503 bi->pseudoNewAlloc =
504 abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
505
506 //if the entry is recognized as a newly allocated entry and
507 //useAltPredForNewlyAllocated is positive use the alternate
508 //prediction
509 if ((useAltPredForNewlyAllocated < 0)
510 || abs(2 *
511 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1)
512 bi->tagePred = bi->longestMatchPred;
513 else
514 bi->tagePred = bi->altTaken;
515 } else {
516 bi->altTaken = getBimodePred(pc, bi);
517 bi->tagePred = bi->altTaken;
518 bi->longestMatchPred = bi->altTaken;
519 }
520 //end TAGE prediction
521
522 bi->loopPred = getLoop(pc, bi); // loop prediction
523
524 pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
525 (bi->loopPred): (bi->tagePred);
526 DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
527 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
528 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
529 loopUseCounter, bi->tagePred, bi->altTaken);
530 }
232 DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
233 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
234 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
235 loopUseCounter, bi->tagePred, bi->altTaken);
236 }
531 bi->branchPC = branch_pc;
532 bi->condBranch = cond_branch;
237
533 specLoopUpdate(branch_pc, pred_taken, bi);
534 return pred_taken;
535}
536
238 specLoopUpdate(branch_pc, pred_taken, bi);
239 return pred_taken;
240}
241
537// PREDICTOR UPDATE
538void
242void
539LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
540 bool squashed)
243LTAGE::condBranchUpdate(Addr branch_pc, bool taken,
244 TageBranchInfo* tage_bi, int nrand)
541{
245{
542 assert(bp_history);
246 LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(tage_bi);
543
247
544 BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
248 // first update the loop predictor
249 loopUpdate(branch_pc, taken, bi);
545
250
546 if (squashed) {
547 // This restores the global history, then update it
548 // and recomputes the folded histories.
549 squash(tid, taken, bp_history);
550 return;
551 }
552
553 int nrand = random_mt.random<int>(0,3);
554 Addr pc = branch_pc;
555 if (bi->condBranch) {
556 DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
557 branch_pc, taken);
558 // first update the loop predictor
559 loopUpdate(pc, taken, bi);
560
561 if (bi->loopPredValid) {
562 if (bi->tagePred != bi->loopPred) {
563 ctrUpdate(loopUseCounter,
564 (bi->loopPred == taken),
565 withLoopBits);
566 }
251 if (bi->loopPredValid) {
252 if (bi->tagePred != bi->loopPred) {
253 ctrUpdate(loopUseCounter,
254 (bi->loopPred == taken),
255 withLoopBits);
567 }
256 }
568
569 // TAGE UPDATE
570 // try to allocate a new entries only if prediction was wrong
571 bool longest_match_pred = false;
572 bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
573 if (bi->hitBank > 0) {
574 // Manage the selection between longest matching and alternate
575 // matching for "pseudo"-newly allocated longest matching entry
576 longest_match_pred = bi->longestMatchPred;
577 bool PseudoNewAlloc = bi->pseudoNewAlloc;
578 // an entry is considered as newly allocated if its prediction
579 // counter is weak
580 if (PseudoNewAlloc) {
581 if (longest_match_pred == taken) {
582 alloc = false;
583 }
584 // if it was delivering the correct prediction, no need to
585 // allocate new entry even if the overall prediction was false
586 if (longest_match_pred != bi->altTaken) {
587 ctrUpdate(useAltPredForNewlyAllocated,
588 bi->altTaken == taken, useAltOnNaBits);
589 }
590 }
591 }
592
593 if (alloc) {
594 // is there some "unuseful" entry to allocate
595 uint8_t min = 1;
596 for (int i = nHistoryTables; i > bi->hitBank; i--) {
597 if (gtable[i][bi->tableIndices[i]].u < min) {
598 min = gtable[i][bi->tableIndices[i]].u;
599 }
600 }
601
602 // we allocate an entry with a longer history
603 // to avoid ping-pong, we do not choose systematically the next
604 // entry, but among the 3 next entries
605 int Y = nrand &
606 ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
607 int X = bi->hitBank + 1;
608 if (Y & 1) {
609 X++;
610 if (Y & 2)
611 X++;
612 }
613 // No entry available, forces one to be available
614 if (min > 0) {
615 gtable[X][bi->tableIndices[X]].u = 0;
616 }
617
618
619 //Allocate only one entry
620 for (int i = X; i <= nHistoryTables; i++) {
621 if ((gtable[i][bi->tableIndices[i]].u == 0)) {
622 gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
623 gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
624 break;
625 }
626 }
627 }
628 //periodic reset of u: reset is not complete but bit by bit
629 tCounter++;
630 if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) {
631 // reset least significant bit
632 // most significant bit becomes least significant bit
633 for (int i = 1; i <= nHistoryTables; i++) {
634 for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) {
635 gtable[i][j].u = gtable[i][j].u >> 1;
636 }
637 }
638 }
639
640 if (bi->hitBank > 0) {
641 DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n",
642 bi->hitBank, bi->hitBankIndex, branch_pc);
643 ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
644 tagTableCounterBits);
645 // if the provider entry is not certified to be useful also update
646 // the alternate prediction
647 if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
648 if (bi->altBank > 0) {
649 ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
650 tagTableCounterBits);
651 DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
652 " branch %lx\n", bi->hitBank, bi->hitBankIndex,
653 branch_pc);
654 }
655 if (bi->altBank == 0) {
656 baseUpdate(pc, taken, bi);
657 }
658 }
659
660 // update the u counter
661 if (bi->tagePred != bi->altTaken) {
662 unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u,
663 bi->tagePred == taken, tagTableUBits);
664 }
665 } else {
666 baseUpdate(pc, taken, bi);
667 }
668
669 //END PREDICTOR UPDATE
670 }
257 }
671 if (!squashed) {
672 delete bi;
673 }
674}
675
258
676void
677LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
678{
679 BranchInfo* bi = (BranchInfo*)(b);
680 ThreadHistory& tHist = threadHistory[tid];
681 // UPDATE HISTORIES
682 bool pathbit = ((branch_pc >> instShiftAmt) & 1);
683 //on a squash, return pointers to this and recompute indices.
684 //update user history
685 updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
686 tHist.pathHist = (tHist.pathHist << 1) + pathbit;
687 tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1));
688
689 bi->ptGhist = tHist.ptGhist;
690 bi->pathHist = tHist.pathHist;
691 //prepare next index and tag computations for user branchs
692 for (int i = 1; i <= nHistoryTables; i++)
693 {
694 bi->ci[i] = tHist.computeIndices[i].comp;
695 bi->ct0[i] = tHist.computeTags[0][i].comp;
696 bi->ct1[i] = tHist.computeTags[1][i].comp;
697 tHist.computeIndices[i].update(tHist.gHist);
698 tHist.computeTags[0][i].update(tHist.gHist);
699 tHist.computeTags[1][i].update(tHist.gHist);
700 }
701 DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
702 "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
703 tHist.ptGhist);
259 TAGE::condBranchUpdate(branch_pc, taken, bi, nrand);
704}
705
706void
707LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
708{
260}
261
262void
263LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
264{
709 BranchInfo* bi = (BranchInfo*)(bp_history);
710 ThreadHistory& tHist = threadHistory[tid];
711 DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
712 "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
713 tHist.pathHist = bi->pathHist;
714 tHist.ptGhist = bi->ptGhist;
715 tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
716 tHist.gHist[0] = (taken ? 1 : 0);
717 for (int i = 1; i <= nHistoryTables; i++) {
718 tHist.computeIndices[i].comp = bi->ci[i];
719 tHist.computeTags[0][i].comp = bi->ct0[i];
720 tHist.computeTags[1][i].comp = bi->ct1[i];
721 tHist.computeIndices[i].update(tHist.gHist);
722 tHist.computeTags[0][i].update(tHist.gHist);
723 tHist.computeTags[1][i].update(tHist.gHist);
724 }
265 TAGE::squash(tid, taken, bp_history);
725
266
267 LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);
268
726 if (bi->condBranch) {
727 if (bi->loopHit >= 0) {
728 int idx = bi->loopIndex + bi->loopHit;
729 ltable[idx].currentIterSpec = bi->currentIter;
730 }
731 }
269 if (bi->condBranch) {
270 if (bi->loopHit >= 0) {
271 int idx = bi->loopIndex + bi->loopHit;
272 ltable[idx].currentIterSpec = bi->currentIter;
273 }
274 }
732
733}
734
735void
736LTAGE::squash(ThreadID tid, void *bp_history)
737{
275}
276
277void
278LTAGE::squash(ThreadID tid, void *bp_history)
279{
738 BranchInfo* bi = (BranchInfo*)(bp_history);
739 DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
280 LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);
740 if (bi->condBranch) {
741 if (bi->loopHit >= 0) {
742 int idx = bi->loopIndex + bi->loopHit;
743 ltable[idx].currentIterSpec = bi->currentIter;
744 }
745 }
746
281 if (bi->condBranch) {
282 if (bi->loopHit >= 0) {
283 int idx = bi->loopIndex + bi->loopHit;
284 ltable[idx].currentIterSpec = bi->currentIter;
285 }
286 }
287
747 delete bi;
288 TAGE::squash(tid, bp_history);
748}
749
289}
290
750bool
751LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
752{
753 bool retval = predict(tid, branch_pc, true, bp_history);
754
755 DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
756 updateHistories(tid, branch_pc, retval, bp_history);
757 assert(threadHistory[tid].gHist ==
758 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
759
760 return retval;
761}
762
763void
764LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
765{
766 BranchInfo* bi = (BranchInfo*) bp_history;
767 ThreadHistory& tHist = threadHistory[tid];
768 DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
769 assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
770 tHist.gHist[0] = 0;
771 for (int i = 1; i <= nHistoryTables; i++) {
772 tHist.computeIndices[i].comp = bi->ci[i];
773 tHist.computeTags[0][i].comp = bi->ct0[i];
774 tHist.computeTags[1][i].comp = bi->ct1[i];
775 tHist.computeIndices[i].update(tHist.gHist);
776 tHist.computeTags[0][i].update(tHist.gHist);
777 tHist.computeTags[1][i].update(tHist.gHist);
778 }
779}
780
781void
782LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
783{
784 DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
785 predict(tid, br_pc, false, bp_history);
786 updateHistories(tid, br_pc, true, bp_history);
787 assert(threadHistory[tid].gHist ==
788 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
789}
790
791LTAGE*
792LTAGEParams::create()
793{
794 return new LTAGE(this);
795}
291LTAGE*
292LTAGEParams::create()
293{
294 return new LTAGE(this);
295}