ltage.cc (13420:5cb2b90e1cb5) ltage.cc (13433:fd8c49bea81f)
1/*
2 * Copyright (c) 2014 The University of Wisconsin
3 *
4 * Copyright (c) 2006 INRIA (Institut National de Recherche en
5 * Informatique et en Automatique / French National Research Institute
6 * for Computer Science and Applied Mathematics)
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are
12 * met: redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer;
14 * redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution;
17 * neither the name of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
34 * from André Seznec's code.
35 */
36
37/* @file
38 * Implementation of a L-TAGE branch predictor
39 */
40
41#include "cpu/pred/ltage.hh"
42
43#include "base/intmath.hh"
44#include "base/logging.hh"
45#include "base/random.hh"
46#include "base/trace.hh"
47#include "debug/Fetch.hh"
48#include "debug/LTage.hh"
49
50LTAGE::LTAGE(const LTAGEParams *params)
51 : BPredUnit(params),
52 logSizeBiMP(params->logSizeBiMP),
53 logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
54 logSizeTagTables(params->logSizeTagTables),
55 logSizeLoopPred(params->logSizeLoopPred),
56 nHistoryTables(params->nHistoryTables),
57 tagTableCounterBits(params->tagTableCounterBits),
58 histBufferSize(params->histBufferSize),
59 minHist(params->minHist),
60 maxHist(params->maxHist),
61 minTagWidth(params->minTagWidth),
62 threadHistory(params->numThreads)
63{
64 assert(params->histBufferSize > params->maxHist * 2);
65 useAltPredForNewlyAllocated = 0;
66 logTick = 19;
67 tCounter = ULL(1) << (logTick - 1);
68
69 for (auto& history : threadHistory) {
70 history.pathHist = 0;
71 history.globalHistory = new uint8_t[histBufferSize];
72 history.gHist = history.globalHistory;
73 memset(history.gHist, 0, histBufferSize);
74 history.ptGhist = 0;
75 }
76
77 histLengths = new int [nHistoryTables+1];
78 histLengths[1] = minHist;
79 histLengths[nHistoryTables] = maxHist;
80
81 for (int i = 2; i <= nHistoryTables; i++) {
82 histLengths[i] = (int) (((double) minHist *
83 pow ((double) (maxHist) / (double) minHist,
84 (double) (i - 1) / (double) ((nHistoryTables- 1))))
85 + 0.5);
86 }
87
88 tagWidths[1] = minTagWidth;
89 tagWidths[2] = minTagWidth;
90 tagWidths[3] = minTagWidth + 1;
91 tagWidths[4] = minTagWidth + 1;
92 tagWidths[5] = minTagWidth + 2;
93 tagWidths[6] = minTagWidth + 3;
94 tagWidths[7] = minTagWidth + 4;
95 tagWidths[8] = minTagWidth + 5;
96 tagWidths[9] = minTagWidth + 5;
97 tagWidths[10] = minTagWidth + 6;
98 tagWidths[11] = minTagWidth + 7;
99 tagWidths[12] = minTagWidth + 8;
100
101 for (int i = 1; i <= 2; i++)
102 tagTableSizes[i] = logSizeTagTables - 1;
103 for (int i = 3; i <= 6; i++)
104 tagTableSizes[i] = logSizeTagTables;
105 for (int i = 7; i <= 10; i++)
106 tagTableSizes[i] = logSizeTagTables - 1;
107 for (int i = 11; i <= 12; i++)
108 tagTableSizes[i] = logSizeTagTables - 2;
109
110 for (auto& history : threadHistory) {
111 history.computeIndices = new FoldedHistory[nHistoryTables+1];
112 history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
113 history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
114
115 for (int i = 1; i <= nHistoryTables; i++) {
116 history.computeIndices[i].init(histLengths[i], (tagTableSizes[i]));
117 history.computeTags[0][i].init(
118 history.computeIndices[i].origLength, tagWidths[i]);
119 history.computeTags[1][i].init(
120 history.computeIndices[i].origLength, tagWidths[i] - 1);
121 DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
122 histLengths[i], tagTableSizes[i], tagWidths[i]);
123 }
124 }
125
126 const uint64_t bimodalTableSize = ULL(1) << logSizeBiMP;
127 btablePrediction.resize(bimodalTableSize, false);
128 btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
129 true);
130
131 ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
132 gtable = new TageEntry*[nHistoryTables + 1];
133 for (int i = 1; i <= nHistoryTables; i++) {
134 gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
135 }
136
137 tableIndices = new int [nHistoryTables+1];
138 tableTags = new int [nHistoryTables+1];
139
140 loopUseCounter = 0;
141}
142
143int
144LTAGE::bindex(Addr pc_in) const
145{
146 return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1));
147}
148
149int
150LTAGE::lindex(Addr pc_in) const
151{
152 return (((pc_in >> instShiftAmt) &
153 ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
154}
155
156int
157LTAGE::F(int A, int size, int bank) const
158{
159 int A1, A2;
160
161 A = A & ((ULL(1) << size) - 1);
162 A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
163 A2 = (A >> tagTableSizes[bank]);
164 A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
165 + (A2 >> (tagTableSizes[bank] - bank));
166 A = A1 ^ A2;
167 A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
168 + (A >> (tagTableSizes[bank] - bank));
169 return (A);
170}
171
172
173// gindex computes a full hash of pc, ghist and pathHist
174int
175LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
176{
177 int index;
178 int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
179 index =
180 (pc >> instShiftAmt) ^
181 ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^
182 threadHistory[tid].computeIndices[bank].comp ^
183 F(threadHistory[tid].pathHist, hlen, bank);
184
185 return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
186}
187
188
189// Tag computation
190uint16_t
191LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
192{
193 int tag = (pc >> instShiftAmt) ^
194 threadHistory[tid].computeTags[0][bank].comp ^
195 (threadHistory[tid].computeTags[1][bank].comp << 1);
196
197 return (tag & ((ULL(1) << tagWidths[bank]) - 1));
198}
199
200
201// Up-down saturating counter
202void
203LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
204{
205 assert(nbits <= sizeof(int8_t) << 3);
206 if (taken) {
207 if (ctr < ((1 << (nbits - 1)) - 1))
208 ctr++;
209 } else {
210 if (ctr > -(1 << (nbits - 1)))
211 ctr--;
212 }
213}
214
215// Bimodal prediction
216bool
217LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
218{
219 return btablePrediction[bi->bimodalIndex];
220}
221
222
223// Update the bimodal predictor: a hysteresis bit is shared among N prediction
224// bits (N = 2 ^ logRatioBiModalHystEntries)
225void
226LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
227{
228 int inter = (btablePrediction[bi->bimodalIndex] << 1)
229 + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
230 if (taken) {
231 if (inter < 3)
232 inter++;
233 } else if (inter > 0) {
234 inter--;
235 }
236 const bool pred = inter >> 1;
237 const bool hyst = inter & 1;
238 btablePrediction[bi->bimodalIndex] = pred;
239 btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst;
240 DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst);
241}
242
243
244//loop prediction: only used if high confidence
245bool
246LTAGE::getLoop(Addr pc, BranchInfo* bi) const
247{
248 bi->loopHit = -1;
249 bi->loopPredValid = false;
250 bi->loopIndex = lindex(pc);
251 bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2));
252
253 for (int i = 0; i < 4; i++) {
254 if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
255 bi->loopHit = i;
256 bi->loopPredValid = (ltable[bi->loopIndex + i].confidence >= 3);
257 bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
258 if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
259 ltable[bi->loopIndex + i].numIter) {
260 return !(ltable[bi->loopIndex + i].dir);
261 }else {
262 return (ltable[bi->loopIndex + i].dir);
263 }
264 }
265 }
266 return false;
267}
268
269void
270LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
271{
272 if (bi->loopHit>=0) {
273 int index = lindex(pc);
274 if (taken != ltable[index].dir) {
275 ltable[index].currentIterSpec = 0;
276 } else {
277 ltable[index].currentIterSpec++;
278 }
279 }
280}
281
282void
283LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
284{
285 int idx = bi->loopIndex + bi->loopHit;
286 if (bi->loopHit >= 0) {
287 //already a hit
288 if (bi->loopPredValid) {
289 if (taken != bi->loopPred) {
290 // free the entry
291 ltable[idx].numIter = 0;
292 ltable[idx].age = 0;
293 ltable[idx].confidence = 0;
294 ltable[idx].currentIter = 0;
295 return;
296 } else if (bi->loopPred != bi->tagePred) {
297 DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
298 if (ltable[idx].age < 7)
299 ltable[idx].age++;
300 }
301 }
302
303 ltable[idx].currentIter++;
304 if (ltable[idx].currentIter > ltable[idx].numIter) {
305 ltable[idx].confidence = 0;
306 if (ltable[idx].numIter != 0) {
307 // free the entry
308 ltable[idx].numIter = 0;
309 ltable[idx].age = 0;
310 ltable[idx].confidence = 0;
311 }
312 }
313
314 if (taken != ltable[idx].dir) {
315 if (ltable[idx].currentIter == ltable[idx].numIter) {
316 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
317
318 if (ltable[idx].confidence < 7) {
319 ltable[idx].confidence++;
320 }
321 //just do not predict when the loop count is 1 or 2
322 if (ltable[idx].numIter < 3) {
323 // free the entry
324 ltable[idx].dir = taken;
325 ltable[idx].numIter = 0;
326 ltable[idx].age = 0;
327 ltable[idx].confidence = 0;
328 }
329 } else {
330 DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc);
331 if (ltable[idx].numIter == 0) {
332 // first complete nest;
333 ltable[idx].confidence = 0;
334 ltable[idx].numIter = ltable[idx].currentIter;
335 } else {
336 //not the same number of iterations as last time: free the
337 //entry
338 ltable[idx].numIter = 0;
339 ltable[idx].age = 0;
340 ltable[idx].confidence = 0;
341 }
342 }
343 ltable[idx].currentIter = 0;
344 }
345
346 } else if (taken) {
347 //try to allocate an entry on taken branch
348 int nrand = random_mt.random<int>();
349 for (int i = 0; i < 4; i++) {
350 int loop_hit = (nrand + i) & 3;
351 idx = bi->loopIndex + loop_hit;
352 if (ltable[idx].age == 0) {
353 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
354 pc);
355 ltable[idx].dir = !taken;
356 ltable[idx].tag = bi->loopTag;
357 ltable[idx].numIter = 0;
358 ltable[idx].age = 7;
359 ltable[idx].confidence = 0;
360 ltable[idx].currentIter = 1;
361 break;
362
363 }
364 else
365 ltable[idx].age--;
366 }
367 }
368
369}
370
371// shifting the global history: we manage the history in a big table in order
372// to reduce simulation time
373void
374LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
375{
376 if (pt == 0) {
377 DPRINTF(LTage, "Rolling over the histories\n");
378 // Copy beginning of globalHistoryBuffer to end, such that
379 // the last maxHist outcomes are still reachable
380 // through pt[0 .. maxHist - 1].
381 for (int i = 0; i < maxHist; i++)
382 tab[histBufferSize - maxHist + i] = tab[i];
383 pt = histBufferSize - maxHist;
384 h = &tab[pt];
385 }
386 pt--;
387 h--;
388 h[0] = (dir) ? 1 : 0;
389}
390
391// Get GHR for hashing indirect predictor
392// Build history backwards from pointer in
393// bp_history.
394unsigned
395LTAGE::getGHR(ThreadID tid, void *bp_history) const
396{
397 BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
398 unsigned val = 0;
399 for (unsigned i = 0; i < 32; i++) {
400 // Make sure we don't go out of bounds
401 int gh_offset = bi->ptGhist + i;
402 assert(&(threadHistory[tid].globalHistory[gh_offset]) <
403 threadHistory[tid].globalHistory + histBufferSize);
404 val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
405 }
406
407 return val;
408}
409
410//prediction
411bool
412LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
413{
414 BranchInfo *bi = new BranchInfo(nHistoryTables+1);
415 b = (void*)(bi);
416 Addr pc = branch_pc;
417 bool pred_taken = true;
418 bi->loopHit = -1;
419
420 if (cond_branch) {
421 // TAGE prediction
422
423 // computes the table addresses and the partial tags
424 for (int i = 1; i <= nHistoryTables; i++) {
425 tableIndices[i] = gindex(tid, pc, i);
426 bi->tableIndices[i] = tableIndices[i];
427 tableTags[i] = gtag(tid, pc, i);
428 bi->tableTags[i] = tableTags[i];
429 }
430
431 bi->bimodalIndex = bindex(pc);
432
433 bi->hitBank = 0;
434 bi->altBank = 0;
435 //Look for the bank with longest matching history
436 for (int i = nHistoryTables; i > 0; i--) {
437 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
438 bi->hitBank = i;
439 bi->hitBankIndex = tableIndices[bi->hitBank];
440 break;
441 }
442 }
443 //Look for the alternate bank
444 for (int i = bi->hitBank - 1; i > 0; i--) {
445 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
446 bi->altBank = i;
447 bi->altBankIndex = tableIndices[bi->altBank];
448 break;
449 }
450 }
451 //computes the prediction and the alternate prediction
452 if (bi->hitBank > 0) {
453 if (bi->altBank > 0) {
454 bi->altTaken =
455 gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
456 }else {
457 bi->altTaken = getBimodePred(pc, bi);
458 }
459
460 bi->longestMatchPred =
461 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
462 bi->pseudoNewAlloc =
463 abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
464
465 //if the entry is recognized as a newly allocated entry and
466 //useAltPredForNewlyAllocated is positive use the alternate
467 //prediction
468 if ((useAltPredForNewlyAllocated < 0)
469 || abs(2 *
470 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1)
471 bi->tagePred = bi->longestMatchPred;
472 else
473 bi->tagePred = bi->altTaken;
474 } else {
475 bi->altTaken = getBimodePred(pc, bi);
476 bi->tagePred = bi->altTaken;
477 bi->longestMatchPred = bi->altTaken;
478 }
479 //end TAGE prediction
480
481 bi->loopPred = getLoop(pc, bi); // loop prediction
482
483 pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
484 (bi->loopPred): (bi->tagePred);
485 DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
486 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
487 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
488 loopUseCounter, bi->tagePred, bi->altTaken);
489 }
490 bi->branchPC = branch_pc;
491 bi->condBranch = cond_branch;
492 specLoopUpdate(branch_pc, pred_taken, bi);
493 return pred_taken;
494}
495
496// PREDICTOR UPDATE
497void
498LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
499 bool squashed)
500{
501 assert(bp_history);
502
503 BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
504
505 if (squashed) {
506 // This restores the global history, then update it
507 // and recomputes the folded histories.
508 squash(tid, taken, bp_history);
509 return;
510 }
511
512 int nrand = random_mt.random<int>(0,3);
513 Addr pc = branch_pc;
514 if (bi->condBranch) {
515 DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
516 branch_pc, taken);
517 // first update the loop predictor
518 loopUpdate(pc, taken, bi);
519
520 if (bi->loopPredValid) {
521 if (bi->tagePred != bi->loopPred) {
522 ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
523 }
524 }
525
526 // TAGE UPDATE
527 // try to allocate a new entries only if prediction was wrong
528 bool longest_match_pred = false;
529 bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
530 if (bi->hitBank > 0) {
531 // Manage the selection between longest matching and alternate
532 // matching for "pseudo"-newly allocated longest matching entry
533 longest_match_pred = bi->longestMatchPred;
534 bool PseudoNewAlloc = bi->pseudoNewAlloc;
535 // an entry is considered as newly allocated if its prediction
536 // counter is weak
537 if (PseudoNewAlloc) {
538 if (longest_match_pred == taken) {
539 alloc = false;
540 }
541 // if it was delivering the correct prediction, no need to
542 // allocate new entry even if the overall prediction was false
543 if (longest_match_pred != bi->altTaken) {
544 ctrUpdate(useAltPredForNewlyAllocated,
545 bi->altTaken == taken, 4);
546 }
547 }
548 }
549
550 if (alloc) {
551 // is there some "unuseful" entry to allocate
552 int8_t min = 1;
553 for (int i = nHistoryTables; i > bi->hitBank; i--) {
554 if (gtable[i][bi->tableIndices[i]].u < min) {
555 min = gtable[i][bi->tableIndices[i]].u;
556 }
557 }
558
559 // we allocate an entry with a longer history
560 // to avoid ping-pong, we do not choose systematically the next
561 // entry, but among the 3 next entries
562 int Y = nrand &
563 ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
564 int X = bi->hitBank + 1;
565 if (Y & 1) {
566 X++;
567 if (Y & 2)
568 X++;
569 }
570 // No entry available, forces one to be available
571 if (min > 0) {
572 gtable[X][bi->tableIndices[X]].u = 0;
573 }
574
575
576 //Allocate only one entry
577 for (int i = X; i <= nHistoryTables; i++) {
578 if ((gtable[i][bi->tableIndices[i]].u == 0)) {
579 gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
580 gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
581 gtable[i][bi->tableIndices[i]].u = 0; //?
1/*
2 * Copyright (c) 2014 The University of Wisconsin
3 *
4 * Copyright (c) 2006 INRIA (Institut National de Recherche en
5 * Informatique et en Automatique / French National Research Institute
6 * for Computer Science and Applied Mathematics)
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are
12 * met: redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer;
14 * redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution;
17 * neither the name of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
34 * from André Seznec's code.
35 */
36
37/* @file
38 * Implementation of a L-TAGE branch predictor
39 */
40
41#include "cpu/pred/ltage.hh"
42
43#include "base/intmath.hh"
44#include "base/logging.hh"
45#include "base/random.hh"
46#include "base/trace.hh"
47#include "debug/Fetch.hh"
48#include "debug/LTage.hh"
49
50LTAGE::LTAGE(const LTAGEParams *params)
51 : BPredUnit(params),
52 logSizeBiMP(params->logSizeBiMP),
53 logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
54 logSizeTagTables(params->logSizeTagTables),
55 logSizeLoopPred(params->logSizeLoopPred),
56 nHistoryTables(params->nHistoryTables),
57 tagTableCounterBits(params->tagTableCounterBits),
58 histBufferSize(params->histBufferSize),
59 minHist(params->minHist),
60 maxHist(params->maxHist),
61 minTagWidth(params->minTagWidth),
62 threadHistory(params->numThreads)
63{
64 assert(params->histBufferSize > params->maxHist * 2);
65 useAltPredForNewlyAllocated = 0;
66 logTick = 19;
67 tCounter = ULL(1) << (logTick - 1);
68
69 for (auto& history : threadHistory) {
70 history.pathHist = 0;
71 history.globalHistory = new uint8_t[histBufferSize];
72 history.gHist = history.globalHistory;
73 memset(history.gHist, 0, histBufferSize);
74 history.ptGhist = 0;
75 }
76
77 histLengths = new int [nHistoryTables+1];
78 histLengths[1] = minHist;
79 histLengths[nHistoryTables] = maxHist;
80
81 for (int i = 2; i <= nHistoryTables; i++) {
82 histLengths[i] = (int) (((double) minHist *
83 pow ((double) (maxHist) / (double) minHist,
84 (double) (i - 1) / (double) ((nHistoryTables- 1))))
85 + 0.5);
86 }
87
88 tagWidths[1] = minTagWidth;
89 tagWidths[2] = minTagWidth;
90 tagWidths[3] = minTagWidth + 1;
91 tagWidths[4] = minTagWidth + 1;
92 tagWidths[5] = minTagWidth + 2;
93 tagWidths[6] = minTagWidth + 3;
94 tagWidths[7] = minTagWidth + 4;
95 tagWidths[8] = minTagWidth + 5;
96 tagWidths[9] = minTagWidth + 5;
97 tagWidths[10] = minTagWidth + 6;
98 tagWidths[11] = minTagWidth + 7;
99 tagWidths[12] = minTagWidth + 8;
100
101 for (int i = 1; i <= 2; i++)
102 tagTableSizes[i] = logSizeTagTables - 1;
103 for (int i = 3; i <= 6; i++)
104 tagTableSizes[i] = logSizeTagTables;
105 for (int i = 7; i <= 10; i++)
106 tagTableSizes[i] = logSizeTagTables - 1;
107 for (int i = 11; i <= 12; i++)
108 tagTableSizes[i] = logSizeTagTables - 2;
109
110 for (auto& history : threadHistory) {
111 history.computeIndices = new FoldedHistory[nHistoryTables+1];
112 history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
113 history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
114
115 for (int i = 1; i <= nHistoryTables; i++) {
116 history.computeIndices[i].init(histLengths[i], (tagTableSizes[i]));
117 history.computeTags[0][i].init(
118 history.computeIndices[i].origLength, tagWidths[i]);
119 history.computeTags[1][i].init(
120 history.computeIndices[i].origLength, tagWidths[i] - 1);
121 DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
122 histLengths[i], tagTableSizes[i], tagWidths[i]);
123 }
124 }
125
126 const uint64_t bimodalTableSize = ULL(1) << logSizeBiMP;
127 btablePrediction.resize(bimodalTableSize, false);
128 btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
129 true);
130
131 ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
132 gtable = new TageEntry*[nHistoryTables + 1];
133 for (int i = 1; i <= nHistoryTables; i++) {
134 gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
135 }
136
137 tableIndices = new int [nHistoryTables+1];
138 tableTags = new int [nHistoryTables+1];
139
140 loopUseCounter = 0;
141}
142
143int
144LTAGE::bindex(Addr pc_in) const
145{
146 return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1));
147}
148
149int
150LTAGE::lindex(Addr pc_in) const
151{
152 return (((pc_in >> instShiftAmt) &
153 ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
154}
155
156int
157LTAGE::F(int A, int size, int bank) const
158{
159 int A1, A2;
160
161 A = A & ((ULL(1) << size) - 1);
162 A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
163 A2 = (A >> tagTableSizes[bank]);
164 A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
165 + (A2 >> (tagTableSizes[bank] - bank));
166 A = A1 ^ A2;
167 A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
168 + (A >> (tagTableSizes[bank] - bank));
169 return (A);
170}
171
172
173// gindex computes a full hash of pc, ghist and pathHist
174int
175LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
176{
177 int index;
178 int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
179 index =
180 (pc >> instShiftAmt) ^
181 ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^
182 threadHistory[tid].computeIndices[bank].comp ^
183 F(threadHistory[tid].pathHist, hlen, bank);
184
185 return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
186}
187
188
189// Tag computation
190uint16_t
191LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
192{
193 int tag = (pc >> instShiftAmt) ^
194 threadHistory[tid].computeTags[0][bank].comp ^
195 (threadHistory[tid].computeTags[1][bank].comp << 1);
196
197 return (tag & ((ULL(1) << tagWidths[bank]) - 1));
198}
199
200
201// Up-down saturating counter
202void
203LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
204{
205 assert(nbits <= sizeof(int8_t) << 3);
206 if (taken) {
207 if (ctr < ((1 << (nbits - 1)) - 1))
208 ctr++;
209 } else {
210 if (ctr > -(1 << (nbits - 1)))
211 ctr--;
212 }
213}
214
215// Bimodal prediction
216bool
217LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
218{
219 return btablePrediction[bi->bimodalIndex];
220}
221
222
223// Update the bimodal predictor: a hysteresis bit is shared among N prediction
224// bits (N = 2 ^ logRatioBiModalHystEntries)
225void
226LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
227{
228 int inter = (btablePrediction[bi->bimodalIndex] << 1)
229 + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
230 if (taken) {
231 if (inter < 3)
232 inter++;
233 } else if (inter > 0) {
234 inter--;
235 }
236 const bool pred = inter >> 1;
237 const bool hyst = inter & 1;
238 btablePrediction[bi->bimodalIndex] = pred;
239 btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst;
240 DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst);
241}
242
243
244//loop prediction: only used if high confidence
245bool
246LTAGE::getLoop(Addr pc, BranchInfo* bi) const
247{
248 bi->loopHit = -1;
249 bi->loopPredValid = false;
250 bi->loopIndex = lindex(pc);
251 bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2));
252
253 for (int i = 0; i < 4; i++) {
254 if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
255 bi->loopHit = i;
256 bi->loopPredValid = (ltable[bi->loopIndex + i].confidence >= 3);
257 bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
258 if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
259 ltable[bi->loopIndex + i].numIter) {
260 return !(ltable[bi->loopIndex + i].dir);
261 }else {
262 return (ltable[bi->loopIndex + i].dir);
263 }
264 }
265 }
266 return false;
267}
268
269void
270LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
271{
272 if (bi->loopHit>=0) {
273 int index = lindex(pc);
274 if (taken != ltable[index].dir) {
275 ltable[index].currentIterSpec = 0;
276 } else {
277 ltable[index].currentIterSpec++;
278 }
279 }
280}
281
282void
283LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
284{
285 int idx = bi->loopIndex + bi->loopHit;
286 if (bi->loopHit >= 0) {
287 //already a hit
288 if (bi->loopPredValid) {
289 if (taken != bi->loopPred) {
290 // free the entry
291 ltable[idx].numIter = 0;
292 ltable[idx].age = 0;
293 ltable[idx].confidence = 0;
294 ltable[idx].currentIter = 0;
295 return;
296 } else if (bi->loopPred != bi->tagePred) {
297 DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
298 if (ltable[idx].age < 7)
299 ltable[idx].age++;
300 }
301 }
302
303 ltable[idx].currentIter++;
304 if (ltable[idx].currentIter > ltable[idx].numIter) {
305 ltable[idx].confidence = 0;
306 if (ltable[idx].numIter != 0) {
307 // free the entry
308 ltable[idx].numIter = 0;
309 ltable[idx].age = 0;
310 ltable[idx].confidence = 0;
311 }
312 }
313
314 if (taken != ltable[idx].dir) {
315 if (ltable[idx].currentIter == ltable[idx].numIter) {
316 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
317
318 if (ltable[idx].confidence < 7) {
319 ltable[idx].confidence++;
320 }
321 //just do not predict when the loop count is 1 or 2
322 if (ltable[idx].numIter < 3) {
323 // free the entry
324 ltable[idx].dir = taken;
325 ltable[idx].numIter = 0;
326 ltable[idx].age = 0;
327 ltable[idx].confidence = 0;
328 }
329 } else {
330 DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc);
331 if (ltable[idx].numIter == 0) {
332 // first complete nest;
333 ltable[idx].confidence = 0;
334 ltable[idx].numIter = ltable[idx].currentIter;
335 } else {
336 //not the same number of iterations as last time: free the
337 //entry
338 ltable[idx].numIter = 0;
339 ltable[idx].age = 0;
340 ltable[idx].confidence = 0;
341 }
342 }
343 ltable[idx].currentIter = 0;
344 }
345
346 } else if (taken) {
347 //try to allocate an entry on taken branch
348 int nrand = random_mt.random<int>();
349 for (int i = 0; i < 4; i++) {
350 int loop_hit = (nrand + i) & 3;
351 idx = bi->loopIndex + loop_hit;
352 if (ltable[idx].age == 0) {
353 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
354 pc);
355 ltable[idx].dir = !taken;
356 ltable[idx].tag = bi->loopTag;
357 ltable[idx].numIter = 0;
358 ltable[idx].age = 7;
359 ltable[idx].confidence = 0;
360 ltable[idx].currentIter = 1;
361 break;
362
363 }
364 else
365 ltable[idx].age--;
366 }
367 }
368
369}
370
371// shifting the global history: we manage the history in a big table in order
372// to reduce simulation time
373void
374LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
375{
376 if (pt == 0) {
377 DPRINTF(LTage, "Rolling over the histories\n");
378 // Copy beginning of globalHistoryBuffer to end, such that
379 // the last maxHist outcomes are still reachable
380 // through pt[0 .. maxHist - 1].
381 for (int i = 0; i < maxHist; i++)
382 tab[histBufferSize - maxHist + i] = tab[i];
383 pt = histBufferSize - maxHist;
384 h = &tab[pt];
385 }
386 pt--;
387 h--;
388 h[0] = (dir) ? 1 : 0;
389}
390
391// Get GHR for hashing indirect predictor
392// Build history backwards from pointer in
393// bp_history.
394unsigned
395LTAGE::getGHR(ThreadID tid, void *bp_history) const
396{
397 BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
398 unsigned val = 0;
399 for (unsigned i = 0; i < 32; i++) {
400 // Make sure we don't go out of bounds
401 int gh_offset = bi->ptGhist + i;
402 assert(&(threadHistory[tid].globalHistory[gh_offset]) <
403 threadHistory[tid].globalHistory + histBufferSize);
404 val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
405 }
406
407 return val;
408}
409
410//prediction
411bool
412LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
413{
414 BranchInfo *bi = new BranchInfo(nHistoryTables+1);
415 b = (void*)(bi);
416 Addr pc = branch_pc;
417 bool pred_taken = true;
418 bi->loopHit = -1;
419
420 if (cond_branch) {
421 // TAGE prediction
422
423 // computes the table addresses and the partial tags
424 for (int i = 1; i <= nHistoryTables; i++) {
425 tableIndices[i] = gindex(tid, pc, i);
426 bi->tableIndices[i] = tableIndices[i];
427 tableTags[i] = gtag(tid, pc, i);
428 bi->tableTags[i] = tableTags[i];
429 }
430
431 bi->bimodalIndex = bindex(pc);
432
433 bi->hitBank = 0;
434 bi->altBank = 0;
435 //Look for the bank with longest matching history
436 for (int i = nHistoryTables; i > 0; i--) {
437 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
438 bi->hitBank = i;
439 bi->hitBankIndex = tableIndices[bi->hitBank];
440 break;
441 }
442 }
443 //Look for the alternate bank
444 for (int i = bi->hitBank - 1; i > 0; i--) {
445 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
446 bi->altBank = i;
447 bi->altBankIndex = tableIndices[bi->altBank];
448 break;
449 }
450 }
451 //computes the prediction and the alternate prediction
452 if (bi->hitBank > 0) {
453 if (bi->altBank > 0) {
454 bi->altTaken =
455 gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
456 }else {
457 bi->altTaken = getBimodePred(pc, bi);
458 }
459
460 bi->longestMatchPred =
461 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
462 bi->pseudoNewAlloc =
463 abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
464
465 //if the entry is recognized as a newly allocated entry and
466 //useAltPredForNewlyAllocated is positive use the alternate
467 //prediction
468 if ((useAltPredForNewlyAllocated < 0)
469 || abs(2 *
470 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1)
471 bi->tagePred = bi->longestMatchPred;
472 else
473 bi->tagePred = bi->altTaken;
474 } else {
475 bi->altTaken = getBimodePred(pc, bi);
476 bi->tagePred = bi->altTaken;
477 bi->longestMatchPred = bi->altTaken;
478 }
479 //end TAGE prediction
480
481 bi->loopPred = getLoop(pc, bi); // loop prediction
482
483 pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
484 (bi->loopPred): (bi->tagePred);
485 DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
486 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
487 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
488 loopUseCounter, bi->tagePred, bi->altTaken);
489 }
490 bi->branchPC = branch_pc;
491 bi->condBranch = cond_branch;
492 specLoopUpdate(branch_pc, pred_taken, bi);
493 return pred_taken;
494}
495
496// PREDICTOR UPDATE
497void
498LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
499 bool squashed)
500{
501 assert(bp_history);
502
503 BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
504
505 if (squashed) {
506 // This restores the global history, then update it
507 // and recomputes the folded histories.
508 squash(tid, taken, bp_history);
509 return;
510 }
511
512 int nrand = random_mt.random<int>(0,3);
513 Addr pc = branch_pc;
514 if (bi->condBranch) {
515 DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
516 branch_pc, taken);
517 // first update the loop predictor
518 loopUpdate(pc, taken, bi);
519
520 if (bi->loopPredValid) {
521 if (bi->tagePred != bi->loopPred) {
522 ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
523 }
524 }
525
526 // TAGE UPDATE
527 // try to allocate a new entries only if prediction was wrong
528 bool longest_match_pred = false;
529 bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
530 if (bi->hitBank > 0) {
531 // Manage the selection between longest matching and alternate
532 // matching for "pseudo"-newly allocated longest matching entry
533 longest_match_pred = bi->longestMatchPred;
534 bool PseudoNewAlloc = bi->pseudoNewAlloc;
535 // an entry is considered as newly allocated if its prediction
536 // counter is weak
537 if (PseudoNewAlloc) {
538 if (longest_match_pred == taken) {
539 alloc = false;
540 }
541 // if it was delivering the correct prediction, no need to
542 // allocate new entry even if the overall prediction was false
543 if (longest_match_pred != bi->altTaken) {
544 ctrUpdate(useAltPredForNewlyAllocated,
545 bi->altTaken == taken, 4);
546 }
547 }
548 }
549
550 if (alloc) {
551 // is there some "unuseful" entry to allocate
552 int8_t min = 1;
553 for (int i = nHistoryTables; i > bi->hitBank; i--) {
554 if (gtable[i][bi->tableIndices[i]].u < min) {
555 min = gtable[i][bi->tableIndices[i]].u;
556 }
557 }
558
559 // we allocate an entry with a longer history
560 // to avoid ping-pong, we do not choose systematically the next
561 // entry, but among the 3 next entries
562 int Y = nrand &
563 ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
564 int X = bi->hitBank + 1;
565 if (Y & 1) {
566 X++;
567 if (Y & 2)
568 X++;
569 }
570 // No entry available, forces one to be available
571 if (min > 0) {
572 gtable[X][bi->tableIndices[X]].u = 0;
573 }
574
575
576 //Allocate only one entry
577 for (int i = X; i <= nHistoryTables; i++) {
578 if ((gtable[i][bi->tableIndices[i]].u == 0)) {
579 gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
580 gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
581 gtable[i][bi->tableIndices[i]].u = 0; //?
582 break;
582 }
583 }
584 }
585 //periodic reset of u: reset is not complete but bit by bit
586 tCounter++;
587 if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
588 // reset least significant bit
589 // most significant bit becomes least significant bit
590 for (int i = 1; i <= nHistoryTables; i++) {
591 for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
592 gtable[i][j].u = gtable[i][j].u >> 1;
593 }
594 }
595 }
596
597 if (bi->hitBank > 0) {
598 DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n",
599 bi->hitBank, bi->hitBankIndex, branch_pc);
600 ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
601 tagTableCounterBits);
602 // if the provider entry is not certified to be useful also update
603 // the alternate prediction
604 if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
605 if (bi->altBank > 0) {
606 ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
607 tagTableCounterBits);
608 DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
609 " branch %lx\n", bi->hitBank, bi->hitBankIndex,
610 branch_pc);
611 }
612 if (bi->altBank == 0) {
613 baseUpdate(pc, taken, bi);
614 }
615 }
616
617 // update the u counter
618 if (longest_match_pred != bi->altTaken) {
619 if (longest_match_pred == taken) {
620 if (gtable[bi->hitBank][bi->hitBankIndex].u < 1) {
621 gtable[bi->hitBank][bi->hitBankIndex].u++;
622 }
623 }
624 }
625 } else {
626 baseUpdate(pc, taken, bi);
627 }
628
629 //END PREDICTOR UPDATE
630 }
631 if (!squashed) {
632 delete bi;
633 }
634}
635
636void
637LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
638{
639 BranchInfo* bi = (BranchInfo*)(b);
640 ThreadHistory& tHist = threadHistory[tid];
641 // UPDATE HISTORIES
642 bool pathbit = ((branch_pc >> instShiftAmt) & 1);
643 //on a squash, return pointers to this and recompute indices.
644 //update user history
645 updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
646 tHist.pathHist = (tHist.pathHist << 1) + pathbit;
647 tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
648
649 bi->ptGhist = tHist.ptGhist;
650 bi->pathHist = tHist.pathHist;
651 //prepare next index and tag computations for user branchs
652 for (int i = 1; i <= nHistoryTables; i++)
653 {
654 bi->ci[i] = tHist.computeIndices[i].comp;
655 bi->ct0[i] = tHist.computeTags[0][i].comp;
656 bi->ct1[i] = tHist.computeTags[1][i].comp;
657 tHist.computeIndices[i].update(tHist.gHist);
658 tHist.computeTags[0][i].update(tHist.gHist);
659 tHist.computeTags[1][i].update(tHist.gHist);
660 }
661 DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
662 "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
663 tHist.ptGhist);
664}
665
666void
667LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
668{
669 BranchInfo* bi = (BranchInfo*)(bp_history);
670 ThreadHistory& tHist = threadHistory[tid];
671 DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
672 "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
673 tHist.pathHist = bi->pathHist;
674 tHist.ptGhist = bi->ptGhist;
675 tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
676 tHist.gHist[0] = (taken ? 1 : 0);
677 for (int i = 1; i <= nHistoryTables; i++) {
678 tHist.computeIndices[i].comp = bi->ci[i];
679 tHist.computeTags[0][i].comp = bi->ct0[i];
680 tHist.computeTags[1][i].comp = bi->ct1[i];
681 tHist.computeIndices[i].update(tHist.gHist);
682 tHist.computeTags[0][i].update(tHist.gHist);
683 tHist.computeTags[1][i].update(tHist.gHist);
684 }
685
686 if (bi->condBranch) {
687 if (bi->loopHit >= 0) {
688 int idx = bi->loopIndex + bi->loopHit;
689 ltable[idx].currentIterSpec = bi->currentIter;
690 }
691 }
692
693}
694
695void
696LTAGE::squash(ThreadID tid, void *bp_history)
697{
698 BranchInfo* bi = (BranchInfo*)(bp_history);
699 DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
700 if (bi->condBranch) {
701 if (bi->loopHit >= 0) {
702 int idx = bi->loopIndex + bi->loopHit;
703 ltable[idx].currentIterSpec = bi->currentIter;
704 }
705 }
706
707 delete bi;
708}
709
710bool
711LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
712{
713 bool retval = predict(tid, branch_pc, true, bp_history);
714
715 DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
716 updateHistories(tid, branch_pc, retval, bp_history);
717 assert(threadHistory[tid].gHist ==
718 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
719
720 return retval;
721}
722
723void
724LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
725{
726 BranchInfo* bi = (BranchInfo*) bp_history;
727 ThreadHistory& tHist = threadHistory[tid];
728 DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
729 assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
730 tHist.gHist[0] = 0;
731 for (int i = 1; i <= nHistoryTables; i++) {
732 tHist.computeIndices[i].comp = bi->ci[i];
733 tHist.computeTags[0][i].comp = bi->ct0[i];
734 tHist.computeTags[1][i].comp = bi->ct1[i];
735 tHist.computeIndices[i].update(tHist.gHist);
736 tHist.computeTags[0][i].update(tHist.gHist);
737 tHist.computeTags[1][i].update(tHist.gHist);
738 }
739}
740
741void
742LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
743{
744 DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
745 predict(tid, br_pc, false, bp_history);
746 updateHistories(tid, br_pc, true, bp_history);
747 assert(threadHistory[tid].gHist ==
748 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
749}
750
751LTAGE*
752LTAGEParams::create()
753{
754 return new LTAGE(this);
755}
583 }
584 }
585 }
586 //periodic reset of u: reset is not complete but bit by bit
587 tCounter++;
588 if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
589 // reset least significant bit
590 // most significant bit becomes least significant bit
591 for (int i = 1; i <= nHistoryTables; i++) {
592 for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
593 gtable[i][j].u = gtable[i][j].u >> 1;
594 }
595 }
596 }
597
598 if (bi->hitBank > 0) {
599 DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n",
600 bi->hitBank, bi->hitBankIndex, branch_pc);
601 ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
602 tagTableCounterBits);
603 // if the provider entry is not certified to be useful also update
604 // the alternate prediction
605 if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
606 if (bi->altBank > 0) {
607 ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
608 tagTableCounterBits);
609 DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
610 " branch %lx\n", bi->hitBank, bi->hitBankIndex,
611 branch_pc);
612 }
613 if (bi->altBank == 0) {
614 baseUpdate(pc, taken, bi);
615 }
616 }
617
618 // update the u counter
619 if (longest_match_pred != bi->altTaken) {
620 if (longest_match_pred == taken) {
621 if (gtable[bi->hitBank][bi->hitBankIndex].u < 1) {
622 gtable[bi->hitBank][bi->hitBankIndex].u++;
623 }
624 }
625 }
626 } else {
627 baseUpdate(pc, taken, bi);
628 }
629
630 //END PREDICTOR UPDATE
631 }
632 if (!squashed) {
633 delete bi;
634 }
635}
636
637void
638LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
639{
640 BranchInfo* bi = (BranchInfo*)(b);
641 ThreadHistory& tHist = threadHistory[tid];
642 // UPDATE HISTORIES
643 bool pathbit = ((branch_pc >> instShiftAmt) & 1);
644 //on a squash, return pointers to this and recompute indices.
645 //update user history
646 updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
647 tHist.pathHist = (tHist.pathHist << 1) + pathbit;
648 tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
649
650 bi->ptGhist = tHist.ptGhist;
651 bi->pathHist = tHist.pathHist;
652 //prepare next index and tag computations for user branchs
653 for (int i = 1; i <= nHistoryTables; i++)
654 {
655 bi->ci[i] = tHist.computeIndices[i].comp;
656 bi->ct0[i] = tHist.computeTags[0][i].comp;
657 bi->ct1[i] = tHist.computeTags[1][i].comp;
658 tHist.computeIndices[i].update(tHist.gHist);
659 tHist.computeTags[0][i].update(tHist.gHist);
660 tHist.computeTags[1][i].update(tHist.gHist);
661 }
662 DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
663 "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
664 tHist.ptGhist);
665}
666
667void
668LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
669{
670 BranchInfo* bi = (BranchInfo*)(bp_history);
671 ThreadHistory& tHist = threadHistory[tid];
672 DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
673 "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
674 tHist.pathHist = bi->pathHist;
675 tHist.ptGhist = bi->ptGhist;
676 tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
677 tHist.gHist[0] = (taken ? 1 : 0);
678 for (int i = 1; i <= nHistoryTables; i++) {
679 tHist.computeIndices[i].comp = bi->ci[i];
680 tHist.computeTags[0][i].comp = bi->ct0[i];
681 tHist.computeTags[1][i].comp = bi->ct1[i];
682 tHist.computeIndices[i].update(tHist.gHist);
683 tHist.computeTags[0][i].update(tHist.gHist);
684 tHist.computeTags[1][i].update(tHist.gHist);
685 }
686
687 if (bi->condBranch) {
688 if (bi->loopHit >= 0) {
689 int idx = bi->loopIndex + bi->loopHit;
690 ltable[idx].currentIterSpec = bi->currentIter;
691 }
692 }
693
694}
695
696void
697LTAGE::squash(ThreadID tid, void *bp_history)
698{
699 BranchInfo* bi = (BranchInfo*)(bp_history);
700 DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
701 if (bi->condBranch) {
702 if (bi->loopHit >= 0) {
703 int idx = bi->loopIndex + bi->loopHit;
704 ltable[idx].currentIterSpec = bi->currentIter;
705 }
706 }
707
708 delete bi;
709}
710
711bool
712LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
713{
714 bool retval = predict(tid, branch_pc, true, bp_history);
715
716 DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
717 updateHistories(tid, branch_pc, retval, bp_history);
718 assert(threadHistory[tid].gHist ==
719 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
720
721 return retval;
722}
723
724void
725LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
726{
727 BranchInfo* bi = (BranchInfo*) bp_history;
728 ThreadHistory& tHist = threadHistory[tid];
729 DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
730 assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
731 tHist.gHist[0] = 0;
732 for (int i = 1; i <= nHistoryTables; i++) {
733 tHist.computeIndices[i].comp = bi->ci[i];
734 tHist.computeTags[0][i].comp = bi->ct0[i];
735 tHist.computeTags[1][i].comp = bi->ct1[i];
736 tHist.computeIndices[i].update(tHist.gHist);
737 tHist.computeTags[0][i].update(tHist.gHist);
738 tHist.computeTags[1][i].update(tHist.gHist);
739 }
740}
741
742void
743LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
744{
745 DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
746 predict(tid, br_pc, false, bp_history);
747 updateHistories(tid, br_pc, true, bp_history);
748 assert(threadHistory[tid].gHist ==
749 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
750}
751
752LTAGE*
753LTAGEParams::create()
754{
755 return new LTAGE(this);
756}