ltage.cc (11784:00fd5dce5e7e) ltage.cc (12334:e0ab29a34764)
1/*
2 * Copyright (c) 2014 The University of Wisconsin
3 *
4 * Copyright (c) 2006 INRIA (Institut National de Recherche en
5 * Informatique et en Automatique / French National Research Institute
6 * for Computer Science and Applied Mathematics)
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are
12 * met: redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer;
14 * redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution;
17 * neither the name of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
34 * from André Seznec's code.
35 */
36
37/* @file
38 * Implementation of a L-TAGE branch predictor
39 */
40
41#include "cpu/pred/ltage.hh"
42
43#include "base/intmath.hh"
1/*
2 * Copyright (c) 2014 The University of Wisconsin
3 *
4 * Copyright (c) 2006 INRIA (Institut National de Recherche en
5 * Informatique et en Automatique / French National Research Institute
6 * for Computer Science and Applied Mathematics)
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are
12 * met: redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer;
14 * redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution;
17 * neither the name of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
34 * from André Seznec's code.
35 */
36
37/* @file
38 * Implementation of a L-TAGE branch predictor
39 */
40
41#include "cpu/pred/ltage.hh"
42
43#include "base/intmath.hh"
44#include "base/misc.hh"
44#include "base/logging.hh"
45#include "base/random.hh"
46#include "base/trace.hh"
47#include "debug/Fetch.hh"
48#include "debug/LTage.hh"
49
50LTAGE::LTAGE(const LTAGEParams *params)
51 : BPredUnit(params),
52 logSizeBiMP(params->logSizeBiMP),
53 logSizeTagTables(params->logSizeTagTables),
54 logSizeLoopPred(params->logSizeLoopPred),
55 nHistoryTables(params->nHistoryTables),
56 tagTableCounterBits(params->tagTableCounterBits),
57 histBufferSize(params->histBufferSize),
58 minHist(params->minHist),
59 maxHist(params->maxHist),
60 minTagWidth(params->minTagWidth),
61 threadHistory(params->numThreads)
62{
63 assert(params->histBufferSize > params->maxHist * 2);
64 useAltPredForNewlyAllocated = 0;
65 logTick = 19;
66 tCounter = ULL(1) << (logTick - 1);
67
68 for (auto& history : threadHistory) {
69 history.pathHist = 0;
70 history.globalHistory = new uint8_t[histBufferSize];
71 history.gHist = history.globalHistory;
72 memset(history.gHist, 0, histBufferSize);
73 history.ptGhist = 0;
74 }
75
76 histLengths = new int [nHistoryTables+1];
77 histLengths[1] = minHist;
78 histLengths[nHistoryTables] = maxHist;
79
80 for (int i = 2; i <= nHistoryTables; i++) {
81 histLengths[i] = (int) (((double) minHist *
82 pow ((double) (maxHist) / (double) minHist,
83 (double) (i - 1) / (double) ((nHistoryTables- 1))))
84 + 0.5);
85 }
86
87 tagWidths[1] = minTagWidth;
88 tagWidths[2] = minTagWidth;
89 tagWidths[3] = minTagWidth + 1;
90 tagWidths[4] = minTagWidth + 1;
91 tagWidths[5] = minTagWidth + 2;
92 tagWidths[6] = minTagWidth + 3;
93 tagWidths[7] = minTagWidth + 4;
94 tagWidths[8] = minTagWidth + 5;
95 tagWidths[9] = minTagWidth + 5;
96 tagWidths[10] = minTagWidth + 6;
97 tagWidths[11] = minTagWidth + 7;
98 tagWidths[12] = minTagWidth + 8;
99
100 for (int i = 1; i <= 2; i++)
101 tagTableSizes[i] = logSizeTagTables - 1;
102 for (int i = 3; i <= 6; i++)
103 tagTableSizes[i] = logSizeTagTables;
104 for (int i = 7; i <= 10; i++)
105 tagTableSizes[i] = logSizeTagTables - 1;
106 for (int i = 11; i <= 12; i++)
107 tagTableSizes[i] = logSizeTagTables - 2;
108
109 for (auto& history : threadHistory) {
110 history.computeIndices = new FoldedHistory[nHistoryTables+1];
111 history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
112 history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
113
114 for (int i = 1; i <= nHistoryTables; i++) {
115 history.computeIndices[i].init(histLengths[i], (tagTableSizes[i]));
116 history.computeTags[0][i].init(
117 history.computeIndices[i].origLength, tagWidths[i]);
118 history.computeTags[1][i].init(
119 history.computeIndices[i].origLength, tagWidths[i] - 1);
120 DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
121 histLengths[i], tagTableSizes[i], tagWidths[i]);
122 }
123 }
124
125 btable = new BimodalEntry[ULL(1) << logSizeBiMP];
126 ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
127 gtable = new TageEntry*[nHistoryTables + 1];
128 for (int i = 1; i <= nHistoryTables; i++) {
129 gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
130 }
131
132 tableIndices = new int [nHistoryTables+1];
133 tableTags = new int [nHistoryTables+1];
134
135 loopUseCounter = 0;
136}
137
138int
139LTAGE::bindex(Addr pc_in) const
140{
141 return ((pc_in) & ((ULL(1) << (logSizeBiMP)) - 1));
142}
143
144int
145LTAGE::lindex(Addr pc_in) const
146{
147 return (((pc_in) & ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
148}
149
150int
151LTAGE::F(int A, int size, int bank) const
152{
153 int A1, A2;
154
155 A = A & ((ULL(1) << size) - 1);
156 A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
157 A2 = (A >> tagTableSizes[bank]);
158 A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
159 + (A2 >> (tagTableSizes[bank] - bank));
160 A = A1 ^ A2;
161 A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
162 + (A >> (tagTableSizes[bank] - bank));
163 return (A);
164}
165
166
167// gindex computes a full hash of pc, ghist and pathHist
168int
169LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
170{
171 int index;
172 int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
173 index =
174 (pc) ^ ((pc) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^
175 threadHistory[tid].computeIndices[bank].comp ^
176 F(threadHistory[tid].pathHist, hlen, bank);
177
178 return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
179}
180
181
182// Tag computation
183uint16_t
184LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
185{
186 int tag = (pc) ^ threadHistory[tid].computeTags[0][bank].comp
187 ^ (threadHistory[tid].computeTags[1][bank].comp << 1);
188
189 return (tag & ((ULL(1) << tagWidths[bank]) - 1));
190}
191
192
193// Up-down saturating counter
194void
195LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
196{
197 assert(nbits <= sizeof(int8_t) << 3);
198 if (taken) {
199 if (ctr < ((1 << (nbits - 1)) - 1))
200 ctr++;
201 } else {
202 if (ctr > -(1 << (nbits - 1)))
203 ctr--;
204 }
205}
206
207// Bimodal prediction
208bool
209LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
210{
211 return (btable[bi->bimodalIndex].pred > 0);
212}
213
214
215// Update the bimodal predictor: a hysteresis bit is shared among 4 prediction
216// bits
217void
218LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
219{
220 int inter = (btable[bi->bimodalIndex].pred << 1)
221 + btable[bi->bimodalIndex ].hyst;
222 if (taken) {
223 if (inter < 3)
224 inter++;
225 } else if (inter > 0) {
226 inter--;
227 }
228 btable[bi->bimodalIndex].pred = inter >> 1;
229 btable[bi->bimodalIndex].hyst = (inter & 1);
230 DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n",
231 pc, btable[bi->bimodalIndex].pred,btable[bi->bimodalIndex].hyst);
232}
233
234
235//loop prediction: only used if high confidence
236bool
237LTAGE::getLoop(Addr pc, BranchInfo* bi) const
238{
239 bi->loopHit = -1;
240 bi->loopPredValid = false;
241 bi->loopIndex = lindex(pc);
242 bi->loopTag = ((pc) >> (logSizeLoopPred - 2));
243
244 for (int i = 0; i < 4; i++) {
245 if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
246 bi->loopHit = i;
247 bi->loopPredValid = (ltable[bi->loopIndex + i].confidence >= 3);
248 bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
249 if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
250 ltable[bi->loopIndex + i].numIter) {
251 return !(ltable[bi->loopIndex + i].dir);
252 }else {
253 return (ltable[bi->loopIndex + i].dir);
254 }
255 }
256 }
257 return false;
258}
259
260void
261LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
262{
263 if (bi->loopHit>=0) {
264 int index = lindex(pc);
265 if (taken != ltable[index].dir) {
266 ltable[index].currentIterSpec = 0;
267 } else {
268 ltable[index].currentIterSpec++;
269 }
270 }
271}
272
273void
274LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
275{
276 int idx = bi->loopIndex + bi->loopHit;
277 if (bi->loopHit >= 0) {
278 //already a hit
279 if (bi->loopPredValid) {
280 if (taken != bi->loopPred) {
281 // free the entry
282 ltable[idx].numIter = 0;
283 ltable[idx].age = 0;
284 ltable[idx].confidence = 0;
285 ltable[idx].currentIter = 0;
286 return;
287 } else if (bi->loopPred != bi->tagePred) {
288 DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
289 if (ltable[idx].age < 7)
290 ltable[idx].age++;
291 }
292 }
293
294 ltable[idx].currentIter++;
295 if (ltable[idx].currentIter > ltable[idx].numIter) {
296 ltable[idx].confidence = 0;
297 if (ltable[idx].numIter != 0) {
298 // free the entry
299 ltable[idx].numIter = 0;
300 ltable[idx].age = 0;
301 ltable[idx].confidence = 0;
302 }
303 }
304
305 if (taken != ltable[idx].dir) {
306 if (ltable[idx].currentIter == ltable[idx].numIter) {
307 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
308
309 if (ltable[idx].confidence < 7) {
310 ltable[idx].confidence++;
311 }
312 //just do not predict when the loop count is 1 or 2
313 if (ltable[idx].numIter < 3) {
314 // free the entry
315 ltable[idx].dir = taken;
316 ltable[idx].numIter = 0;
317 ltable[idx].age = 0;
318 ltable[idx].confidence = 0;
319 }
320 } else {
321 DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc);
322 if (ltable[idx].numIter == 0) {
323 // first complete nest;
324 ltable[idx].confidence = 0;
325 ltable[idx].numIter = ltable[idx].currentIter;
326 } else {
327 //not the same number of iterations as last time: free the
328 //entry
329 ltable[idx].numIter = 0;
330 ltable[idx].age = 0;
331 ltable[idx].confidence = 0;
332 }
333 }
334 ltable[idx].currentIter = 0;
335 }
336
337 } else if (taken) {
338 //try to allocate an entry on taken branch
339 int nrand = random_mt.random<int>();
340 for (int i = 0; i < 4; i++) {
341 int loop_hit = (nrand + i) & 3;
342 idx = bi->loopIndex + loop_hit;
343 if (ltable[idx].age == 0) {
344 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
345 pc);
346 ltable[idx].dir = !taken;
347 ltable[idx].tag = bi->loopTag;
348 ltable[idx].numIter = 0;
349 ltable[idx].age = 7;
350 ltable[idx].confidence = 0;
351 ltable[idx].currentIter = 1;
352 break;
353
354 }
355 else
356 ltable[idx].age--;
357 }
358 }
359
360}
361
362// shifting the global history: we manage the history in a big table in order
363// to reduce simulation time
364void
365LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
366{
367 if (pt == 0) {
368 DPRINTF(LTage, "Rolling over the histories\n");
369 // Copy beginning of globalHistoryBuffer to end, such that
370 // the last maxHist outcomes are still reachable
371 // through pt[0 .. maxHist - 1].
372 for (int i = 0; i < maxHist; i++)
373 tab[histBufferSize - maxHist + i] = tab[i];
374 pt = histBufferSize - maxHist;
375 h = &tab[pt];
376 }
377 pt--;
378 h--;
379 h[0] = (dir) ? 1 : 0;
380}
381
382// Get GHR for hashing indirect predictor
383// Build history backwards from pointer in
384// bp_history.
385unsigned
386LTAGE::getGHR(ThreadID tid, void *bp_history) const
387{
388 BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
389 unsigned val = 0;
390 for (unsigned i = 0; i < 32; i++) {
391 // Make sure we don't go out of bounds
392 int gh_offset = bi->ptGhist + i;
393 assert(&(threadHistory[tid].globalHistory[gh_offset]) <
394 threadHistory[tid].globalHistory + histBufferSize);
395 val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
396 }
397
398 return val;
399}
400
401//prediction
402bool
403LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
404{
405 BranchInfo *bi = new BranchInfo(nHistoryTables+1);
406 b = (void*)(bi);
407 Addr pc = branch_pc;
408 bool pred_taken = true;
409 bi->loopHit = -1;
410
411 if (cond_branch) {
412 // TAGE prediction
413
414 // computes the table addresses and the partial tags
415 for (int i = 1; i <= nHistoryTables; i++) {
416 tableIndices[i] = gindex(tid, pc, i);
417 bi->tableIndices[i] = tableIndices[i];
418 tableTags[i] = gtag(tid, pc, i);
419 bi->tableTags[i] = tableTags[i];
420 }
421
422 bi->bimodalIndex = bindex(pc);
423
424 bi->hitBank = 0;
425 bi->altBank = 0;
426 //Look for the bank with longest matching history
427 for (int i = nHistoryTables; i > 0; i--) {
428 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
429 bi->hitBank = i;
430 bi->hitBankIndex = tableIndices[bi->hitBank];
431 break;
432 }
433 }
434 //Look for the alternate bank
435 for (int i = bi->hitBank - 1; i > 0; i--) {
436 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
437 bi->altBank = i;
438 bi->altBankIndex = tableIndices[bi->altBank];
439 break;
440 }
441 }
442 //computes the prediction and the alternate prediction
443 if (bi->hitBank > 0) {
444 if (bi->altBank > 0) {
445 bi->altTaken =
446 gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
447 }else {
448 bi->altTaken = getBimodePred(pc, bi);
449 }
450
451 bi->longestMatchPred =
452 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
453 bi->pseudoNewAlloc =
454 abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
455
456 //if the entry is recognized as a newly allocated entry and
457 //useAltPredForNewlyAllocated is positive use the alternate
458 //prediction
459 if ((useAltPredForNewlyAllocated < 0)
460 || abs(2 *
461 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1)
462 bi->tagePred = bi->longestMatchPred;
463 else
464 bi->tagePred = bi->altTaken;
465 } else {
466 bi->altTaken = getBimodePred(pc, bi);
467 bi->tagePred = bi->altTaken;
468 bi->longestMatchPred = bi->altTaken;
469 }
470 //end TAGE prediction
471
472 bi->loopPred = getLoop(pc, bi); // loop prediction
473
474 pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
475 (bi->loopPred): (bi->tagePred);
476 DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
477 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
478 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
479 loopUseCounter, bi->tagePred, bi->altTaken);
480 }
481 bi->branchPC = branch_pc;
482 bi->condBranch = cond_branch;
483 specLoopUpdate(branch_pc, pred_taken, bi);
484 return pred_taken;
485}
486
487// PREDICTOR UPDATE
488void
489LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
490 bool squashed)
491{
492 assert(bp_history);
493
494 BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
495
496 if (squashed) {
497 // This restores the global history, then update it
498 // and recomputes the folded histories.
499 squash(tid, taken, bp_history);
500 return;
501 }
502
503 int nrand = random_mt.random<int>(0,3);
504 Addr pc = branch_pc;
505 if (bi->condBranch) {
506 DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
507 branch_pc, taken);
508 // first update the loop predictor
509 loopUpdate(pc, taken, bi);
510
511 if (bi->loopPredValid) {
512 if (bi->tagePred != bi->loopPred) {
513 ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
514 }
515 }
516
517 // TAGE UPDATE
518 // try to allocate a new entries only if prediction was wrong
519 bool longest_match_pred = false;
520 bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
521 if (bi->hitBank > 0) {
522 // Manage the selection between longest matching and alternate
523 // matching for "pseudo"-newly allocated longest matching entry
524 longest_match_pred = bi->longestMatchPred;
525 bool PseudoNewAlloc = bi->pseudoNewAlloc;
526 // an entry is considered as newly allocated if its prediction
527 // counter is weak
528 if (PseudoNewAlloc) {
529 if (longest_match_pred == taken) {
530 alloc = false;
531 }
532 // if it was delivering the correct prediction, no need to
533 // allocate new entry even if the overall prediction was false
534 if (longest_match_pred != bi->altTaken) {
535 ctrUpdate(useAltPredForNewlyAllocated,
536 bi->altTaken == taken, 4);
537 }
538 }
539 }
540
541 if (alloc) {
542 // is there some "unuseful" entry to allocate
543 int8_t min = 1;
544 for (int i = nHistoryTables; i > bi->hitBank; i--) {
545 if (gtable[i][bi->tableIndices[i]].u < min) {
546 min = gtable[i][bi->tableIndices[i]].u;
547 }
548 }
549
550 // we allocate an entry with a longer history
551 // to avoid ping-pong, we do not choose systematically the next
552 // entry, but among the 3 next entries
553 int Y = nrand &
554 ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
555 int X = bi->hitBank + 1;
556 if (Y & 1) {
557 X++;
558 if (Y & 2)
559 X++;
560 }
561 // No entry available, forces one to be available
562 if (min > 0) {
563 gtable[X][bi->tableIndices[X]].u = 0;
564 }
565
566
567 //Allocate only one entry
568 for (int i = X; i <= nHistoryTables; i++) {
569 if ((gtable[i][bi->tableIndices[i]].u == 0)) {
570 gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
571 gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
572 gtable[i][bi->tableIndices[i]].u = 0; //?
573 }
574 }
575 }
576 //periodic reset of u: reset is not complete but bit by bit
577 tCounter++;
578 if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
579 // reset least significant bit
580 // most significant bit becomes least significant bit
581 for (int i = 1; i <= nHistoryTables; i++) {
582 for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
583 gtable[i][j].u = gtable[i][j].u >> 1;
584 }
585 }
586 }
587
588 if (bi->hitBank > 0) {
589 DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n",
590 bi->hitBank, bi->hitBankIndex, branch_pc);
591 ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
592 tagTableCounterBits);
593 // if the provider entry is not certified to be useful also update
594 // the alternate prediction
595 if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
596 if (bi->altBank > 0) {
597 ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
598 tagTableCounterBits);
599 DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
600 " branch %lx\n", bi->hitBank, bi->hitBankIndex,
601 branch_pc);
602 }
603 if (bi->altBank == 0) {
604 baseUpdate(pc, taken, bi);
605 }
606 }
607
608 // update the u counter
609 if (longest_match_pred != bi->altTaken) {
610 if (longest_match_pred == taken) {
611 if (gtable[bi->hitBank][bi->hitBankIndex].u < 1) {
612 gtable[bi->hitBank][bi->hitBankIndex].u++;
613 }
614 }
615 }
616 } else {
617 baseUpdate(pc, taken, bi);
618 }
619
620 //END PREDICTOR UPDATE
621 }
622 if (!squashed) {
623 delete bi;
624 }
625}
626
627void
628LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
629{
630 BranchInfo* bi = (BranchInfo*)(b);
631 ThreadHistory& tHist = threadHistory[tid];
632 // UPDATE HISTORIES
633 bool pathbit = ((branch_pc) & 1);
634 //on a squash, return pointers to this and recompute indices.
635 //update user history
636 updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
637 tHist.pathHist = (tHist.pathHist << 1) + pathbit;
638 tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
639
640 bi->ptGhist = tHist.ptGhist;
641 bi->pathHist = tHist.pathHist;
642 //prepare next index and tag computations for user branchs
643 for (int i = 1; i <= nHistoryTables; i++)
644 {
645 bi->ci[i] = tHist.computeIndices[i].comp;
646 bi->ct0[i] = tHist.computeTags[0][i].comp;
647 bi->ct1[i] = tHist.computeTags[1][i].comp;
648 tHist.computeIndices[i].update(tHist.gHist);
649 tHist.computeTags[0][i].update(tHist.gHist);
650 tHist.computeTags[1][i].update(tHist.gHist);
651 }
652 DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
653 "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
654 tHist.ptGhist);
655}
656
657void
658LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
659{
660 BranchInfo* bi = (BranchInfo*)(bp_history);
661 ThreadHistory& tHist = threadHistory[tid];
662 DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
663 "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
664 tHist.pathHist = bi->pathHist;
665 tHist.ptGhist = bi->ptGhist;
666 tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
667 tHist.gHist[0] = (taken ? 1 : 0);
668 for (int i = 1; i <= nHistoryTables; i++) {
669 tHist.computeIndices[i].comp = bi->ci[i];
670 tHist.computeTags[0][i].comp = bi->ct0[i];
671 tHist.computeTags[1][i].comp = bi->ct1[i];
672 tHist.computeIndices[i].update(tHist.gHist);
673 tHist.computeTags[0][i].update(tHist.gHist);
674 tHist.computeTags[1][i].update(tHist.gHist);
675 }
676
677 if (bi->condBranch) {
678 if (bi->loopHit >= 0) {
679 int idx = bi->loopIndex + bi->loopHit;
680 ltable[idx].currentIterSpec = bi->currentIter;
681 }
682 }
683
684}
685
686void
687LTAGE::squash(ThreadID tid, void *bp_history)
688{
689 BranchInfo* bi = (BranchInfo*)(bp_history);
690 DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
691 if (bi->condBranch) {
692 if (bi->loopHit >= 0) {
693 int idx = bi->loopIndex + bi->loopHit;
694 ltable[idx].currentIterSpec = bi->currentIter;
695 }
696 }
697
698 delete bi;
699}
700
701bool
702LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
703{
704 bool retval = predict(tid, branch_pc, true, bp_history);
705
706 DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
707 updateHistories(tid, branch_pc, retval, bp_history);
708 assert(threadHistory[tid].gHist ==
709 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
710
711 return retval;
712}
713
714void
715LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
716{
717 BranchInfo* bi = (BranchInfo*) bp_history;
718 ThreadHistory& tHist = threadHistory[tid];
719 DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
720 assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
721 tHist.gHist[0] = 0;
722 for (int i = 1; i <= nHistoryTables; i++) {
723 tHist.computeIndices[i].comp = bi->ci[i];
724 tHist.computeTags[0][i].comp = bi->ct0[i];
725 tHist.computeTags[1][i].comp = bi->ct1[i];
726 tHist.computeIndices[i].update(tHist.gHist);
727 tHist.computeTags[0][i].update(tHist.gHist);
728 tHist.computeTags[1][i].update(tHist.gHist);
729 }
730}
731
732void
733LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
734{
735 DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
736 predict(tid, br_pc, false, bp_history);
737 updateHistories(tid, br_pc, true, bp_history);
738 assert(threadHistory[tid].gHist ==
739 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
740}
741
742LTAGE*
743LTAGEParams::create()
744{
745 return new LTAGE(this);
746}
45#include "base/random.hh"
46#include "base/trace.hh"
47#include "debug/Fetch.hh"
48#include "debug/LTage.hh"
49
50LTAGE::LTAGE(const LTAGEParams *params)
51 : BPredUnit(params),
52 logSizeBiMP(params->logSizeBiMP),
53 logSizeTagTables(params->logSizeTagTables),
54 logSizeLoopPred(params->logSizeLoopPred),
55 nHistoryTables(params->nHistoryTables),
56 tagTableCounterBits(params->tagTableCounterBits),
57 histBufferSize(params->histBufferSize),
58 minHist(params->minHist),
59 maxHist(params->maxHist),
60 minTagWidth(params->minTagWidth),
61 threadHistory(params->numThreads)
62{
63 assert(params->histBufferSize > params->maxHist * 2);
64 useAltPredForNewlyAllocated = 0;
65 logTick = 19;
66 tCounter = ULL(1) << (logTick - 1);
67
68 for (auto& history : threadHistory) {
69 history.pathHist = 0;
70 history.globalHistory = new uint8_t[histBufferSize];
71 history.gHist = history.globalHistory;
72 memset(history.gHist, 0, histBufferSize);
73 history.ptGhist = 0;
74 }
75
76 histLengths = new int [nHistoryTables+1];
77 histLengths[1] = minHist;
78 histLengths[nHistoryTables] = maxHist;
79
80 for (int i = 2; i <= nHistoryTables; i++) {
81 histLengths[i] = (int) (((double) minHist *
82 pow ((double) (maxHist) / (double) minHist,
83 (double) (i - 1) / (double) ((nHistoryTables- 1))))
84 + 0.5);
85 }
86
87 tagWidths[1] = minTagWidth;
88 tagWidths[2] = minTagWidth;
89 tagWidths[3] = minTagWidth + 1;
90 tagWidths[4] = minTagWidth + 1;
91 tagWidths[5] = minTagWidth + 2;
92 tagWidths[6] = minTagWidth + 3;
93 tagWidths[7] = minTagWidth + 4;
94 tagWidths[8] = minTagWidth + 5;
95 tagWidths[9] = minTagWidth + 5;
96 tagWidths[10] = minTagWidth + 6;
97 tagWidths[11] = minTagWidth + 7;
98 tagWidths[12] = minTagWidth + 8;
99
100 for (int i = 1; i <= 2; i++)
101 tagTableSizes[i] = logSizeTagTables - 1;
102 for (int i = 3; i <= 6; i++)
103 tagTableSizes[i] = logSizeTagTables;
104 for (int i = 7; i <= 10; i++)
105 tagTableSizes[i] = logSizeTagTables - 1;
106 for (int i = 11; i <= 12; i++)
107 tagTableSizes[i] = logSizeTagTables - 2;
108
109 for (auto& history : threadHistory) {
110 history.computeIndices = new FoldedHistory[nHistoryTables+1];
111 history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
112 history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
113
114 for (int i = 1; i <= nHistoryTables; i++) {
115 history.computeIndices[i].init(histLengths[i], (tagTableSizes[i]));
116 history.computeTags[0][i].init(
117 history.computeIndices[i].origLength, tagWidths[i]);
118 history.computeTags[1][i].init(
119 history.computeIndices[i].origLength, tagWidths[i] - 1);
120 DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
121 histLengths[i], tagTableSizes[i], tagWidths[i]);
122 }
123 }
124
125 btable = new BimodalEntry[ULL(1) << logSizeBiMP];
126 ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
127 gtable = new TageEntry*[nHistoryTables + 1];
128 for (int i = 1; i <= nHistoryTables; i++) {
129 gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
130 }
131
132 tableIndices = new int [nHistoryTables+1];
133 tableTags = new int [nHistoryTables+1];
134
135 loopUseCounter = 0;
136}
137
138int
139LTAGE::bindex(Addr pc_in) const
140{
141 return ((pc_in) & ((ULL(1) << (logSizeBiMP)) - 1));
142}
143
144int
145LTAGE::lindex(Addr pc_in) const
146{
147 return (((pc_in) & ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
148}
149
150int
151LTAGE::F(int A, int size, int bank) const
152{
153 int A1, A2;
154
155 A = A & ((ULL(1) << size) - 1);
156 A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
157 A2 = (A >> tagTableSizes[bank]);
158 A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
159 + (A2 >> (tagTableSizes[bank] - bank));
160 A = A1 ^ A2;
161 A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
162 + (A >> (tagTableSizes[bank] - bank));
163 return (A);
164}
165
166
167// gindex computes a full hash of pc, ghist and pathHist
168int
169LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
170{
171 int index;
172 int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
173 index =
174 (pc) ^ ((pc) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^
175 threadHistory[tid].computeIndices[bank].comp ^
176 F(threadHistory[tid].pathHist, hlen, bank);
177
178 return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
179}
180
181
182// Tag computation
183uint16_t
184LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
185{
186 int tag = (pc) ^ threadHistory[tid].computeTags[0][bank].comp
187 ^ (threadHistory[tid].computeTags[1][bank].comp << 1);
188
189 return (tag & ((ULL(1) << tagWidths[bank]) - 1));
190}
191
192
193// Up-down saturating counter
194void
195LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
196{
197 assert(nbits <= sizeof(int8_t) << 3);
198 if (taken) {
199 if (ctr < ((1 << (nbits - 1)) - 1))
200 ctr++;
201 } else {
202 if (ctr > -(1 << (nbits - 1)))
203 ctr--;
204 }
205}
206
207// Bimodal prediction
208bool
209LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
210{
211 return (btable[bi->bimodalIndex].pred > 0);
212}
213
214
215// Update the bimodal predictor: a hysteresis bit is shared among 4 prediction
216// bits
217void
218LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
219{
220 int inter = (btable[bi->bimodalIndex].pred << 1)
221 + btable[bi->bimodalIndex ].hyst;
222 if (taken) {
223 if (inter < 3)
224 inter++;
225 } else if (inter > 0) {
226 inter--;
227 }
228 btable[bi->bimodalIndex].pred = inter >> 1;
229 btable[bi->bimodalIndex].hyst = (inter & 1);
230 DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n",
231 pc, btable[bi->bimodalIndex].pred,btable[bi->bimodalIndex].hyst);
232}
233
234
235//loop prediction: only used if high confidence
236bool
237LTAGE::getLoop(Addr pc, BranchInfo* bi) const
238{
239 bi->loopHit = -1;
240 bi->loopPredValid = false;
241 bi->loopIndex = lindex(pc);
242 bi->loopTag = ((pc) >> (logSizeLoopPred - 2));
243
244 for (int i = 0; i < 4; i++) {
245 if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
246 bi->loopHit = i;
247 bi->loopPredValid = (ltable[bi->loopIndex + i].confidence >= 3);
248 bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
249 if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
250 ltable[bi->loopIndex + i].numIter) {
251 return !(ltable[bi->loopIndex + i].dir);
252 }else {
253 return (ltable[bi->loopIndex + i].dir);
254 }
255 }
256 }
257 return false;
258}
259
260void
261LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
262{
263 if (bi->loopHit>=0) {
264 int index = lindex(pc);
265 if (taken != ltable[index].dir) {
266 ltable[index].currentIterSpec = 0;
267 } else {
268 ltable[index].currentIterSpec++;
269 }
270 }
271}
272
273void
274LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
275{
276 int idx = bi->loopIndex + bi->loopHit;
277 if (bi->loopHit >= 0) {
278 //already a hit
279 if (bi->loopPredValid) {
280 if (taken != bi->loopPred) {
281 // free the entry
282 ltable[idx].numIter = 0;
283 ltable[idx].age = 0;
284 ltable[idx].confidence = 0;
285 ltable[idx].currentIter = 0;
286 return;
287 } else if (bi->loopPred != bi->tagePred) {
288 DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
289 if (ltable[idx].age < 7)
290 ltable[idx].age++;
291 }
292 }
293
294 ltable[idx].currentIter++;
295 if (ltable[idx].currentIter > ltable[idx].numIter) {
296 ltable[idx].confidence = 0;
297 if (ltable[idx].numIter != 0) {
298 // free the entry
299 ltable[idx].numIter = 0;
300 ltable[idx].age = 0;
301 ltable[idx].confidence = 0;
302 }
303 }
304
305 if (taken != ltable[idx].dir) {
306 if (ltable[idx].currentIter == ltable[idx].numIter) {
307 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
308
309 if (ltable[idx].confidence < 7) {
310 ltable[idx].confidence++;
311 }
312 //just do not predict when the loop count is 1 or 2
313 if (ltable[idx].numIter < 3) {
314 // free the entry
315 ltable[idx].dir = taken;
316 ltable[idx].numIter = 0;
317 ltable[idx].age = 0;
318 ltable[idx].confidence = 0;
319 }
320 } else {
321 DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc);
322 if (ltable[idx].numIter == 0) {
323 // first complete nest;
324 ltable[idx].confidence = 0;
325 ltable[idx].numIter = ltable[idx].currentIter;
326 } else {
327 //not the same number of iterations as last time: free the
328 //entry
329 ltable[idx].numIter = 0;
330 ltable[idx].age = 0;
331 ltable[idx].confidence = 0;
332 }
333 }
334 ltable[idx].currentIter = 0;
335 }
336
337 } else if (taken) {
338 //try to allocate an entry on taken branch
339 int nrand = random_mt.random<int>();
340 for (int i = 0; i < 4; i++) {
341 int loop_hit = (nrand + i) & 3;
342 idx = bi->loopIndex + loop_hit;
343 if (ltable[idx].age == 0) {
344 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
345 pc);
346 ltable[idx].dir = !taken;
347 ltable[idx].tag = bi->loopTag;
348 ltable[idx].numIter = 0;
349 ltable[idx].age = 7;
350 ltable[idx].confidence = 0;
351 ltable[idx].currentIter = 1;
352 break;
353
354 }
355 else
356 ltable[idx].age--;
357 }
358 }
359
360}
361
362// shifting the global history: we manage the history in a big table in order
363// to reduce simulation time
364void
365LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
366{
367 if (pt == 0) {
368 DPRINTF(LTage, "Rolling over the histories\n");
369 // Copy beginning of globalHistoryBuffer to end, such that
370 // the last maxHist outcomes are still reachable
371 // through pt[0 .. maxHist - 1].
372 for (int i = 0; i < maxHist; i++)
373 tab[histBufferSize - maxHist + i] = tab[i];
374 pt = histBufferSize - maxHist;
375 h = &tab[pt];
376 }
377 pt--;
378 h--;
379 h[0] = (dir) ? 1 : 0;
380}
381
382// Get GHR for hashing indirect predictor
383// Build history backwards from pointer in
384// bp_history.
385unsigned
386LTAGE::getGHR(ThreadID tid, void *bp_history) const
387{
388 BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
389 unsigned val = 0;
390 for (unsigned i = 0; i < 32; i++) {
391 // Make sure we don't go out of bounds
392 int gh_offset = bi->ptGhist + i;
393 assert(&(threadHistory[tid].globalHistory[gh_offset]) <
394 threadHistory[tid].globalHistory + histBufferSize);
395 val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
396 }
397
398 return val;
399}
400
401//prediction
402bool
403LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
404{
405 BranchInfo *bi = new BranchInfo(nHistoryTables+1);
406 b = (void*)(bi);
407 Addr pc = branch_pc;
408 bool pred_taken = true;
409 bi->loopHit = -1;
410
411 if (cond_branch) {
412 // TAGE prediction
413
414 // computes the table addresses and the partial tags
415 for (int i = 1; i <= nHistoryTables; i++) {
416 tableIndices[i] = gindex(tid, pc, i);
417 bi->tableIndices[i] = tableIndices[i];
418 tableTags[i] = gtag(tid, pc, i);
419 bi->tableTags[i] = tableTags[i];
420 }
421
422 bi->bimodalIndex = bindex(pc);
423
424 bi->hitBank = 0;
425 bi->altBank = 0;
426 //Look for the bank with longest matching history
427 for (int i = nHistoryTables; i > 0; i--) {
428 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
429 bi->hitBank = i;
430 bi->hitBankIndex = tableIndices[bi->hitBank];
431 break;
432 }
433 }
434 //Look for the alternate bank
435 for (int i = bi->hitBank - 1; i > 0; i--) {
436 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
437 bi->altBank = i;
438 bi->altBankIndex = tableIndices[bi->altBank];
439 break;
440 }
441 }
442 //computes the prediction and the alternate prediction
443 if (bi->hitBank > 0) {
444 if (bi->altBank > 0) {
445 bi->altTaken =
446 gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
447 }else {
448 bi->altTaken = getBimodePred(pc, bi);
449 }
450
451 bi->longestMatchPred =
452 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
453 bi->pseudoNewAlloc =
454 abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
455
456 //if the entry is recognized as a newly allocated entry and
457 //useAltPredForNewlyAllocated is positive use the alternate
458 //prediction
459 if ((useAltPredForNewlyAllocated < 0)
460 || abs(2 *
461 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1)
462 bi->tagePred = bi->longestMatchPred;
463 else
464 bi->tagePred = bi->altTaken;
465 } else {
466 bi->altTaken = getBimodePred(pc, bi);
467 bi->tagePred = bi->altTaken;
468 bi->longestMatchPred = bi->altTaken;
469 }
470 //end TAGE prediction
471
472 bi->loopPred = getLoop(pc, bi); // loop prediction
473
474 pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
475 (bi->loopPred): (bi->tagePred);
476 DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
477 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
478 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
479 loopUseCounter, bi->tagePred, bi->altTaken);
480 }
481 bi->branchPC = branch_pc;
482 bi->condBranch = cond_branch;
483 specLoopUpdate(branch_pc, pred_taken, bi);
484 return pred_taken;
485}
486
487// PREDICTOR UPDATE
488void
489LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
490 bool squashed)
491{
492 assert(bp_history);
493
494 BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
495
496 if (squashed) {
497 // This restores the global history, then update it
498 // and recomputes the folded histories.
499 squash(tid, taken, bp_history);
500 return;
501 }
502
503 int nrand = random_mt.random<int>(0,3);
504 Addr pc = branch_pc;
505 if (bi->condBranch) {
506 DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
507 branch_pc, taken);
508 // first update the loop predictor
509 loopUpdate(pc, taken, bi);
510
511 if (bi->loopPredValid) {
512 if (bi->tagePred != bi->loopPred) {
513 ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
514 }
515 }
516
517 // TAGE UPDATE
518 // try to allocate a new entries only if prediction was wrong
519 bool longest_match_pred = false;
520 bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
521 if (bi->hitBank > 0) {
522 // Manage the selection between longest matching and alternate
523 // matching for "pseudo"-newly allocated longest matching entry
524 longest_match_pred = bi->longestMatchPred;
525 bool PseudoNewAlloc = bi->pseudoNewAlloc;
526 // an entry is considered as newly allocated if its prediction
527 // counter is weak
528 if (PseudoNewAlloc) {
529 if (longest_match_pred == taken) {
530 alloc = false;
531 }
532 // if it was delivering the correct prediction, no need to
533 // allocate new entry even if the overall prediction was false
534 if (longest_match_pred != bi->altTaken) {
535 ctrUpdate(useAltPredForNewlyAllocated,
536 bi->altTaken == taken, 4);
537 }
538 }
539 }
540
541 if (alloc) {
542 // is there some "unuseful" entry to allocate
543 int8_t min = 1;
544 for (int i = nHistoryTables; i > bi->hitBank; i--) {
545 if (gtable[i][bi->tableIndices[i]].u < min) {
546 min = gtable[i][bi->tableIndices[i]].u;
547 }
548 }
549
550 // we allocate an entry with a longer history
551 // to avoid ping-pong, we do not choose systematically the next
552 // entry, but among the 3 next entries
553 int Y = nrand &
554 ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
555 int X = bi->hitBank + 1;
556 if (Y & 1) {
557 X++;
558 if (Y & 2)
559 X++;
560 }
561 // No entry available, forces one to be available
562 if (min > 0) {
563 gtable[X][bi->tableIndices[X]].u = 0;
564 }
565
566
567 //Allocate only one entry
568 for (int i = X; i <= nHistoryTables; i++) {
569 if ((gtable[i][bi->tableIndices[i]].u == 0)) {
570 gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
571 gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
572 gtable[i][bi->tableIndices[i]].u = 0; //?
573 }
574 }
575 }
576 //periodic reset of u: reset is not complete but bit by bit
577 tCounter++;
578 if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
579 // reset least significant bit
580 // most significant bit becomes least significant bit
581 for (int i = 1; i <= nHistoryTables; i++) {
582 for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
583 gtable[i][j].u = gtable[i][j].u >> 1;
584 }
585 }
586 }
587
588 if (bi->hitBank > 0) {
589 DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n",
590 bi->hitBank, bi->hitBankIndex, branch_pc);
591 ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
592 tagTableCounterBits);
593 // if the provider entry is not certified to be useful also update
594 // the alternate prediction
595 if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
596 if (bi->altBank > 0) {
597 ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
598 tagTableCounterBits);
599 DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
600 " branch %lx\n", bi->hitBank, bi->hitBankIndex,
601 branch_pc);
602 }
603 if (bi->altBank == 0) {
604 baseUpdate(pc, taken, bi);
605 }
606 }
607
608 // update the u counter
609 if (longest_match_pred != bi->altTaken) {
610 if (longest_match_pred == taken) {
611 if (gtable[bi->hitBank][bi->hitBankIndex].u < 1) {
612 gtable[bi->hitBank][bi->hitBankIndex].u++;
613 }
614 }
615 }
616 } else {
617 baseUpdate(pc, taken, bi);
618 }
619
620 //END PREDICTOR UPDATE
621 }
622 if (!squashed) {
623 delete bi;
624 }
625}
626
627void
628LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
629{
630 BranchInfo* bi = (BranchInfo*)(b);
631 ThreadHistory& tHist = threadHistory[tid];
632 // UPDATE HISTORIES
633 bool pathbit = ((branch_pc) & 1);
634 //on a squash, return pointers to this and recompute indices.
635 //update user history
636 updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
637 tHist.pathHist = (tHist.pathHist << 1) + pathbit;
638 tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
639
640 bi->ptGhist = tHist.ptGhist;
641 bi->pathHist = tHist.pathHist;
642 //prepare next index and tag computations for user branchs
643 for (int i = 1; i <= nHistoryTables; i++)
644 {
645 bi->ci[i] = tHist.computeIndices[i].comp;
646 bi->ct0[i] = tHist.computeTags[0][i].comp;
647 bi->ct1[i] = tHist.computeTags[1][i].comp;
648 tHist.computeIndices[i].update(tHist.gHist);
649 tHist.computeTags[0][i].update(tHist.gHist);
650 tHist.computeTags[1][i].update(tHist.gHist);
651 }
652 DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
653 "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
654 tHist.ptGhist);
655}
656
657void
658LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
659{
660 BranchInfo* bi = (BranchInfo*)(bp_history);
661 ThreadHistory& tHist = threadHistory[tid];
662 DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
663 "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
664 tHist.pathHist = bi->pathHist;
665 tHist.ptGhist = bi->ptGhist;
666 tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
667 tHist.gHist[0] = (taken ? 1 : 0);
668 for (int i = 1; i <= nHistoryTables; i++) {
669 tHist.computeIndices[i].comp = bi->ci[i];
670 tHist.computeTags[0][i].comp = bi->ct0[i];
671 tHist.computeTags[1][i].comp = bi->ct1[i];
672 tHist.computeIndices[i].update(tHist.gHist);
673 tHist.computeTags[0][i].update(tHist.gHist);
674 tHist.computeTags[1][i].update(tHist.gHist);
675 }
676
677 if (bi->condBranch) {
678 if (bi->loopHit >= 0) {
679 int idx = bi->loopIndex + bi->loopHit;
680 ltable[idx].currentIterSpec = bi->currentIter;
681 }
682 }
683
684}
685
686void
687LTAGE::squash(ThreadID tid, void *bp_history)
688{
689 BranchInfo* bi = (BranchInfo*)(bp_history);
690 DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
691 if (bi->condBranch) {
692 if (bi->loopHit >= 0) {
693 int idx = bi->loopIndex + bi->loopHit;
694 ltable[idx].currentIterSpec = bi->currentIter;
695 }
696 }
697
698 delete bi;
699}
700
701bool
702LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
703{
704 bool retval = predict(tid, branch_pc, true, bp_history);
705
706 DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
707 updateHistories(tid, branch_pc, retval, bp_history);
708 assert(threadHistory[tid].gHist ==
709 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
710
711 return retval;
712}
713
714void
715LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
716{
717 BranchInfo* bi = (BranchInfo*) bp_history;
718 ThreadHistory& tHist = threadHistory[tid];
719 DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
720 assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
721 tHist.gHist[0] = 0;
722 for (int i = 1; i <= nHistoryTables; i++) {
723 tHist.computeIndices[i].comp = bi->ci[i];
724 tHist.computeTags[0][i].comp = bi->ct0[i];
725 tHist.computeTags[1][i].comp = bi->ct1[i];
726 tHist.computeIndices[i].update(tHist.gHist);
727 tHist.computeTags[0][i].update(tHist.gHist);
728 tHist.computeTags[1][i].update(tHist.gHist);
729 }
730}
731
732void
733LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
734{
735 DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
736 predict(tid, br_pc, false, bp_history);
737 updateHistories(tid, br_pc, true, bp_history);
738 assert(threadHistory[tid].gHist ==
739 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
740}
741
742LTAGE*
743LTAGEParams::create()
744{
745 return new LTAGE(this);
746}