ltage.cc (13442:5314c50529a5) ltage.cc (13443:a111cb197897)
1/*
2 * Copyright (c) 2014 The University of Wisconsin
3 *
4 * Copyright (c) 2006 INRIA (Institut National de Recherche en
5 * Informatique et en Automatique / French National Research Institute
6 * for Computer Science and Applied Mathematics)
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are
12 * met: redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer;
14 * redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution;
17 * neither the name of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
34 * from André Seznec's code.
35 */
36
37/* @file
38 * Implementation of a L-TAGE branch predictor
39 */
40
41#include "cpu/pred/ltage.hh"
42
43#include "base/intmath.hh"
44#include "base/logging.hh"
45#include "base/random.hh"
46#include "base/trace.hh"
47#include "debug/Fetch.hh"
48#include "debug/LTage.hh"
49
50LTAGE::LTAGE(const LTAGEParams *params)
51 : BPredUnit(params),
52 logSizeBiMP(params->logSizeBiMP),
53 logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
54 logSizeTagTables(params->logSizeTagTables),
55 logSizeLoopPred(params->logSizeLoopPred),
56 nHistoryTables(params->nHistoryTables),
57 tagTableCounterBits(params->tagTableCounterBits),
1/*
2 * Copyright (c) 2014 The University of Wisconsin
3 *
4 * Copyright (c) 2006 INRIA (Institut National de Recherche en
5 * Informatique et en Automatique / French National Research Institute
6 * for Computer Science and Applied Mathematics)
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are
12 * met: redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer;
14 * redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution;
17 * neither the name of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
34 * from André Seznec's code.
35 */
36
37/* @file
38 * Implementation of a L-TAGE branch predictor
39 */
40
41#include "cpu/pred/ltage.hh"
42
43#include "base/intmath.hh"
44#include "base/logging.hh"
45#include "base/random.hh"
46#include "base/trace.hh"
47#include "debug/Fetch.hh"
48#include "debug/LTage.hh"
49
50LTAGE::LTAGE(const LTAGEParams *params)
51 : BPredUnit(params),
52 logSizeBiMP(params->logSizeBiMP),
53 logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
54 logSizeTagTables(params->logSizeTagTables),
55 logSizeLoopPred(params->logSizeLoopPred),
56 nHistoryTables(params->nHistoryTables),
57 tagTableCounterBits(params->tagTableCounterBits),
58 tagTableUBits(params->tagTableUBits),
58 histBufferSize(params->histBufferSize),
59 minHist(params->minHist),
60 maxHist(params->maxHist),
61 minTagWidth(params->minTagWidth),
62 loopTableAgeBits(params->loopTableAgeBits),
63 loopTableConfidenceBits(params->loopTableConfidenceBits),
64 loopTableTagBits(params->loopTableTagBits),
65 loopTableIterBits(params->loopTableIterBits),
66 confidenceThreshold((1 << loopTableConfidenceBits) - 1),
67 loopTagMask((1 << loopTableTagBits) - 1),
68 loopNumIterMask((1 << loopTableIterBits) - 1),
69 threadHistory(params->numThreads)
70{
59 histBufferSize(params->histBufferSize),
60 minHist(params->minHist),
61 maxHist(params->maxHist),
62 minTagWidth(params->minTagWidth),
63 loopTableAgeBits(params->loopTableAgeBits),
64 loopTableConfidenceBits(params->loopTableConfidenceBits),
65 loopTableTagBits(params->loopTableTagBits),
66 loopTableIterBits(params->loopTableIterBits),
67 confidenceThreshold((1 << loopTableConfidenceBits) - 1),
68 loopTagMask((1 << loopTableTagBits) - 1),
69 loopNumIterMask((1 << loopTableIterBits) - 1),
70 threadHistory(params->numThreads)
71{
72 // Current method for periodically resetting the u counter bits only
73 // works for 1 or 2 bits
74 // Also make sure that it is not 0
75 assert(tagTableUBits <= 2 && (tagTableUBits > 0));
76
71 // we use uint16_t type for these vales, so they cannot be more than
72 // 16 bits
73 assert(loopTableTagBits <= 16);
74 assert(loopTableIterBits <= 16);
75
76 assert(params->histBufferSize > params->maxHist * 2);
77 useAltPredForNewlyAllocated = 0;
78 logTick = 19;
79 tCounter = ULL(1) << (logTick - 1);
80
81 for (auto& history : threadHistory) {
82 history.pathHist = 0;
83 history.globalHistory = new uint8_t[histBufferSize];
84 history.gHist = history.globalHistory;
85 memset(history.gHist, 0, histBufferSize);
86 history.ptGhist = 0;
87 }
88
89 histLengths = new int [nHistoryTables+1];
90 histLengths[1] = minHist;
91 histLengths[nHistoryTables] = maxHist;
92
93 for (int i = 2; i <= nHistoryTables; i++) {
94 histLengths[i] = (int) (((double) minHist *
95 pow ((double) (maxHist) / (double) minHist,
96 (double) (i - 1) / (double) ((nHistoryTables- 1))))
97 + 0.5);
98 }
99
100 tagWidths[1] = minTagWidth;
101 tagWidths[2] = minTagWidth;
102 tagWidths[3] = minTagWidth + 1;
103 tagWidths[4] = minTagWidth + 1;
104 tagWidths[5] = minTagWidth + 2;
105 tagWidths[6] = minTagWidth + 3;
106 tagWidths[7] = minTagWidth + 4;
107 tagWidths[8] = minTagWidth + 5;
108 tagWidths[9] = minTagWidth + 5;
109 tagWidths[10] = minTagWidth + 6;
110 tagWidths[11] = minTagWidth + 7;
111 tagWidths[12] = minTagWidth + 8;
112
113 for (int i = 1; i <= 2; i++)
114 tagTableSizes[i] = logSizeTagTables - 1;
115 for (int i = 3; i <= 6; i++)
116 tagTableSizes[i] = logSizeTagTables;
117 for (int i = 7; i <= 10; i++)
118 tagTableSizes[i] = logSizeTagTables - 1;
119 for (int i = 11; i <= 12; i++)
120 tagTableSizes[i] = logSizeTagTables - 2;
121
122 for (auto& history : threadHistory) {
123 history.computeIndices = new FoldedHistory[nHistoryTables+1];
124 history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
125 history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
126
127 for (int i = 1; i <= nHistoryTables; i++) {
128 history.computeIndices[i].init(histLengths[i], (tagTableSizes[i]));
129 history.computeTags[0][i].init(
130 history.computeIndices[i].origLength, tagWidths[i]);
131 history.computeTags[1][i].init(
132 history.computeIndices[i].origLength, tagWidths[i] - 1);
133 DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
134 histLengths[i], tagTableSizes[i], tagWidths[i]);
135 }
136 }
137
138 const uint64_t bimodalTableSize = ULL(1) << logSizeBiMP;
139 btablePrediction.resize(bimodalTableSize, false);
140 btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
141 true);
142
143 ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
144 gtable = new TageEntry*[nHistoryTables + 1];
145 for (int i = 1; i <= nHistoryTables; i++) {
146 gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
147 }
148
149 tableIndices = new int [nHistoryTables+1];
150 tableTags = new int [nHistoryTables+1];
151
152 loopUseCounter = 0;
153}
154
155int
156LTAGE::bindex(Addr pc_in) const
157{
158 return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1));
159}
160
161int
162LTAGE::lindex(Addr pc_in) const
163{
164 return (((pc_in >> instShiftAmt) &
165 ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
166}
167
168int
169LTAGE::F(int A, int size, int bank) const
170{
171 int A1, A2;
172
173 A = A & ((ULL(1) << size) - 1);
174 A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
175 A2 = (A >> tagTableSizes[bank]);
176 A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
177 + (A2 >> (tagTableSizes[bank] - bank));
178 A = A1 ^ A2;
179 A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
180 + (A >> (tagTableSizes[bank] - bank));
181 return (A);
182}
183
184
185// gindex computes a full hash of pc, ghist and pathHist
186int
187LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
188{
189 int index;
190 int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
191 index =
192 (pc >> instShiftAmt) ^
193 ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^
194 threadHistory[tid].computeIndices[bank].comp ^
195 F(threadHistory[tid].pathHist, hlen, bank);
196
197 return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
198}
199
200
201// Tag computation
202uint16_t
203LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
204{
205 int tag = (pc >> instShiftAmt) ^
206 threadHistory[tid].computeTags[0][bank].comp ^
207 (threadHistory[tid].computeTags[1][bank].comp << 1);
208
209 return (tag & ((ULL(1) << tagWidths[bank]) - 1));
210}
211
212
213// Up-down saturating counter
214void
215LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
216{
217 assert(nbits <= sizeof(int8_t) << 3);
218 if (taken) {
219 if (ctr < ((1 << (nbits - 1)) - 1))
220 ctr++;
221 } else {
222 if (ctr > -(1 << (nbits - 1)))
223 ctr--;
224 }
225}
226
227// Up-down unsigned saturating counter
228void
229LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
230{
231 assert(nbits <= sizeof(uint8_t) << 3);
232 if (up) {
233 if (ctr < ((1 << nbits) - 1))
234 ctr++;
235 } else {
236 if (ctr)
237 ctr--;
238 }
239}
240
241// Bimodal prediction
242bool
243LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
244{
245 return btablePrediction[bi->bimodalIndex];
246}
247
248
249// Update the bimodal predictor: a hysteresis bit is shared among N prediction
250// bits (N = 2 ^ logRatioBiModalHystEntries)
251void
252LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
253{
254 int inter = (btablePrediction[bi->bimodalIndex] << 1)
255 + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
256 if (taken) {
257 if (inter < 3)
258 inter++;
259 } else if (inter > 0) {
260 inter--;
261 }
262 const bool pred = inter >> 1;
263 const bool hyst = inter & 1;
264 btablePrediction[bi->bimodalIndex] = pred;
265 btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst;
266 DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst);
267}
268
269
270//loop prediction: only used if high confidence
271bool
272LTAGE::getLoop(Addr pc, BranchInfo* bi) const
273{
274 bi->loopHit = -1;
275 bi->loopPredValid = false;
276 bi->loopIndex = lindex(pc);
277 bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2)) & loopTagMask;
278
279 for (int i = 0; i < 4; i++) {
280 if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
281 bi->loopHit = i;
282 bi->loopPredValid =
283 ltable[bi->loopIndex + i].confidence == confidenceThreshold;
284 bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
285 if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
286 ltable[bi->loopIndex + i].numIter) {
287 return !(ltable[bi->loopIndex + i].dir);
288 }else {
289 return (ltable[bi->loopIndex + i].dir);
290 }
291 }
292 }
293 return false;
294}
295
296void
297LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
298{
299 if (bi->loopHit>=0) {
300 int index = lindex(pc);
301 if (taken != ltable[index].dir) {
302 ltable[index].currentIterSpec = 0;
303 } else {
304 ltable[index].currentIterSpec =
305 (ltable[index].currentIterSpec + 1) & loopNumIterMask;
306 }
307 }
308}
309
310void
311LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
312{
313 int idx = bi->loopIndex + bi->loopHit;
314 if (bi->loopHit >= 0) {
315 //already a hit
316 if (bi->loopPredValid) {
317 if (taken != bi->loopPred) {
318 // free the entry
319 ltable[idx].numIter = 0;
320 ltable[idx].age = 0;
321 ltable[idx].confidence = 0;
322 ltable[idx].currentIter = 0;
323 return;
324 } else if (bi->loopPred != bi->tagePred) {
325 DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
326 unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits);
327 }
328 }
329
330 ltable[idx].currentIter =
331 (ltable[idx].currentIter + 1) & loopNumIterMask;
332 if (ltable[idx].currentIter > ltable[idx].numIter) {
333 ltable[idx].confidence = 0;
334 if (ltable[idx].numIter != 0) {
335 // free the entry
336 ltable[idx].numIter = 0;
337 ltable[idx].age = 0;
338 ltable[idx].confidence = 0;
339 }
340 }
341
342 if (taken != ltable[idx].dir) {
343 if (ltable[idx].currentIter == ltable[idx].numIter) {
344 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
345
346 unsignedCtrUpdate(ltable[idx].confidence, true,
347 loopTableConfidenceBits);
348 //just do not predict when the loop count is 1 or 2
349 if (ltable[idx].numIter < 3) {
350 // free the entry
351 ltable[idx].dir = taken;
352 ltable[idx].numIter = 0;
353 ltable[idx].age = 0;
354 ltable[idx].confidence = 0;
355 }
356 } else {
357 DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc);
358 if (ltable[idx].numIter == 0) {
359 // first complete nest;
360 ltable[idx].confidence = 0;
361 ltable[idx].numIter = ltable[idx].currentIter;
362 } else {
363 //not the same number of iterations as last time: free the
364 //entry
365 ltable[idx].numIter = 0;
366 ltable[idx].age = 0;
367 ltable[idx].confidence = 0;
368 }
369 }
370 ltable[idx].currentIter = 0;
371 }
372
373 } else if (taken) {
374 //try to allocate an entry on taken branch
375 int nrand = random_mt.random<int>();
376 for (int i = 0; i < 4; i++) {
377 int loop_hit = (nrand + i) & 3;
378 idx = bi->loopIndex + loop_hit;
379 if (ltable[idx].age == 0) {
380 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
381 pc);
382 ltable[idx].dir = !taken;
383 ltable[idx].tag = bi->loopTag;
384 ltable[idx].numIter = 0;
385 ltable[idx].age = (1 << loopTableAgeBits) - 1;
386 ltable[idx].confidence = 0;
387 ltable[idx].currentIter = 1;
388 break;
389
390 }
391 else
392 ltable[idx].age--;
393 }
394 }
395
396}
397
398// shifting the global history: we manage the history in a big table in order
399// to reduce simulation time
400void
401LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
402{
403 if (pt == 0) {
404 DPRINTF(LTage, "Rolling over the histories\n");
405 // Copy beginning of globalHistoryBuffer to end, such that
406 // the last maxHist outcomes are still reachable
407 // through pt[0 .. maxHist - 1].
408 for (int i = 0; i < maxHist; i++)
409 tab[histBufferSize - maxHist + i] = tab[i];
410 pt = histBufferSize - maxHist;
411 h = &tab[pt];
412 }
413 pt--;
414 h--;
415 h[0] = (dir) ? 1 : 0;
416}
417
418// Get GHR for hashing indirect predictor
419// Build history backwards from pointer in
420// bp_history.
421unsigned
422LTAGE::getGHR(ThreadID tid, void *bp_history) const
423{
424 BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
425 unsigned val = 0;
426 for (unsigned i = 0; i < 32; i++) {
427 // Make sure we don't go out of bounds
428 int gh_offset = bi->ptGhist + i;
429 assert(&(threadHistory[tid].globalHistory[gh_offset]) <
430 threadHistory[tid].globalHistory + histBufferSize);
431 val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
432 }
433
434 return val;
435}
436
437//prediction
438bool
439LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
440{
441 BranchInfo *bi = new BranchInfo(nHistoryTables+1);
442 b = (void*)(bi);
443 Addr pc = branch_pc;
444 bool pred_taken = true;
445 bi->loopHit = -1;
446
447 if (cond_branch) {
448 // TAGE prediction
449
450 // computes the table addresses and the partial tags
451 for (int i = 1; i <= nHistoryTables; i++) {
452 tableIndices[i] = gindex(tid, pc, i);
453 bi->tableIndices[i] = tableIndices[i];
454 tableTags[i] = gtag(tid, pc, i);
455 bi->tableTags[i] = tableTags[i];
456 }
457
458 bi->bimodalIndex = bindex(pc);
459
460 bi->hitBank = 0;
461 bi->altBank = 0;
462 //Look for the bank with longest matching history
463 for (int i = nHistoryTables; i > 0; i--) {
464 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
465 bi->hitBank = i;
466 bi->hitBankIndex = tableIndices[bi->hitBank];
467 break;
468 }
469 }
470 //Look for the alternate bank
471 for (int i = bi->hitBank - 1; i > 0; i--) {
472 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
473 bi->altBank = i;
474 bi->altBankIndex = tableIndices[bi->altBank];
475 break;
476 }
477 }
478 //computes the prediction and the alternate prediction
479 if (bi->hitBank > 0) {
480 if (bi->altBank > 0) {
481 bi->altTaken =
482 gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
483 }else {
484 bi->altTaken = getBimodePred(pc, bi);
485 }
486
487 bi->longestMatchPred =
488 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
489 bi->pseudoNewAlloc =
490 abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
491
492 //if the entry is recognized as a newly allocated entry and
493 //useAltPredForNewlyAllocated is positive use the alternate
494 //prediction
495 if ((useAltPredForNewlyAllocated < 0)
496 || abs(2 *
497 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1)
498 bi->tagePred = bi->longestMatchPred;
499 else
500 bi->tagePred = bi->altTaken;
501 } else {
502 bi->altTaken = getBimodePred(pc, bi);
503 bi->tagePred = bi->altTaken;
504 bi->longestMatchPred = bi->altTaken;
505 }
506 //end TAGE prediction
507
508 bi->loopPred = getLoop(pc, bi); // loop prediction
509
510 pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
511 (bi->loopPred): (bi->tagePred);
512 DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
513 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
514 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
515 loopUseCounter, bi->tagePred, bi->altTaken);
516 }
517 bi->branchPC = branch_pc;
518 bi->condBranch = cond_branch;
519 specLoopUpdate(branch_pc, pred_taken, bi);
520 return pred_taken;
521}
522
523// PREDICTOR UPDATE
524void
525LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
526 bool squashed)
527{
528 assert(bp_history);
529
530 BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
531
532 if (squashed) {
533 // This restores the global history, then update it
534 // and recomputes the folded histories.
535 squash(tid, taken, bp_history);
536 return;
537 }
538
539 int nrand = random_mt.random<int>(0,3);
540 Addr pc = branch_pc;
541 if (bi->condBranch) {
542 DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
543 branch_pc, taken);
544 // first update the loop predictor
545 loopUpdate(pc, taken, bi);
546
547 if (bi->loopPredValid) {
548 if (bi->tagePred != bi->loopPred) {
549 ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
550 }
551 }
552
553 // TAGE UPDATE
554 // try to allocate a new entries only if prediction was wrong
555 bool longest_match_pred = false;
556 bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
557 if (bi->hitBank > 0) {
558 // Manage the selection between longest matching and alternate
559 // matching for "pseudo"-newly allocated longest matching entry
560 longest_match_pred = bi->longestMatchPred;
561 bool PseudoNewAlloc = bi->pseudoNewAlloc;
562 // an entry is considered as newly allocated if its prediction
563 // counter is weak
564 if (PseudoNewAlloc) {
565 if (longest_match_pred == taken) {
566 alloc = false;
567 }
568 // if it was delivering the correct prediction, no need to
569 // allocate new entry even if the overall prediction was false
570 if (longest_match_pred != bi->altTaken) {
571 ctrUpdate(useAltPredForNewlyAllocated,
572 bi->altTaken == taken, 4);
573 }
574 }
575 }
576
577 if (alloc) {
578 // is there some "unuseful" entry to allocate
77 // we use uint16_t type for these vales, so they cannot be more than
78 // 16 bits
79 assert(loopTableTagBits <= 16);
80 assert(loopTableIterBits <= 16);
81
82 assert(params->histBufferSize > params->maxHist * 2);
83 useAltPredForNewlyAllocated = 0;
84 logTick = 19;
85 tCounter = ULL(1) << (logTick - 1);
86
87 for (auto& history : threadHistory) {
88 history.pathHist = 0;
89 history.globalHistory = new uint8_t[histBufferSize];
90 history.gHist = history.globalHistory;
91 memset(history.gHist, 0, histBufferSize);
92 history.ptGhist = 0;
93 }
94
95 histLengths = new int [nHistoryTables+1];
96 histLengths[1] = minHist;
97 histLengths[nHistoryTables] = maxHist;
98
99 for (int i = 2; i <= nHistoryTables; i++) {
100 histLengths[i] = (int) (((double) minHist *
101 pow ((double) (maxHist) / (double) minHist,
102 (double) (i - 1) / (double) ((nHistoryTables- 1))))
103 + 0.5);
104 }
105
106 tagWidths[1] = minTagWidth;
107 tagWidths[2] = minTagWidth;
108 tagWidths[3] = minTagWidth + 1;
109 tagWidths[4] = minTagWidth + 1;
110 tagWidths[5] = minTagWidth + 2;
111 tagWidths[6] = minTagWidth + 3;
112 tagWidths[7] = minTagWidth + 4;
113 tagWidths[8] = minTagWidth + 5;
114 tagWidths[9] = minTagWidth + 5;
115 tagWidths[10] = minTagWidth + 6;
116 tagWidths[11] = minTagWidth + 7;
117 tagWidths[12] = minTagWidth + 8;
118
119 for (int i = 1; i <= 2; i++)
120 tagTableSizes[i] = logSizeTagTables - 1;
121 for (int i = 3; i <= 6; i++)
122 tagTableSizes[i] = logSizeTagTables;
123 for (int i = 7; i <= 10; i++)
124 tagTableSizes[i] = logSizeTagTables - 1;
125 for (int i = 11; i <= 12; i++)
126 tagTableSizes[i] = logSizeTagTables - 2;
127
128 for (auto& history : threadHistory) {
129 history.computeIndices = new FoldedHistory[nHistoryTables+1];
130 history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
131 history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
132
133 for (int i = 1; i <= nHistoryTables; i++) {
134 history.computeIndices[i].init(histLengths[i], (tagTableSizes[i]));
135 history.computeTags[0][i].init(
136 history.computeIndices[i].origLength, tagWidths[i]);
137 history.computeTags[1][i].init(
138 history.computeIndices[i].origLength, tagWidths[i] - 1);
139 DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
140 histLengths[i], tagTableSizes[i], tagWidths[i]);
141 }
142 }
143
144 const uint64_t bimodalTableSize = ULL(1) << logSizeBiMP;
145 btablePrediction.resize(bimodalTableSize, false);
146 btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
147 true);
148
149 ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
150 gtable = new TageEntry*[nHistoryTables + 1];
151 for (int i = 1; i <= nHistoryTables; i++) {
152 gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
153 }
154
155 tableIndices = new int [nHistoryTables+1];
156 tableTags = new int [nHistoryTables+1];
157
158 loopUseCounter = 0;
159}
160
161int
162LTAGE::bindex(Addr pc_in) const
163{
164 return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1));
165}
166
167int
168LTAGE::lindex(Addr pc_in) const
169{
170 return (((pc_in >> instShiftAmt) &
171 ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
172}
173
174int
175LTAGE::F(int A, int size, int bank) const
176{
177 int A1, A2;
178
179 A = A & ((ULL(1) << size) - 1);
180 A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
181 A2 = (A >> tagTableSizes[bank]);
182 A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
183 + (A2 >> (tagTableSizes[bank] - bank));
184 A = A1 ^ A2;
185 A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
186 + (A >> (tagTableSizes[bank] - bank));
187 return (A);
188}
189
190
191// gindex computes a full hash of pc, ghist and pathHist
192int
193LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
194{
195 int index;
196 int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
197 index =
198 (pc >> instShiftAmt) ^
199 ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^
200 threadHistory[tid].computeIndices[bank].comp ^
201 F(threadHistory[tid].pathHist, hlen, bank);
202
203 return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
204}
205
206
207// Tag computation
208uint16_t
209LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
210{
211 int tag = (pc >> instShiftAmt) ^
212 threadHistory[tid].computeTags[0][bank].comp ^
213 (threadHistory[tid].computeTags[1][bank].comp << 1);
214
215 return (tag & ((ULL(1) << tagWidths[bank]) - 1));
216}
217
218
219// Up-down saturating counter
220void
221LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
222{
223 assert(nbits <= sizeof(int8_t) << 3);
224 if (taken) {
225 if (ctr < ((1 << (nbits - 1)) - 1))
226 ctr++;
227 } else {
228 if (ctr > -(1 << (nbits - 1)))
229 ctr--;
230 }
231}
232
233// Up-down unsigned saturating counter
234void
235LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
236{
237 assert(nbits <= sizeof(uint8_t) << 3);
238 if (up) {
239 if (ctr < ((1 << nbits) - 1))
240 ctr++;
241 } else {
242 if (ctr)
243 ctr--;
244 }
245}
246
247// Bimodal prediction
248bool
249LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
250{
251 return btablePrediction[bi->bimodalIndex];
252}
253
254
255// Update the bimodal predictor: a hysteresis bit is shared among N prediction
256// bits (N = 2 ^ logRatioBiModalHystEntries)
257void
258LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
259{
260 int inter = (btablePrediction[bi->bimodalIndex] << 1)
261 + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
262 if (taken) {
263 if (inter < 3)
264 inter++;
265 } else if (inter > 0) {
266 inter--;
267 }
268 const bool pred = inter >> 1;
269 const bool hyst = inter & 1;
270 btablePrediction[bi->bimodalIndex] = pred;
271 btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst;
272 DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst);
273}
274
275
276//loop prediction: only used if high confidence
277bool
278LTAGE::getLoop(Addr pc, BranchInfo* bi) const
279{
280 bi->loopHit = -1;
281 bi->loopPredValid = false;
282 bi->loopIndex = lindex(pc);
283 bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2)) & loopTagMask;
284
285 for (int i = 0; i < 4; i++) {
286 if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
287 bi->loopHit = i;
288 bi->loopPredValid =
289 ltable[bi->loopIndex + i].confidence == confidenceThreshold;
290 bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
291 if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
292 ltable[bi->loopIndex + i].numIter) {
293 return !(ltable[bi->loopIndex + i].dir);
294 }else {
295 return (ltable[bi->loopIndex + i].dir);
296 }
297 }
298 }
299 return false;
300}
301
302void
303LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
304{
305 if (bi->loopHit>=0) {
306 int index = lindex(pc);
307 if (taken != ltable[index].dir) {
308 ltable[index].currentIterSpec = 0;
309 } else {
310 ltable[index].currentIterSpec =
311 (ltable[index].currentIterSpec + 1) & loopNumIterMask;
312 }
313 }
314}
315
316void
317LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
318{
319 int idx = bi->loopIndex + bi->loopHit;
320 if (bi->loopHit >= 0) {
321 //already a hit
322 if (bi->loopPredValid) {
323 if (taken != bi->loopPred) {
324 // free the entry
325 ltable[idx].numIter = 0;
326 ltable[idx].age = 0;
327 ltable[idx].confidence = 0;
328 ltable[idx].currentIter = 0;
329 return;
330 } else if (bi->loopPred != bi->tagePred) {
331 DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
332 unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits);
333 }
334 }
335
336 ltable[idx].currentIter =
337 (ltable[idx].currentIter + 1) & loopNumIterMask;
338 if (ltable[idx].currentIter > ltable[idx].numIter) {
339 ltable[idx].confidence = 0;
340 if (ltable[idx].numIter != 0) {
341 // free the entry
342 ltable[idx].numIter = 0;
343 ltable[idx].age = 0;
344 ltable[idx].confidence = 0;
345 }
346 }
347
348 if (taken != ltable[idx].dir) {
349 if (ltable[idx].currentIter == ltable[idx].numIter) {
350 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
351
352 unsignedCtrUpdate(ltable[idx].confidence, true,
353 loopTableConfidenceBits);
354 //just do not predict when the loop count is 1 or 2
355 if (ltable[idx].numIter < 3) {
356 // free the entry
357 ltable[idx].dir = taken;
358 ltable[idx].numIter = 0;
359 ltable[idx].age = 0;
360 ltable[idx].confidence = 0;
361 }
362 } else {
363 DPRINTF(LTage, "Loop End predicted incorrectly:%lx\n", pc);
364 if (ltable[idx].numIter == 0) {
365 // first complete nest;
366 ltable[idx].confidence = 0;
367 ltable[idx].numIter = ltable[idx].currentIter;
368 } else {
369 //not the same number of iterations as last time: free the
370 //entry
371 ltable[idx].numIter = 0;
372 ltable[idx].age = 0;
373 ltable[idx].confidence = 0;
374 }
375 }
376 ltable[idx].currentIter = 0;
377 }
378
379 } else if (taken) {
380 //try to allocate an entry on taken branch
381 int nrand = random_mt.random<int>();
382 for (int i = 0; i < 4; i++) {
383 int loop_hit = (nrand + i) & 3;
384 idx = bi->loopIndex + loop_hit;
385 if (ltable[idx].age == 0) {
386 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
387 pc);
388 ltable[idx].dir = !taken;
389 ltable[idx].tag = bi->loopTag;
390 ltable[idx].numIter = 0;
391 ltable[idx].age = (1 << loopTableAgeBits) - 1;
392 ltable[idx].confidence = 0;
393 ltable[idx].currentIter = 1;
394 break;
395
396 }
397 else
398 ltable[idx].age--;
399 }
400 }
401
402}
403
404// shifting the global history: we manage the history in a big table in order
405// to reduce simulation time
406void
407LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
408{
409 if (pt == 0) {
410 DPRINTF(LTage, "Rolling over the histories\n");
411 // Copy beginning of globalHistoryBuffer to end, such that
412 // the last maxHist outcomes are still reachable
413 // through pt[0 .. maxHist - 1].
414 for (int i = 0; i < maxHist; i++)
415 tab[histBufferSize - maxHist + i] = tab[i];
416 pt = histBufferSize - maxHist;
417 h = &tab[pt];
418 }
419 pt--;
420 h--;
421 h[0] = (dir) ? 1 : 0;
422}
423
424// Get GHR for hashing indirect predictor
425// Build history backwards from pointer in
426// bp_history.
427unsigned
428LTAGE::getGHR(ThreadID tid, void *bp_history) const
429{
430 BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
431 unsigned val = 0;
432 for (unsigned i = 0; i < 32; i++) {
433 // Make sure we don't go out of bounds
434 int gh_offset = bi->ptGhist + i;
435 assert(&(threadHistory[tid].globalHistory[gh_offset]) <
436 threadHistory[tid].globalHistory + histBufferSize);
437 val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
438 }
439
440 return val;
441}
442
443//prediction
444bool
445LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
446{
447 BranchInfo *bi = new BranchInfo(nHistoryTables+1);
448 b = (void*)(bi);
449 Addr pc = branch_pc;
450 bool pred_taken = true;
451 bi->loopHit = -1;
452
453 if (cond_branch) {
454 // TAGE prediction
455
456 // computes the table addresses and the partial tags
457 for (int i = 1; i <= nHistoryTables; i++) {
458 tableIndices[i] = gindex(tid, pc, i);
459 bi->tableIndices[i] = tableIndices[i];
460 tableTags[i] = gtag(tid, pc, i);
461 bi->tableTags[i] = tableTags[i];
462 }
463
464 bi->bimodalIndex = bindex(pc);
465
466 bi->hitBank = 0;
467 bi->altBank = 0;
468 //Look for the bank with longest matching history
469 for (int i = nHistoryTables; i > 0; i--) {
470 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
471 bi->hitBank = i;
472 bi->hitBankIndex = tableIndices[bi->hitBank];
473 break;
474 }
475 }
476 //Look for the alternate bank
477 for (int i = bi->hitBank - 1; i > 0; i--) {
478 if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
479 bi->altBank = i;
480 bi->altBankIndex = tableIndices[bi->altBank];
481 break;
482 }
483 }
484 //computes the prediction and the alternate prediction
485 if (bi->hitBank > 0) {
486 if (bi->altBank > 0) {
487 bi->altTaken =
488 gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
489 }else {
490 bi->altTaken = getBimodePred(pc, bi);
491 }
492
493 bi->longestMatchPred =
494 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
495 bi->pseudoNewAlloc =
496 abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
497
498 //if the entry is recognized as a newly allocated entry and
499 //useAltPredForNewlyAllocated is positive use the alternate
500 //prediction
501 if ((useAltPredForNewlyAllocated < 0)
502 || abs(2 *
503 gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1) > 1)
504 bi->tagePred = bi->longestMatchPred;
505 else
506 bi->tagePred = bi->altTaken;
507 } else {
508 bi->altTaken = getBimodePred(pc, bi);
509 bi->tagePred = bi->altTaken;
510 bi->longestMatchPred = bi->altTaken;
511 }
512 //end TAGE prediction
513
514 bi->loopPred = getLoop(pc, bi); // loop prediction
515
516 pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
517 (bi->loopPred): (bi->tagePred);
518 DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
519 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
520 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
521 loopUseCounter, bi->tagePred, bi->altTaken);
522 }
523 bi->branchPC = branch_pc;
524 bi->condBranch = cond_branch;
525 specLoopUpdate(branch_pc, pred_taken, bi);
526 return pred_taken;
527}
528
529// PREDICTOR UPDATE
530void
531LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
532 bool squashed)
533{
534 assert(bp_history);
535
536 BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
537
538 if (squashed) {
539 // This restores the global history, then update it
540 // and recomputes the folded histories.
541 squash(tid, taken, bp_history);
542 return;
543 }
544
545 int nrand = random_mt.random<int>(0,3);
546 Addr pc = branch_pc;
547 if (bi->condBranch) {
548 DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
549 branch_pc, taken);
550 // first update the loop predictor
551 loopUpdate(pc, taken, bi);
552
553 if (bi->loopPredValid) {
554 if (bi->tagePred != bi->loopPred) {
555 ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
556 }
557 }
558
559 // TAGE UPDATE
560 // try to allocate a new entries only if prediction was wrong
561 bool longest_match_pred = false;
562 bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
563 if (bi->hitBank > 0) {
564 // Manage the selection between longest matching and alternate
565 // matching for "pseudo"-newly allocated longest matching entry
566 longest_match_pred = bi->longestMatchPred;
567 bool PseudoNewAlloc = bi->pseudoNewAlloc;
568 // an entry is considered as newly allocated if its prediction
569 // counter is weak
570 if (PseudoNewAlloc) {
571 if (longest_match_pred == taken) {
572 alloc = false;
573 }
574 // if it was delivering the correct prediction, no need to
575 // allocate new entry even if the overall prediction was false
576 if (longest_match_pred != bi->altTaken) {
577 ctrUpdate(useAltPredForNewlyAllocated,
578 bi->altTaken == taken, 4);
579 }
580 }
581 }
582
583 if (alloc) {
584 // is there some "unuseful" entry to allocate
579 int8_t min = 1;
585 uint8_t min = 1;
580 for (int i = nHistoryTables; i > bi->hitBank; i--) {
581 if (gtable[i][bi->tableIndices[i]].u < min) {
582 min = gtable[i][bi->tableIndices[i]].u;
583 }
584 }
585
586 // we allocate an entry with a longer history
587 // to avoid ping-pong, we do not choose systematically the next
588 // entry, but among the 3 next entries
589 int Y = nrand &
590 ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
591 int X = bi->hitBank + 1;
592 if (Y & 1) {
593 X++;
594 if (Y & 2)
595 X++;
596 }
597 // No entry available, forces one to be available
598 if (min > 0) {
599 gtable[X][bi->tableIndices[X]].u = 0;
600 }
601
602
603 //Allocate only one entry
604 for (int i = X; i <= nHistoryTables; i++) {
605 if ((gtable[i][bi->tableIndices[i]].u == 0)) {
606 gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
607 gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
586 for (int i = nHistoryTables; i > bi->hitBank; i--) {
587 if (gtable[i][bi->tableIndices[i]].u < min) {
588 min = gtable[i][bi->tableIndices[i]].u;
589 }
590 }
591
592 // we allocate an entry with a longer history
593 // to avoid ping-pong, we do not choose systematically the next
594 // entry, but among the 3 next entries
595 int Y = nrand &
596 ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
597 int X = bi->hitBank + 1;
598 if (Y & 1) {
599 X++;
600 if (Y & 2)
601 X++;
602 }
603 // No entry available, forces one to be available
604 if (min > 0) {
605 gtable[X][bi->tableIndices[X]].u = 0;
606 }
607
608
609 //Allocate only one entry
610 for (int i = X; i <= nHistoryTables; i++) {
611 if ((gtable[i][bi->tableIndices[i]].u == 0)) {
612 gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
613 gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
608 gtable[i][bi->tableIndices[i]].u = 0; //?
609 break;
610 }
611 }
612 }
613 //periodic reset of u: reset is not complete but bit by bit
614 tCounter++;
615 if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
616 // reset least significant bit
617 // most significant bit becomes least significant bit
618 for (int i = 1; i <= nHistoryTables; i++) {
619 for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
620 gtable[i][j].u = gtable[i][j].u >> 1;
621 }
622 }
623 }
624
625 if (bi->hitBank > 0) {
626 DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n",
627 bi->hitBank, bi->hitBankIndex, branch_pc);
628 ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
629 tagTableCounterBits);
630 // if the provider entry is not certified to be useful also update
631 // the alternate prediction
632 if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
633 if (bi->altBank > 0) {
634 ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
635 tagTableCounterBits);
636 DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
637 " branch %lx\n", bi->hitBank, bi->hitBankIndex,
638 branch_pc);
639 }
640 if (bi->altBank == 0) {
641 baseUpdate(pc, taken, bi);
642 }
643 }
644
645 // update the u counter
614 break;
615 }
616 }
617 }
618 //periodic reset of u: reset is not complete but bit by bit
619 tCounter++;
620 if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
621 // reset least significant bit
622 // most significant bit becomes least significant bit
623 for (int i = 1; i <= nHistoryTables; i++) {
624 for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
625 gtable[i][j].u = gtable[i][j].u >> 1;
626 }
627 }
628 }
629
630 if (bi->hitBank > 0) {
631 DPRINTF(LTage, "Updating tag table entry (%d,%d) for branch %lx\n",
632 bi->hitBank, bi->hitBankIndex, branch_pc);
633 ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
634 tagTableCounterBits);
635 // if the provider entry is not certified to be useful also update
636 // the alternate prediction
637 if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
638 if (bi->altBank > 0) {
639 ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
640 tagTableCounterBits);
641 DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
642 " branch %lx\n", bi->hitBank, bi->hitBankIndex,
643 branch_pc);
644 }
645 if (bi->altBank == 0) {
646 baseUpdate(pc, taken, bi);
647 }
648 }
649
650 // update the u counter
646 if (longest_match_pred != bi->altTaken) {
647 if (longest_match_pred == taken) {
648 if (gtable[bi->hitBank][bi->hitBankIndex].u < 1) {
649 gtable[bi->hitBank][bi->hitBankIndex].u++;
650 }
651 }
651 if (bi->tagePred != bi->altTaken) {
652 unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u,
653 bi->tagePred == taken, tagTableUBits);
652 }
653 } else {
654 baseUpdate(pc, taken, bi);
655 }
656
657 //END PREDICTOR UPDATE
658 }
659 if (!squashed) {
660 delete bi;
661 }
662}
663
664void
665LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
666{
667 BranchInfo* bi = (BranchInfo*)(b);
668 ThreadHistory& tHist = threadHistory[tid];
669 // UPDATE HISTORIES
670 bool pathbit = ((branch_pc >> instShiftAmt) & 1);
671 //on a squash, return pointers to this and recompute indices.
672 //update user history
673 updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
674 tHist.pathHist = (tHist.pathHist << 1) + pathbit;
675 tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
676
677 bi->ptGhist = tHist.ptGhist;
678 bi->pathHist = tHist.pathHist;
679 //prepare next index and tag computations for user branchs
680 for (int i = 1; i <= nHistoryTables; i++)
681 {
682 bi->ci[i] = tHist.computeIndices[i].comp;
683 bi->ct0[i] = tHist.computeTags[0][i].comp;
684 bi->ct1[i] = tHist.computeTags[1][i].comp;
685 tHist.computeIndices[i].update(tHist.gHist);
686 tHist.computeTags[0][i].update(tHist.gHist);
687 tHist.computeTags[1][i].update(tHist.gHist);
688 }
689 DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
690 "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
691 tHist.ptGhist);
692}
693
694void
695LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
696{
697 BranchInfo* bi = (BranchInfo*)(bp_history);
698 ThreadHistory& tHist = threadHistory[tid];
699 DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
700 "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
701 tHist.pathHist = bi->pathHist;
702 tHist.ptGhist = bi->ptGhist;
703 tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
704 tHist.gHist[0] = (taken ? 1 : 0);
705 for (int i = 1; i <= nHistoryTables; i++) {
706 tHist.computeIndices[i].comp = bi->ci[i];
707 tHist.computeTags[0][i].comp = bi->ct0[i];
708 tHist.computeTags[1][i].comp = bi->ct1[i];
709 tHist.computeIndices[i].update(tHist.gHist);
710 tHist.computeTags[0][i].update(tHist.gHist);
711 tHist.computeTags[1][i].update(tHist.gHist);
712 }
713
714 if (bi->condBranch) {
715 if (bi->loopHit >= 0) {
716 int idx = bi->loopIndex + bi->loopHit;
717 ltable[idx].currentIterSpec = bi->currentIter;
718 }
719 }
720
721}
722
723void
724LTAGE::squash(ThreadID tid, void *bp_history)
725{
726 BranchInfo* bi = (BranchInfo*)(bp_history);
727 DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
728 if (bi->condBranch) {
729 if (bi->loopHit >= 0) {
730 int idx = bi->loopIndex + bi->loopHit;
731 ltable[idx].currentIterSpec = bi->currentIter;
732 }
733 }
734
735 delete bi;
736}
737
738bool
739LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
740{
741 bool retval = predict(tid, branch_pc, true, bp_history);
742
743 DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
744 updateHistories(tid, branch_pc, retval, bp_history);
745 assert(threadHistory[tid].gHist ==
746 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
747
748 return retval;
749}
750
751void
752LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
753{
754 BranchInfo* bi = (BranchInfo*) bp_history;
755 ThreadHistory& tHist = threadHistory[tid];
756 DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
757 assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
758 tHist.gHist[0] = 0;
759 for (int i = 1; i <= nHistoryTables; i++) {
760 tHist.computeIndices[i].comp = bi->ci[i];
761 tHist.computeTags[0][i].comp = bi->ct0[i];
762 tHist.computeTags[1][i].comp = bi->ct1[i];
763 tHist.computeIndices[i].update(tHist.gHist);
764 tHist.computeTags[0][i].update(tHist.gHist);
765 tHist.computeTags[1][i].update(tHist.gHist);
766 }
767}
768
769void
770LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
771{
772 DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
773 predict(tid, br_pc, false, bp_history);
774 updateHistories(tid, br_pc, true, bp_history);
775 assert(threadHistory[tid].gHist ==
776 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
777}
778
779LTAGE*
780LTAGEParams::create()
781{
782 return new LTAGE(this);
783}
654 }
655 } else {
656 baseUpdate(pc, taken, bi);
657 }
658
659 //END PREDICTOR UPDATE
660 }
661 if (!squashed) {
662 delete bi;
663 }
664}
665
666void
667LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
668{
669 BranchInfo* bi = (BranchInfo*)(b);
670 ThreadHistory& tHist = threadHistory[tid];
671 // UPDATE HISTORIES
672 bool pathbit = ((branch_pc >> instShiftAmt) & 1);
673 //on a squash, return pointers to this and recompute indices.
674 //update user history
675 updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
676 tHist.pathHist = (tHist.pathHist << 1) + pathbit;
677 tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
678
679 bi->ptGhist = tHist.ptGhist;
680 bi->pathHist = tHist.pathHist;
681 //prepare next index and tag computations for user branchs
682 for (int i = 1; i <= nHistoryTables; i++)
683 {
684 bi->ci[i] = tHist.computeIndices[i].comp;
685 bi->ct0[i] = tHist.computeTags[0][i].comp;
686 bi->ct1[i] = tHist.computeTags[1][i].comp;
687 tHist.computeIndices[i].update(tHist.gHist);
688 tHist.computeTags[0][i].update(tHist.gHist);
689 tHist.computeTags[1][i].update(tHist.gHist);
690 }
691 DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
692 "path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
693 tHist.ptGhist);
694}
695
696void
697LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
698{
699 BranchInfo* bi = (BranchInfo*)(bp_history);
700 ThreadHistory& tHist = threadHistory[tid];
701 DPRINTF(LTage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
702 "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
703 tHist.pathHist = bi->pathHist;
704 tHist.ptGhist = bi->ptGhist;
705 tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
706 tHist.gHist[0] = (taken ? 1 : 0);
707 for (int i = 1; i <= nHistoryTables; i++) {
708 tHist.computeIndices[i].comp = bi->ci[i];
709 tHist.computeTags[0][i].comp = bi->ct0[i];
710 tHist.computeTags[1][i].comp = bi->ct1[i];
711 tHist.computeIndices[i].update(tHist.gHist);
712 tHist.computeTags[0][i].update(tHist.gHist);
713 tHist.computeTags[1][i].update(tHist.gHist);
714 }
715
716 if (bi->condBranch) {
717 if (bi->loopHit >= 0) {
718 int idx = bi->loopIndex + bi->loopHit;
719 ltable[idx].currentIterSpec = bi->currentIter;
720 }
721 }
722
723}
724
725void
726LTAGE::squash(ThreadID tid, void *bp_history)
727{
728 BranchInfo* bi = (BranchInfo*)(bp_history);
729 DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
730 if (bi->condBranch) {
731 if (bi->loopHit >= 0) {
732 int idx = bi->loopIndex + bi->loopHit;
733 ltable[idx].currentIterSpec = bi->currentIter;
734 }
735 }
736
737 delete bi;
738}
739
740bool
741LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
742{
743 bool retval = predict(tid, branch_pc, true, bp_history);
744
745 DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
746 updateHistories(tid, branch_pc, retval, bp_history);
747 assert(threadHistory[tid].gHist ==
748 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
749
750 return retval;
751}
752
753void
754LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
755{
756 BranchInfo* bi = (BranchInfo*) bp_history;
757 ThreadHistory& tHist = threadHistory[tid];
758 DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
759 assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
760 tHist.gHist[0] = 0;
761 for (int i = 1; i <= nHistoryTables; i++) {
762 tHist.computeIndices[i].comp = bi->ci[i];
763 tHist.computeTags[0][i].comp = bi->ct0[i];
764 tHist.computeTags[1][i].comp = bi->ct1[i];
765 tHist.computeIndices[i].update(tHist.gHist);
766 tHist.computeTags[0][i].update(tHist.gHist);
767 tHist.computeTags[1][i].update(tHist.gHist);
768 }
769}
770
771void
772LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
773{
774 DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
775 predict(tid, br_pc, false, bp_history);
776 updateHistories(tid, br_pc, true, bp_history);
777 assert(threadHistory[tid].gHist ==
778 &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
779}
780
781LTAGE*
782LTAGEParams::create()
783{
784 return new LTAGE(this);
785}