1/*
2 * Copyright 2019 Texas A&M University
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice,
8 *    this list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 *    this list of conditions and the following disclaimer in the documentation
12 *    and/or other materials provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its
15 *    contributors may be used to endorse or promote products derived from this
16 *    software without specific prior written permission.
17 *
18 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 *  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 *  Author: Daniel A. Jiménez
31 *  Adapted to gem5 by: Javier Bueno Hedo
32 *
33 */
34
35 /*
36  * Multiperspective Perceptron Predictor (by Daniel A. Jiménez)
37  */
38
39#include "cpu/pred/multiperspective_perceptron.hh"
40
41#include "base/random.hh"
42#include "debug/Branch.hh"
43
44int
45MultiperspectivePerceptron::xlat[] =
46    {1,3,4,5,7,8,9,11,12,14,15,17,19,21,23,25,27,29,32,34,37,41,45,49,53,58,63,
47     69,76,85,94,106,};
48int
49MultiperspectivePerceptron::xlat4[] =
50    {0,4,5,7,9,11,12,14,16,17,19,22,28,33,39,45,};
51
52MultiperspectivePerceptron::ThreadData::ThreadData(int num_filters,
53        int n_local_histories, int local_history_length, int assoc,
54        const std::vector<std::vector<int>> &blurrypath_bits, int path_length,
55        int ghist_length, int block_size,
56        const std::vector<std::vector<std::vector<bool>>> &acyclic_bits,
57        const std::vector<int> &modhist_indices,
58        const std::vector<int> &modhist_lengths,
59        const std::vector<int> &modpath_indices,
60        const std::vector<int> &modpath_lengths,
61        const std::vector<int> &table_sizes, int n_sign_bits)
62      : filterTable(num_filters), acyclic_histories(acyclic_bits.size()),
63        acyclic2_histories(acyclic_bits.size()),
64        blurrypath_histories(blurrypath_bits.size()),
65        ghist_words(ghist_length/block_size+1, 0),
66        path_history(path_length, 0), imli_counter(4,0),
67        localHistories(n_local_histories, local_history_length),
68        recency_stack(assoc), last_ghist_bit(false), occupancy(0)
69{
70    for (int i = 0; i < blurrypath_bits.size(); i+= 1) {
71        blurrypath_histories[i].resize(blurrypath_bits[i].size());
72    }
73
74    for (int i = 0; i < acyclic_bits.size(); i += 1) {
75        acyclic_histories[i].resize(acyclic_bits[i].size());
76        acyclic2_histories[i].resize(acyclic_bits[i].size());
77    }
78
79    int max_modhist_idx = -1;
80    for (auto &elem : modhist_indices) {
81        max_modhist_idx = (max_modhist_idx < elem) ? elem : max_modhist_idx;
82    }
83    if (max_modhist_idx >= 0) {
84        mod_histories.resize(max_modhist_idx + 1);
85    }
86    for (int i = 0; i < modhist_lengths.size(); i+= 1) {
87        mod_histories[modhist_indices[i]].resize(modhist_lengths[i]);
88    }
89
90    int max_modpath_idx = -1;
91    for (auto &elem : modpath_indices) {
92        max_modpath_idx = (max_modpath_idx < elem) ? elem : max_modpath_idx;
93    }
94    if (max_modpath_idx >= 0) {
95        modpath_histories.resize(max_modpath_idx + 1);
96    }
97    for (int i = 0; i < modpath_lengths.size(); i+= 1) {
98        modpath_histories[modpath_indices[i]].resize(modpath_lengths[i]);
99    }
100
101    for (int i = 0; i < table_sizes.size(); i += 1) {
102        mpreds.push_back(0);
103        tables.push_back(std::vector<short int>(table_sizes[i]));
104        sign_bits.push_back(std::vector<std::array<bool, 2>>(table_sizes[i]));
105        for (int j = 0; j < table_sizes[i]; j += 1) {
106            for (int k = 0; k < n_sign_bits; k += 1) {
107                sign_bits[i][j][k] = (i & 1) | (k & 1);
108            }
109        }
110    }
111}
112
113MultiperspectivePerceptron::MultiperspectivePerceptron(
114    const MultiperspectivePerceptronParams *p) : BPredUnit(p),
115    blockSize(p->block_size), pcshift(p->pcshift), threshold(p->threshold),
116    bias0(p->bias0), bias1(p->bias1), biasmostly0(p->biasmostly0),
117    biasmostly1(p->biasmostly1), nbest(p->nbest), tunebits(p->tunebits),
118    hshift(p->hshift), imli_mask1(p->imli_mask1), imli_mask4(p->imli_mask4),
119    recencypos_mask(p->recencypos_mask), fudge(p->fudge),
120    n_sign_bits(p->n_sign_bits), pcbit(p->pcbit), decay(p->decay),
121    record_mask(p->record_mask), hash_taken(p->hash_taken),
122    tuneonly(p->tuneonly), extra_rounds(p->extra_rounds), speed(p->speed),
123    budgetbits(p->budgetbits), speculative_update(p->speculative_update),
124    threadData(p->numThreads, nullptr), doing_local(false),
125    doing_recency(false), assoc(0), ghist_length(p->initial_ghist_length),
126    modghist_length(1), path_length(1), thresholdCounter(0),
127    theta(p->initial_theta), extrabits(0), imli_counter_bits(4),
128    modhist_indices(), modhist_lengths(), modpath_indices(), modpath_lengths()
129{
130    fatal_if(speculative_update, "Speculative update not implemented");
131}
132
133void
134MultiperspectivePerceptron::setExtraBits(int bits)
135{
136    extrabits = bits;
137}
138
139void
140MultiperspectivePerceptron::init()
141{
142    createSpecs();
143
144    for (auto &spec : specs) {
145        // initial assignation of values
146        table_sizes.push_back(spec->size);
147    }
148
149    // Update bit requirements and runtime values
150    for (auto &spec : specs) {
151        spec->setBitRequirements();
152    }
153    const MultiperspectivePerceptronParams *p =
154        static_cast<const MultiperspectivePerceptronParams *>(params());
155
156    computeBits(p->num_filter_entries, p->num_local_histories,
157                p->local_history_length, p->ignore_path_size);
158
159    for (int i = 0; i < threadData.size(); i += 1) {
160        threadData[i] = new ThreadData(p->num_filter_entries,
161                                       p->num_local_histories,
162                                       p->local_history_length, assoc,
163                                       blurrypath_bits, path_length,
164                                       ghist_length, blockSize, acyclic_bits,
165                                       modhist_indices, modhist_lengths,
166                                       modpath_indices, modpath_lengths,
167                                       table_sizes, n_sign_bits);
168    }
169}
170
171void
172MultiperspectivePerceptron::computeBits(int num_filter_entries,
173        int nlocal_histories, int local_history_length, bool ignore_path_size)
174{
175    int totalbits = extrabits;
176    for (auto &imli_bits : imli_counter_bits) {
177        totalbits += imli_bits;
178    }
179    totalbits += ghist_length;
180    if (!ignore_path_size) {
181        totalbits += path_length * 16;
182    }
183    totalbits += (threshold >= 0) ? (tunebits * specs.size()) : 0;
184    for (auto &len : modhist_lengths) {
185        totalbits += len;
186    }
187    if (!ignore_path_size) {
188        for (auto &len : modpath_lengths) {
189            totalbits += 16 * len;
190        }
191    }
192    totalbits += doing_local ? (nlocal_histories * local_history_length) : 0;
193    totalbits += doing_recency ? (assoc * 16) : 0;
194
195    for (auto &bv : blurrypath_bits) {
196        for (auto &bve : bv) {
197            totalbits += bve;
198        }
199    }
200    totalbits += num_filter_entries * 2;
201
202    for (auto &abi : acyclic_bits) {
203        for (auto &abj : abi) {
204            for (auto abk : abj) {
205                totalbits += abk;
206            }
207        }
208    }
209
210    int remaining = budgetbits - totalbits;
211
212    // count the tables that have already been assigned sizes
213    int num_sized = 0;
214    for (int i = 0; i < specs.size(); i +=1) {
215        if (table_sizes[i] != 0) {
216            int sz = table_sizes[i] * (specs[i]->width + (n_sign_bits - 1));
217            totalbits += sz;
218            remaining -= sz;
219            num_sized += 1;
220        }
221    }
222
223    // whatever is left, we divide among the rest of the tables
224    int table_size_bits = (remaining / (specs.size()-num_sized));
225    for (int i = 0; i < specs.size(); i += 1) {
226        // if a table doesn't have a size yet, give it one and count those bits
227        if (!table_sizes[i]) {
228            int my_table_size = table_size_bits /
229                (specs[i]->width + (n_sign_bits - 1)); // extra sign bits
230            table_sizes[i] = my_table_size;
231            totalbits += my_table_size * (specs[i]->width + (n_sign_bits - 1));
232        }
233    }
234
235    DPRINTF(Branch, "%d bits of metadata so far, %d left out of "
236            "%d total budget\n", totalbits, remaining, budgetbits);
237    DPRINTF(Branch, "table size is %d bits, %d entries for 5 bit, %d entries "
238            "for 6 bit\n", table_size_bits,
239            table_size_bits / (5 + (n_sign_bits - 1)),
240            table_size_bits / (6 + (n_sign_bits - 1)));
241    DPRINTF(Branch, "%d total bits (%0.2fKB)\n", totalbits,
242            totalbits / 8192.0);
243}
244
245void
246MultiperspectivePerceptron::findBest(ThreadID tid,
247                                     std::vector<int> &best_preds) const
248{
249    if (threshold < 0) {
250        return;
251    }
252    struct BestPair {
253        int index;
254        int mpreds;
255        bool operator<(BestPair const &bp) const
256        {
257            return mpreds < bp.mpreds;
258        }
259    };
260    std::vector<BestPair> pairs(best_preds.size());
261    for (int i = 0; i < best_preds.size(); i += 1) {
262        pairs[i].index = i;
263        pairs[i].mpreds = threadData[tid]->mpreds[i];
264    }
265    std::sort(pairs.begin(), pairs.end());
266    for (int i = 0; i < (std::min(nbest, (int) best_preds.size())); i += 1) {
267        best_preds[i] = pairs[i].index;
268    }
269}
270
271unsigned int
272MultiperspectivePerceptron::getIndex(ThreadID tid, const MPPBranchInfo &bi,
273                                     const HistorySpec &spec, int index) const
274{
275    unsigned int g = spec.getHash(tid, bi.getPC(), bi.getPC2(), index);
276    unsigned long long int h = g;
277     // shift the hash from the feature to xor with the hashed PC
278    if (hshift < 0) {
279        h <<= -hshift;
280        h ^= bi.getPC2();
281    } else {
282        h <<= hshift;
283        h ^= bi.getHPC();
284    }
285    // xor in the imli counter(s) and/or recency position based on the masks
286    if ((1ull<<index) & imli_mask1) {
287        h ^= threadData[tid]->imli_counter[0];
288    }
289    if ((1ull<<index) & imli_mask4) {
290        h ^= threadData[tid]->imli_counter[3];
291    }
292    if (doing_recency) {
293        if ((1ull<<index) & recencypos_mask) {
294            h ^= RECENCYPOS::hash(threadData[tid]->recency_stack, table_sizes,
295                    bi.getPC2(), 31, index);
296        }
297    }
298    h %= table_sizes[index];
299    return h;
300}
301
302int
303MultiperspectivePerceptron::computeOutput(ThreadID tid, MPPBranchInfo &bi)
304{
305    // list of best predictors
306    std::vector<int> best_preds(specs.size(), -1);
307
308    // initialize sum
309    bi.yout = 0;
310
311    // bias the prediction by whether the local history is
312    // one of four distinctive patterns
313    int lhist = threadData[tid]->localHistories[bi.getPC()];
314    int history_len = threadData[tid]->localHistories.getLocalHistoryLength();
315    if (lhist == 0) {
316        bi.yout = bias0;
317    } else if (lhist == ((1<<history_len)-1)) {
318        bi.yout = bias1;
319    } else if (lhist == (1<<(history_len-1))) {
320        bi.yout = biasmostly0;
321    } else if (lhist == ((1<<(history_len-1))-1)) {
322        bi.yout = biasmostly1;
323    }
324    // find the best subset of features to use in case of a low-confidence
325    // branch
326    findBest(tid, best_preds);
327
328    // begin computation of the sum for low-confidence branch
329    int bestval = 0;
330
331    for (int i = 0; i < specs.size(); i += 1) {
332        HistorySpec const &spec = *specs[i];
333        // get the hash to index the table
334        unsigned int hashed_idx = getIndex(tid, bi, spec, i);
335        // add the weight; first get the weight's magnitude
336        int counter = threadData[tid]->tables[i][hashed_idx];
337        // get the sign
338        bool sign =
339          threadData[tid]->sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
340        // apply the transfer function and multiply by a coefficient
341        int weight = spec.coeff * ((spec.width == 5) ?
342                                   xlat4[counter] : xlat[counter]);
343        // apply the sign
344        int val = sign ? -weight : weight;
345        // add the value
346        bi.yout += val;
347        // if this is one of those good features, add the value to bestval
348        if (threshold >= 0) {
349            for (int j = 0;
350                 j < std::min(nbest, (int) best_preds.size());
351                 j += 1)
352            {
353                if (best_preds[j] == i) {
354                    bestval += val;
355                    break;
356                }
357            }
358        }
359    }
360    // apply a fudge factor to affect when training is triggered
361    bi.yout *= fudge;
362    return bestval;
363}
364
365void
366MultiperspectivePerceptron::satIncDec(bool taken, bool &sign, int &counter,
367                                      int max_weight) const
368{
369    if (taken) {
370        // increment sign/magnitude
371        if (sign) {
372            // go toward 0 away from negative max weight
373            if (counter == 0) {
374                sign = false; // moved to positive 0
375            } else {
376                counter -= 1;
377            }
378        } else {
379            // go toward max weight away from 0
380            if (counter < max_weight) {
381                counter += 1;
382            }
383        }
384    } else {
385        // decrement sign/magnitude
386        if (sign) {
387            // go toward negative max weight down from 0
388            if (counter < max_weight) {
389                counter += 1;
390            }
391        } else {
392            // go toward 0 away from max weight
393            if (counter == 0) {
394                sign = true; // negative 0
395            } else {
396                counter -= 1;
397            }
398        }
399    }
400}
401
402void
403MultiperspectivePerceptron::train(ThreadID tid, MPPBranchInfo &bi, bool taken)
404{
405    std::vector<std::vector<short int>> &tables = threadData[tid]->tables;
406    std::vector<std::vector<std::array<bool, 2>>> &sign_bits =
407            threadData[tid]->sign_bits;
408    std::vector<int> &mpreds = threadData[tid]->mpreds;
409    // was the prediction correct?
410    bool correct = (bi.yout >= 1) == taken;
411    // what is the magnitude of yout?
412    int abs_yout = abs(bi.yout);
413    // keep track of mispredictions per table
414    if (threshold >= 0) if (!tuneonly || (abs_yout <= threshold)) {
415        bool halve = false;
416
417        // for each table, figure out if there was a misprediction
418        for (int i = 0; i < specs.size(); i += 1) {
419            HistorySpec const &spec = *specs[i];
420            // get the hash to index the table
421            unsigned int hashed_idx = getIndex(tid, bi, spec, i);
422            bool sign = sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
423            int counter = tables[i][hashed_idx];
424            int weight = spec.coeff * ((spec.width == 5) ?
425                                       xlat4[counter] : xlat[counter]);
426            if (sign) weight = -weight;
427            bool pred = weight >= 1;
428            if (pred != taken) {
429                mpreds[i] += 1;
430                if (mpreds[i] == (1 << tunebits) - 1) {
431                    halve = true;
432                }
433            }
434        }
435        // if we reach the maximum counter value, halve all the counters
436        if (halve) {
437            for (int i = 0; i < specs.size(); i += 1) {
438                mpreds[i] /= 2;
439            }
440        }
441    }
442    // if the branch was predicted incorrectly or the correct
443    // prediction was weak, update the weights
444    bool do_train = !correct || (abs_yout <= theta);
445    if (!do_train) return;
446
447    // adaptive theta training, adapted from O-GEHL
448    if (!correct) {
449        thresholdCounter += 1;
450        if (thresholdCounter >= speed) {
451            theta += 1;
452            thresholdCounter = 0;
453        }
454    }
455    if (correct && abs_yout < theta) {
456        thresholdCounter -= 1;
457        if (thresholdCounter <= -speed) {
458            theta -= 1;
459            thresholdCounter = 0;
460        }
461    }
462
463    // train the weights, computing what the value of yout
464    // would have been if these updates had been applied before
465    int newyout = 0;
466    for (int i = 0; i < specs.size(); i += 1) {
467        HistorySpec const &spec = *specs[i];
468        // get the magnitude
469        unsigned int hashed_idx = getIndex(tid, bi, spec, i);
470        int counter = tables[i][hashed_idx];
471        // get the sign
472        bool sign = sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
473        // increment/decrement if taken/not taken
474        satIncDec(taken, sign, counter, (1 << (spec.width - 1)) - 1);
475        // update the magnitude and sign
476        tables[i][hashed_idx] = counter;
477        sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits] = sign;
478        int weight = ((spec.width == 5) ? xlat4[counter] : xlat[counter]);
479        // update the new version of yout
480        if (sign) {
481            newyout -= weight;
482        } else {
483            newyout += weight;
484        }
485    }
486
487    // if the prediction still would have been incorrect even
488    // with the updated weights, update some more weights to
489    // try to fix the problem
490    if ((newyout >= 1) != taken) {
491        if (extra_rounds != -1) {
492            int round_counter = 0;
493            bool found;
494            do {
495                // udpate a random weight
496                int besti = -1;
497                int nrand = random_mt.random<int>() % specs.size();
498                int pout;
499                found = false;
500                for (int j = 0; j < specs.size(); j += 1) {
501                    int i = (nrand + j) % specs.size();
502                    HistorySpec const &spec = *specs[i];
503                    unsigned int hashed_idx = getIndex(tid, bi, spec, i);
504                    int counter = tables[i][hashed_idx];
505                    bool sign =
506                        sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
507                    int weight = ((spec.width == 5) ?
508                            xlat4[counter] : xlat[counter]);
509                    int signed_weight = sign ? -weight : weight;
510                    pout = newyout - signed_weight;
511                    if ((pout >= 1) == taken) {
512                        // we have found a weight that if we blow
513                        // it away will help!
514                        besti = i;
515                        break;
516                    }
517                }
518                if (besti != -1) {
519                    int i = besti;
520                    HistorySpec const &spec = *specs[i];
521                    unsigned int hashed_idx = getIndex(tid, bi, spec, i);
522                    int counter = tables[i][hashed_idx];
523                    bool sign =
524                        sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
525                    if (counter > 1) {
526                        counter--;
527                        tables[i][hashed_idx] = counter;
528                    }
529                    int weight = ((spec.width == 5) ?
530                            xlat4[counter] : xlat[counter]);
531                    int signed_weight = sign ? -weight : weight;
532                    int out = pout + signed_weight;
533                    round_counter += 1;
534                    if ((out >= 1) != taken) {
535                        found = true;
536                    }
537                }
538            } while (found && round_counter < extra_rounds);
539        }
540    }
541}
542
543void
544MultiperspectivePerceptron::uncondBranch(ThreadID tid, Addr pc,
545                                         void * &bp_history)
546{
547    MPPBranchInfo *bi = new MPPBranchInfo(pc, pcshift, false);
548    std::vector<unsigned int> &ghist_words = threadData[tid]->ghist_words;
549
550    bp_history = (void *)bi;
551    unsigned short int pc2 = pc >> 2;
552    bool ab = !(pc & (1<<pcbit));
553    for (int i = 0; i < ghist_length / blockSize + 1; i += 1) {
554        bool ab_new = (ghist_words[i] >> (blockSize - 1)) & 1;
555        ghist_words[i] <<= 1;
556        ghist_words[i] |= ab;
557        ghist_words[i] &= (1 << blockSize) - 1;
558        ab = ab_new;
559    }
560    memmove(&threadData[tid]->path_history[1],
561            &threadData[tid]->path_history[0],
562            sizeof(unsigned short int) * (path_length - 1));
563    threadData[tid]->path_history[0] = pc2;
564}
565
566bool
567MultiperspectivePerceptron::lookup(ThreadID tid, Addr instPC,
568                                   void * &bp_history)
569{
570    MPPBranchInfo *bi = new MPPBranchInfo(instPC, pcshift, true);
571    bp_history = (void *)bi;
572
573    bool use_static = false;
574
575    if (!threadData[tid]->filterTable.empty()) {
576        unsigned int findex =
577            bi->getHashFilter(threadData[tid]->last_ghist_bit) %
578            threadData[tid]->filterTable.size();
579        FilterEntry &f = threadData[tid]->filterTable[findex];
580        if (f.alwaysNotTakenSoFar()) {
581            bi->filtered = true;
582            bi->prediction = false;
583            return false;
584        } else if (f.alwaysTakenSoFar()) {
585            bi->filtered = true;
586            bi->prediction = true;
587            return true;
588        }
589        if (f.neverSeen()) {
590            use_static = true;
591        }
592    }
593
594    int bestval = computeOutput(tid, *bi);
595    if (use_static) {
596        bi->prediction = false;
597    } else {
598        if (abs(bi->yout) <= threshold) {
599            bi->prediction = (bestval >= 1);
600        } else {
601            bi->prediction = (bi->yout >= 1);
602        }
603    }
604
605    return bi->prediction;
606}
607
608void
609MultiperspectivePerceptron::update(ThreadID tid, Addr instPC, bool taken,
610                                   void *bp_history, bool squashed,
611                                   const StaticInstPtr & inst,
612                                   Addr corrTarget)
613{
614    assert(bp_history);
615    MPPBranchInfo *bi = static_cast<MPPBranchInfo*>(bp_history);
616    assert(corrTarget != MaxAddr);
617    if (squashed) {
618        //delete bi;
619        return;
620    }
621
622    if (bi->isUnconditional()) {
623        delete bi;
624        return;
625    }
626
627    bool do_train = true;
628
629    if (!threadData[tid]->filterTable.empty()) {
630        int findex = bi->getHashFilter(threadData[tid]->last_ghist_bit) %
631                                       threadData[tid]->filterTable.size();
632        FilterEntry &f = threadData[tid]->filterTable[findex];
633
634        // compute this first, so we don't not train on the
635        // first time a branch is seen.
636        bool transition = false;
637        if (f.alwaysNotTakenSoFar() || f.alwaysTakenSoFar()) {
638            do_train = false;
639        }
640        if (taken) {
641            if (f.alwaysNotTakenSoFar()) {
642                transition = true;
643            }
644            f.seenTaken = true;
645        } else {
646            if (f.alwaysTakenSoFar()) {
647                transition = true;
648            }
649            f.seenUntaken = true;
650        }
651        // is this the first time time the branch has gone both ways?
652        if (transition) {
653            threadData[tid]->occupancy += 1;
654        }
655        // for every new dynamic branch, when there ar
656        // more than 'decay' number of branches in the
657        // filter, blow a random filter entry away
658        if (decay && transition &&
659            ((threadData[tid]->occupancy > decay) || (decay == 1))) {
660            int rnd = random_mt.random<int>() %
661                      threadData[tid]->filterTable.size();
662            FilterEntry &frand = threadData[tid]->filterTable[rnd];
663            if (frand.seenTaken && frand.seenUntaken) {
664                threadData[tid]->occupancy -= 1;
665            }
666            frand.seenTaken = false;
667            frand.seenUntaken = false;
668        }
669    }
670
671    if (do_train) {
672        train(tid, *bi, taken);
673    }
674
675#define RECORD_FILTERED_IMLI      1
676#define RECORD_FILTERED_GHIST     2
677#define RECORD_FILTERED_PATH      4
678#define RECORD_FILTERED_ACYCLIC   8
679#define RECORD_FILTERED_MOD      16
680#define RECORD_FILTERED_BLURRY   32
681// should never record a filtered local branch - duh!
682#define RECORD_FILTERED_LOCAL    64
683#define RECORD_FILTERED_RECENCY 128
684
685    // four different styles of IMLI
686    if (!bi->filtered || (record_mask & RECORD_FILTERED_IMLI)) {
687        unsigned int target = corrTarget;
688        if (target < bi->getPC()) {
689            if (taken) {
690                threadData[tid]->imli_counter[0] += 1;
691            } else {
692                threadData[tid]->imli_counter[0] = 0;
693            }
694            if (!taken) {
695                threadData[tid]->imli_counter[1] += 1;
696            } else {
697                threadData[tid]->imli_counter[1] = 0;
698            }
699        } else {
700            if (taken) {
701                threadData[tid]->imli_counter[2] += 1;
702            } else {
703                threadData[tid]->imli_counter[2] = 0;
704            }
705            if (!taken) {
706                threadData[tid]->imli_counter[3] += 1;
707            } else {
708                threadData[tid]->imli_counter[3] = 0;
709            }
710        }
711    }
712
713    bool hashed_taken = hash_taken ? (taken ^ !!(bi->getPC() & (1<<pcbit)))
714                                   : taken;
715    // record into ghist
716    if (!bi->filtered || (record_mask & RECORD_FILTERED_GHIST)) {
717        bool ab = hashed_taken;
718        assert(threadData[tid]->ghist_words.size() > 0);
719        for (int i = 0; i < ghist_length / blockSize + 1; i += 1) {
720            unsigned int a = threadData[tid]->ghist_words[i];
721            bool ab_new = (a >> (blockSize - 1)) & 1;
722            a <<= 1;
723            a |= ab;
724            ab = ab_new;
725            a &= (1 << blockSize) - 1;
726            threadData[tid]->ghist_words[i] = a;
727        }
728    }
729
730    // record into path history
731    if (!bi->filtered || (record_mask & RECORD_FILTERED_PATH)) {
732        assert(threadData[tid]->path_history.size() > 0);
733        memmove(&threadData[tid]->path_history[1],
734                &threadData[tid]->path_history[0],
735                sizeof(unsigned short int) * (path_length - 1));
736        threadData[tid]->path_history[0] = bi->getPC2();
737    }
738
739    // record into acyclic history
740    if (!bi->filtered || (record_mask & RECORD_FILTERED_ACYCLIC)) {
741        threadData[tid]->updateAcyclic(hashed_taken, bi->getHPC());
742    }
743
744    // record into modulo path history
745    if (!bi->filtered || (record_mask & RECORD_FILTERED_MOD)) {
746        for (int ii = 0; ii < modpath_indices.size(); ii += 1) {
747            int i = modpath_indices[ii];
748            if (bi->getHPC() % (i + 2) == 0) {
749                memmove(&threadData[tid]->modpath_histories[i][1],
750                        &threadData[tid]->modpath_histories[i][0],
751                        sizeof(unsigned short int) * (modpath_lengths[ii]-1));
752                threadData[tid]->modpath_histories[i][0] = bi->getPC2();
753            }
754        }
755    }
756
757    // update blurry history
758    if (!bi->filtered || (record_mask & RECORD_FILTERED_BLURRY)) {
759        std::vector<std::vector<unsigned int>> &blurrypath_histories =
760             threadData[tid]->blurrypath_histories;
761
762        for (int i = 0; i < blurrypath_histories.size(); i += 1)
763        {
764            if (blurrypath_histories[i].size() > 0) {
765                unsigned int z = bi->getPC() >> i;
766                if (blurrypath_histories[i][0] != z) {
767                    memmove(&blurrypath_histories[i][1],
768                            &blurrypath_histories[i][0],
769                            sizeof(unsigned int) *
770                            (blurrypath_histories[i].size() - 1));
771                    blurrypath_histories[i][0] = z;
772                }
773            }
774        }
775    }
776
777    // record into modulo pattern history
778    if (!bi->filtered || (record_mask & RECORD_FILTERED_MOD)) {
779        for (int ii = 0; ii < modhist_indices.size(); ii += 1) {
780            int i = modhist_indices[ii];
781            if (bi->getHPC() % (i + 2) == 0) {
782                for (int j = modhist_lengths[ii] - 1; j > 0; j -= 1) {
783                    threadData[tid]->mod_histories[i][j] =
784                        threadData[tid]->mod_histories[i][j-1];
785                }
786                threadData[tid]->mod_histories[i][0] = hashed_taken;
787            }
788        }
789    }
790
791    // insert this PC into the recency stack
792    if (doing_recency) {
793        if (!bi->filtered || (record_mask & RECORD_FILTERED_RECENCY)) {
794            threadData[tid]->insertRecency(bi->getPC2(), assoc);
795        }
796    }
797
798    // record into a local history
799    if (!bi->filtered || (record_mask & RECORD_FILTERED_LOCAL)) {
800        threadData[tid]->localHistories.update(bi->getPC(), hashed_taken);
801    }
802
803    // update last ghist bit, used to index filter
804    threadData[tid]->last_ghist_bit = taken;
805
806    delete bi;
807}
808
809void
810MultiperspectivePerceptron::btbUpdate(ThreadID tid, Addr branch_pc,
811                                      void* &bp_history)
812{
813}
814
815void
816MultiperspectivePerceptron::squash(ThreadID tid, void *bp_history)
817{
818    assert(bp_history);
819    MPPBranchInfo *bi = static_cast<MPPBranchInfo*>(bp_history);
820    delete bi;
821}
822