1/**
2 * Copyright (c) 2018 Metempsy Technology Consulting
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Ivan Pizarro
29 */
30
31#include "mem/cache/prefetch/bop.hh"
32
33#include "debug/HWPrefetch.hh"
34#include "params/BOPPrefetcher.hh"
35
36BOPPrefetcher::BOPPrefetcher(const BOPPrefetcherParams *p)
37    : QueuedPrefetcher(p),
38      scoreMax(p->score_max), roundMax(p->round_max),
39      badScore(p->bad_score), rrEntries(p->rr_size),
40      tagMask((1 << p->tag_bits) - 1),
41      delayQueueEnabled(p->delay_queue_enable),
42      delayQueueSize(p->delay_queue_size),
43      delayTicks(cyclesToTicks(p->delay_queue_cycles)),
44      delayQueueEvent([this]{ delayQueueEventWrapper(); }, name()),
45      issuePrefetchRequests(false), bestOffset(1), phaseBestOffset(0),
46      bestScore(0), round(0)
47{
48    if (!isPowerOf2(rrEntries)) {
49        fatal("%s: number of RR entries is not power of 2\n", name());
50    }
51    if (!isPowerOf2(blkSize)) {
52        fatal("%s: cache line size is not power of 2\n", name());
53    }
54    if (!(p->negative_offsets_enable && (p->offset_list_size % 2 == 0))) {
55        fatal("%s: negative offsets enabled with odd offset list size\n",
56              name());
57    }
58
59    rrLeft.resize(rrEntries);
60    rrRight.resize(rrEntries);
61
62    // Following the paper implementation, a list with the specified number
63    // of offsets which are of the form 2^i * 3^j * 5^k with i,j,k >= 0
64    const int factors[] = { 2, 3, 5 };
65    unsigned int i = 0;
66    int64_t offset_i = 1;
67
68    while (i < p->offset_list_size)
69    {
70        int64_t offset = offset_i;
71
72        for (int n : factors) {
73            while ((offset % n) == 0) {
74                offset /= n;
75            }
76        }
77
78        if (offset == 1) {
79            offsetsList.push_back(OffsetListEntry(offset_i, 0));
80            i++;
81            // If we want to use negative offsets, add also the negative value
82            // of the offset just calculated
83            if (p->negative_offsets_enable)  {
84                offsetsList.push_back(OffsetListEntry(-offset_i, 0));
85                i++;
86            }
87        }
88
89        offset_i++;
90    }
91
92    offsetsListIterator = offsetsList.begin();
93}
94
95void
96BOPPrefetcher::delayQueueEventWrapper()
97{
98    while (!delayQueue.empty() &&
99            delayQueue.front().processTick <= curTick())
100    {
101        Addr addr_x = delayQueue.front().baseAddr;
102        insertIntoRR(addr_x, RRWay::Left);
103        delayQueue.pop_front();
104    }
105
106    // Schedule an event for the next element if there is one
107    if (!delayQueue.empty()) {
108        schedule(delayQueueEvent, delayQueue.front().processTick);
109    }
110}
111
112unsigned int
113BOPPrefetcher::hash(Addr addr, unsigned int way) const
114{
115    Addr hash1 = addr >> way;
116    Addr hash2 = hash1 >> floorLog2(rrEntries);
117    return (hash1 ^ hash2) & (Addr)(rrEntries - 1);
118}
119
120void
121BOPPrefetcher::insertIntoRR(Addr addr, unsigned int way)
122{
123    switch (way) {
124        case RRWay::Left:
125            rrLeft[hash(addr, RRWay::Left)] = addr;
126            break;
127        case RRWay::Right:
128            rrRight[hash(addr, RRWay::Right)] = addr;
129            break;
130    }
131}
132
133void
134BOPPrefetcher::insertIntoDelayQueue(Addr x)
135{
136    if (delayQueue.size() == delayQueueSize) {
137        return;
138    }
139
140    // Add the address to the delay queue and schedule an event to process
141    // it after the specified delay cycles
142    Tick process_tick = curTick() + delayTicks;
143
144    delayQueue.push_back(DelayQueueEntry(x, process_tick));
145
146    if (!delayQueueEvent.scheduled()) {
147        schedule(delayQueueEvent, process_tick);
148    }
149}
150
151void
152BOPPrefetcher::resetScores()
153{
154    for (auto& it : offsetsList) {
155        it.second = 0;
156    }
157}
158
159inline Addr
160BOPPrefetcher::tag(Addr addr) const
161{
162    return (addr >> blkSize) & tagMask;
163}
164
165bool
166BOPPrefetcher::testRR(Addr addr) const
167{
168    for (auto& it : rrLeft) {
169        if (it == addr) {
170            return true;
171        }
172    }
173
174    for (auto& it : rrRight) {
175        if (it == addr) {
176            return true;
177        }
178    }
179
180    return false;
181}
182
183void
184BOPPrefetcher::bestOffsetLearning(Addr x)
185{
186    Addr offset_addr = (*offsetsListIterator).first;
187    Addr lookup_addr = x - offset_addr;
188
189    // There was a hit in the RR table, increment the score for this offset
190    if (testRR(lookup_addr)) {
191        DPRINTF(HWPrefetch, "Address %#lx found in the RR table\n", x);
192        (*offsetsListIterator).second++;
193        if ((*offsetsListIterator).second > bestScore) {
194            bestScore = (*offsetsListIterator).second;
195            phaseBestOffset = (*offsetsListIterator).first;
196            DPRINTF(HWPrefetch, "New best score is %lu\n", bestScore);
197        }
198    }
199
200    offsetsListIterator++;
201
202    // All the offsets in the list were visited meaning that a learning
203    // phase finished. Check if
204    if (offsetsListIterator == offsetsList.end()) {
205        offsetsListIterator = offsetsList.begin();
206        round++;
207
208        // Check if the best offset must be updated if:
209        // (1) One of the scores equals SCORE_MAX
210        // (2) The number of rounds equals ROUND_MAX
211        if ((bestScore >= scoreMax) || (round == roundMax)) {
212            bestOffset = phaseBestOffset;
213            round = 0;
214            bestScore = 0;
215            phaseBestOffset = 0;
216            resetScores();
217            issuePrefetchRequests = true;
218        } else if (phaseBestOffset <= badScore) {
219            issuePrefetchRequests = false;
220        }
221    }
222}
223
224void
225BOPPrefetcher::calculatePrefetch(const PrefetchInfo &pfi,
226        std::vector<AddrPriority> &addresses)
227{
228    Addr addr = pfi.getAddr();
229    Addr tag_x = tag(addr);
230
231    if (delayQueueEnabled) {
232        insertIntoDelayQueue(tag_x);
233    } else {
234        insertIntoRR(tag_x, RRWay::Left);
235    }
236
237    // Go through the nth offset and update the score, the best score and the
238    // current best offset if a better one is found
239    bestOffsetLearning(tag_x);
240
241    // This prefetcher is a degree 1 prefetch, so it will only generate one
242    // prefetch at most per access
243    if (issuePrefetchRequests) {
244        Addr prefetch_addr = addr + (bestOffset << lBlkSize);
245        addresses.push_back(AddrPriority(prefetch_addr, 0));
246        DPRINTF(HWPrefetch, "Generated prefetch %#lx\n", prefetch_addr);
247    }
248}
249
250void
251BOPPrefetcher::notifyFill(const PacketPtr& pkt)
252{
253    // Only insert into the RR right way if it's the pkt is a HWP
254    if (!pkt->cmd.isHWPrefetch()) return;
255
256    Addr tag_y = tag(pkt->getAddr());
257
258    if (issuePrefetchRequests) {
259        insertIntoRR(tag_y - bestOffset, RRWay::Right);
260    }
261}
262
263BOPPrefetcher*
264BOPPrefetcherParams::create()
265{
266   return new BOPPrefetcher(this);
267}
268