1/*
2 * Copyright (c) 2014-2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Mitch Hayenga
38 */
39
40#include "mem/cache/prefetch/queued.hh"
41
42#include <cassert>
43
44#include "arch/generic/tlb.hh"
45#include "base/logging.hh"
46#include "base/trace.hh"
47#include "debug/HWPrefetch.hh"
48#include "mem/cache/base.hh"
49#include "mem/request.hh"
50#include "params/QueuedPrefetcher.hh"
51
52void
53QueuedPrefetcher::DeferredPacket::createPkt(Addr paddr, unsigned blk_size,
54                                            MasterID mid, bool tag_prefetch,
55                                            Tick t) {
56    /* Create a prefetch memory request */
57    RequestPtr req = std::make_shared<Request>(paddr, blk_size, 0, mid);
58
59    if (pfInfo.isSecure()) {
60        req->setFlags(Request::SECURE);
61    }
62    req->taskId(ContextSwitchTaskId::Prefetcher);
63    pkt = new Packet(req, MemCmd::HardPFReq);
64    pkt->allocate();
65    if (tag_prefetch && pfInfo.hasPC()) {
66        // Tag prefetch packet with  accessing pc
67        pkt->req->setPC(pfInfo.getPC());
68    }
69    tick = t;
70}
71
72void
73QueuedPrefetcher::DeferredPacket::startTranslation(BaseTLB *tlb)
74{
75    assert(translationRequest != nullptr);
76    if (!ongoingTranslation) {
77        ongoingTranslation = true;
78        // Prefetchers only operate in Timing mode
79        tlb->translateTiming(translationRequest, tc, this, BaseTLB::Read);
80    }
81}
82
83void
84QueuedPrefetcher::DeferredPacket::finish(const Fault &fault,
85    const RequestPtr &req, ThreadContext *tc, BaseTLB::Mode mode)
86{
87    assert(ongoingTranslation);
88    ongoingTranslation = false;
89    bool failed = (fault != NoFault);
90    owner->translationComplete(this, failed);
91}
92
93QueuedPrefetcher::QueuedPrefetcher(const QueuedPrefetcherParams *p)
94    : BasePrefetcher(p), queueSize(p->queue_size),
95      missingTranslationQueueSize(
96        p->max_prefetch_requests_with_pending_translation),
97      latency(p->latency), queueSquash(p->queue_squash),
98      queueFilter(p->queue_filter), cacheSnoop(p->cache_snoop),
99      tagPrefetch(p->tag_prefetch),
100      throttleControlPct(p->throttle_control_percentage)
101{
102}
103
104QueuedPrefetcher::~QueuedPrefetcher()
105{
106    // Delete the queued prefetch packets
107    for (DeferredPacket &p : pfq) {
108        delete p.pkt;
109    }
110}
111
112size_t
113QueuedPrefetcher::getMaxPermittedPrefetches(size_t total) const
114{
115    /**
116     * Throttle generated prefetches based in the accuracy of the prefetcher.
117     * Accuracy is computed based in the ratio of useful prefetches with
118     * respect to the number of issued prefetches.
119     *
120     * The throttleControlPct controls how many of the candidate addresses
121     * generated by the prefetcher will be finally turned into prefetch
122     * requests
123     * - If set to 100, all candidates can be discarded (one request
124     *   will always be allowed to be generated)
125     * - Setting it to 0 will disable the throttle control, so requests are
126     *   created for all candidates
127     * - If set to 60, 40% of candidates will generate a request, and the
128     *   remaining 60% will be generated depending on the current accuracy
129     */
130
131    size_t max_pfs = total;
132    if (total > 0 && issuedPrefetches > 0) {
133        size_t throttle_pfs = (total * throttleControlPct) / 100;
134        size_t min_pfs = (total - throttle_pfs) == 0 ?
135            1 : (total - throttle_pfs);
136        max_pfs = min_pfs + (total - min_pfs) *
137            usefulPrefetches / issuedPrefetches;
138    }
139    return max_pfs;
140}
141
142void
143QueuedPrefetcher::notify(const PacketPtr &pkt, const PrefetchInfo &pfi)
144{
145    Addr blk_addr = blockAddress(pfi.getAddr());
146    bool is_secure = pfi.isSecure();
147
148    // Squash queued prefetches if demand miss to same line
149    if (queueSquash) {
150        auto itr = pfq.begin();
151        while (itr != pfq.end()) {
152            if (itr->pfInfo.getAddr() == blk_addr &&
153                itr->pfInfo.isSecure() == is_secure) {
154                delete itr->pkt;
155                itr = pfq.erase(itr);
156            } else {
157                ++itr;
158            }
159        }
160    }
161
162    // Calculate prefetches given this access
163    std::vector<AddrPriority> addresses;
164    calculatePrefetch(pfi, addresses);
165
166    // Get the maximu number of prefetches that we are allowed to generate
167    size_t max_pfs = getMaxPermittedPrefetches(addresses.size());
168
169    // Queue up generated prefetches
170    size_t num_pfs = 0;
171    for (AddrPriority& addr_prio : addresses) {
172
173        // Block align prefetch address
174        addr_prio.first = blockAddress(addr_prio.first);
175
176        if (!samePage(addr_prio.first, pfi.getAddr())) {
177            pfSpanPage += 1;
178        }
179
180        bool can_cross_page = (tlb != nullptr);
181        if (can_cross_page || samePage(addr_prio.first, pfi.getAddr())) {
182            PrefetchInfo new_pfi(pfi,addr_prio.first);
183            pfIdentified++;
184            DPRINTF(HWPrefetch, "Found a pf candidate addr: %#x, "
185                    "inserting into prefetch queue.\n", new_pfi.getAddr());
186            // Create and insert the request
187            insert(pkt, new_pfi, addr_prio.second);
188            num_pfs += 1;
189            if (num_pfs == max_pfs) {
190                break;
191            }
192        } else {
193            DPRINTF(HWPrefetch, "Ignoring page crossing prefetch.\n");
194        }
195    }
196}
197
198PacketPtr
199QueuedPrefetcher::getPacket()
200{
201    DPRINTF(HWPrefetch, "Requesting a prefetch to issue.\n");
202
203    if (pfq.empty()) {
204        // If the queue is empty, attempt first to fill it with requests
205        // from the queue of missing translations
206        processMissingTranslations(queueSize);
207    }
208
209    if (pfq.empty()) {
210        DPRINTF(HWPrefetch, "No hardware prefetches available.\n");
211        return nullptr;
212    }
213
214    PacketPtr pkt = pfq.front().pkt;
215    pfq.pop_front();
216
217    pfIssued++;
218    issuedPrefetches += 1;
219    assert(pkt != nullptr);
220    DPRINTF(HWPrefetch, "Generating prefetch for %#x.\n", pkt->getAddr());
221
222    processMissingTranslations(queueSize - pfq.size());
223    return pkt;
224}
225
226void
227QueuedPrefetcher::regStats()
228{
229    BasePrefetcher::regStats();
230
231    pfIdentified
232        .name(name() + ".pfIdentified")
233        .desc("number of prefetch candidates identified");
234
235    pfBufferHit
236        .name(name() + ".pfBufferHit")
237        .desc("number of redundant prefetches already in prefetch queue");
238
239    pfInCache
240        .name(name() + ".pfInCache")
241        .desc("number of redundant prefetches already in cache/mshr dropped");
242
243    pfRemovedFull
244        .name(name() + ".pfRemovedFull")
245        .desc("number of prefetches dropped due to prefetch queue size");
246
247    pfSpanPage
248        .name(name() + ".pfSpanPage")
249        .desc("number of prefetches that crossed the page");
250}
251
252
253void
254QueuedPrefetcher::processMissingTranslations(unsigned max)
255{
256    unsigned count = 0;
257    iterator it = pfqMissingTranslation.begin();
258    while (it != pfqMissingTranslation.end() && count < max) {
259        DeferredPacket &dp = *it;
260        // Increase the iterator first because dp.startTranslation can end up
261        // calling finishTranslation, which will erase "it"
262        it++;
263        dp.startTranslation(tlb);
264        count += 1;
265    }
266}
267
268void
269QueuedPrefetcher::translationComplete(DeferredPacket *dp, bool failed)
270{
271    auto it = pfqMissingTranslation.begin();
272    while (it != pfqMissingTranslation.end()) {
273        if (&(*it) == dp) {
274            break;
275        }
276        it++;
277    }
278    assert(it != pfqMissingTranslation.end());
279    if (!failed) {
280        DPRINTF(HWPrefetch, "%s Translation of vaddr %#x succeeded: "
281                "paddr %#x \n", tlb->name(),
282                it->translationRequest->getVaddr(),
283                it->translationRequest->getPaddr());
284        Addr target_paddr = it->translationRequest->getPaddr();
285        // check if this prefetch is already redundant
286        if (cacheSnoop && (inCache(target_paddr, it->pfInfo.isSecure()) ||
287                    inMissQueue(target_paddr, it->pfInfo.isSecure()))) {
288            pfInCache++;
289            DPRINTF(HWPrefetch, "Dropping redundant in "
290                    "cache/MSHR prefetch addr:%#x\n", target_paddr);
291        } else {
292            Tick pf_time = curTick() + clockPeriod() * latency;
293            it->createPkt(it->translationRequest->getPaddr(), blkSize,
294                    masterId, tagPrefetch, pf_time);
295            addToQueue(pfq, *it);
296        }
297    } else {
298        DPRINTF(HWPrefetch, "%s Translation of vaddr %#x failed, dropping "
299                "prefetch request %#x \n", tlb->name(),
300                it->translationRequest->getVaddr());
301    }
302    pfqMissingTranslation.erase(it);
303}
304
305bool
306QueuedPrefetcher::alreadyInQueue(std::list<DeferredPacket> &queue,
307                                 const PrefetchInfo &pfi, int32_t priority)
308{
309    bool found = false;
310    iterator it;
311    for (it = queue.begin(); it != queue.end() && !found; it++) {
312        found = it->pfInfo.sameAddr(pfi);
313    }
314
315    /* If the address is already in the queue, update priority and leave */
316    if (it != queue.end()) {
317        pfBufferHit++;
318        if (it->priority < priority) {
319            /* Update priority value and position in the queue */
320            it->priority = priority;
321            iterator prev = it;
322            while (prev != queue.begin()) {
323                prev--;
324                /* If the packet has higher priority, swap */
325                if (*it > *prev) {
326                    std::swap(*it, *prev);
327                    it = prev;
328                }
329            }
330            DPRINTF(HWPrefetch, "Prefetch addr already in "
331                "prefetch queue, priority updated\n");
332        } else {
333            DPRINTF(HWPrefetch, "Prefetch addr already in "
334                "prefetch queue\n");
335        }
336    }
337    return found;
338}
339
340RequestPtr
341QueuedPrefetcher::createPrefetchRequest(Addr addr, PrefetchInfo const &pfi,
342                                        PacketPtr pkt)
343{
344    RequestPtr translation_req = std::make_shared<Request>(pkt->req->getAsid(),
345            addr, blkSize, pkt->req->getFlags(), masterId, pfi.getPC(),
346            pkt->req->contextId());
347    translation_req->setFlags(Request::PREFETCH);
348    return translation_req;
349}
350
351void
352QueuedPrefetcher::insert(const PacketPtr &pkt, PrefetchInfo &new_pfi,
353                         int32_t priority)
354{
355    if (queueFilter) {
356        if (alreadyInQueue(pfq, new_pfi, priority)) {
357            return;
358        }
359        if (alreadyInQueue(pfqMissingTranslation, new_pfi, priority)) {
360            return;
361        }
362    }
363
364    /*
365     * Physical address computation
366     * if the prefetch is within the same page
367     *   using VA: add the computed stride to the original PA
368     *   using PA: no actions needed
369     * if we are page crossing
370     *   using VA: Create a translaion request and enqueue the corresponding
371     *       deferred packet to the queue of pending translations
372     *   using PA: use the provided VA to obtain the target VA, then attempt to
373     *     translate the resulting address
374     */
375
376    Addr orig_addr = useVirtualAddresses ?
377        pkt->req->getVaddr() : pkt->req->getPaddr();
378    bool positive_stride = new_pfi.getAddr() >= orig_addr;
379    Addr stride = positive_stride ?
380        (new_pfi.getAddr() - orig_addr) : (orig_addr - new_pfi.getAddr());
381
382    Addr target_paddr;
383    bool has_target_pa = false;
384    RequestPtr translation_req = nullptr;
385    if (samePage(orig_addr, new_pfi.getAddr())) {
386        if (useVirtualAddresses) {
387            // if we trained with virtual addresses,
388            // compute the target PA using the original PA and adding the
389            // prefetch stride (difference between target VA and original VA)
390            target_paddr = positive_stride ? (pkt->req->getPaddr() + stride) :
391                (pkt->req->getPaddr() - stride);
392        } else {
393            target_paddr = new_pfi.getAddr();
394        }
395        has_target_pa = true;
396    } else {
397        // Page crossing reference
398
399        // ContextID is needed for translation
400        if (!pkt->req->hasContextId()) {
401            return;
402        }
403        if (useVirtualAddresses) {
404            has_target_pa = false;
405            translation_req = createPrefetchRequest(new_pfi.getAddr(), new_pfi,
406                                                    pkt);
407        } else if (pkt->req->hasVaddr()) {
408            has_target_pa = false;
409            // Compute the target VA using req->getVaddr + stride
410            Addr target_vaddr = positive_stride ?
411                (pkt->req->getVaddr() + stride) :
412                (pkt->req->getVaddr() - stride);
413            translation_req = createPrefetchRequest(target_vaddr, new_pfi,
414                                                    pkt);
415        } else {
416            // Using PA for training but the request does not have a VA,
417            // unable to process this page crossing prefetch.
418            return;
419        }
420    }
421    if (has_target_pa && cacheSnoop &&
422            (inCache(target_paddr, new_pfi.isSecure()) ||
423            inMissQueue(target_paddr, new_pfi.isSecure()))) {
424        pfInCache++;
425        DPRINTF(HWPrefetch, "Dropping redundant in "
426                "cache/MSHR prefetch addr:%#x\n", target_paddr);
427        return;
428    }
429
430    /* Create the packet and find the spot to insert it */
431    DeferredPacket dpp(this, new_pfi, 0, priority);
432    if (has_target_pa) {
433        Tick pf_time = curTick() + clockPeriod() * latency;
434        dpp.createPkt(target_paddr, blkSize, masterId, tagPrefetch, pf_time);
435        DPRINTF(HWPrefetch, "Prefetch queued. "
436                "addr:%#x priority: %3d tick:%lld.\n",
437                new_pfi.getAddr(), priority, pf_time);
438        addToQueue(pfq, dpp);
439    } else {
440        // Add the translation request and try to resolve it later
441        dpp.setTranslationRequest(translation_req);
442        dpp.tc = cache->system->getThreadContext(translation_req->contextId());
443        DPRINTF(HWPrefetch, "Prefetch queued with no translation. "
444                "addr:%#x priority: %3d\n", new_pfi.getAddr(), priority);
445        addToQueue(pfqMissingTranslation, dpp);
446    }
447}
448
449void
450QueuedPrefetcher::addToQueue(std::list<DeferredPacket> &queue,
451                             DeferredPacket &dpp)
452{
453    /* Verify prefetch buffer space for request */
454    if (queue.size() == queueSize) {
455        pfRemovedFull++;
456        /* Lowest priority packet */
457        iterator it = queue.end();
458        panic_if (it == queue.begin(),
459            "Prefetch queue is both full and empty!");
460        --it;
461        /* Look for oldest in that level of priority */
462        panic_if (it == queue.begin(),
463            "Prefetch queue is full with 1 element!");
464        iterator prev = it;
465        bool cont = true;
466        /* While not at the head of the queue */
467        while (cont && prev != queue.begin()) {
468            prev--;
469            /* While at the same level of priority */
470            cont = prev->priority == it->priority;
471            if (cont)
472                /* update pointer */
473                it = prev;
474        }
475        DPRINTF(HWPrefetch, "Prefetch queue full, removing lowest priority "
476                            "oldest packet, addr: %#x\n",it->pfInfo.getAddr());
477        delete it->pkt;
478        queue.erase(it);
479    }
480
481    if (queue.size() == 0) {
482        queue.emplace_back(dpp);
483    } else {
484        iterator it = queue.end();
485        do {
486            --it;
487        } while (it != queue.begin() && dpp > *it);
488        /* If we reach the head, we have to see if the new element is new head
489         * or not */
490        if (it == queue.begin() && dpp <= *it)
491            it++;
492        queue.insert(it, dpp);
493    }
494}
495