base.cc revision 13859
1/*
2 * Copyright (c) 2012-2013, 2018 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2003-2005 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Erik Hallnor
41 *          Nikos Nikoleris
42 */
43
44/**
45 * @file
46 * Definition of BaseCache functions.
47 */
48
49#include "mem/cache/base.hh"
50
51#include "base/compiler.hh"
52#include "base/logging.hh"
53#include "debug/Cache.hh"
54#include "debug/CachePort.hh"
55#include "debug/CacheRepl.hh"
56#include "debug/CacheVerbose.hh"
57#include "mem/cache/mshr.hh"
58#include "mem/cache/prefetch/base.hh"
59#include "mem/cache/queue_entry.hh"
60#include "params/BaseCache.hh"
61#include "params/WriteAllocator.hh"
62#include "sim/core.hh"
63
64class BaseMasterPort;
65class BaseSlavePort;
66
67using namespace std;
68
69BaseCache::CacheSlavePort::CacheSlavePort(const std::string &_name,
70                                          BaseCache *_cache,
71                                          const std::string &_label)
72    : QueuedSlavePort(_name, _cache, queue),
73      queue(*_cache, *this, true, _label),
74      blocked(false), mustSendRetry(false),
75      sendRetryEvent([this]{ processSendRetry(); }, _name)
76{
77}
78
79BaseCache::BaseCache(const BaseCacheParams *p, unsigned blk_size)
80    : MemObject(p),
81      cpuSidePort (p->name + ".cpu_side", this, "CpuSidePort"),
82      memSidePort(p->name + ".mem_side", this, "MemSidePort"),
83      mshrQueue("MSHRs", p->mshrs, 0, p->demand_mshr_reserve), // see below
84      writeBuffer("write buffer", p->write_buffers, p->mshrs), // see below
85      tags(p->tags),
86      prefetcher(p->prefetcher),
87      writeAllocator(p->write_allocator),
88      writebackClean(p->writeback_clean),
89      tempBlockWriteback(nullptr),
90      writebackTempBlockAtomicEvent([this]{ writebackTempBlockAtomic(); },
91                                    name(), false,
92                                    EventBase::Delayed_Writeback_Pri),
93      blkSize(blk_size),
94      lookupLatency(p->tag_latency),
95      dataLatency(p->data_latency),
96      forwardLatency(p->tag_latency),
97      fillLatency(p->data_latency),
98      responseLatency(p->response_latency),
99      sequentialAccess(p->sequential_access),
100      numTarget(p->tgts_per_mshr),
101      forwardSnoops(true),
102      clusivity(p->clusivity),
103      isReadOnly(p->is_read_only),
104      blocked(0),
105      order(0),
106      noTargetMSHR(nullptr),
107      missCount(p->max_miss_count),
108      addrRanges(p->addr_ranges.begin(), p->addr_ranges.end()),
109      system(p->system)
110{
111    // the MSHR queue has no reserve entries as we check the MSHR
112    // queue on every single allocation, whereas the write queue has
113    // as many reserve entries as we have MSHRs, since every MSHR may
114    // eventually require a writeback, and we do not check the write
115    // buffer before committing to an MSHR
116
117    // forward snoops is overridden in init() once we can query
118    // whether the connected master is actually snooping or not
119
120    tempBlock = new TempCacheBlk(blkSize);
121
122    tags->tagsInit();
123    if (prefetcher)
124        prefetcher->setCache(this);
125}
126
127BaseCache::~BaseCache()
128{
129    delete tempBlock;
130}
131
132void
133BaseCache::CacheSlavePort::setBlocked()
134{
135    assert(!blocked);
136    DPRINTF(CachePort, "Port is blocking new requests\n");
137    blocked = true;
138    // if we already scheduled a retry in this cycle, but it has not yet
139    // happened, cancel it
140    if (sendRetryEvent.scheduled()) {
141        owner.deschedule(sendRetryEvent);
142        DPRINTF(CachePort, "Port descheduled retry\n");
143        mustSendRetry = true;
144    }
145}
146
147void
148BaseCache::CacheSlavePort::clearBlocked()
149{
150    assert(blocked);
151    DPRINTF(CachePort, "Port is accepting new requests\n");
152    blocked = false;
153    if (mustSendRetry) {
154        // @TODO: need to find a better time (next cycle?)
155        owner.schedule(sendRetryEvent, curTick() + 1);
156    }
157}
158
159void
160BaseCache::CacheSlavePort::processSendRetry()
161{
162    DPRINTF(CachePort, "Port is sending retry\n");
163
164    // reset the flag and call retry
165    mustSendRetry = false;
166    sendRetryReq();
167}
168
169Addr
170BaseCache::regenerateBlkAddr(CacheBlk* blk)
171{
172    if (blk != tempBlock) {
173        return tags->regenerateBlkAddr(blk);
174    } else {
175        return tempBlock->getAddr();
176    }
177}
178
179void
180BaseCache::init()
181{
182    if (!cpuSidePort.isConnected() || !memSidePort.isConnected())
183        fatal("Cache ports on %s are not connected\n", name());
184    cpuSidePort.sendRangeChange();
185    forwardSnoops = cpuSidePort.isSnooping();
186}
187
188Port &
189BaseCache::getPort(const std::string &if_name, PortID idx)
190{
191    if (if_name == "mem_side") {
192        return memSidePort;
193    } else if (if_name == "cpu_side") {
194        return cpuSidePort;
195    }  else {
196        return MemObject::getPort(if_name, idx);
197    }
198}
199
200bool
201BaseCache::inRange(Addr addr) const
202{
203    for (const auto& r : addrRanges) {
204        if (r.contains(addr)) {
205            return true;
206       }
207    }
208    return false;
209}
210
211void
212BaseCache::handleTimingReqHit(PacketPtr pkt, CacheBlk *blk, Tick request_time)
213{
214    if (pkt->needsResponse()) {
215        // These delays should have been consumed by now
216        assert(pkt->headerDelay == 0);
217        assert(pkt->payloadDelay == 0);
218
219        pkt->makeTimingResponse();
220
221        // In this case we are considering request_time that takes
222        // into account the delay of the xbar, if any, and just
223        // lat, neglecting responseLatency, modelling hit latency
224        // just as the value of lat overriden by access(), which calls
225        // the calculateAccessLatency() function.
226        cpuSidePort.schedTimingResp(pkt, request_time);
227    } else {
228        DPRINTF(Cache, "%s satisfied %s, no response needed\n", __func__,
229                pkt->print());
230
231        // queue the packet for deletion, as the sending cache is
232        // still relying on it; if the block is found in access(),
233        // CleanEvict and Writeback messages will be deleted
234        // here as well
235        pendingDelete.reset(pkt);
236    }
237}
238
239void
240BaseCache::handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk,
241                               Tick forward_time, Tick request_time)
242{
243    if (writeAllocator &&
244        pkt && pkt->isWrite() && !pkt->req->isUncacheable()) {
245        writeAllocator->updateMode(pkt->getAddr(), pkt->getSize(),
246                                   pkt->getBlockAddr(blkSize));
247    }
248
249    if (mshr) {
250        /// MSHR hit
251        /// @note writebacks will be checked in getNextMSHR()
252        /// for any conflicting requests to the same block
253
254        //@todo remove hw_pf here
255
256        // Coalesce unless it was a software prefetch (see above).
257        if (pkt) {
258            assert(!pkt->isWriteback());
259            // CleanEvicts corresponding to blocks which have
260            // outstanding requests in MSHRs are simply sunk here
261            if (pkt->cmd == MemCmd::CleanEvict) {
262                pendingDelete.reset(pkt);
263            } else if (pkt->cmd == MemCmd::WriteClean) {
264                // A WriteClean should never coalesce with any
265                // outstanding cache maintenance requests.
266
267                // We use forward_time here because there is an
268                // uncached memory write, forwarded to WriteBuffer.
269                allocateWriteBuffer(pkt, forward_time);
270            } else {
271                DPRINTF(Cache, "%s coalescing MSHR for %s\n", __func__,
272                        pkt->print());
273
274                assert(pkt->req->masterId() < system->maxMasters());
275                mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
276
277                // We use forward_time here because it is the same
278                // considering new targets. We have multiple
279                // requests for the same address here. It
280                // specifies the latency to allocate an internal
281                // buffer and to schedule an event to the queued
282                // port and also takes into account the additional
283                // delay of the xbar.
284                mshr->allocateTarget(pkt, forward_time, order++,
285                                     allocOnFill(pkt->cmd));
286                if (mshr->getNumTargets() == numTarget) {
287                    noTargetMSHR = mshr;
288                    setBlocked(Blocked_NoTargets);
289                    // need to be careful with this... if this mshr isn't
290                    // ready yet (i.e. time > curTick()), we don't want to
291                    // move it ahead of mshrs that are ready
292                    // mshrQueue.moveToFront(mshr);
293                }
294            }
295        }
296    } else {
297        // no MSHR
298        assert(pkt->req->masterId() < system->maxMasters());
299        mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
300
301        if (pkt->isEviction() || pkt->cmd == MemCmd::WriteClean) {
302            // We use forward_time here because there is an
303            // writeback or writeclean, forwarded to WriteBuffer.
304            allocateWriteBuffer(pkt, forward_time);
305        } else {
306            if (blk && blk->isValid()) {
307                // If we have a write miss to a valid block, we
308                // need to mark the block non-readable.  Otherwise
309                // if we allow reads while there's an outstanding
310                // write miss, the read could return stale data
311                // out of the cache block... a more aggressive
312                // system could detect the overlap (if any) and
313                // forward data out of the MSHRs, but we don't do
314                // that yet.  Note that we do need to leave the
315                // block valid so that it stays in the cache, in
316                // case we get an upgrade response (and hence no
317                // new data) when the write miss completes.
318                // As long as CPUs do proper store/load forwarding
319                // internally, and have a sufficiently weak memory
320                // model, this is probably unnecessary, but at some
321                // point it must have seemed like we needed it...
322                assert((pkt->needsWritable() && !blk->isWritable()) ||
323                       pkt->req->isCacheMaintenance());
324                blk->status &= ~BlkReadable;
325            }
326            // Here we are using forward_time, modelling the latency of
327            // a miss (outbound) just as forwardLatency, neglecting the
328            // lookupLatency component.
329            allocateMissBuffer(pkt, forward_time);
330        }
331    }
332}
333
334void
335BaseCache::recvTimingReq(PacketPtr pkt)
336{
337    // anything that is merely forwarded pays for the forward latency and
338    // the delay provided by the crossbar
339    Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
340
341    Cycles lat;
342    CacheBlk *blk = nullptr;
343    bool satisfied = false;
344    {
345        PacketList writebacks;
346        // Note that lat is passed by reference here. The function
347        // access() will set the lat value.
348        satisfied = access(pkt, blk, lat, writebacks);
349
350        // After the evicted blocks are selected, they must be forwarded
351        // to the write buffer to ensure they logically precede anything
352        // happening below
353        doWritebacks(writebacks, clockEdge(lat + forwardLatency));
354    }
355
356    // Here we charge the headerDelay that takes into account the latencies
357    // of the bus, if the packet comes from it.
358    // The latency charged is just the value set by the access() function.
359    // In case of a hit we are neglecting response latency.
360    // In case of a miss we are neglecting forward latency.
361    Tick request_time = clockEdge(lat);
362    // Here we reset the timing of the packet.
363    pkt->headerDelay = pkt->payloadDelay = 0;
364
365    if (satisfied) {
366        // notify before anything else as later handleTimingReqHit might turn
367        // the packet in a response
368        ppHit->notify(pkt);
369
370        if (prefetcher && blk && blk->wasPrefetched()) {
371            blk->status &= ~BlkHWPrefetched;
372        }
373
374        handleTimingReqHit(pkt, blk, request_time);
375    } else {
376        handleTimingReqMiss(pkt, blk, forward_time, request_time);
377
378        ppMiss->notify(pkt);
379    }
380
381    if (prefetcher) {
382        // track time of availability of next prefetch, if any
383        Tick next_pf_time = prefetcher->nextPrefetchReadyTime();
384        if (next_pf_time != MaxTick) {
385            schedMemSideSendEvent(next_pf_time);
386        }
387    }
388}
389
390void
391BaseCache::handleUncacheableWriteResp(PacketPtr pkt)
392{
393    Tick completion_time = clockEdge(responseLatency) +
394        pkt->headerDelay + pkt->payloadDelay;
395
396    // Reset the bus additional time as it is now accounted for
397    pkt->headerDelay = pkt->payloadDelay = 0;
398
399    cpuSidePort.schedTimingResp(pkt, completion_time);
400}
401
402void
403BaseCache::recvTimingResp(PacketPtr pkt)
404{
405    assert(pkt->isResponse());
406
407    // all header delay should be paid for by the crossbar, unless
408    // this is a prefetch response from above
409    panic_if(pkt->headerDelay != 0 && pkt->cmd != MemCmd::HardPFResp,
410             "%s saw a non-zero packet delay\n", name());
411
412    const bool is_error = pkt->isError();
413
414    if (is_error) {
415        DPRINTF(Cache, "%s: Cache received %s with error\n", __func__,
416                pkt->print());
417    }
418
419    DPRINTF(Cache, "%s: Handling response %s\n", __func__,
420            pkt->print());
421
422    // if this is a write, we should be looking at an uncacheable
423    // write
424    if (pkt->isWrite()) {
425        assert(pkt->req->isUncacheable());
426        handleUncacheableWriteResp(pkt);
427        return;
428    }
429
430    // we have dealt with any (uncacheable) writes above, from here on
431    // we know we are dealing with an MSHR due to a miss or a prefetch
432    MSHR *mshr = dynamic_cast<MSHR*>(pkt->popSenderState());
433    assert(mshr);
434
435    if (mshr == noTargetMSHR) {
436        // we always clear at least one target
437        clearBlocked(Blocked_NoTargets);
438        noTargetMSHR = nullptr;
439    }
440
441    // Initial target is used just for stats
442    QueueEntry::Target *initial_tgt = mshr->getTarget();
443    int stats_cmd_idx = initial_tgt->pkt->cmdToIndex();
444    Tick miss_latency = curTick() - initial_tgt->recvTime;
445
446    if (pkt->req->isUncacheable()) {
447        assert(pkt->req->masterId() < system->maxMasters());
448        mshr_uncacheable_lat[stats_cmd_idx][pkt->req->masterId()] +=
449            miss_latency;
450    } else {
451        assert(pkt->req->masterId() < system->maxMasters());
452        mshr_miss_latency[stats_cmd_idx][pkt->req->masterId()] +=
453            miss_latency;
454    }
455
456    PacketList writebacks;
457
458    bool is_fill = !mshr->isForward &&
459        (pkt->isRead() || pkt->cmd == MemCmd::UpgradeResp ||
460         mshr->wasWholeLineWrite);
461
462    // make sure that if the mshr was due to a whole line write then
463    // the response is an invalidation
464    assert(!mshr->wasWholeLineWrite || pkt->isInvalidate());
465
466    CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure());
467
468    if (is_fill && !is_error) {
469        DPRINTF(Cache, "Block for addr %#llx being updated in Cache\n",
470                pkt->getAddr());
471
472        const bool allocate = (writeAllocator && mshr->wasWholeLineWrite) ?
473            writeAllocator->allocate() : mshr->allocOnFill();
474        blk = handleFill(pkt, blk, writebacks, allocate);
475        assert(blk != nullptr);
476        ppFill->notify(pkt);
477    }
478
479    if (blk && blk->isValid() && pkt->isClean() && !pkt->isInvalidate()) {
480        // The block was marked not readable while there was a pending
481        // cache maintenance operation, restore its flag.
482        blk->status |= BlkReadable;
483
484        // This was a cache clean operation (without invalidate)
485        // and we have a copy of the block already. Since there
486        // is no invalidation, we can promote targets that don't
487        // require a writable copy
488        mshr->promoteReadable();
489    }
490
491    if (blk && blk->isWritable() && !pkt->req->isCacheInvalidate()) {
492        // If at this point the referenced block is writable and the
493        // response is not a cache invalidate, we promote targets that
494        // were deferred as we couldn't guarrantee a writable copy
495        mshr->promoteWritable();
496    }
497
498    serviceMSHRTargets(mshr, pkt, blk);
499
500    if (mshr->promoteDeferredTargets()) {
501        // avoid later read getting stale data while write miss is
502        // outstanding.. see comment in timingAccess()
503        if (blk) {
504            blk->status &= ~BlkReadable;
505        }
506        mshrQueue.markPending(mshr);
507        schedMemSideSendEvent(clockEdge() + pkt->payloadDelay);
508    } else {
509        // while we deallocate an mshr from the queue we still have to
510        // check the isFull condition before and after as we might
511        // have been using the reserved entries already
512        const bool was_full = mshrQueue.isFull();
513        mshrQueue.deallocate(mshr);
514        if (was_full && !mshrQueue.isFull()) {
515            clearBlocked(Blocked_NoMSHRs);
516        }
517
518        // Request the bus for a prefetch if this deallocation freed enough
519        // MSHRs for a prefetch to take place
520        if (prefetcher && mshrQueue.canPrefetch()) {
521            Tick next_pf_time = std::max(prefetcher->nextPrefetchReadyTime(),
522                                         clockEdge());
523            if (next_pf_time != MaxTick)
524                schedMemSideSendEvent(next_pf_time);
525        }
526    }
527
528    // if we used temp block, check to see if its valid and then clear it out
529    if (blk == tempBlock && tempBlock->isValid()) {
530        evictBlock(blk, writebacks);
531    }
532
533    const Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
534    // copy writebacks to write buffer
535    doWritebacks(writebacks, forward_time);
536
537    DPRINTF(CacheVerbose, "%s: Leaving with %s\n", __func__, pkt->print());
538    delete pkt;
539}
540
541
542Tick
543BaseCache::recvAtomic(PacketPtr pkt)
544{
545    // should assert here that there are no outstanding MSHRs or
546    // writebacks... that would mean that someone used an atomic
547    // access in timing mode
548
549    // We use lookupLatency here because it is used to specify the latency
550    // to access.
551    Cycles lat = lookupLatency;
552
553    CacheBlk *blk = nullptr;
554    PacketList writebacks;
555    bool satisfied = access(pkt, blk, lat, writebacks);
556
557    if (pkt->isClean() && blk && blk->isDirty()) {
558        // A cache clean opearation is looking for a dirty
559        // block. If a dirty block is encountered a WriteClean
560        // will update any copies to the path to the memory
561        // until the point of reference.
562        DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n",
563                __func__, pkt->print(), blk->print());
564        PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), pkt->id);
565        writebacks.push_back(wb_pkt);
566        pkt->setSatisfied();
567    }
568
569    // handle writebacks resulting from the access here to ensure they
570    // logically precede anything happening below
571    doWritebacksAtomic(writebacks);
572    assert(writebacks.empty());
573
574    if (!satisfied) {
575        lat += handleAtomicReqMiss(pkt, blk, writebacks);
576    }
577
578    // Note that we don't invoke the prefetcher at all in atomic mode.
579    // It's not clear how to do it properly, particularly for
580    // prefetchers that aggressively generate prefetch candidates and
581    // rely on bandwidth contention to throttle them; these will tend
582    // to pollute the cache in atomic mode since there is no bandwidth
583    // contention.  If we ever do want to enable prefetching in atomic
584    // mode, though, this is the place to do it... see timingAccess()
585    // for an example (though we'd want to issue the prefetch(es)
586    // immediately rather than calling requestMemSideBus() as we do
587    // there).
588
589    // do any writebacks resulting from the response handling
590    doWritebacksAtomic(writebacks);
591
592    // if we used temp block, check to see if its valid and if so
593    // clear it out, but only do so after the call to recvAtomic is
594    // finished so that any downstream observers (such as a snoop
595    // filter), first see the fill, and only then see the eviction
596    if (blk == tempBlock && tempBlock->isValid()) {
597        // the atomic CPU calls recvAtomic for fetch and load/store
598        // sequentuially, and we may already have a tempBlock
599        // writeback from the fetch that we have not yet sent
600        if (tempBlockWriteback) {
601            // if that is the case, write the prevoius one back, and
602            // do not schedule any new event
603            writebackTempBlockAtomic();
604        } else {
605            // the writeback/clean eviction happens after the call to
606            // recvAtomic has finished (but before any successive
607            // calls), so that the response handling from the fill is
608            // allowed to happen first
609            schedule(writebackTempBlockAtomicEvent, curTick());
610        }
611
612        tempBlockWriteback = evictBlock(blk);
613    }
614
615    if (pkt->needsResponse()) {
616        pkt->makeAtomicResponse();
617    }
618
619    return lat * clockPeriod();
620}
621
622void
623BaseCache::functionalAccess(PacketPtr pkt, bool from_cpu_side)
624{
625    Addr blk_addr = pkt->getBlockAddr(blkSize);
626    bool is_secure = pkt->isSecure();
627    CacheBlk *blk = tags->findBlock(pkt->getAddr(), is_secure);
628    MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure);
629
630    pkt->pushLabel(name());
631
632    CacheBlkPrintWrapper cbpw(blk);
633
634    // Note that just because an L2/L3 has valid data doesn't mean an
635    // L1 doesn't have a more up-to-date modified copy that still
636    // needs to be found.  As a result we always update the request if
637    // we have it, but only declare it satisfied if we are the owner.
638
639    // see if we have data at all (owned or otherwise)
640    bool have_data = blk && blk->isValid()
641        && pkt->trySatisfyFunctional(&cbpw, blk_addr, is_secure, blkSize,
642                                     blk->data);
643
644    // data we have is dirty if marked as such or if we have an
645    // in-service MSHR that is pending a modified line
646    bool have_dirty =
647        have_data && (blk->isDirty() ||
648                      (mshr && mshr->inService && mshr->isPendingModified()));
649
650    bool done = have_dirty ||
651        cpuSidePort.trySatisfyFunctional(pkt) ||
652        mshrQueue.trySatisfyFunctional(pkt, blk_addr) ||
653        writeBuffer.trySatisfyFunctional(pkt, blk_addr) ||
654        memSidePort.trySatisfyFunctional(pkt);
655
656    DPRINTF(CacheVerbose, "%s: %s %s%s%s\n", __func__,  pkt->print(),
657            (blk && blk->isValid()) ? "valid " : "",
658            have_data ? "data " : "", done ? "done " : "");
659
660    // We're leaving the cache, so pop cache->name() label
661    pkt->popLabel();
662
663    if (done) {
664        pkt->makeResponse();
665    } else {
666        // if it came as a request from the CPU side then make sure it
667        // continues towards the memory side
668        if (from_cpu_side) {
669            memSidePort.sendFunctional(pkt);
670        } else if (cpuSidePort.isSnooping()) {
671            // if it came from the memory side, it must be a snoop request
672            // and we should only forward it if we are forwarding snoops
673            cpuSidePort.sendFunctionalSnoop(pkt);
674        }
675    }
676}
677
678
679void
680BaseCache::cmpAndSwap(CacheBlk *blk, PacketPtr pkt)
681{
682    assert(pkt->isRequest());
683
684    uint64_t overwrite_val;
685    bool overwrite_mem;
686    uint64_t condition_val64;
687    uint32_t condition_val32;
688
689    int offset = pkt->getOffset(blkSize);
690    uint8_t *blk_data = blk->data + offset;
691
692    assert(sizeof(uint64_t) >= pkt->getSize());
693
694    overwrite_mem = true;
695    // keep a copy of our possible write value, and copy what is at the
696    // memory address into the packet
697    pkt->writeData((uint8_t *)&overwrite_val);
698    pkt->setData(blk_data);
699
700    if (pkt->req->isCondSwap()) {
701        if (pkt->getSize() == sizeof(uint64_t)) {
702            condition_val64 = pkt->req->getExtraData();
703            overwrite_mem = !std::memcmp(&condition_val64, blk_data,
704                                         sizeof(uint64_t));
705        } else if (pkt->getSize() == sizeof(uint32_t)) {
706            condition_val32 = (uint32_t)pkt->req->getExtraData();
707            overwrite_mem = !std::memcmp(&condition_val32, blk_data,
708                                         sizeof(uint32_t));
709        } else
710            panic("Invalid size for conditional read/write\n");
711    }
712
713    if (overwrite_mem) {
714        std::memcpy(blk_data, &overwrite_val, pkt->getSize());
715        blk->status |= BlkDirty;
716    }
717}
718
719QueueEntry*
720BaseCache::getNextQueueEntry()
721{
722    // Check both MSHR queue and write buffer for potential requests,
723    // note that null does not mean there is no request, it could
724    // simply be that it is not ready
725    MSHR *miss_mshr  = mshrQueue.getNext();
726    WriteQueueEntry *wq_entry = writeBuffer.getNext();
727
728    // If we got a write buffer request ready, first priority is a
729    // full write buffer, otherwise we favour the miss requests
730    if (wq_entry && (writeBuffer.isFull() || !miss_mshr)) {
731        // need to search MSHR queue for conflicting earlier miss.
732        MSHR *conflict_mshr =
733            mshrQueue.findPending(wq_entry->blkAddr,
734                                  wq_entry->isSecure);
735
736        if (conflict_mshr && conflict_mshr->order < wq_entry->order) {
737            // Service misses in order until conflict is cleared.
738            return conflict_mshr;
739
740            // @todo Note that we ignore the ready time of the conflict here
741        }
742
743        // No conflicts; issue write
744        return wq_entry;
745    } else if (miss_mshr) {
746        // need to check for conflicting earlier writeback
747        WriteQueueEntry *conflict_mshr =
748            writeBuffer.findPending(miss_mshr->blkAddr,
749                                    miss_mshr->isSecure);
750        if (conflict_mshr) {
751            // not sure why we don't check order here... it was in the
752            // original code but commented out.
753
754            // The only way this happens is if we are
755            // doing a write and we didn't have permissions
756            // then subsequently saw a writeback (owned got evicted)
757            // We need to make sure to perform the writeback first
758            // To preserve the dirty data, then we can issue the write
759
760            // should we return wq_entry here instead?  I.e. do we
761            // have to flush writes in order?  I don't think so... not
762            // for Alpha anyway.  Maybe for x86?
763            return conflict_mshr;
764
765            // @todo Note that we ignore the ready time of the conflict here
766        }
767
768        // No conflicts; issue read
769        return miss_mshr;
770    }
771
772    // fall through... no pending requests.  Try a prefetch.
773    assert(!miss_mshr && !wq_entry);
774    if (prefetcher && mshrQueue.canPrefetch()) {
775        // If we have a miss queue slot, we can try a prefetch
776        PacketPtr pkt = prefetcher->getPacket();
777        if (pkt) {
778            Addr pf_addr = pkt->getBlockAddr(blkSize);
779            if (!tags->findBlock(pf_addr, pkt->isSecure()) &&
780                !mshrQueue.findMatch(pf_addr, pkt->isSecure()) &&
781                !writeBuffer.findMatch(pf_addr, pkt->isSecure())) {
782                // Update statistic on number of prefetches issued
783                // (hwpf_mshr_misses)
784                assert(pkt->req->masterId() < system->maxMasters());
785                mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
786
787                // allocate an MSHR and return it, note
788                // that we send the packet straight away, so do not
789                // schedule the send
790                return allocateMissBuffer(pkt, curTick(), false);
791            } else {
792                // free the request and packet
793                delete pkt;
794            }
795        }
796    }
797
798    return nullptr;
799}
800
801void
802BaseCache::satisfyRequest(PacketPtr pkt, CacheBlk *blk, bool, bool)
803{
804    assert(pkt->isRequest());
805
806    assert(blk && blk->isValid());
807    // Occasionally this is not true... if we are a lower-level cache
808    // satisfying a string of Read and ReadEx requests from
809    // upper-level caches, a Read will mark the block as shared but we
810    // can satisfy a following ReadEx anyway since we can rely on the
811    // Read requester(s) to have buffered the ReadEx snoop and to
812    // invalidate their blocks after receiving them.
813    // assert(!pkt->needsWritable() || blk->isWritable());
814    assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
815
816    // Check RMW operations first since both isRead() and
817    // isWrite() will be true for them
818    if (pkt->cmd == MemCmd::SwapReq) {
819        if (pkt->isAtomicOp()) {
820            // extract data from cache and save it into the data field in
821            // the packet as a return value from this atomic op
822            int offset = tags->extractBlkOffset(pkt->getAddr());
823            uint8_t *blk_data = blk->data + offset;
824            pkt->setData(blk_data);
825
826            // execute AMO operation
827            (*(pkt->getAtomicOp()))(blk_data);
828
829            // set block status to dirty
830            blk->status |= BlkDirty;
831        } else {
832            cmpAndSwap(blk, pkt);
833        }
834    } else if (pkt->isWrite()) {
835        // we have the block in a writable state and can go ahead,
836        // note that the line may be also be considered writable in
837        // downstream caches along the path to memory, but always
838        // Exclusive, and never Modified
839        assert(blk->isWritable());
840        // Write or WriteLine at the first cache with block in writable state
841        if (blk->checkWrite(pkt)) {
842            pkt->writeDataToBlock(blk->data, blkSize);
843        }
844        // Always mark the line as dirty (and thus transition to the
845        // Modified state) even if we are a failed StoreCond so we
846        // supply data to any snoops that have appended themselves to
847        // this cache before knowing the store will fail.
848        blk->status |= BlkDirty;
849        DPRINTF(CacheVerbose, "%s for %s (write)\n", __func__, pkt->print());
850    } else if (pkt->isRead()) {
851        if (pkt->isLLSC()) {
852            blk->trackLoadLocked(pkt);
853        }
854
855        // all read responses have a data payload
856        assert(pkt->hasRespData());
857        pkt->setDataFromBlock(blk->data, blkSize);
858    } else if (pkt->isUpgrade()) {
859        // sanity check
860        assert(!pkt->hasSharers());
861
862        if (blk->isDirty()) {
863            // we were in the Owned state, and a cache above us that
864            // has the line in Shared state needs to be made aware
865            // that the data it already has is in fact dirty
866            pkt->setCacheResponding();
867            blk->status &= ~BlkDirty;
868        }
869    } else if (pkt->isClean()) {
870        blk->status &= ~BlkDirty;
871    } else {
872        assert(pkt->isInvalidate());
873        invalidateBlock(blk);
874        DPRINTF(CacheVerbose, "%s for %s (invalidation)\n", __func__,
875                pkt->print());
876    }
877}
878
879/////////////////////////////////////////////////////
880//
881// Access path: requests coming in from the CPU side
882//
883/////////////////////////////////////////////////////
884Cycles
885BaseCache::calculateTagOnlyLatency(const uint32_t delay,
886                                   const Cycles lookup_lat) const
887{
888    // A tag-only access has to wait for the packet to arrive in order to
889    // perform the tag lookup.
890    return ticksToCycles(delay) + lookup_lat;
891}
892
893Cycles
894BaseCache::calculateAccessLatency(const CacheBlk* blk, const uint32_t delay,
895                                  const Cycles lookup_lat) const
896{
897    Cycles lat(0);
898
899    if (blk != nullptr) {
900        // As soon as the access arrives, for sequential accesses first access
901        // tags, then the data entry. In the case of parallel accesses the
902        // latency is dictated by the slowest of tag and data latencies.
903        if (sequentialAccess) {
904            lat = ticksToCycles(delay) + lookup_lat + dataLatency;
905        } else {
906            lat = ticksToCycles(delay) + std::max(lookup_lat, dataLatency);
907        }
908
909        // Check if the block to be accessed is available. If not, apply the
910        // access latency on top of when the block is ready to be accessed.
911        const Tick tick = curTick() + delay;
912        const Tick when_ready = blk->getWhenReady();
913        if (when_ready > tick &&
914            ticksToCycles(when_ready - tick) > lat) {
915            lat += ticksToCycles(when_ready - tick);
916        }
917    } else {
918        // In case of a miss, we neglect the data access in a parallel
919        // configuration (i.e., the data access will be stopped as soon as
920        // we find out it is a miss), and use the tag-only latency.
921        lat = calculateTagOnlyLatency(delay, lookup_lat);
922    }
923
924    return lat;
925}
926
927bool
928BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
929                  PacketList &writebacks)
930{
931    // sanity check
932    assert(pkt->isRequest());
933
934    chatty_assert(!(isReadOnly && pkt->isWrite()),
935                  "Should never see a write in a read-only cache %s\n",
936                  name());
937
938    // Access block in the tags
939    Cycles tag_latency(0);
940    blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), tag_latency);
941
942    DPRINTF(Cache, "%s for %s %s\n", __func__, pkt->print(),
943            blk ? "hit " + blk->print() : "miss");
944
945    if (pkt->req->isCacheMaintenance()) {
946        // A cache maintenance operation is always forwarded to the
947        // memory below even if the block is found in dirty state.
948
949        // We defer any changes to the state of the block until we
950        // create and mark as in service the mshr for the downstream
951        // packet.
952
953        // Calculate access latency on top of when the packet arrives. This
954        // takes into account the bus delay.
955        lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
956
957        return false;
958    }
959
960    if (pkt->isEviction()) {
961        // We check for presence of block in above caches before issuing
962        // Writeback or CleanEvict to write buffer. Therefore the only
963        // possible cases can be of a CleanEvict packet coming from above
964        // encountering a Writeback generated in this cache peer cache and
965        // waiting in the write buffer. Cases of upper level peer caches
966        // generating CleanEvict and Writeback or simply CleanEvict and
967        // CleanEvict almost simultaneously will be caught by snoops sent out
968        // by crossbar.
969        WriteQueueEntry *wb_entry = writeBuffer.findMatch(pkt->getAddr(),
970                                                          pkt->isSecure());
971        if (wb_entry) {
972            assert(wb_entry->getNumTargets() == 1);
973            PacketPtr wbPkt = wb_entry->getTarget()->pkt;
974            assert(wbPkt->isWriteback());
975
976            if (pkt->isCleanEviction()) {
977                // The CleanEvict and WritebackClean snoops into other
978                // peer caches of the same level while traversing the
979                // crossbar. If a copy of the block is found, the
980                // packet is deleted in the crossbar. Hence, none of
981                // the other upper level caches connected to this
982                // cache have the block, so we can clear the
983                // BLOCK_CACHED flag in the Writeback if set and
984                // discard the CleanEvict by returning true.
985                wbPkt->clearBlockCached();
986
987                // A clean evict does not need to access the data array
988                lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
989
990                return true;
991            } else {
992                assert(pkt->cmd == MemCmd::WritebackDirty);
993                // Dirty writeback from above trumps our clean
994                // writeback... discard here
995                // Note: markInService will remove entry from writeback buffer.
996                markInService(wb_entry);
997                delete wbPkt;
998            }
999        }
1000    }
1001
1002    // Writeback handling is special case.  We can write the block into
1003    // the cache without having a writeable copy (or any copy at all).
1004    if (pkt->isWriteback()) {
1005        assert(blkSize == pkt->getSize());
1006
1007        // we could get a clean writeback while we are having
1008        // outstanding accesses to a block, do the simple thing for
1009        // now and drop the clean writeback so that we do not upset
1010        // any ordering/decisions about ownership already taken
1011        if (pkt->cmd == MemCmd::WritebackClean &&
1012            mshrQueue.findMatch(pkt->getAddr(), pkt->isSecure())) {
1013            DPRINTF(Cache, "Clean writeback %#llx to block with MSHR, "
1014                    "dropping\n", pkt->getAddr());
1015
1016            // A writeback searches for the block, then writes the data.
1017            // As the writeback is being dropped, the data is not touched,
1018            // and we just had to wait for the time to find a match in the
1019            // MSHR. As of now assume a mshr queue search takes as long as
1020            // a tag lookup for simplicity.
1021            lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1022
1023            return true;
1024        }
1025
1026        if (!blk) {
1027            // need to do a replacement
1028            blk = allocateBlock(pkt, writebacks);
1029            if (!blk) {
1030                // no replaceable block available: give up, fwd to next level.
1031                incMissCount(pkt);
1032
1033                // A writeback searches for the block, then writes the data.
1034                // As the block could not be found, it was a tag-only access.
1035                lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1036
1037                return false;
1038            }
1039
1040            blk->status |= BlkReadable;
1041        }
1042        // only mark the block dirty if we got a writeback command,
1043        // and leave it as is for a clean writeback
1044        if (pkt->cmd == MemCmd::WritebackDirty) {
1045            // TODO: the coherent cache can assert(!blk->isDirty());
1046            blk->status |= BlkDirty;
1047        }
1048        // if the packet does not have sharers, it is passing
1049        // writable, and we got the writeback in Modified or Exclusive
1050        // state, if not we are in the Owned or Shared state
1051        if (!pkt->hasSharers()) {
1052            blk->status |= BlkWritable;
1053        }
1054        // nothing else to do; writeback doesn't expect response
1055        assert(!pkt->needsResponse());
1056        pkt->writeDataToBlock(blk->data, blkSize);
1057        DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
1058        incHitCount(pkt);
1059
1060        // A writeback searches for the block, then writes the data
1061        lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
1062
1063        // When the packet metadata arrives, the tag lookup will be done while
1064        // the payload is arriving. Then the block will be ready to access as
1065        // soon as the fill is done
1066        blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay +
1067            std::max(cyclesToTicks(tag_latency), (uint64_t)pkt->payloadDelay));
1068
1069        return true;
1070    } else if (pkt->cmd == MemCmd::CleanEvict) {
1071        // A CleanEvict does not need to access the data array
1072        lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1073
1074        if (blk) {
1075            // Found the block in the tags, need to stop CleanEvict from
1076            // propagating further down the hierarchy. Returning true will
1077            // treat the CleanEvict like a satisfied write request and delete
1078            // it.
1079            return true;
1080        }
1081        // We didn't find the block here, propagate the CleanEvict further
1082        // down the memory hierarchy. Returning false will treat the CleanEvict
1083        // like a Writeback which could not find a replaceable block so has to
1084        // go to next level.
1085        return false;
1086    } else if (pkt->cmd == MemCmd::WriteClean) {
1087        // WriteClean handling is a special case. We can allocate a
1088        // block directly if it doesn't exist and we can update the
1089        // block immediately. The WriteClean transfers the ownership
1090        // of the block as well.
1091        assert(blkSize == pkt->getSize());
1092
1093        if (!blk) {
1094            if (pkt->writeThrough()) {
1095                // A writeback searches for the block, then writes the data.
1096                // As the block could not be found, it was a tag-only access.
1097                lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1098
1099                // if this is a write through packet, we don't try to
1100                // allocate if the block is not present
1101                return false;
1102            } else {
1103                // a writeback that misses needs to allocate a new block
1104                blk = allocateBlock(pkt, writebacks);
1105                if (!blk) {
1106                    // no replaceable block available: give up, fwd to
1107                    // next level.
1108                    incMissCount(pkt);
1109
1110                    // A writeback searches for the block, then writes the
1111                    // data. As the block could not be found, it was a tag-only
1112                    // access.
1113                    lat = calculateTagOnlyLatency(pkt->headerDelay,
1114                                                  tag_latency);
1115
1116                    return false;
1117                }
1118
1119                blk->status |= BlkReadable;
1120            }
1121        }
1122
1123        // at this point either this is a writeback or a write-through
1124        // write clean operation and the block is already in this
1125        // cache, we need to update the data and the block flags
1126        assert(blk);
1127        // TODO: the coherent cache can assert(!blk->isDirty());
1128        if (!pkt->writeThrough()) {
1129            blk->status |= BlkDirty;
1130        }
1131        // nothing else to do; writeback doesn't expect response
1132        assert(!pkt->needsResponse());
1133        pkt->writeDataToBlock(blk->data, blkSize);
1134        DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
1135
1136        incHitCount(pkt);
1137
1138        // A writeback searches for the block, then writes the data
1139        lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
1140
1141        // When the packet metadata arrives, the tag lookup will be done while
1142        // the payload is arriving. Then the block will be ready to access as
1143        // soon as the fill is done
1144        blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay +
1145            std::max(cyclesToTicks(tag_latency), (uint64_t)pkt->payloadDelay));
1146
1147        // if this a write-through packet it will be sent to cache
1148        // below
1149        return !pkt->writeThrough();
1150    } else if (blk && (pkt->needsWritable() ? blk->isWritable() :
1151                       blk->isReadable())) {
1152        // OK to satisfy access
1153        incHitCount(pkt);
1154
1155        // Calculate access latency based on the need to access the data array
1156        if (pkt->isRead() || pkt->isWrite()) {
1157            lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
1158        } else {
1159            lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1160        }
1161
1162        satisfyRequest(pkt, blk);
1163        maintainClusivity(pkt->fromCache(), blk);
1164
1165        return true;
1166    }
1167
1168    // Can't satisfy access normally... either no block (blk == nullptr)
1169    // or have block but need writable
1170
1171    incMissCount(pkt);
1172
1173    lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
1174
1175    if (!blk && pkt->isLLSC() && pkt->isWrite()) {
1176        // complete miss on store conditional... just give up now
1177        pkt->req->setExtraData(0);
1178        return true;
1179    }
1180
1181    return false;
1182}
1183
1184void
1185BaseCache::maintainClusivity(bool from_cache, CacheBlk *blk)
1186{
1187    if (from_cache && blk && blk->isValid() && !blk->isDirty() &&
1188        clusivity == Enums::mostly_excl) {
1189        // if we have responded to a cache, and our block is still
1190        // valid, but not dirty, and this cache is mostly exclusive
1191        // with respect to the cache above, drop the block
1192        invalidateBlock(blk);
1193    }
1194}
1195
1196CacheBlk*
1197BaseCache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks,
1198                      bool allocate)
1199{
1200    assert(pkt->isResponse());
1201    Addr addr = pkt->getAddr();
1202    bool is_secure = pkt->isSecure();
1203#if TRACING_ON
1204    CacheBlk::State old_state = blk ? blk->status : 0;
1205#endif
1206
1207    // When handling a fill, we should have no writes to this line.
1208    assert(addr == pkt->getBlockAddr(blkSize));
1209    assert(!writeBuffer.findMatch(addr, is_secure));
1210
1211    if (!blk) {
1212        // better have read new data...
1213        assert(pkt->hasData() || pkt->cmd == MemCmd::InvalidateResp);
1214
1215        // need to do a replacement if allocating, otherwise we stick
1216        // with the temporary storage
1217        blk = allocate ? allocateBlock(pkt, writebacks) : nullptr;
1218
1219        if (!blk) {
1220            // No replaceable block or a mostly exclusive
1221            // cache... just use temporary storage to complete the
1222            // current request and then get rid of it
1223            blk = tempBlock;
1224            tempBlock->insert(addr, is_secure);
1225            DPRINTF(Cache, "using temp block for %#llx (%s)\n", addr,
1226                    is_secure ? "s" : "ns");
1227        }
1228    } else {
1229        // existing block... probably an upgrade
1230        // don't clear block status... if block is already dirty we
1231        // don't want to lose that
1232    }
1233
1234    // Block is guaranteed to be valid at this point
1235    assert(blk->isValid());
1236    assert(blk->isSecure() == is_secure);
1237    assert(regenerateBlkAddr(blk) == addr);
1238
1239    blk->status |= BlkReadable;
1240
1241    // sanity check for whole-line writes, which should always be
1242    // marked as writable as part of the fill, and then later marked
1243    // dirty as part of satisfyRequest
1244    if (pkt->cmd == MemCmd::InvalidateResp) {
1245        assert(!pkt->hasSharers());
1246    }
1247
1248    // here we deal with setting the appropriate state of the line,
1249    // and we start by looking at the hasSharers flag, and ignore the
1250    // cacheResponding flag (normally signalling dirty data) if the
1251    // packet has sharers, thus the line is never allocated as Owned
1252    // (dirty but not writable), and always ends up being either
1253    // Shared, Exclusive or Modified, see Packet::setCacheResponding
1254    // for more details
1255    if (!pkt->hasSharers()) {
1256        // we could get a writable line from memory (rather than a
1257        // cache) even in a read-only cache, note that we set this bit
1258        // even for a read-only cache, possibly revisit this decision
1259        blk->status |= BlkWritable;
1260
1261        // check if we got this via cache-to-cache transfer (i.e., from a
1262        // cache that had the block in Modified or Owned state)
1263        if (pkt->cacheResponding()) {
1264            // we got the block in Modified state, and invalidated the
1265            // owners copy
1266            blk->status |= BlkDirty;
1267
1268            chatty_assert(!isReadOnly, "Should never see dirty snoop response "
1269                          "in read-only cache %s\n", name());
1270        }
1271    }
1272
1273    DPRINTF(Cache, "Block addr %#llx (%s) moving from state %x to %s\n",
1274            addr, is_secure ? "s" : "ns", old_state, blk->print());
1275
1276    // if we got new data, copy it in (checking for a read response
1277    // and a response that has data is the same in the end)
1278    if (pkt->isRead()) {
1279        // sanity checks
1280        assert(pkt->hasData());
1281        assert(pkt->getSize() == blkSize);
1282
1283        pkt->writeDataToBlock(blk->data, blkSize);
1284    }
1285    // The block will be ready when the payload arrives and the fill is done
1286    blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay +
1287                      pkt->payloadDelay);
1288
1289    return blk;
1290}
1291
1292CacheBlk*
1293BaseCache::allocateBlock(const PacketPtr pkt, PacketList &writebacks)
1294{
1295    // Get address
1296    const Addr addr = pkt->getAddr();
1297
1298    // Get secure bit
1299    const bool is_secure = pkt->isSecure();
1300
1301    // Find replacement victim
1302    std::vector<CacheBlk*> evict_blks;
1303    CacheBlk *victim = tags->findVictim(addr, is_secure, evict_blks);
1304
1305    // It is valid to return nullptr if there is no victim
1306    if (!victim)
1307        return nullptr;
1308
1309    // Print victim block's information
1310    DPRINTF(CacheRepl, "Replacement victim: %s\n", victim->print());
1311
1312    // Check for transient state allocations. If any of the entries listed
1313    // for eviction has a transient state, the allocation fails
1314    for (const auto& blk : evict_blks) {
1315        if (blk->isValid()) {
1316            Addr repl_addr = regenerateBlkAddr(blk);
1317            MSHR *repl_mshr = mshrQueue.findMatch(repl_addr, blk->isSecure());
1318            if (repl_mshr) {
1319                // must be an outstanding upgrade or clean request
1320                // on a block we're about to replace...
1321                assert((!blk->isWritable() && repl_mshr->needsWritable()) ||
1322                       repl_mshr->isCleaning());
1323
1324                // too hard to replace block with transient state
1325                // allocation failed, block not inserted
1326                return nullptr;
1327            }
1328        }
1329    }
1330
1331    // The victim will be replaced by a new entry, so increase the replacement
1332    // counter if a valid block is being replaced
1333    if (victim->isValid()) {
1334        DPRINTF(Cache, "replacement: replacing %#llx (%s) with %#llx "
1335                "(%s): %s\n", regenerateBlkAddr(victim),
1336                victim->isSecure() ? "s" : "ns",
1337                addr, is_secure ? "s" : "ns",
1338                victim->isDirty() ? "writeback" : "clean");
1339
1340        replacements++;
1341    }
1342
1343    // Evict valid blocks associated to this victim block
1344    for (const auto& blk : evict_blks) {
1345        if (blk->isValid()) {
1346            if (blk->wasPrefetched()) {
1347                unusedPrefetches++;
1348            }
1349
1350            evictBlock(blk, writebacks);
1351        }
1352    }
1353
1354    // Insert new block at victimized entry
1355    tags->insertBlock(pkt, victim);
1356
1357    return victim;
1358}
1359
1360void
1361BaseCache::invalidateBlock(CacheBlk *blk)
1362{
1363    // If handling a block present in the Tags, let it do its invalidation
1364    // process, which will update stats and invalidate the block itself
1365    if (blk != tempBlock) {
1366        tags->invalidate(blk);
1367    } else {
1368        tempBlock->invalidate();
1369    }
1370}
1371
1372void
1373BaseCache::evictBlock(CacheBlk *blk, PacketList &writebacks)
1374{
1375    PacketPtr pkt = evictBlock(blk);
1376    if (pkt) {
1377        writebacks.push_back(pkt);
1378    }
1379}
1380
1381PacketPtr
1382BaseCache::writebackBlk(CacheBlk *blk)
1383{
1384    chatty_assert(!isReadOnly || writebackClean,
1385                  "Writeback from read-only cache");
1386    assert(blk && blk->isValid() && (blk->isDirty() || writebackClean));
1387
1388    writebacks[Request::wbMasterId]++;
1389
1390    RequestPtr req = std::make_shared<Request>(
1391        regenerateBlkAddr(blk), blkSize, 0, Request::wbMasterId);
1392
1393    if (blk->isSecure())
1394        req->setFlags(Request::SECURE);
1395
1396    req->taskId(blk->task_id);
1397
1398    PacketPtr pkt =
1399        new Packet(req, blk->isDirty() ?
1400                   MemCmd::WritebackDirty : MemCmd::WritebackClean);
1401
1402    DPRINTF(Cache, "Create Writeback %s writable: %d, dirty: %d\n",
1403            pkt->print(), blk->isWritable(), blk->isDirty());
1404
1405    if (blk->isWritable()) {
1406        // not asserting shared means we pass the block in modified
1407        // state, mark our own block non-writeable
1408        blk->status &= ~BlkWritable;
1409    } else {
1410        // we are in the Owned state, tell the receiver
1411        pkt->setHasSharers();
1412    }
1413
1414    // make sure the block is not marked dirty
1415    blk->status &= ~BlkDirty;
1416
1417    pkt->allocate();
1418    pkt->setDataFromBlock(blk->data, blkSize);
1419
1420    return pkt;
1421}
1422
1423PacketPtr
1424BaseCache::writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id)
1425{
1426    RequestPtr req = std::make_shared<Request>(
1427        regenerateBlkAddr(blk), blkSize, 0, Request::wbMasterId);
1428
1429    if (blk->isSecure()) {
1430        req->setFlags(Request::SECURE);
1431    }
1432    req->taskId(blk->task_id);
1433
1434    PacketPtr pkt = new Packet(req, MemCmd::WriteClean, blkSize, id);
1435
1436    if (dest) {
1437        req->setFlags(dest);
1438        pkt->setWriteThrough();
1439    }
1440
1441    DPRINTF(Cache, "Create %s writable: %d, dirty: %d\n", pkt->print(),
1442            blk->isWritable(), blk->isDirty());
1443
1444    if (blk->isWritable()) {
1445        // not asserting shared means we pass the block in modified
1446        // state, mark our own block non-writeable
1447        blk->status &= ~BlkWritable;
1448    } else {
1449        // we are in the Owned state, tell the receiver
1450        pkt->setHasSharers();
1451    }
1452
1453    // make sure the block is not marked dirty
1454    blk->status &= ~BlkDirty;
1455
1456    pkt->allocate();
1457    pkt->setDataFromBlock(blk->data, blkSize);
1458
1459    return pkt;
1460}
1461
1462
1463void
1464BaseCache::memWriteback()
1465{
1466    tags->forEachBlk([this](CacheBlk &blk) { writebackVisitor(blk); });
1467}
1468
1469void
1470BaseCache::memInvalidate()
1471{
1472    tags->forEachBlk([this](CacheBlk &blk) { invalidateVisitor(blk); });
1473}
1474
1475bool
1476BaseCache::isDirty() const
1477{
1478    return tags->anyBlk([](CacheBlk &blk) { return blk.isDirty(); });
1479}
1480
1481bool
1482BaseCache::coalesce() const
1483{
1484    return writeAllocator && writeAllocator->coalesce();
1485}
1486
1487void
1488BaseCache::writebackVisitor(CacheBlk &blk)
1489{
1490    if (blk.isDirty()) {
1491        assert(blk.isValid());
1492
1493        RequestPtr request = std::make_shared<Request>(
1494            regenerateBlkAddr(&blk), blkSize, 0, Request::funcMasterId);
1495
1496        request->taskId(blk.task_id);
1497        if (blk.isSecure()) {
1498            request->setFlags(Request::SECURE);
1499        }
1500
1501        Packet packet(request, MemCmd::WriteReq);
1502        packet.dataStatic(blk.data);
1503
1504        memSidePort.sendFunctional(&packet);
1505
1506        blk.status &= ~BlkDirty;
1507    }
1508}
1509
1510void
1511BaseCache::invalidateVisitor(CacheBlk &blk)
1512{
1513    if (blk.isDirty())
1514        warn_once("Invalidating dirty cache lines. " \
1515                  "Expect things to break.\n");
1516
1517    if (blk.isValid()) {
1518        assert(!blk.isDirty());
1519        invalidateBlock(&blk);
1520    }
1521}
1522
1523Tick
1524BaseCache::nextQueueReadyTime() const
1525{
1526    Tick nextReady = std::min(mshrQueue.nextReadyTime(),
1527                              writeBuffer.nextReadyTime());
1528
1529    // Don't signal prefetch ready time if no MSHRs available
1530    // Will signal once enoguh MSHRs are deallocated
1531    if (prefetcher && mshrQueue.canPrefetch()) {
1532        nextReady = std::min(nextReady,
1533                             prefetcher->nextPrefetchReadyTime());
1534    }
1535
1536    return nextReady;
1537}
1538
1539
1540bool
1541BaseCache::sendMSHRQueuePacket(MSHR* mshr)
1542{
1543    assert(mshr);
1544
1545    // use request from 1st target
1546    PacketPtr tgt_pkt = mshr->getTarget()->pkt;
1547
1548    DPRINTF(Cache, "%s: MSHR %s\n", __func__, tgt_pkt->print());
1549
1550    // if the cache is in write coalescing mode or (additionally) in
1551    // no allocation mode, and we have a write packet with an MSHR
1552    // that is not a whole-line write (due to incompatible flags etc),
1553    // then reset the write mode
1554    if (writeAllocator && writeAllocator->coalesce() && tgt_pkt->isWrite()) {
1555        if (!mshr->isWholeLineWrite()) {
1556            // if we are currently write coalescing, hold on the
1557            // MSHR as many cycles extra as we need to completely
1558            // write a cache line
1559            if (writeAllocator->delay(mshr->blkAddr)) {
1560                Tick delay = blkSize / tgt_pkt->getSize() * clockPeriod();
1561                DPRINTF(CacheVerbose, "Delaying pkt %s %llu ticks to allow "
1562                        "for write coalescing\n", tgt_pkt->print(), delay);
1563                mshrQueue.delay(mshr, delay);
1564                return false;
1565            } else {
1566                writeAllocator->reset();
1567            }
1568        } else {
1569            writeAllocator->resetDelay(mshr->blkAddr);
1570        }
1571    }
1572
1573    CacheBlk *blk = tags->findBlock(mshr->blkAddr, mshr->isSecure);
1574
1575    // either a prefetch that is not present upstream, or a normal
1576    // MSHR request, proceed to get the packet to send downstream
1577    PacketPtr pkt = createMissPacket(tgt_pkt, blk, mshr->needsWritable(),
1578                                     mshr->isWholeLineWrite());
1579
1580    mshr->isForward = (pkt == nullptr);
1581
1582    if (mshr->isForward) {
1583        // not a cache block request, but a response is expected
1584        // make copy of current packet to forward, keep current
1585        // copy for response handling
1586        pkt = new Packet(tgt_pkt, false, true);
1587        assert(!pkt->isWrite());
1588    }
1589
1590    // play it safe and append (rather than set) the sender state,
1591    // as forwarded packets may already have existing state
1592    pkt->pushSenderState(mshr);
1593
1594    if (pkt->isClean() && blk && blk->isDirty()) {
1595        // A cache clean opearation is looking for a dirty block. Mark
1596        // the packet so that the destination xbar can determine that
1597        // there will be a follow-up write packet as well.
1598        pkt->setSatisfied();
1599    }
1600
1601    if (!memSidePort.sendTimingReq(pkt)) {
1602        // we are awaiting a retry, but we
1603        // delete the packet and will be creating a new packet
1604        // when we get the opportunity
1605        delete pkt;
1606
1607        // note that we have now masked any requestBus and
1608        // schedSendEvent (we will wait for a retry before
1609        // doing anything), and this is so even if we do not
1610        // care about this packet and might override it before
1611        // it gets retried
1612        return true;
1613    } else {
1614        // As part of the call to sendTimingReq the packet is
1615        // forwarded to all neighbouring caches (and any caches
1616        // above them) as a snoop. Thus at this point we know if
1617        // any of the neighbouring caches are responding, and if
1618        // so, we know it is dirty, and we can determine if it is
1619        // being passed as Modified, making our MSHR the ordering
1620        // point
1621        bool pending_modified_resp = !pkt->hasSharers() &&
1622            pkt->cacheResponding();
1623        markInService(mshr, pending_modified_resp);
1624
1625        if (pkt->isClean() && blk && blk->isDirty()) {
1626            // A cache clean opearation is looking for a dirty
1627            // block. If a dirty block is encountered a WriteClean
1628            // will update any copies to the path to the memory
1629            // until the point of reference.
1630            DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n",
1631                    __func__, pkt->print(), blk->print());
1632            PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(),
1633                                             pkt->id);
1634            PacketList writebacks;
1635            writebacks.push_back(wb_pkt);
1636            doWritebacks(writebacks, 0);
1637        }
1638
1639        return false;
1640    }
1641}
1642
1643bool
1644BaseCache::sendWriteQueuePacket(WriteQueueEntry* wq_entry)
1645{
1646    assert(wq_entry);
1647
1648    // always a single target for write queue entries
1649    PacketPtr tgt_pkt = wq_entry->getTarget()->pkt;
1650
1651    DPRINTF(Cache, "%s: write %s\n", __func__, tgt_pkt->print());
1652
1653    // forward as is, both for evictions and uncacheable writes
1654    if (!memSidePort.sendTimingReq(tgt_pkt)) {
1655        // note that we have now masked any requestBus and
1656        // schedSendEvent (we will wait for a retry before
1657        // doing anything), and this is so even if we do not
1658        // care about this packet and might override it before
1659        // it gets retried
1660        return true;
1661    } else {
1662        markInService(wq_entry);
1663        return false;
1664    }
1665}
1666
1667void
1668BaseCache::serialize(CheckpointOut &cp) const
1669{
1670    bool dirty(isDirty());
1671
1672    if (dirty) {
1673        warn("*** The cache still contains dirty data. ***\n");
1674        warn("    Make sure to drain the system using the correct flags.\n");
1675        warn("    This checkpoint will not restore correctly " \
1676             "and dirty data in the cache will be lost!\n");
1677    }
1678
1679    // Since we don't checkpoint the data in the cache, any dirty data
1680    // will be lost when restoring from a checkpoint of a system that
1681    // wasn't drained properly. Flag the checkpoint as invalid if the
1682    // cache contains dirty data.
1683    bool bad_checkpoint(dirty);
1684    SERIALIZE_SCALAR(bad_checkpoint);
1685}
1686
1687void
1688BaseCache::unserialize(CheckpointIn &cp)
1689{
1690    bool bad_checkpoint;
1691    UNSERIALIZE_SCALAR(bad_checkpoint);
1692    if (bad_checkpoint) {
1693        fatal("Restoring from checkpoints with dirty caches is not "
1694              "supported in the classic memory system. Please remove any "
1695              "caches or drain them properly before taking checkpoints.\n");
1696    }
1697}
1698
1699void
1700BaseCache::regStats()
1701{
1702    MemObject::regStats();
1703
1704    using namespace Stats;
1705
1706    // Hit statistics
1707    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1708        MemCmd cmd(access_idx);
1709        const string &cstr = cmd.toString();
1710
1711        hits[access_idx]
1712            .init(system->maxMasters())
1713            .name(name() + "." + cstr + "_hits")
1714            .desc("number of " + cstr + " hits")
1715            .flags(total | nozero | nonan)
1716            ;
1717        for (int i = 0; i < system->maxMasters(); i++) {
1718            hits[access_idx].subname(i, system->getMasterName(i));
1719        }
1720    }
1721
1722// These macros make it easier to sum the right subset of commands and
1723// to change the subset of commands that are considered "demand" vs
1724// "non-demand"
1725#define SUM_DEMAND(s) \
1726    (s[MemCmd::ReadReq] + s[MemCmd::WriteReq] + s[MemCmd::WriteLineReq] + \
1727     s[MemCmd::ReadExReq] + s[MemCmd::ReadCleanReq] + s[MemCmd::ReadSharedReq])
1728
1729// should writebacks be included here?  prior code was inconsistent...
1730#define SUM_NON_DEMAND(s) \
1731    (s[MemCmd::SoftPFReq] + s[MemCmd::HardPFReq] + s[MemCmd::SoftPFExReq])
1732
1733    demandHits
1734        .name(name() + ".demand_hits")
1735        .desc("number of demand (read+write) hits")
1736        .flags(total | nozero | nonan)
1737        ;
1738    demandHits = SUM_DEMAND(hits);
1739    for (int i = 0; i < system->maxMasters(); i++) {
1740        demandHits.subname(i, system->getMasterName(i));
1741    }
1742
1743    overallHits
1744        .name(name() + ".overall_hits")
1745        .desc("number of overall hits")
1746        .flags(total | nozero | nonan)
1747        ;
1748    overallHits = demandHits + SUM_NON_DEMAND(hits);
1749    for (int i = 0; i < system->maxMasters(); i++) {
1750        overallHits.subname(i, system->getMasterName(i));
1751    }
1752
1753    // Miss statistics
1754    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1755        MemCmd cmd(access_idx);
1756        const string &cstr = cmd.toString();
1757
1758        misses[access_idx]
1759            .init(system->maxMasters())
1760            .name(name() + "." + cstr + "_misses")
1761            .desc("number of " + cstr + " misses")
1762            .flags(total | nozero | nonan)
1763            ;
1764        for (int i = 0; i < system->maxMasters(); i++) {
1765            misses[access_idx].subname(i, system->getMasterName(i));
1766        }
1767    }
1768
1769    demandMisses
1770        .name(name() + ".demand_misses")
1771        .desc("number of demand (read+write) misses")
1772        .flags(total | nozero | nonan)
1773        ;
1774    demandMisses = SUM_DEMAND(misses);
1775    for (int i = 0; i < system->maxMasters(); i++) {
1776        demandMisses.subname(i, system->getMasterName(i));
1777    }
1778
1779    overallMisses
1780        .name(name() + ".overall_misses")
1781        .desc("number of overall misses")
1782        .flags(total | nozero | nonan)
1783        ;
1784    overallMisses = demandMisses + SUM_NON_DEMAND(misses);
1785    for (int i = 0; i < system->maxMasters(); i++) {
1786        overallMisses.subname(i, system->getMasterName(i));
1787    }
1788
1789    // Miss latency statistics
1790    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1791        MemCmd cmd(access_idx);
1792        const string &cstr = cmd.toString();
1793
1794        missLatency[access_idx]
1795            .init(system->maxMasters())
1796            .name(name() + "." + cstr + "_miss_latency")
1797            .desc("number of " + cstr + " miss cycles")
1798            .flags(total | nozero | nonan)
1799            ;
1800        for (int i = 0; i < system->maxMasters(); i++) {
1801            missLatency[access_idx].subname(i, system->getMasterName(i));
1802        }
1803    }
1804
1805    demandMissLatency
1806        .name(name() + ".demand_miss_latency")
1807        .desc("number of demand (read+write) miss cycles")
1808        .flags(total | nozero | nonan)
1809        ;
1810    demandMissLatency = SUM_DEMAND(missLatency);
1811    for (int i = 0; i < system->maxMasters(); i++) {
1812        demandMissLatency.subname(i, system->getMasterName(i));
1813    }
1814
1815    overallMissLatency
1816        .name(name() + ".overall_miss_latency")
1817        .desc("number of overall miss cycles")
1818        .flags(total | nozero | nonan)
1819        ;
1820    overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency);
1821    for (int i = 0; i < system->maxMasters(); i++) {
1822        overallMissLatency.subname(i, system->getMasterName(i));
1823    }
1824
1825    // access formulas
1826    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1827        MemCmd cmd(access_idx);
1828        const string &cstr = cmd.toString();
1829
1830        accesses[access_idx]
1831            .name(name() + "." + cstr + "_accesses")
1832            .desc("number of " + cstr + " accesses(hits+misses)")
1833            .flags(total | nozero | nonan)
1834            ;
1835        accesses[access_idx] = hits[access_idx] + misses[access_idx];
1836
1837        for (int i = 0; i < system->maxMasters(); i++) {
1838            accesses[access_idx].subname(i, system->getMasterName(i));
1839        }
1840    }
1841
1842    demandAccesses
1843        .name(name() + ".demand_accesses")
1844        .desc("number of demand (read+write) accesses")
1845        .flags(total | nozero | nonan)
1846        ;
1847    demandAccesses = demandHits + demandMisses;
1848    for (int i = 0; i < system->maxMasters(); i++) {
1849        demandAccesses.subname(i, system->getMasterName(i));
1850    }
1851
1852    overallAccesses
1853        .name(name() + ".overall_accesses")
1854        .desc("number of overall (read+write) accesses")
1855        .flags(total | nozero | nonan)
1856        ;
1857    overallAccesses = overallHits + overallMisses;
1858    for (int i = 0; i < system->maxMasters(); i++) {
1859        overallAccesses.subname(i, system->getMasterName(i));
1860    }
1861
1862    // miss rate formulas
1863    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1864        MemCmd cmd(access_idx);
1865        const string &cstr = cmd.toString();
1866
1867        missRate[access_idx]
1868            .name(name() + "." + cstr + "_miss_rate")
1869            .desc("miss rate for " + cstr + " accesses")
1870            .flags(total | nozero | nonan)
1871            ;
1872        missRate[access_idx] = misses[access_idx] / accesses[access_idx];
1873
1874        for (int i = 0; i < system->maxMasters(); i++) {
1875            missRate[access_idx].subname(i, system->getMasterName(i));
1876        }
1877    }
1878
1879    demandMissRate
1880        .name(name() + ".demand_miss_rate")
1881        .desc("miss rate for demand accesses")
1882        .flags(total | nozero | nonan)
1883        ;
1884    demandMissRate = demandMisses / demandAccesses;
1885    for (int i = 0; i < system->maxMasters(); i++) {
1886        demandMissRate.subname(i, system->getMasterName(i));
1887    }
1888
1889    overallMissRate
1890        .name(name() + ".overall_miss_rate")
1891        .desc("miss rate for overall accesses")
1892        .flags(total | nozero | nonan)
1893        ;
1894    overallMissRate = overallMisses / overallAccesses;
1895    for (int i = 0; i < system->maxMasters(); i++) {
1896        overallMissRate.subname(i, system->getMasterName(i));
1897    }
1898
1899    // miss latency formulas
1900    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1901        MemCmd cmd(access_idx);
1902        const string &cstr = cmd.toString();
1903
1904        avgMissLatency[access_idx]
1905            .name(name() + "." + cstr + "_avg_miss_latency")
1906            .desc("average " + cstr + " miss latency")
1907            .flags(total | nozero | nonan)
1908            ;
1909        avgMissLatency[access_idx] =
1910            missLatency[access_idx] / misses[access_idx];
1911
1912        for (int i = 0; i < system->maxMasters(); i++) {
1913            avgMissLatency[access_idx].subname(i, system->getMasterName(i));
1914        }
1915    }
1916
1917    demandAvgMissLatency
1918        .name(name() + ".demand_avg_miss_latency")
1919        .desc("average overall miss latency")
1920        .flags(total | nozero | nonan)
1921        ;
1922    demandAvgMissLatency = demandMissLatency / demandMisses;
1923    for (int i = 0; i < system->maxMasters(); i++) {
1924        demandAvgMissLatency.subname(i, system->getMasterName(i));
1925    }
1926
1927    overallAvgMissLatency
1928        .name(name() + ".overall_avg_miss_latency")
1929        .desc("average overall miss latency")
1930        .flags(total | nozero | nonan)
1931        ;
1932    overallAvgMissLatency = overallMissLatency / overallMisses;
1933    for (int i = 0; i < system->maxMasters(); i++) {
1934        overallAvgMissLatency.subname(i, system->getMasterName(i));
1935    }
1936
1937    blocked_cycles.init(NUM_BLOCKED_CAUSES);
1938    blocked_cycles
1939        .name(name() + ".blocked_cycles")
1940        .desc("number of cycles access was blocked")
1941        .subname(Blocked_NoMSHRs, "no_mshrs")
1942        .subname(Blocked_NoTargets, "no_targets")
1943        ;
1944
1945
1946    blocked_causes.init(NUM_BLOCKED_CAUSES);
1947    blocked_causes
1948        .name(name() + ".blocked")
1949        .desc("number of cycles access was blocked")
1950        .subname(Blocked_NoMSHRs, "no_mshrs")
1951        .subname(Blocked_NoTargets, "no_targets")
1952        ;
1953
1954    avg_blocked
1955        .name(name() + ".avg_blocked_cycles")
1956        .desc("average number of cycles each access was blocked")
1957        .subname(Blocked_NoMSHRs, "no_mshrs")
1958        .subname(Blocked_NoTargets, "no_targets")
1959        ;
1960
1961    avg_blocked = blocked_cycles / blocked_causes;
1962
1963    unusedPrefetches
1964        .name(name() + ".unused_prefetches")
1965        .desc("number of HardPF blocks evicted w/o reference")
1966        .flags(nozero)
1967        ;
1968
1969    writebacks
1970        .init(system->maxMasters())
1971        .name(name() + ".writebacks")
1972        .desc("number of writebacks")
1973        .flags(total | nozero | nonan)
1974        ;
1975    for (int i = 0; i < system->maxMasters(); i++) {
1976        writebacks.subname(i, system->getMasterName(i));
1977    }
1978
1979    // MSHR statistics
1980    // MSHR hit statistics
1981    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1982        MemCmd cmd(access_idx);
1983        const string &cstr = cmd.toString();
1984
1985        mshr_hits[access_idx]
1986            .init(system->maxMasters())
1987            .name(name() + "." + cstr + "_mshr_hits")
1988            .desc("number of " + cstr + " MSHR hits")
1989            .flags(total | nozero | nonan)
1990            ;
1991        for (int i = 0; i < system->maxMasters(); i++) {
1992            mshr_hits[access_idx].subname(i, system->getMasterName(i));
1993        }
1994    }
1995
1996    demandMshrHits
1997        .name(name() + ".demand_mshr_hits")
1998        .desc("number of demand (read+write) MSHR hits")
1999        .flags(total | nozero | nonan)
2000        ;
2001    demandMshrHits = SUM_DEMAND(mshr_hits);
2002    for (int i = 0; i < system->maxMasters(); i++) {
2003        demandMshrHits.subname(i, system->getMasterName(i));
2004    }
2005
2006    overallMshrHits
2007        .name(name() + ".overall_mshr_hits")
2008        .desc("number of overall MSHR hits")
2009        .flags(total | nozero | nonan)
2010        ;
2011    overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshr_hits);
2012    for (int i = 0; i < system->maxMasters(); i++) {
2013        overallMshrHits.subname(i, system->getMasterName(i));
2014    }
2015
2016    // MSHR miss statistics
2017    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2018        MemCmd cmd(access_idx);
2019        const string &cstr = cmd.toString();
2020
2021        mshr_misses[access_idx]
2022            .init(system->maxMasters())
2023            .name(name() + "." + cstr + "_mshr_misses")
2024            .desc("number of " + cstr + " MSHR misses")
2025            .flags(total | nozero | nonan)
2026            ;
2027        for (int i = 0; i < system->maxMasters(); i++) {
2028            mshr_misses[access_idx].subname(i, system->getMasterName(i));
2029        }
2030    }
2031
2032    demandMshrMisses
2033        .name(name() + ".demand_mshr_misses")
2034        .desc("number of demand (read+write) MSHR misses")
2035        .flags(total | nozero | nonan)
2036        ;
2037    demandMshrMisses = SUM_DEMAND(mshr_misses);
2038    for (int i = 0; i < system->maxMasters(); i++) {
2039        demandMshrMisses.subname(i, system->getMasterName(i));
2040    }
2041
2042    overallMshrMisses
2043        .name(name() + ".overall_mshr_misses")
2044        .desc("number of overall MSHR misses")
2045        .flags(total | nozero | nonan)
2046        ;
2047    overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshr_misses);
2048    for (int i = 0; i < system->maxMasters(); i++) {
2049        overallMshrMisses.subname(i, system->getMasterName(i));
2050    }
2051
2052    // MSHR miss latency statistics
2053    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2054        MemCmd cmd(access_idx);
2055        const string &cstr = cmd.toString();
2056
2057        mshr_miss_latency[access_idx]
2058            .init(system->maxMasters())
2059            .name(name() + "." + cstr + "_mshr_miss_latency")
2060            .desc("number of " + cstr + " MSHR miss cycles")
2061            .flags(total | nozero | nonan)
2062            ;
2063        for (int i = 0; i < system->maxMasters(); i++) {
2064            mshr_miss_latency[access_idx].subname(i, system->getMasterName(i));
2065        }
2066    }
2067
2068    demandMshrMissLatency
2069        .name(name() + ".demand_mshr_miss_latency")
2070        .desc("number of demand (read+write) MSHR miss cycles")
2071        .flags(total | nozero | nonan)
2072        ;
2073    demandMshrMissLatency = SUM_DEMAND(mshr_miss_latency);
2074    for (int i = 0; i < system->maxMasters(); i++) {
2075        demandMshrMissLatency.subname(i, system->getMasterName(i));
2076    }
2077
2078    overallMshrMissLatency
2079        .name(name() + ".overall_mshr_miss_latency")
2080        .desc("number of overall MSHR miss cycles")
2081        .flags(total | nozero | nonan)
2082        ;
2083    overallMshrMissLatency =
2084        demandMshrMissLatency + SUM_NON_DEMAND(mshr_miss_latency);
2085    for (int i = 0; i < system->maxMasters(); i++) {
2086        overallMshrMissLatency.subname(i, system->getMasterName(i));
2087    }
2088
2089    // MSHR uncacheable statistics
2090    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2091        MemCmd cmd(access_idx);
2092        const string &cstr = cmd.toString();
2093
2094        mshr_uncacheable[access_idx]
2095            .init(system->maxMasters())
2096            .name(name() + "." + cstr + "_mshr_uncacheable")
2097            .desc("number of " + cstr + " MSHR uncacheable")
2098            .flags(total | nozero | nonan)
2099            ;
2100        for (int i = 0; i < system->maxMasters(); i++) {
2101            mshr_uncacheable[access_idx].subname(i, system->getMasterName(i));
2102        }
2103    }
2104
2105    overallMshrUncacheable
2106        .name(name() + ".overall_mshr_uncacheable_misses")
2107        .desc("number of overall MSHR uncacheable misses")
2108        .flags(total | nozero | nonan)
2109        ;
2110    overallMshrUncacheable =
2111        SUM_DEMAND(mshr_uncacheable) + SUM_NON_DEMAND(mshr_uncacheable);
2112    for (int i = 0; i < system->maxMasters(); i++) {
2113        overallMshrUncacheable.subname(i, system->getMasterName(i));
2114    }
2115
2116    // MSHR miss latency statistics
2117    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2118        MemCmd cmd(access_idx);
2119        const string &cstr = cmd.toString();
2120
2121        mshr_uncacheable_lat[access_idx]
2122            .init(system->maxMasters())
2123            .name(name() + "." + cstr + "_mshr_uncacheable_latency")
2124            .desc("number of " + cstr + " MSHR uncacheable cycles")
2125            .flags(total | nozero | nonan)
2126            ;
2127        for (int i = 0; i < system->maxMasters(); i++) {
2128            mshr_uncacheable_lat[access_idx].subname(
2129                i, system->getMasterName(i));
2130        }
2131    }
2132
2133    overallMshrUncacheableLatency
2134        .name(name() + ".overall_mshr_uncacheable_latency")
2135        .desc("number of overall MSHR uncacheable cycles")
2136        .flags(total | nozero | nonan)
2137        ;
2138    overallMshrUncacheableLatency =
2139        SUM_DEMAND(mshr_uncacheable_lat) +
2140        SUM_NON_DEMAND(mshr_uncacheable_lat);
2141    for (int i = 0; i < system->maxMasters(); i++) {
2142        overallMshrUncacheableLatency.subname(i, system->getMasterName(i));
2143    }
2144
2145#if 0
2146    // MSHR access formulas
2147    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2148        MemCmd cmd(access_idx);
2149        const string &cstr = cmd.toString();
2150
2151        mshrAccesses[access_idx]
2152            .name(name() + "." + cstr + "_mshr_accesses")
2153            .desc("number of " + cstr + " mshr accesses(hits+misses)")
2154            .flags(total | nozero | nonan)
2155            ;
2156        mshrAccesses[access_idx] =
2157            mshr_hits[access_idx] + mshr_misses[access_idx]
2158            + mshr_uncacheable[access_idx];
2159    }
2160
2161    demandMshrAccesses
2162        .name(name() + ".demand_mshr_accesses")
2163        .desc("number of demand (read+write) mshr accesses")
2164        .flags(total | nozero | nonan)
2165        ;
2166    demandMshrAccesses = demandMshrHits + demandMshrMisses;
2167
2168    overallMshrAccesses
2169        .name(name() + ".overall_mshr_accesses")
2170        .desc("number of overall (read+write) mshr accesses")
2171        .flags(total | nozero | nonan)
2172        ;
2173    overallMshrAccesses = overallMshrHits + overallMshrMisses
2174        + overallMshrUncacheable;
2175#endif
2176
2177    // MSHR miss rate formulas
2178    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2179        MemCmd cmd(access_idx);
2180        const string &cstr = cmd.toString();
2181
2182        mshrMissRate[access_idx]
2183            .name(name() + "." + cstr + "_mshr_miss_rate")
2184            .desc("mshr miss rate for " + cstr + " accesses")
2185            .flags(total | nozero | nonan)
2186            ;
2187        mshrMissRate[access_idx] =
2188            mshr_misses[access_idx] / accesses[access_idx];
2189
2190        for (int i = 0; i < system->maxMasters(); i++) {
2191            mshrMissRate[access_idx].subname(i, system->getMasterName(i));
2192        }
2193    }
2194
2195    demandMshrMissRate
2196        .name(name() + ".demand_mshr_miss_rate")
2197        .desc("mshr miss rate for demand accesses")
2198        .flags(total | nozero | nonan)
2199        ;
2200    demandMshrMissRate = demandMshrMisses / demandAccesses;
2201    for (int i = 0; i < system->maxMasters(); i++) {
2202        demandMshrMissRate.subname(i, system->getMasterName(i));
2203    }
2204
2205    overallMshrMissRate
2206        .name(name() + ".overall_mshr_miss_rate")
2207        .desc("mshr miss rate for overall accesses")
2208        .flags(total | nozero | nonan)
2209        ;
2210    overallMshrMissRate = overallMshrMisses / overallAccesses;
2211    for (int i = 0; i < system->maxMasters(); i++) {
2212        overallMshrMissRate.subname(i, system->getMasterName(i));
2213    }
2214
2215    // mshrMiss latency formulas
2216    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2217        MemCmd cmd(access_idx);
2218        const string &cstr = cmd.toString();
2219
2220        avgMshrMissLatency[access_idx]
2221            .name(name() + "." + cstr + "_avg_mshr_miss_latency")
2222            .desc("average " + cstr + " mshr miss latency")
2223            .flags(total | nozero | nonan)
2224            ;
2225        avgMshrMissLatency[access_idx] =
2226            mshr_miss_latency[access_idx] / mshr_misses[access_idx];
2227
2228        for (int i = 0; i < system->maxMasters(); i++) {
2229            avgMshrMissLatency[access_idx].subname(
2230                i, system->getMasterName(i));
2231        }
2232    }
2233
2234    demandAvgMshrMissLatency
2235        .name(name() + ".demand_avg_mshr_miss_latency")
2236        .desc("average overall mshr miss latency")
2237        .flags(total | nozero | nonan)
2238        ;
2239    demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses;
2240    for (int i = 0; i < system->maxMasters(); i++) {
2241        demandAvgMshrMissLatency.subname(i, system->getMasterName(i));
2242    }
2243
2244    overallAvgMshrMissLatency
2245        .name(name() + ".overall_avg_mshr_miss_latency")
2246        .desc("average overall mshr miss latency")
2247        .flags(total | nozero | nonan)
2248        ;
2249    overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses;
2250    for (int i = 0; i < system->maxMasters(); i++) {
2251        overallAvgMshrMissLatency.subname(i, system->getMasterName(i));
2252    }
2253
2254    // mshrUncacheable latency formulas
2255    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2256        MemCmd cmd(access_idx);
2257        const string &cstr = cmd.toString();
2258
2259        avgMshrUncacheableLatency[access_idx]
2260            .name(name() + "." + cstr + "_avg_mshr_uncacheable_latency")
2261            .desc("average " + cstr + " mshr uncacheable latency")
2262            .flags(total | nozero | nonan)
2263            ;
2264        avgMshrUncacheableLatency[access_idx] =
2265            mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx];
2266
2267        for (int i = 0; i < system->maxMasters(); i++) {
2268            avgMshrUncacheableLatency[access_idx].subname(
2269                i, system->getMasterName(i));
2270        }
2271    }
2272
2273    overallAvgMshrUncacheableLatency
2274        .name(name() + ".overall_avg_mshr_uncacheable_latency")
2275        .desc("average overall mshr uncacheable latency")
2276        .flags(total | nozero | nonan)
2277        ;
2278    overallAvgMshrUncacheableLatency =
2279        overallMshrUncacheableLatency / overallMshrUncacheable;
2280    for (int i = 0; i < system->maxMasters(); i++) {
2281        overallAvgMshrUncacheableLatency.subname(i, system->getMasterName(i));
2282    }
2283
2284    replacements
2285        .name(name() + ".replacements")
2286        .desc("number of replacements")
2287        ;
2288}
2289
2290void
2291BaseCache::regProbePoints()
2292{
2293    ppHit = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Hit");
2294    ppMiss = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Miss");
2295    ppFill = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Fill");
2296}
2297
2298///////////////
2299//
2300// CpuSidePort
2301//
2302///////////////
2303bool
2304BaseCache::CpuSidePort::recvTimingSnoopResp(PacketPtr pkt)
2305{
2306    // Snoops shouldn't happen when bypassing caches
2307    assert(!cache->system->bypassCaches());
2308
2309    assert(pkt->isResponse());
2310
2311    // Express snoop responses from master to slave, e.g., from L1 to L2
2312    cache->recvTimingSnoopResp(pkt);
2313    return true;
2314}
2315
2316
2317bool
2318BaseCache::CpuSidePort::tryTiming(PacketPtr pkt)
2319{
2320    if (cache->system->bypassCaches() || pkt->isExpressSnoop()) {
2321        // always let express snoop packets through even if blocked
2322        return true;
2323    } else if (blocked || mustSendRetry) {
2324        // either already committed to send a retry, or blocked
2325        mustSendRetry = true;
2326        return false;
2327    }
2328    mustSendRetry = false;
2329    return true;
2330}
2331
2332bool
2333BaseCache::CpuSidePort::recvTimingReq(PacketPtr pkt)
2334{
2335    assert(pkt->isRequest());
2336
2337    if (cache->system->bypassCaches()) {
2338        // Just forward the packet if caches are disabled.
2339        // @todo This should really enqueue the packet rather
2340        bool M5_VAR_USED success = cache->memSidePort.sendTimingReq(pkt);
2341        assert(success);
2342        return true;
2343    } else if (tryTiming(pkt)) {
2344        cache->recvTimingReq(pkt);
2345        return true;
2346    }
2347    return false;
2348}
2349
2350Tick
2351BaseCache::CpuSidePort::recvAtomic(PacketPtr pkt)
2352{
2353    if (cache->system->bypassCaches()) {
2354        // Forward the request if the system is in cache bypass mode.
2355        return cache->memSidePort.sendAtomic(pkt);
2356    } else {
2357        return cache->recvAtomic(pkt);
2358    }
2359}
2360
2361void
2362BaseCache::CpuSidePort::recvFunctional(PacketPtr pkt)
2363{
2364    if (cache->system->bypassCaches()) {
2365        // The cache should be flushed if we are in cache bypass mode,
2366        // so we don't need to check if we need to update anything.
2367        cache->memSidePort.sendFunctional(pkt);
2368        return;
2369    }
2370
2371    // functional request
2372    cache->functionalAccess(pkt, true);
2373}
2374
2375AddrRangeList
2376BaseCache::CpuSidePort::getAddrRanges() const
2377{
2378    return cache->getAddrRanges();
2379}
2380
2381
2382BaseCache::
2383CpuSidePort::CpuSidePort(const std::string &_name, BaseCache *_cache,
2384                         const std::string &_label)
2385    : CacheSlavePort(_name, _cache, _label), cache(_cache)
2386{
2387}
2388
2389///////////////
2390//
2391// MemSidePort
2392//
2393///////////////
2394bool
2395BaseCache::MemSidePort::recvTimingResp(PacketPtr pkt)
2396{
2397    cache->recvTimingResp(pkt);
2398    return true;
2399}
2400
2401// Express snooping requests to memside port
2402void
2403BaseCache::MemSidePort::recvTimingSnoopReq(PacketPtr pkt)
2404{
2405    // Snoops shouldn't happen when bypassing caches
2406    assert(!cache->system->bypassCaches());
2407
2408    // handle snooping requests
2409    cache->recvTimingSnoopReq(pkt);
2410}
2411
2412Tick
2413BaseCache::MemSidePort::recvAtomicSnoop(PacketPtr pkt)
2414{
2415    // Snoops shouldn't happen when bypassing caches
2416    assert(!cache->system->bypassCaches());
2417
2418    return cache->recvAtomicSnoop(pkt);
2419}
2420
2421void
2422BaseCache::MemSidePort::recvFunctionalSnoop(PacketPtr pkt)
2423{
2424    // Snoops shouldn't happen when bypassing caches
2425    assert(!cache->system->bypassCaches());
2426
2427    // functional snoop (note that in contrast to atomic we don't have
2428    // a specific functionalSnoop method, as they have the same
2429    // behaviour regardless)
2430    cache->functionalAccess(pkt, false);
2431}
2432
2433void
2434BaseCache::CacheReqPacketQueue::sendDeferredPacket()
2435{
2436    // sanity check
2437    assert(!waitingOnRetry);
2438
2439    // there should never be any deferred request packets in the
2440    // queue, instead we resly on the cache to provide the packets
2441    // from the MSHR queue or write queue
2442    assert(deferredPacketReadyTime() == MaxTick);
2443
2444    // check for request packets (requests & writebacks)
2445    QueueEntry* entry = cache.getNextQueueEntry();
2446
2447    if (!entry) {
2448        // can happen if e.g. we attempt a writeback and fail, but
2449        // before the retry, the writeback is eliminated because
2450        // we snoop another cache's ReadEx.
2451    } else {
2452        // let our snoop responses go first if there are responses to
2453        // the same addresses
2454        if (checkConflictingSnoop(entry->blkAddr)) {
2455            return;
2456        }
2457        waitingOnRetry = entry->sendPacket(cache);
2458    }
2459
2460    // if we succeeded and are not waiting for a retry, schedule the
2461    // next send considering when the next queue is ready, note that
2462    // snoop responses have their own packet queue and thus schedule
2463    // their own events
2464    if (!waitingOnRetry) {
2465        schedSendEvent(cache.nextQueueReadyTime());
2466    }
2467}
2468
2469BaseCache::MemSidePort::MemSidePort(const std::string &_name,
2470                                    BaseCache *_cache,
2471                                    const std::string &_label)
2472    : CacheMasterPort(_name, _cache, _reqQueue, _snoopRespQueue),
2473      _reqQueue(*_cache, *this, _snoopRespQueue, _label),
2474      _snoopRespQueue(*_cache, *this, true, _label), cache(_cache)
2475{
2476}
2477
2478void
2479WriteAllocator::updateMode(Addr write_addr, unsigned write_size,
2480                           Addr blk_addr)
2481{
2482    // check if we are continuing where the last write ended
2483    if (nextAddr == write_addr) {
2484        delayCtr[blk_addr] = delayThreshold;
2485        // stop if we have already saturated
2486        if (mode != WriteMode::NO_ALLOCATE) {
2487            byteCount += write_size;
2488            // switch to streaming mode if we have passed the lower
2489            // threshold
2490            if (mode == WriteMode::ALLOCATE &&
2491                byteCount > coalesceLimit) {
2492                mode = WriteMode::COALESCE;
2493                DPRINTF(Cache, "Switched to write coalescing\n");
2494            } else if (mode == WriteMode::COALESCE &&
2495                       byteCount > noAllocateLimit) {
2496                // and continue and switch to non-allocating mode if we
2497                // pass the upper threshold
2498                mode = WriteMode::NO_ALLOCATE;
2499                DPRINTF(Cache, "Switched to write-no-allocate\n");
2500            }
2501        }
2502    } else {
2503        // we did not see a write matching the previous one, start
2504        // over again
2505        byteCount = write_size;
2506        mode = WriteMode::ALLOCATE;
2507        resetDelay(blk_addr);
2508    }
2509    nextAddr = write_addr + write_size;
2510}
2511
2512WriteAllocator*
2513WriteAllocatorParams::create()
2514{
2515    return new WriteAllocator(this);
2516}
2517