base.hh revision 13416
1/*
2 * Copyright (c) 2012-2013, 2015-2016, 2018 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2003-2005 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Erik Hallnor
41 *          Steve Reinhardt
42 *          Ron Dreslinski
43 *          Andreas Hansson
44 *          Nikos Nikoleris
45 */
46
47/**
48 * @file
49 * Declares a basic cache interface BaseCache.
50 */
51
52#ifndef __MEM_CACHE_BASE_HH__
53#define __MEM_CACHE_BASE_HH__
54
55#include <cassert>
56#include <cstdint>
57#include <string>
58
59#include "base/addr_range.hh"
60#include "base/statistics.hh"
61#include "base/trace.hh"
62#include "base/types.hh"
63#include "debug/Cache.hh"
64#include "debug/CachePort.hh"
65#include "enums/Clusivity.hh"
66#include "mem/cache/cache_blk.hh"
67#include "mem/cache/mshr_queue.hh"
68#include "mem/cache/tags/base.hh"
69#include "mem/cache/write_queue.hh"
70#include "mem/cache/write_queue_entry.hh"
71#include "mem/mem_object.hh"
72#include "mem/packet.hh"
73#include "mem/packet_queue.hh"
74#include "mem/qport.hh"
75#include "mem/request.hh"
76#include "params/WriteAllocator.hh"
77#include "sim/eventq.hh"
78#include "sim/probe/probe.hh"
79#include "sim/serialize.hh"
80#include "sim/sim_exit.hh"
81#include "sim/system.hh"
82
83class BaseMasterPort;
84class BasePrefetcher;
85class BaseSlavePort;
86class MSHR;
87class MasterPort;
88class QueueEntry;
89struct BaseCacheParams;
90
91/**
92 * A basic cache interface. Implements some common functions for speed.
93 */
94class BaseCache : public MemObject
95{
96  protected:
97    /**
98     * Indexes to enumerate the MSHR queues.
99     */
100    enum MSHRQueueIndex {
101        MSHRQueue_MSHRs,
102        MSHRQueue_WriteBuffer
103    };
104
105  public:
106    /**
107     * Reasons for caches to be blocked.
108     */
109    enum BlockedCause {
110        Blocked_NoMSHRs = MSHRQueue_MSHRs,
111        Blocked_NoWBBuffers = MSHRQueue_WriteBuffer,
112        Blocked_NoTargets,
113        NUM_BLOCKED_CAUSES
114    };
115
116  protected:
117
118    /**
119     * A cache master port is used for the memory-side port of the
120     * cache, and in addition to the basic timing port that only sends
121     * response packets through a transmit list, it also offers the
122     * ability to schedule and send request packets (requests &
123     * writebacks). The send event is scheduled through schedSendEvent,
124     * and the sendDeferredPacket of the timing port is modified to
125     * consider both the transmit list and the requests from the MSHR.
126     */
127    class CacheMasterPort : public QueuedMasterPort
128    {
129
130      public:
131
132        /**
133         * Schedule a send of a request packet (from the MSHR). Note
134         * that we could already have a retry outstanding.
135         */
136        void schedSendEvent(Tick time)
137        {
138            DPRINTF(CachePort, "Scheduling send event at %llu\n", time);
139            reqQueue.schedSendEvent(time);
140        }
141
142      protected:
143
144        CacheMasterPort(const std::string &_name, BaseCache *_cache,
145                        ReqPacketQueue &_reqQueue,
146                        SnoopRespPacketQueue &_snoopRespQueue) :
147            QueuedMasterPort(_name, _cache, _reqQueue, _snoopRespQueue)
148        { }
149
150        /**
151         * Memory-side port always snoops.
152         *
153         * @return always true
154         */
155        virtual bool isSnooping() const { return true; }
156    };
157
158    /**
159     * Override the default behaviour of sendDeferredPacket to enable
160     * the memory-side cache port to also send requests based on the
161     * current MSHR status. This queue has a pointer to our specific
162     * cache implementation and is used by the MemSidePort.
163     */
164    class CacheReqPacketQueue : public ReqPacketQueue
165    {
166
167      protected:
168
169        BaseCache &cache;
170        SnoopRespPacketQueue &snoopRespQueue;
171
172      public:
173
174        CacheReqPacketQueue(BaseCache &cache, MasterPort &port,
175                            SnoopRespPacketQueue &snoop_resp_queue,
176                            const std::string &label) :
177            ReqPacketQueue(cache, port, label), cache(cache),
178            snoopRespQueue(snoop_resp_queue) { }
179
180        /**
181         * Override the normal sendDeferredPacket and do not only
182         * consider the transmit list (used for responses), but also
183         * requests.
184         */
185        virtual void sendDeferredPacket();
186
187        /**
188         * Check if there is a conflicting snoop response about to be
189         * send out, and if so simply stall any requests, and schedule
190         * a send event at the same time as the next snoop response is
191         * being sent out.
192         */
193        bool checkConflictingSnoop(Addr addr)
194        {
195            if (snoopRespQueue.hasAddr(addr)) {
196                DPRINTF(CachePort, "Waiting for snoop response to be "
197                        "sent\n");
198                Tick when = snoopRespQueue.deferredPacketReadyTime();
199                schedSendEvent(when);
200                return true;
201            }
202            return false;
203        }
204    };
205
206
207    /**
208     * The memory-side port extends the base cache master port with
209     * access functions for functional, atomic and timing snoops.
210     */
211    class MemSidePort : public CacheMasterPort
212    {
213      private:
214
215        /** The cache-specific queue. */
216        CacheReqPacketQueue _reqQueue;
217
218        SnoopRespPacketQueue _snoopRespQueue;
219
220        // a pointer to our specific cache implementation
221        BaseCache *cache;
222
223      protected:
224
225        virtual void recvTimingSnoopReq(PacketPtr pkt);
226
227        virtual bool recvTimingResp(PacketPtr pkt);
228
229        virtual Tick recvAtomicSnoop(PacketPtr pkt);
230
231        virtual void recvFunctionalSnoop(PacketPtr pkt);
232
233      public:
234
235        MemSidePort(const std::string &_name, BaseCache *_cache,
236                    const std::string &_label);
237    };
238
239    /**
240     * A cache slave port is used for the CPU-side port of the cache,
241     * and it is basically a simple timing port that uses a transmit
242     * list for responses to the CPU (or connected master). In
243     * addition, it has the functionality to block the port for
244     * incoming requests. If blocked, the port will issue a retry once
245     * unblocked.
246     */
247    class CacheSlavePort : public QueuedSlavePort
248    {
249
250      public:
251
252        /** Do not accept any new requests. */
253        void setBlocked();
254
255        /** Return to normal operation and accept new requests. */
256        void clearBlocked();
257
258        bool isBlocked() const { return blocked; }
259
260      protected:
261
262        CacheSlavePort(const std::string &_name, BaseCache *_cache,
263                       const std::string &_label);
264
265        /** A normal packet queue used to store responses. */
266        RespPacketQueue queue;
267
268        bool blocked;
269
270        bool mustSendRetry;
271
272      private:
273
274        void processSendRetry();
275
276        EventFunctionWrapper sendRetryEvent;
277
278    };
279
280    /**
281     * The CPU-side port extends the base cache slave port with access
282     * functions for functional, atomic and timing requests.
283     */
284    class CpuSidePort : public CacheSlavePort
285    {
286      private:
287
288        // a pointer to our specific cache implementation
289        BaseCache *cache;
290
291      protected:
292        virtual bool recvTimingSnoopResp(PacketPtr pkt) override;
293
294        virtual bool tryTiming(PacketPtr pkt) override;
295
296        virtual bool recvTimingReq(PacketPtr pkt) override;
297
298        virtual Tick recvAtomic(PacketPtr pkt) override;
299
300        virtual void recvFunctional(PacketPtr pkt) override;
301
302        virtual AddrRangeList getAddrRanges() const override;
303
304      public:
305
306        CpuSidePort(const std::string &_name, BaseCache *_cache,
307                    const std::string &_label);
308
309    };
310
311    CpuSidePort cpuSidePort;
312    MemSidePort memSidePort;
313
314  protected:
315
316    /** Miss status registers */
317    MSHRQueue mshrQueue;
318
319    /** Write/writeback buffer */
320    WriteQueue writeBuffer;
321
322    /** Tag and data Storage */
323    BaseTags *tags;
324
325    /** Prefetcher */
326    BasePrefetcher *prefetcher;
327
328    /** To probe when a cache hit occurs */
329    ProbePointArg<PacketPtr> *ppHit;
330
331    /** To probe when a cache miss occurs */
332    ProbePointArg<PacketPtr> *ppMiss;
333
334    /**
335     * The writeAllocator drive optimizations for streaming writes.
336     * It first determines whether a WriteReq MSHR should be delayed,
337     * thus ensuring that we wait longer in cases when we are write
338     * coalescing and allowing all the bytes of the line to be written
339     * before the MSHR packet is sent downstream. This works in unison
340     * with the tracking in the MSHR to check if the entire line is
341     * written. The write mode also affects the behaviour on filling
342     * any whole-line writes. Normally the cache allocates the line
343     * when receiving the InvalidateResp, but after seeing enough
344     * consecutive lines we switch to using the tempBlock, and thus
345     * end up not allocating the line, and instead turning the
346     * whole-line write into a writeback straight away.
347     */
348    WriteAllocator * const writeAllocator;
349
350    /**
351     * Temporary cache block for occasional transitory use.  We use
352     * the tempBlock to fill when allocation fails (e.g., when there
353     * is an outstanding request that accesses the victim block) or
354     * when we want to avoid allocation (e.g., exclusive caches)
355     */
356    TempCacheBlk *tempBlock;
357
358    /**
359     * Upstream caches need this packet until true is returned, so
360     * hold it for deletion until a subsequent call
361     */
362    std::unique_ptr<Packet> pendingDelete;
363
364    /**
365     * Mark a request as in service (sent downstream in the memory
366     * system), effectively making this MSHR the ordering point.
367     */
368    void markInService(MSHR *mshr, bool pending_modified_resp)
369    {
370        bool wasFull = mshrQueue.isFull();
371        mshrQueue.markInService(mshr, pending_modified_resp);
372
373        if (wasFull && !mshrQueue.isFull()) {
374            clearBlocked(Blocked_NoMSHRs);
375        }
376    }
377
378    void markInService(WriteQueueEntry *entry)
379    {
380        bool wasFull = writeBuffer.isFull();
381        writeBuffer.markInService(entry);
382
383        if (wasFull && !writeBuffer.isFull()) {
384            clearBlocked(Blocked_NoWBBuffers);
385        }
386    }
387
388    /**
389     * Determine whether we should allocate on a fill or not. If this
390     * cache is mostly inclusive with regards to the upstream cache(s)
391     * we always allocate (for any non-forwarded and cacheable
392     * requests). In the case of a mostly exclusive cache, we allocate
393     * on fill if the packet did not come from a cache, thus if we:
394     * are dealing with a whole-line write (the latter behaves much
395     * like a writeback), the original target packet came from a
396     * non-caching source, or if we are performing a prefetch or LLSC.
397     *
398     * @param cmd Command of the incoming requesting packet
399     * @return Whether we should allocate on the fill
400     */
401    inline bool allocOnFill(MemCmd cmd) const
402    {
403        return clusivity == Enums::mostly_incl ||
404            cmd == MemCmd::WriteLineReq ||
405            cmd == MemCmd::ReadReq ||
406            cmd == MemCmd::WriteReq ||
407            cmd.isPrefetch() ||
408            cmd.isLLSC();
409    }
410
411    /**
412     * Regenerate block address using tags.
413     * Block address regeneration depends on whether we're using a temporary
414     * block or not.
415     *
416     * @param blk The block to regenerate address.
417     * @return The block's address.
418     */
419    Addr regenerateBlkAddr(CacheBlk* blk);
420
421    /**
422     * Does all the processing necessary to perform the provided request.
423     * @param pkt The memory request to perform.
424     * @param blk The cache block to be updated.
425     * @param lat The latency of the access.
426     * @param writebacks List for any writebacks that need to be performed.
427     * @return Boolean indicating whether the request was satisfied.
428     */
429    virtual bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
430                        PacketList &writebacks);
431
432    /*
433     * Handle a timing request that hit in the cache
434     *
435     * @param ptk The request packet
436     * @param blk The referenced block
437     * @param request_time The tick at which the block lookup is compete
438     */
439    virtual void handleTimingReqHit(PacketPtr pkt, CacheBlk *blk,
440                                    Tick request_time);
441
442    /*
443     * Handle a timing request that missed in the cache
444     *
445     * Implementation specific handling for different cache
446     * implementations
447     *
448     * @param ptk The request packet
449     * @param blk The referenced block
450     * @param forward_time The tick at which we can process dependent requests
451     * @param request_time The tick at which the block lookup is compete
452     */
453    virtual void handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk,
454                                     Tick forward_time,
455                                     Tick request_time) = 0;
456
457    /*
458     * Handle a timing request that missed in the cache
459     *
460     * Common functionality across different cache implementations
461     *
462     * @param ptk The request packet
463     * @param blk The referenced block
464     * @param mshr Any existing mshr for the referenced cache block
465     * @param forward_time The tick at which we can process dependent requests
466     * @param request_time The tick at which the block lookup is compete
467     */
468    void handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk,
469                             Tick forward_time, Tick request_time);
470
471    /**
472     * Performs the access specified by the request.
473     * @param pkt The request to perform.
474     */
475    virtual void recvTimingReq(PacketPtr pkt);
476
477    /**
478     * Handling the special case of uncacheable write responses to
479     * make recvTimingResp less cluttered.
480     */
481    void handleUncacheableWriteResp(PacketPtr pkt);
482
483    /**
484     * Service non-deferred MSHR targets using the received response
485     *
486     * Iterates through the list of targets that can be serviced with
487     * the current response. Any writebacks that need to performed
488     * must be appended to the writebacks parameter.
489     *
490     * @param mshr The MSHR that corresponds to the reponse
491     * @param pkt The response packet
492     * @param blk The reference block
493     * @param writebacks List of writebacks that need to be performed
494     */
495    virtual void serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
496                                    CacheBlk *blk, PacketList& writebacks) = 0;
497
498    /**
499     * Handles a response (cache line fill/write ack) from the bus.
500     * @param pkt The response packet
501     */
502    virtual void recvTimingResp(PacketPtr pkt);
503
504    /**
505     * Snoops bus transactions to maintain coherence.
506     * @param pkt The current bus transaction.
507     */
508    virtual void recvTimingSnoopReq(PacketPtr pkt) = 0;
509
510    /**
511     * Handle a snoop response.
512     * @param pkt Snoop response packet
513     */
514    virtual void recvTimingSnoopResp(PacketPtr pkt) = 0;
515
516    /**
517     * Handle a request in atomic mode that missed in this cache
518     *
519     * Creates a downstream request, sends it to the memory below and
520     * handles the response. As we are in atomic mode all operations
521     * are performed immediately.
522     *
523     * @param pkt The packet with the requests
524     * @param blk The referenced block
525     * @param writebacks A list with packets for any performed writebacks
526     * @return Cycles for handling the request
527     */
528    virtual Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk,
529                                       PacketList &writebacks) = 0;
530
531    /**
532     * Performs the access specified by the request.
533     * @param pkt The request to perform.
534     * @return The number of ticks required for the access.
535     */
536    virtual Tick recvAtomic(PacketPtr pkt);
537
538    /**
539     * Snoop for the provided request in the cache and return the estimated
540     * time taken.
541     * @param pkt The memory request to snoop
542     * @return The number of ticks required for the snoop.
543     */
544    virtual Tick recvAtomicSnoop(PacketPtr pkt) = 0;
545
546    /**
547     * Performs the access specified by the request.
548     *
549     * @param pkt The request to perform.
550     * @param fromCpuSide from the CPU side port or the memory side port
551     */
552    virtual void functionalAccess(PacketPtr pkt, bool from_cpu_side);
553
554    /**
555     * Handle doing the Compare and Swap function for SPARC.
556     */
557    void cmpAndSwap(CacheBlk *blk, PacketPtr pkt);
558
559    /**
560     * Return the next queue entry to service, either a pending miss
561     * from the MSHR queue, a buffered write from the write buffer, or
562     * something from the prefetcher. This function is responsible
563     * for prioritizing among those sources on the fly.
564     */
565    QueueEntry* getNextQueueEntry();
566
567    /**
568     * Insert writebacks into the write buffer
569     */
570    virtual void doWritebacks(PacketList& writebacks, Tick forward_time) = 0;
571
572    /**
573     * Send writebacks down the memory hierarchy in atomic mode
574     */
575    virtual void doWritebacksAtomic(PacketList& writebacks) = 0;
576
577    /**
578     * Create an appropriate downstream bus request packet.
579     *
580     * Creates a new packet with the request to be send to the memory
581     * below, or nullptr if the current request in cpu_pkt should just
582     * be forwarded on.
583     *
584     * @param cpu_pkt The miss packet that needs to be satisfied.
585     * @param blk The referenced block, can be nullptr.
586     * @param needs_writable Indicates that the block must be writable
587     * even if the request in cpu_pkt doesn't indicate that.
588     * @param is_whole_line_write True if there are writes for the
589     * whole line
590     * @return A packet send to the memory below
591     */
592    virtual PacketPtr createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk,
593                                       bool needs_writable,
594                                       bool is_whole_line_write) const = 0;
595
596    /**
597     * Determine if clean lines should be written back or not. In
598     * cases where a downstream cache is mostly inclusive we likely
599     * want it to act as a victim cache also for lines that have not
600     * been modified. Hence, we cannot simply drop the line (or send a
601     * clean evict), but rather need to send the actual data.
602     */
603    const bool writebackClean;
604
605    /**
606     * Writebacks from the tempBlock, resulting on the response path
607     * in atomic mode, must happen after the call to recvAtomic has
608     * finished (for the right ordering of the packets). We therefore
609     * need to hold on to the packets, and have a method and an event
610     * to send them.
611     */
612    PacketPtr tempBlockWriteback;
613
614    /**
615     * Send the outstanding tempBlock writeback. To be called after
616     * recvAtomic finishes in cases where the block we filled is in
617     * fact the tempBlock, and now needs to be written back.
618     */
619    void writebackTempBlockAtomic() {
620        assert(tempBlockWriteback != nullptr);
621        PacketList writebacks{tempBlockWriteback};
622        doWritebacksAtomic(writebacks);
623        tempBlockWriteback = nullptr;
624    }
625
626    /**
627     * An event to writeback the tempBlock after recvAtomic
628     * finishes. To avoid other calls to recvAtomic getting in
629     * between, we create this event with a higher priority.
630     */
631    EventFunctionWrapper writebackTempBlockAtomicEvent;
632
633    /**
634     * Perform any necessary updates to the block and perform any data
635     * exchange between the packet and the block. The flags of the
636     * packet are also set accordingly.
637     *
638     * @param pkt Request packet from upstream that hit a block
639     * @param blk Cache block that the packet hit
640     * @param deferred_response Whether this request originally missed
641     * @param pending_downgrade Whether the writable flag is to be removed
642     */
643    virtual void satisfyRequest(PacketPtr pkt, CacheBlk *blk,
644                                bool deferred_response = false,
645                                bool pending_downgrade = false);
646
647    /**
648     * Maintain the clusivity of this cache by potentially
649     * invalidating a block. This method works in conjunction with
650     * satisfyRequest, but is separate to allow us to handle all MSHR
651     * targets before potentially dropping a block.
652     *
653     * @param from_cache Whether we have dealt with a packet from a cache
654     * @param blk The block that should potentially be dropped
655     */
656    void maintainClusivity(bool from_cache, CacheBlk *blk);
657
658    /**
659     * Handle a fill operation caused by a received packet.
660     *
661     * Populates a cache block and handles all outstanding requests for the
662     * satisfied fill request. This version takes two memory requests. One
663     * contains the fill data, the other is an optional target to satisfy.
664     * Note that the reason we return a list of writebacks rather than
665     * inserting them directly in the write buffer is that this function
666     * is called by both atomic and timing-mode accesses, and in atomic
667     * mode we don't mess with the write buffer (we just perform the
668     * writebacks atomically once the original request is complete).
669     *
670     * @param pkt The memory request with the fill data.
671     * @param blk The cache block if it already exists.
672     * @param writebacks List for any writebacks that need to be performed.
673     * @param allocate Whether to allocate a block or use the temp block
674     * @return Pointer to the new cache block.
675     */
676    CacheBlk *handleFill(PacketPtr pkt, CacheBlk *blk,
677                         PacketList &writebacks, bool allocate);
678
679    /**
680     * Allocate a new block and perform any necessary writebacks
681     *
682     * Find a victim block and if necessary prepare writebacks for any
683     * existing data. May return nullptr if there are no replaceable
684     * blocks. If a replaceable block is found, it inserts the new block in
685     * its place. The new block, however, is not set as valid yet.
686     *
687     * @param pkt Packet holding the address to update
688     * @param writebacks A list of writeback packets for the evicted blocks
689     * @return the allocated block
690     */
691    CacheBlk *allocateBlock(const PacketPtr pkt, PacketList &writebacks);
692    /**
693     * Evict a cache block.
694     *
695     * Performs a writeback if necesssary and invalidates the block
696     *
697     * @param blk Block to invalidate
698     * @return A packet with the writeback, can be nullptr
699     */
700    M5_NODISCARD virtual PacketPtr evictBlock(CacheBlk *blk) = 0;
701
702    /**
703     * Evict a cache block.
704     *
705     * Performs a writeback if necesssary and invalidates the block
706     *
707     * @param blk Block to invalidate
708     * @param writebacks Return a list of packets with writebacks
709     */
710    void evictBlock(CacheBlk *blk, PacketList &writebacks);
711
712    /**
713     * Invalidate a cache block.
714     *
715     * @param blk Block to invalidate
716     */
717    void invalidateBlock(CacheBlk *blk);
718
719    /**
720     * Create a writeback request for the given block.
721     *
722     * @param blk The block to writeback.
723     * @return The writeback request for the block.
724     */
725    PacketPtr writebackBlk(CacheBlk *blk);
726
727    /**
728     * Create a writeclean request for the given block.
729     *
730     * Creates a request that writes the block to the cache below
731     * without evicting the block from the current cache.
732     *
733     * @param blk The block to write clean.
734     * @param dest The destination of the write clean operation.
735     * @param id Use the given packet id for the write clean operation.
736     * @return The generated write clean packet.
737     */
738    PacketPtr writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id);
739
740    /**
741     * Write back dirty blocks in the cache using functional accesses.
742     */
743    virtual void memWriteback() override;
744
745    /**
746     * Invalidates all blocks in the cache.
747     *
748     * @warn Dirty cache lines will not be written back to
749     * memory. Make sure to call functionalWriteback() first if you
750     * want the to write them to memory.
751     */
752    virtual void memInvalidate() override;
753
754    /**
755     * Determine if there are any dirty blocks in the cache.
756     *
757     * @return true if at least one block is dirty, false otherwise.
758     */
759    bool isDirty() const;
760
761    /**
762     * Determine if an address is in the ranges covered by this
763     * cache. This is useful to filter snoops.
764     *
765     * @param addr Address to check against
766     *
767     * @return If the address in question is in range
768     */
769    bool inRange(Addr addr) const;
770
771    /**
772     * Find next request ready time from among possible sources.
773     */
774    Tick nextQueueReadyTime() const;
775
776    /** Block size of this cache */
777    const unsigned blkSize;
778
779    /**
780     * The latency of tag lookup of a cache. It occurs when there is
781     * an access to the cache.
782     */
783    const Cycles lookupLatency;
784
785    /**
786     * The latency of data access of a cache. It occurs when there is
787     * an access to the cache.
788     */
789    const Cycles dataLatency;
790
791    /**
792     * This is the forward latency of the cache. It occurs when there
793     * is a cache miss and a request is forwarded downstream, in
794     * particular an outbound miss.
795     */
796    const Cycles forwardLatency;
797
798    /** The latency to fill a cache block */
799    const Cycles fillLatency;
800
801    /**
802     * The latency of sending reponse to its upper level cache/core on
803     * a linefill. The responseLatency parameter captures this
804     * latency.
805     */
806    const Cycles responseLatency;
807
808    /** The number of targets for each MSHR. */
809    const int numTarget;
810
811    /** Do we forward snoops from mem side port through to cpu side port? */
812    bool forwardSnoops;
813
814    /**
815     * Clusivity with respect to the upstream cache, determining if we
816     * fill into both this cache and the cache above on a miss. Note
817     * that we currently do not support strict clusivity policies.
818     */
819    const Enums::Clusivity clusivity;
820
821    /**
822     * Is this cache read only, for example the instruction cache, or
823     * table-walker cache. A cache that is read only should never see
824     * any writes, and should never get any dirty data (and hence
825     * never have to do any writebacks).
826     */
827    const bool isReadOnly;
828
829    /**
830     * Bit vector of the blocking reasons for the access path.
831     * @sa #BlockedCause
832     */
833    uint8_t blocked;
834
835    /** Increasing order number assigned to each incoming request. */
836    uint64_t order;
837
838    /** Stores time the cache blocked for statistics. */
839    Cycles blockedCycle;
840
841    /** Pointer to the MSHR that has no targets. */
842    MSHR *noTargetMSHR;
843
844    /** The number of misses to trigger an exit event. */
845    Counter missCount;
846
847    /**
848     * The address range to which the cache responds on the CPU side.
849     * Normally this is all possible memory addresses. */
850    const AddrRangeList addrRanges;
851
852  public:
853    /** System we are currently operating in. */
854    System *system;
855
856    // Statistics
857    /**
858     * @addtogroup CacheStatistics
859     * @{
860     */
861
862    /** Number of hits per thread for each type of command.
863        @sa Packet::Command */
864    Stats::Vector hits[MemCmd::NUM_MEM_CMDS];
865    /** Number of hits for demand accesses. */
866    Stats::Formula demandHits;
867    /** Number of hit for all accesses. */
868    Stats::Formula overallHits;
869
870    /** Number of misses per thread for each type of command.
871        @sa Packet::Command */
872    Stats::Vector misses[MemCmd::NUM_MEM_CMDS];
873    /** Number of misses for demand accesses. */
874    Stats::Formula demandMisses;
875    /** Number of misses for all accesses. */
876    Stats::Formula overallMisses;
877
878    /**
879     * Total number of cycles per thread/command spent waiting for a miss.
880     * Used to calculate the average miss latency.
881     */
882    Stats::Vector missLatency[MemCmd::NUM_MEM_CMDS];
883    /** Total number of cycles spent waiting for demand misses. */
884    Stats::Formula demandMissLatency;
885    /** Total number of cycles spent waiting for all misses. */
886    Stats::Formula overallMissLatency;
887
888    /** The number of accesses per command and thread. */
889    Stats::Formula accesses[MemCmd::NUM_MEM_CMDS];
890    /** The number of demand accesses. */
891    Stats::Formula demandAccesses;
892    /** The number of overall accesses. */
893    Stats::Formula overallAccesses;
894
895    /** The miss rate per command and thread. */
896    Stats::Formula missRate[MemCmd::NUM_MEM_CMDS];
897    /** The miss rate of all demand accesses. */
898    Stats::Formula demandMissRate;
899    /** The miss rate for all accesses. */
900    Stats::Formula overallMissRate;
901
902    /** The average miss latency per command and thread. */
903    Stats::Formula avgMissLatency[MemCmd::NUM_MEM_CMDS];
904    /** The average miss latency for demand misses. */
905    Stats::Formula demandAvgMissLatency;
906    /** The average miss latency for all misses. */
907    Stats::Formula overallAvgMissLatency;
908
909    /** The total number of cycles blocked for each blocked cause. */
910    Stats::Vector blocked_cycles;
911    /** The number of times this cache blocked for each blocked cause. */
912    Stats::Vector blocked_causes;
913
914    /** The average number of cycles blocked for each blocked cause. */
915    Stats::Formula avg_blocked;
916
917    /** The number of times a HW-prefetched block is evicted w/o reference. */
918    Stats::Scalar unusedPrefetches;
919
920    /** Number of blocks written back per thread. */
921    Stats::Vector writebacks;
922
923    /** Number of misses that hit in the MSHRs per command and thread. */
924    Stats::Vector mshr_hits[MemCmd::NUM_MEM_CMDS];
925    /** Demand misses that hit in the MSHRs. */
926    Stats::Formula demandMshrHits;
927    /** Total number of misses that hit in the MSHRs. */
928    Stats::Formula overallMshrHits;
929
930    /** Number of misses that miss in the MSHRs, per command and thread. */
931    Stats::Vector mshr_misses[MemCmd::NUM_MEM_CMDS];
932    /** Demand misses that miss in the MSHRs. */
933    Stats::Formula demandMshrMisses;
934    /** Total number of misses that miss in the MSHRs. */
935    Stats::Formula overallMshrMisses;
936
937    /** Number of misses that miss in the MSHRs, per command and thread. */
938    Stats::Vector mshr_uncacheable[MemCmd::NUM_MEM_CMDS];
939    /** Total number of misses that miss in the MSHRs. */
940    Stats::Formula overallMshrUncacheable;
941
942    /** Total cycle latency of each MSHR miss, per command and thread. */
943    Stats::Vector mshr_miss_latency[MemCmd::NUM_MEM_CMDS];
944    /** Total cycle latency of demand MSHR misses. */
945    Stats::Formula demandMshrMissLatency;
946    /** Total cycle latency of overall MSHR misses. */
947    Stats::Formula overallMshrMissLatency;
948
949    /** Total cycle latency of each MSHR miss, per command and thread. */
950    Stats::Vector mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS];
951    /** Total cycle latency of overall MSHR misses. */
952    Stats::Formula overallMshrUncacheableLatency;
953
954#if 0
955    /** The total number of MSHR accesses per command and thread. */
956    Stats::Formula mshrAccesses[MemCmd::NUM_MEM_CMDS];
957    /** The total number of demand MSHR accesses. */
958    Stats::Formula demandMshrAccesses;
959    /** The total number of MSHR accesses. */
960    Stats::Formula overallMshrAccesses;
961#endif
962
963    /** The miss rate in the MSHRs pre command and thread. */
964    Stats::Formula mshrMissRate[MemCmd::NUM_MEM_CMDS];
965    /** The demand miss rate in the MSHRs. */
966    Stats::Formula demandMshrMissRate;
967    /** The overall miss rate in the MSHRs. */
968    Stats::Formula overallMshrMissRate;
969
970    /** The average latency of an MSHR miss, per command and thread. */
971    Stats::Formula avgMshrMissLatency[MemCmd::NUM_MEM_CMDS];
972    /** The average latency of a demand MSHR miss. */
973    Stats::Formula demandAvgMshrMissLatency;
974    /** The average overall latency of an MSHR miss. */
975    Stats::Formula overallAvgMshrMissLatency;
976
977    /** The average latency of an MSHR miss, per command and thread. */
978    Stats::Formula avgMshrUncacheableLatency[MemCmd::NUM_MEM_CMDS];
979    /** The average overall latency of an MSHR miss. */
980    Stats::Formula overallAvgMshrUncacheableLatency;
981
982    /** Number of replacements of valid blocks. */
983    Stats::Scalar replacements;
984
985    /**
986     * @}
987     */
988
989    /**
990     * Register stats for this object.
991     */
992    void regStats() override;
993
994    /** Registers probes. */
995    void regProbePoints() override;
996
997  public:
998    BaseCache(const BaseCacheParams *p, unsigned blk_size);
999    ~BaseCache();
1000
1001    void init() override;
1002
1003    BaseMasterPort &getMasterPort(const std::string &if_name,
1004                                  PortID idx = InvalidPortID) override;
1005    BaseSlavePort &getSlavePort(const std::string &if_name,
1006                                PortID idx = InvalidPortID) override;
1007
1008    /**
1009     * Query block size of a cache.
1010     * @return  The block size
1011     */
1012    unsigned
1013    getBlockSize() const
1014    {
1015        return blkSize;
1016    }
1017
1018    const AddrRangeList &getAddrRanges() const { return addrRanges; }
1019
1020    MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool sched_send = true)
1021    {
1022        MSHR *mshr = mshrQueue.allocate(pkt->getBlockAddr(blkSize), blkSize,
1023                                        pkt, time, order++,
1024                                        allocOnFill(pkt->cmd));
1025
1026        if (mshrQueue.isFull()) {
1027            setBlocked((BlockedCause)MSHRQueue_MSHRs);
1028        }
1029
1030        if (sched_send) {
1031            // schedule the send
1032            schedMemSideSendEvent(time);
1033        }
1034
1035        return mshr;
1036    }
1037
1038    void allocateWriteBuffer(PacketPtr pkt, Tick time)
1039    {
1040        // should only see writes or clean evicts here
1041        assert(pkt->isWrite() || pkt->cmd == MemCmd::CleanEvict);
1042
1043        Addr blk_addr = pkt->getBlockAddr(blkSize);
1044
1045        WriteQueueEntry *wq_entry =
1046            writeBuffer.findMatch(blk_addr, pkt->isSecure());
1047        if (wq_entry && !wq_entry->inService) {
1048            DPRINTF(Cache, "Potential to merge writeback %s", pkt->print());
1049        }
1050
1051        writeBuffer.allocate(blk_addr, blkSize, pkt, time, order++);
1052
1053        if (writeBuffer.isFull()) {
1054            setBlocked((BlockedCause)MSHRQueue_WriteBuffer);
1055        }
1056
1057        // schedule the send
1058        schedMemSideSendEvent(time);
1059    }
1060
1061    /**
1062     * Returns true if the cache is blocked for accesses.
1063     */
1064    bool isBlocked() const
1065    {
1066        return blocked != 0;
1067    }
1068
1069    /**
1070     * Marks the access path of the cache as blocked for the given cause. This
1071     * also sets the blocked flag in the slave interface.
1072     * @param cause The reason for the cache blocking.
1073     */
1074    void setBlocked(BlockedCause cause)
1075    {
1076        uint8_t flag = 1 << cause;
1077        if (blocked == 0) {
1078            blocked_causes[cause]++;
1079            blockedCycle = curCycle();
1080            cpuSidePort.setBlocked();
1081        }
1082        blocked |= flag;
1083        DPRINTF(Cache,"Blocking for cause %d, mask=%d\n", cause, blocked);
1084    }
1085
1086    /**
1087     * Marks the cache as unblocked for the given cause. This also clears the
1088     * blocked flags in the appropriate interfaces.
1089     * @param cause The newly unblocked cause.
1090     * @warning Calling this function can cause a blocked request on the bus to
1091     * access the cache. The cache must be in a state to handle that request.
1092     */
1093    void clearBlocked(BlockedCause cause)
1094    {
1095        uint8_t flag = 1 << cause;
1096        blocked &= ~flag;
1097        DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked);
1098        if (blocked == 0) {
1099            blocked_cycles[cause] += curCycle() - blockedCycle;
1100            cpuSidePort.clearBlocked();
1101        }
1102    }
1103
1104    /**
1105     * Schedule a send event for the memory-side port. If already
1106     * scheduled, this may reschedule the event at an earlier
1107     * time. When the specified time is reached, the port is free to
1108     * send either a response, a request, or a prefetch request.
1109     *
1110     * @param time The time when to attempt sending a packet.
1111     */
1112    void schedMemSideSendEvent(Tick time)
1113    {
1114        memSidePort.schedSendEvent(time);
1115    }
1116
1117    bool inCache(Addr addr, bool is_secure) const {
1118        return tags->findBlock(addr, is_secure);
1119    }
1120
1121    bool inMissQueue(Addr addr, bool is_secure) const {
1122        return mshrQueue.findMatch(addr, is_secure);
1123    }
1124
1125    void incMissCount(PacketPtr pkt)
1126    {
1127        assert(pkt->req->masterId() < system->maxMasters());
1128        misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
1129        pkt->req->incAccessDepth();
1130        if (missCount) {
1131            --missCount;
1132            if (missCount == 0)
1133                exitSimLoop("A cache reached the maximum miss count");
1134        }
1135    }
1136    void incHitCount(PacketPtr pkt)
1137    {
1138        assert(pkt->req->masterId() < system->maxMasters());
1139        hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
1140
1141    }
1142
1143    /**
1144     * Checks if the cache is coalescing writes
1145     *
1146     * @return True if the cache is coalescing writes
1147     */
1148    bool coalesce() const;
1149
1150
1151    /**
1152     * Cache block visitor that writes back dirty cache blocks using
1153     * functional writes.
1154     */
1155    void writebackVisitor(CacheBlk &blk);
1156
1157    /**
1158     * Cache block visitor that invalidates all blocks in the cache.
1159     *
1160     * @warn Dirty cache lines will not be written back to memory.
1161     */
1162    void invalidateVisitor(CacheBlk &blk);
1163
1164    /**
1165     * Take an MSHR, turn it into a suitable downstream packet, and
1166     * send it out. This construct allows a queue entry to choose a suitable
1167     * approach based on its type.
1168     *
1169     * @param mshr The MSHR to turn into a packet and send
1170     * @return True if the port is waiting for a retry
1171     */
1172    virtual bool sendMSHRQueuePacket(MSHR* mshr);
1173
1174    /**
1175     * Similar to sendMSHR, but for a write-queue entry
1176     * instead. Create the packet, and send it, and if successful also
1177     * mark the entry in service.
1178     *
1179     * @param wq_entry The write-queue entry to turn into a packet and send
1180     * @return True if the port is waiting for a retry
1181     */
1182    bool sendWriteQueuePacket(WriteQueueEntry* wq_entry);
1183
1184    /**
1185     * Serialize the state of the caches
1186     *
1187     * We currently don't support checkpointing cache state, so this panics.
1188     */
1189    void serialize(CheckpointOut &cp) const override;
1190    void unserialize(CheckpointIn &cp) override;
1191};
1192
1193/**
1194 * The write allocator inspects write packets and detects streaming
1195 * patterns. The write allocator supports a single stream where writes
1196 * are expected to access consecutive locations and keeps track of
1197 * size of the area covered by the concecutive writes in byteCount.
1198 *
1199 * 1) When byteCount has surpassed the coallesceLimit the mode
1200 * switches from ALLOCATE to COALESCE where writes should be delayed
1201 * until the whole block is written at which point a single packet
1202 * (whole line write) can service them.
1203 *
1204 * 2) When byteCount has also exceeded the noAllocateLimit (whole
1205 * line) we switch to NO_ALLOCATE when writes should not allocate in
1206 * the cache but rather send a whole line write to the memory below.
1207 */
1208class WriteAllocator : public SimObject {
1209  public:
1210    WriteAllocator(const WriteAllocatorParams *p) :
1211        SimObject(p),
1212        coalesceLimit(p->coalesce_limit * p->block_size),
1213        noAllocateLimit(p->no_allocate_limit * p->block_size),
1214        delayThreshold(p->delay_threshold)
1215    {
1216        reset();
1217    }
1218
1219    /**
1220     * Should writes be coalesced? This is true if the mode is set to
1221     * NO_ALLOCATE.
1222     *
1223     * @return return true if the cache should coalesce writes.
1224     */
1225    bool coalesce() const {
1226        return mode != WriteMode::ALLOCATE;
1227    }
1228
1229    /**
1230     * Should writes allocate?
1231     *
1232     * @return return true if the cache should not allocate for writes.
1233     */
1234    bool allocate() const {
1235        return mode != WriteMode::NO_ALLOCATE;
1236    }
1237
1238    /**
1239     * Reset the write allocator state, meaning that it allocates for
1240     * writes and has not recorded any information about qualifying
1241     * writes that might trigger a switch to coalescing and later no
1242     * allocation.
1243     */
1244    void reset() {
1245        mode = WriteMode::ALLOCATE;
1246        byteCount = 0;
1247        nextAddr = 0;
1248    }
1249
1250    /**
1251     * Access whether we need to delay the current write.
1252     *
1253     * @param blk_addr The block address the packet writes to
1254     * @return true if the current packet should be delayed
1255     */
1256    bool delay(Addr blk_addr) {
1257        if (delayCtr[blk_addr] > 0) {
1258            --delayCtr[blk_addr];
1259            return true;
1260        } else {
1261            return false;
1262        }
1263    }
1264
1265    /**
1266     * Clear delay counter for the input block
1267     *
1268     * @param blk_addr The accessed cache block
1269     */
1270    void resetDelay(Addr blk_addr) {
1271        delayCtr.erase(blk_addr);
1272    }
1273
1274    /**
1275     * Update the write mode based on the current write
1276     * packet. This method compares the packet's address with any
1277     * current stream, and updates the tracking and the mode
1278     * accordingly.
1279     *
1280     * @param write_addr Start address of the write request
1281     * @param write_size Size of the write request
1282     * @param blk_addr The block address that this packet writes to
1283     */
1284    void updateMode(Addr write_addr, unsigned write_size, Addr blk_addr);
1285
1286  private:
1287    /**
1288     * The current mode for write coalescing and allocation, either
1289     * normal operation (ALLOCATE), write coalescing (COALESCE), or
1290     * write coalescing without allocation (NO_ALLOCATE).
1291     */
1292    enum class WriteMode : char {
1293        ALLOCATE,
1294        COALESCE,
1295        NO_ALLOCATE,
1296    };
1297    WriteMode mode;
1298
1299    /** Address to match writes against to detect streams. */
1300    Addr nextAddr;
1301
1302    /**
1303     * Bytes written contiguously. Saturating once we no longer
1304     * allocate.
1305     */
1306    uint32_t byteCount;
1307
1308    /**
1309     * Limits for when to switch between the different write modes.
1310     */
1311    const uint32_t coalesceLimit;
1312    const uint32_t noAllocateLimit;
1313    /**
1314     * The number of times the allocator will delay an WriteReq MSHR.
1315     */
1316    const uint32_t delayThreshold;
1317
1318    /**
1319     * Keep track of the number of times the allocator has delayed an
1320     * WriteReq MSHR.
1321     */
1322    std::unordered_map<Addr, Counter> delayCtr;
1323};
1324
1325#endif //__MEM_CACHE_BASE_HH__
1326