lsq_unit.hh revision 13590:d7e018859709
1/*
2 * Copyright (c) 2012-2014,2017-2018 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2004-2006 The Regents of The University of Michigan
15 * Copyright (c) 2013 Advanced Micro Devices, Inc.
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Kevin Lim
42 *          Korey Sewell
43 */
44
45#ifndef __CPU_O3_LSQ_UNIT_HH__
46#define __CPU_O3_LSQ_UNIT_HH__
47
48#include <algorithm>
49#include <cstring>
50#include <map>
51#include <queue>
52
53#include "arch/generic/debugfaults.hh"
54#include "arch/isa_traits.hh"
55#include "arch/locked_mem.hh"
56#include "arch/mmapped_ipr.hh"
57#include "config/the_isa.hh"
58#include "cpu/inst_seq.hh"
59#include "cpu/timebuf.hh"
60#include "debug/LSQUnit.hh"
61#include "mem/packet.hh"
62#include "mem/port.hh"
63
64struct DerivO3CPUParams;
65#include "base/circular_queue.hh"
66
67/**
68 * Class that implements the actual LQ and SQ for each specific
69 * thread.  Both are circular queues; load entries are freed upon
70 * committing, while store entries are freed once they writeback. The
71 * LSQUnit tracks if there are memory ordering violations, and also
72 * detects partial load to store forwarding cases (a store only has
73 * part of a load's data) that requires the load to wait until the
74 * store writes back. In the former case it holds onto the instruction
75 * until the dependence unit looks at it, and in the latter it stalls
76 * the LSQ until the store writes back. At that point the load is
77 * replayed.
78 */
79template <class Impl>
80class LSQUnit
81{
82  public:
83    typedef typename Impl::O3CPU O3CPU;
84    typedef typename Impl::DynInstPtr DynInstPtr;
85    typedef typename Impl::CPUPol::IEW IEW;
86    typedef typename Impl::CPUPol::LSQ LSQ;
87    typedef typename Impl::CPUPol::IssueStruct IssueStruct;
88
89    using LSQSenderState = typename LSQ::LSQSenderState;
90    using LSQRequest = typename Impl::CPUPol::LSQ::LSQRequest;
91  private:
92    class LSQEntry
93    {
94      private:
95        /** The instruction. */
96        DynInstPtr inst;
97        /** The request. */
98        LSQRequest* req;
99        /** The size of the operation. */
100        uint8_t _size;
101        /** Valid entry. */
102        bool _valid;
103      public:
104        /** Constructs an empty store queue entry. */
105        LSQEntry()
106            : inst(nullptr), req(nullptr), _size(0), _valid(false)
107        {
108        }
109
110        ~LSQEntry()
111        {
112            inst = nullptr;
113            if (req != nullptr) {
114                req->freeLSQEntry();
115                req = nullptr;
116            }
117        }
118
119        void
120        clear()
121        {
122            inst = nullptr;
123            if (req != nullptr) {
124                req->freeLSQEntry();
125            }
126            req = nullptr;
127            _valid = false;
128            _size = 0;
129        }
130
131        void
132        set(const DynInstPtr& inst)
133        {
134            assert(!_valid);
135            this->inst = inst;
136            _valid = true;
137            _size = 0;
138        }
139        LSQRequest* request() { return req; }
140        void setRequest(LSQRequest* r) { req = r; }
141        bool hasRequest() { return req != nullptr; }
142        /** Member accessors. */
143        /** @{ */
144        bool valid() const { return _valid; }
145        uint8_t& size() { return _size; }
146        const uint8_t& size() const { return _size; }
147        const DynInstPtr& instruction() const { return inst; }
148        /** @} */
149    };
150
151    class SQEntry : public LSQEntry
152    {
153      private:
154        /** The store data. */
155        char _data[64];  // TODO: 64 should become a parameter
156        /** Whether or not the store can writeback. */
157        bool _canWB;
158        /** Whether or not the store is committed. */
159        bool _committed;
160        /** Whether or not the store is completed. */
161        bool _completed;
162        /** Does this request write all zeros and thus doesn't
163         * have any data attached to it. Used for cache block zero
164         * style instructs (ARM DC ZVA; ALPHA WH64)
165         */
166        bool _isAllZeros;
167      public:
168        static constexpr size_t DataSize = sizeof(_data);
169        /** Constructs an empty store queue entry. */
170        SQEntry()
171            : _canWB(false), _committed(false), _completed(false),
172              _isAllZeros(false)
173        {
174            std::memset(_data, 0, DataSize);
175        }
176
177        ~SQEntry()
178        {
179        }
180
181        void
182        set(const DynInstPtr& inst)
183        {
184            LSQEntry::set(inst);
185        }
186
187        void
188        clear()
189        {
190            LSQEntry::clear();
191            _canWB = _completed = _committed = _isAllZeros = false;
192        }
193        /** Member accessors. */
194        /** @{ */
195        bool& canWB() { return _canWB; }
196        const bool& canWB() const { return _canWB; }
197        bool& completed() { return _completed; }
198        const bool& completed() const { return _completed; }
199        bool& committed() { return _committed; }
200        const bool& committed() const { return _committed; }
201        bool& isAllZeros() { return _isAllZeros; }
202        const bool& isAllZeros() const { return _isAllZeros; }
203        char* data() { return _data; }
204        const char* data() const { return _data; }
205        /** @} */
206    };
207    using LQEntry = LSQEntry;
208
209  public:
210    using LoadQueue = CircularQueue<LQEntry>;
211    using StoreQueue = CircularQueue<SQEntry>;
212
213  public:
214    /** Constructs an LSQ unit. init() must be called prior to use. */
215    LSQUnit(uint32_t lqEntries, uint32_t sqEntries);
216
217    /** We cannot copy LSQUnit because it has stats for which copy
218     * contructor is deleted explicitly. However, STL vector requires
219     * a valid copy constructor for the base type at compile time.
220     */
221    LSQUnit(const LSQUnit &l) { panic("LSQUnit is not copy-able"); }
222
223    /** Initializes the LSQ unit with the specified number of entries. */
224    void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
225            LSQ *lsq_ptr, unsigned id);
226
227    /** Returns the name of the LSQ unit. */
228    std::string name() const;
229
230    /** Registers statistics. */
231    void regStats();
232
233    /** Sets the pointer to the dcache port. */
234    void setDcachePort(MasterPort *dcache_port);
235
236    /** Perform sanity checks after a drain. */
237    void drainSanityCheck() const;
238
239    /** Takes over from another CPU's thread. */
240    void takeOverFrom();
241
242    /** Inserts an instruction. */
243    void insert(const DynInstPtr &inst);
244    /** Inserts a load instruction. */
245    void insertLoad(const DynInstPtr &load_inst);
246    /** Inserts a store instruction. */
247    void insertStore(const DynInstPtr &store_inst);
248
249    /** Check for ordering violations in the LSQ. For a store squash if we
250     * ever find a conflicting load. For a load, only squash if we
251     * an external snoop invalidate has been seen for that load address
252     * @param load_idx index to start checking at
253     * @param inst the instruction to check
254     */
255    Fault checkViolations(typename LoadQueue::iterator& loadIt,
256            const DynInstPtr& inst);
257
258    /** Check if an incoming invalidate hits in the lsq on a load
259     * that might have issued out of order wrt another load beacuse
260     * of the intermediate invalidate.
261     */
262    void checkSnoop(PacketPtr pkt);
263
264    /** Executes a load instruction. */
265    Fault executeLoad(const DynInstPtr &inst);
266
267    Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
268    /** Executes a store instruction. */
269    Fault executeStore(const DynInstPtr &inst);
270
271    /** Commits the head load. */
272    void commitLoad();
273    /** Commits loads older than a specific sequence number. */
274    void commitLoads(InstSeqNum &youngest_inst);
275
276    /** Commits stores older than a specific sequence number. */
277    void commitStores(InstSeqNum &youngest_inst);
278
279    /** Writes back stores. */
280    void writebackStores();
281
282    /** Completes the data access that has been returned from the
283     * memory system. */
284    void completeDataAccess(PacketPtr pkt);
285
286    /** Squashes all instructions younger than a specific sequence number. */
287    void squash(const InstSeqNum &squashed_num);
288
289    /** Returns if there is a memory ordering violation. Value is reset upon
290     * call to getMemDepViolator().
291     */
292    bool violation() { return memDepViolator; }
293
294    /** Returns the memory ordering violator. */
295    DynInstPtr getMemDepViolator();
296
297    /** Returns the number of free LQ entries. */
298    unsigned numFreeLoadEntries();
299
300    /** Returns the number of free SQ entries. */
301    unsigned numFreeStoreEntries();
302
303    /** Returns the number of loads in the LQ. */
304    int numLoads() { return loads; }
305
306    /** Returns the number of stores in the SQ. */
307    int numStores() { return stores; }
308
309    /** Returns if either the LQ or SQ is full. */
310    bool isFull() { return lqFull() || sqFull(); }
311
312    /** Returns if both the LQ and SQ are empty. */
313    bool isEmpty() const { return lqEmpty() && sqEmpty(); }
314
315    /** Returns if the LQ is full. */
316    bool lqFull() { return loadQueue.full(); }
317
318    /** Returns if the SQ is full. */
319    bool sqFull() { return storeQueue.full(); }
320
321    /** Returns if the LQ is empty. */
322    bool lqEmpty() const { return loads == 0; }
323
324    /** Returns if the SQ is empty. */
325    bool sqEmpty() const { return stores == 0; }
326
327    /** Returns the number of instructions in the LSQ. */
328    unsigned getCount() { return loads + stores; }
329
330    /** Returns if there are any stores to writeback. */
331    bool hasStoresToWB() { return storesToWB; }
332
333    /** Returns the number of stores to writeback. */
334    int numStoresToWB() { return storesToWB; }
335
336    /** Returns if the LSQ unit will writeback on this cycle. */
337    bool
338    willWB()
339    {
340        return storeWBIt.dereferenceable() &&
341                        storeWBIt->valid() &&
342                        storeWBIt->canWB() &&
343                        !storeWBIt->completed() &&
344                        !isStoreBlocked;
345    }
346
347    /** Handles doing the retry. */
348    void recvRetry();
349
350    unsigned int cacheLineSize();
351  private:
352    /** Reset the LSQ state */
353    void resetState();
354
355    /** Writes back the instruction, sending it to IEW. */
356    void writeback(const DynInstPtr &inst, PacketPtr pkt);
357
358    /** Try to finish a previously blocked write back attempt */
359    void writebackBlockedStore();
360
361    /** Completes the store at the specified index. */
362    void completeStore(typename StoreQueue::iterator store_idx);
363
364    /** Handles completing the send of a store to memory. */
365    void storePostSend();
366
367  public:
368    /** Attempts to send a packet to the cache.
369     * Check if there are ports available. Return true if
370     * there are, false if there are not.
371     */
372    bool trySendPacket(bool isLoad, PacketPtr data_pkt);
373
374
375    /** Debugging function to dump instructions in the LSQ. */
376    void dumpInsts() const;
377
378    /** Schedule event for the cpu. */
379    void schedule(Event& ev, Tick when) { cpu->schedule(ev, when); }
380
381    BaseTLB* dTLB() { return cpu->dtb; }
382
383  private:
384    /** Pointer to the CPU. */
385    O3CPU *cpu;
386
387    /** Pointer to the IEW stage. */
388    IEW *iewStage;
389
390    /** Pointer to the LSQ. */
391    LSQ *lsq;
392
393    /** Pointer to the dcache port.  Used only for sending. */
394    MasterPort *dcachePort;
395
396    /** Particularisation of the LSQSenderState to the LQ. */
397    class LQSenderState : public LSQSenderState
398    {
399        using LSQSenderState::alive;
400      public:
401        LQSenderState(typename LoadQueue::iterator idx_)
402            : LSQSenderState(idx_->request(), true), idx(idx_) { }
403
404        /** The LQ index of the instruction. */
405        typename LoadQueue::iterator idx;
406        //virtual LSQRequest* request() { return idx->request(); }
407        virtual void
408        complete()
409        {
410            //if (alive())
411            //  idx->request()->senderState(nullptr);
412        }
413    };
414
415    /** Particularisation of the LSQSenderState to the SQ. */
416    class SQSenderState : public LSQSenderState
417    {
418        using LSQSenderState::alive;
419      public:
420        SQSenderState(typename StoreQueue::iterator idx_)
421            : LSQSenderState(idx_->request(), false), idx(idx_) { }
422        /** The SQ index of the instruction. */
423        typename StoreQueue::iterator idx;
424        //virtual LSQRequest* request() { return idx->request(); }
425        virtual void
426        complete()
427        {
428            //if (alive())
429            //   idx->request()->senderState(nullptr);
430        }
431    };
432
433    /** Writeback event, specifically for when stores forward data to loads. */
434    class WritebackEvent : public Event
435    {
436      public:
437        /** Constructs a writeback event. */
438        WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt,
439                LSQUnit *lsq_ptr);
440
441        /** Processes the writeback event. */
442        void process();
443
444        /** Returns the description of this event. */
445        const char *description() const;
446
447      private:
448        /** Instruction whose results are being written back. */
449        DynInstPtr inst;
450
451        /** The packet that would have been sent to memory. */
452        PacketPtr pkt;
453
454        /** The pointer to the LSQ unit that issued the store. */
455        LSQUnit<Impl> *lsqPtr;
456    };
457
458  public:
459    /**
460     * Handles writing back and completing the load or store that has
461     * returned from memory.
462     *
463     * @param pkt Response packet from the memory sub-system
464     */
465    bool recvTimingResp(PacketPtr pkt);
466
467  private:
468    /** The LSQUnit thread id. */
469    ThreadID lsqID;
470  public:
471    /** The store queue. */
472    CircularQueue<SQEntry> storeQueue;
473
474    /** The load queue. */
475    LoadQueue loadQueue;
476
477  private:
478    /** The number of places to shift addresses in the LSQ before checking
479     * for dependency violations
480     */
481    unsigned depCheckShift;
482
483    /** Should loads be checked for dependency issues */
484    bool checkLoads;
485
486    /** The number of load instructions in the LQ. */
487    int loads;
488    /** The number of store instructions in the SQ. */
489    int stores;
490    /** The number of store instructions in the SQ waiting to writeback. */
491    int storesToWB;
492
493    /** The index of the first instruction that may be ready to be
494     * written back, and has not yet been written back.
495     */
496    typename StoreQueue::iterator storeWBIt;
497
498    /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
499    Addr cacheBlockMask;
500
501    /** Wire to read information from the issue stage time queue. */
502    typename TimeBuffer<IssueStruct>::wire fromIssue;
503
504    /** Whether or not the LSQ is stalled. */
505    bool stalled;
506    /** The store that causes the stall due to partial store to load
507     * forwarding.
508     */
509    InstSeqNum stallingStoreIsn;
510    /** The index of the above store. */
511    int stallingLoadIdx;
512
513    /** The packet that needs to be retried. */
514    PacketPtr retryPkt;
515
516    /** Whehter or not a store is blocked due to the memory system. */
517    bool isStoreBlocked;
518
519    /** Whether or not a store is in flight. */
520    bool storeInFlight;
521
522    /** The oldest load that caused a memory ordering violation. */
523    DynInstPtr memDepViolator;
524
525    /** Whether or not there is a packet that couldn't be sent because of
526     * a lack of cache ports. */
527    bool hasPendingRequest;
528
529    /** The packet that is pending free cache ports. */
530    LSQRequest* pendingRequest;
531
532    /** Flag for memory model. */
533    bool needsTSO;
534
535    // Will also need how many read/write ports the Dcache has.  Or keep track
536    // of that in stage that is one level up, and only call executeLoad/Store
537    // the appropriate number of times.
538    /** Total number of loads forwaded from LSQ stores. */
539    Stats::Scalar lsqForwLoads;
540
541    /** Total number of loads ignored due to invalid addresses. */
542    Stats::Scalar invAddrLoads;
543
544    /** Total number of squashed loads. */
545    Stats::Scalar lsqSquashedLoads;
546
547    /** Total number of responses from the memory system that are
548     * ignored due to the instruction already being squashed. */
549    Stats::Scalar lsqIgnoredResponses;
550
551    /** Tota number of memory ordering violations. */
552    Stats::Scalar lsqMemOrderViolation;
553
554    /** Total number of squashed stores. */
555    Stats::Scalar lsqSquashedStores;
556
557    /** Total number of software prefetches ignored due to invalid addresses. */
558    Stats::Scalar invAddrSwpfs;
559
560    /** Ready loads blocked due to partial store-forwarding. */
561    Stats::Scalar lsqBlockedLoads;
562
563    /** Number of loads that were rescheduled. */
564    Stats::Scalar lsqRescheduledLoads;
565
566    /** Number of times the LSQ is blocked due to the cache. */
567    Stats::Scalar lsqCacheBlocked;
568
569  public:
570    /** Executes the load at the given index. */
571    Fault read(LSQRequest *req, int load_idx);
572
573    /** Executes the store at the given index. */
574    Fault write(LSQRequest *req, uint8_t *data, int store_idx);
575
576    /** Returns the index of the head load instruction. */
577    int getLoadHead() { return loadQueue.head(); }
578
579    /** Returns the sequence number of the head load instruction. */
580    InstSeqNum
581    getLoadHeadSeqNum()
582    {
583        return loadQueue.front().valid()
584            ? loadQueue.front().instruction()->seqNum
585            : 0;
586    }
587
588    /** Returns the index of the head store instruction. */
589    int getStoreHead() { return storeQueue.head(); }
590    /** Returns the sequence number of the head store instruction. */
591    InstSeqNum
592    getStoreHeadSeqNum()
593    {
594        return storeQueue.front().valid()
595            ? storeQueue.front().instruction()->seqNum
596            : 0;
597    }
598
599    /** Returns whether or not the LSQ unit is stalled. */
600    bool isStalled()  { return stalled; }
601  public:
602    typedef typename CircularQueue<LQEntry>::iterator LQIterator;
603    typedef typename CircularQueue<SQEntry>::iterator SQIterator;
604    typedef CircularQueue<LQEntry> LQueue;
605    typedef CircularQueue<SQEntry> SQueue;
606};
607
608template <class Impl>
609Fault
610LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
611{
612    LQEntry& load_req = loadQueue[load_idx];
613    const DynInstPtr& load_inst = load_req.instruction();
614
615    load_req.setRequest(req);
616    assert(load_inst);
617
618    assert(!load_inst->isExecuted());
619
620    // Make sure this isn't a strictly ordered load
621    // A bit of a hackish way to get strictly ordered accesses to work
622    // only if they're at the head of the LSQ and are ready to commit
623    // (at the head of the ROB too).
624
625    if (req->mainRequest()->isStrictlyOrdered() &&
626        (load_idx != loadQueue.head() || !load_inst->isAtCommit())) {
627        // Tell IQ/mem dep unit that this instruction will need to be
628        // rescheduled eventually
629        iewStage->rescheduleMemInst(load_inst);
630        load_inst->clearIssued();
631        load_inst->effAddrValid(false);
632        ++lsqRescheduledLoads;
633        DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n",
634                load_inst->seqNum, load_inst->pcState());
635
636        // Must delete request now that it wasn't handed off to
637        // memory.  This is quite ugly.  @todo: Figure out the proper
638        // place to really handle request deletes.
639        load_req.setRequest(nullptr);
640        req->discard();
641        return std::make_shared<GenericISA::M5PanicFault>(
642            "Strictly ordered load [sn:%llx] PC %s\n",
643            load_inst->seqNum, load_inst->pcState());
644    }
645
646    DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
647            "storeHead: %i addr: %#x%s\n",
648            load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,
649            req->mainRequest()->getPaddr(), req->isSplit() ? " split" : "");
650
651    if (req->mainRequest()->isLLSC()) {
652        // Disable recording the result temporarily.  Writing to misc
653        // regs normally updates the result, but this is not the
654        // desired behavior when handling store conditionals.
655        load_inst->recordResult(false);
656        TheISA::handleLockedRead(load_inst.get(), req->mainRequest());
657        load_inst->recordResult(true);
658    }
659
660    if (req->mainRequest()->isMmappedIpr()) {
661        assert(!load_inst->memData);
662        load_inst->memData = new uint8_t[64];
663
664        ThreadContext *thread = cpu->tcBase(lsqID);
665        PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::ReadReq);
666
667        Cycles delay = req->handleIprRead(thread, main_pkt);
668
669        WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this);
670        cpu->schedule(wb, cpu->clockEdge(delay));
671        return NoFault;
672    }
673
674    // Check the SQ for any previous stores that might lead to forwarding
675    auto store_it = load_inst->sqIt;
676    assert (store_it >= storeWBIt);
677    // End once we've reached the top of the LSQ
678    while (store_it != storeWBIt) {
679        // Move the index to one younger
680        store_it--;
681        assert(store_it->valid());
682        assert(store_it->instruction()->seqNum < load_inst->seqNum);
683        int store_size = store_it->size();
684
685        // Cache maintenance instructions go down via the store
686        // path but they carry no data and they shouldn't be
687        // considered for forwarding
688        if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
689            !(store_it->request()->mainRequest() &&
690              store_it->request()->mainRequest()->isCacheMaintenance())) {
691            assert(store_it->instruction()->effAddrValid());
692
693            // Check if the store data is within the lower and upper bounds of
694            // addresses that the request needs.
695            auto req_s = req->mainRequest()->getVaddr();
696            auto req_e = req_s + req->mainRequest()->getSize();
697            auto st_s = store_it->instruction()->effAddr;
698            auto st_e = st_s + store_size;
699
700            bool store_has_lower_limit = req_s >= st_s;
701            bool store_has_upper_limit = req_e <= st_e;
702            bool lower_load_has_store_part = req_s < st_e;
703            bool upper_load_has_store_part = req_e > st_s;
704
705            // If the store's data has all of the data needed and the load
706            // isn't LLSC then
707            // we can forward.
708            if (store_has_lower_limit && store_has_upper_limit &&
709                !req->mainRequest()->isLLSC()) {
710
711                // Get shift amount for offset into the store's data.
712                int shift_amt = req->mainRequest()->getVaddr() -
713                    store_it->instruction()->effAddr;
714
715                // Allocate memory if this is the first time a load is issued.
716                if (!load_inst->memData) {
717                    load_inst->memData =
718                        new uint8_t[req->mainRequest()->getSize()];
719                }
720                if (store_it->isAllZeros())
721                    memset(load_inst->memData, 0,
722                            req->mainRequest()->getSize());
723                else
724                    memcpy(load_inst->memData,
725                        store_it->data() + shift_amt,
726                        req->mainRequest()->getSize());
727
728                DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
729                        "addr %#x\n", store_it._idx,
730                        req->mainRequest()->getVaddr());
731
732                PacketPtr data_pkt = new Packet(req->mainRequest(),
733                        MemCmd::ReadReq);
734                data_pkt->dataStatic(load_inst->memData);
735
736                if (req->isAnyOutstandingRequest()) {
737                    assert(req->_numOutstandingPackets > 0);
738                    // There are memory requests packets in flight already.
739                    // This may happen if the store was not complete the
740                    // first time this load got executed. Signal the senderSate
741                    // that response packets should be discarded.
742                    req->discardSenderState();
743                }
744
745                WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt,
746                        this);
747
748                // We'll say this has a 1 cycle load-store forwarding latency
749                // for now.
750                // @todo: Need to make this a parameter.
751                cpu->schedule(wb, curTick());
752
753                // Don't need to do anything special for split loads.
754                ++lsqForwLoads;
755
756                return NoFault;
757            } else if (
758                (!req->mainRequest()->isLLSC() &&
759                 ((store_has_lower_limit && lower_load_has_store_part) ||
760                  (store_has_upper_limit && upper_load_has_store_part) ||
761                  (lower_load_has_store_part && upper_load_has_store_part))) ||
762                (req->mainRequest()->isLLSC() &&
763                 ((store_has_lower_limit || upper_load_has_store_part) &&
764                  (store_has_upper_limit || lower_load_has_store_part)))) {
765                // This is the partial store-load forwarding case where a store
766                // has only part of the load's data and the load isn't LLSC or
767                // the load is LLSC and the store has all or part of the load's
768                // data
769
770                // If it's already been written back, then don't worry about
771                // stalling on it.
772                if (store_it->completed()) {
773                    panic("Should not check one of these");
774                    continue;
775                }
776
777                // Must stall load and force it to retry, so long as it's the
778                // oldest load that needs to do so.
779                if (!stalled ||
780                    (stalled &&
781                     load_inst->seqNum <
782                     loadQueue[stallingLoadIdx].instruction()->seqNum)) {
783                    stalled = true;
784                    stallingStoreIsn = store_it->instruction()->seqNum;
785                    stallingLoadIdx = load_idx;
786                }
787
788                // Tell IQ/mem dep unit that this instruction will need to be
789                // rescheduled eventually
790                iewStage->rescheduleMemInst(load_inst);
791                load_inst->clearIssued();
792                load_inst->effAddrValid(false);
793                ++lsqRescheduledLoads;
794
795                // Do not generate a writeback event as this instruction is not
796                // complete.
797                DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
798                        "Store idx %i to load addr %#x\n",
799                        store_it._idx, req->mainRequest()->getVaddr());
800
801                // Must discard the request.
802                req->discard();
803                load_req.setRequest(nullptr);
804                return NoFault;
805            }
806        }
807    }
808
809    // If there's no forwarding case, then go access memory
810    DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
811            load_inst->seqNum, load_inst->pcState());
812
813    // Allocate memory if this is the first time a load is issued.
814    if (!load_inst->memData) {
815        load_inst->memData = new uint8_t[req->mainRequest()->getSize()];
816    }
817
818    // For now, load throughput is constrained by the number of
819    // load FUs only, and loads do not consume a cache port (only
820    // stores do).
821    // @todo We should account for cache port contention
822    // and arbitrate between loads and stores.
823
824    // if we the cache is not blocked, do cache access
825    if (req->senderState() == nullptr) {
826        LQSenderState *state = new LQSenderState(
827                loadQueue.getIterator(load_idx));
828        state->isLoad = true;
829        state->inst = load_inst;
830        state->isSplit = req->isSplit();
831        req->senderState(state);
832    }
833    req->buildPackets();
834    req->sendPacketToCache();
835    if (!req->isSent())
836        iewStage->blockMemInst(load_inst);
837
838    return NoFault;
839}
840
841template <class Impl>
842Fault
843LSQUnit<Impl>::write(LSQRequest *req, uint8_t *data, int store_idx)
844{
845    assert(storeQueue[store_idx].valid());
846
847    DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i "
848            "[sn:%i]\n",
849            store_idx - 1, req->request()->getPaddr(), storeQueue.head() - 1,
850            storeQueue[store_idx].instruction()->seqNum);
851
852    storeQueue[store_idx].setRequest(req);
853    unsigned size = req->_size;
854    storeQueue[store_idx].size() = size;
855    bool store_no_data =
856        req->mainRequest()->getFlags() & Request::STORE_NO_DATA;
857    storeQueue[store_idx].isAllZeros() = store_no_data;
858    assert(size <= SQEntry::DataSize || store_no_data);
859
860    if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) &&
861        !req->request()->isCacheMaintenance())
862        memcpy(storeQueue[store_idx].data(), data, size);
863
864    // This function only writes the data to the store queue, so no fault
865    // can happen here.
866    return NoFault;
867}
868
869#endif // __CPU_O3_LSQ_UNIT_HH__
870