lsq_unit.hh revision 8230
1/*
2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 *          Korey Sewell
30 */
31
32#ifndef __CPU_O3_LSQ_UNIT_HH__
33#define __CPU_O3_LSQ_UNIT_HH__
34
35#include <algorithm>
36#include <cstring>
37#include <map>
38#include <queue>
39
40#include "arch/faults.hh"
41#include "arch/locked_mem.hh"
42#include "base/fast_alloc.hh"
43#include "base/hashmap.hh"
44#include "config/full_system.hh"
45#include "config/the_isa.hh"
46#include "cpu/inst_seq.hh"
47#include "cpu/timebuf.hh"
48#include "mem/packet.hh"
49#include "mem/port.hh"
50
51class DerivO3CPUParams;
52
53/**
54 * Class that implements the actual LQ and SQ for each specific
55 * thread.  Both are circular queues; load entries are freed upon
56 * committing, while store entries are freed once they writeback. The
57 * LSQUnit tracks if there are memory ordering violations, and also
58 * detects partial load to store forwarding cases (a store only has
59 * part of a load's data) that requires the load to wait until the
60 * store writes back. In the former case it holds onto the instruction
61 * until the dependence unit looks at it, and in the latter it stalls
62 * the LSQ until the store writes back. At that point the load is
63 * replayed.
64 */
65template <class Impl>
66class LSQUnit {
67  public:
68    typedef typename Impl::O3CPU O3CPU;
69    typedef typename Impl::DynInstPtr DynInstPtr;
70    typedef typename Impl::CPUPol::IEW IEW;
71    typedef typename Impl::CPUPol::LSQ LSQ;
72    typedef typename Impl::CPUPol::IssueStruct IssueStruct;
73
74  public:
75    /** Constructs an LSQ unit. init() must be called prior to use. */
76    LSQUnit();
77
78    /** Initializes the LSQ unit with the specified number of entries. */
79    void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
80            LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries,
81            unsigned id);
82
83    /** Returns the name of the LSQ unit. */
84    std::string name() const;
85
86    /** Registers statistics. */
87    void regStats();
88
89    /** Sets the pointer to the dcache port. */
90    void setDcachePort(Port *dcache_port);
91
92    /** Switches out LSQ unit. */
93    void switchOut();
94
95    /** Takes over from another CPU's thread. */
96    void takeOverFrom();
97
98    /** Returns if the LSQ is switched out. */
99    bool isSwitchedOut() { return switchedOut; }
100
101    /** Ticks the LSQ unit, which in this case only resets the number of
102     * used cache ports.
103     * @todo: Move the number of used ports up to the LSQ level so it can
104     * be shared by all LSQ units.
105     */
106    void tick() { usedPorts = 0; }
107
108    /** Inserts an instruction. */
109    void insert(DynInstPtr &inst);
110    /** Inserts a load instruction. */
111    void insertLoad(DynInstPtr &load_inst);
112    /** Inserts a store instruction. */
113    void insertStore(DynInstPtr &store_inst);
114
115    /** Check for ordering violations in the LSQ
116     * @param load_idx index to start checking at
117     * @param inst the instruction to check
118     */
119    Fault checkViolations(int load_idx, DynInstPtr &inst);
120
121    /** Executes a load instruction. */
122    Fault executeLoad(DynInstPtr &inst);
123
124    Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
125    /** Executes a store instruction. */
126    Fault executeStore(DynInstPtr &inst);
127
128    /** Commits the head load. */
129    void commitLoad();
130    /** Commits loads older than a specific sequence number. */
131    void commitLoads(InstSeqNum &youngest_inst);
132
133    /** Commits stores older than a specific sequence number. */
134    void commitStores(InstSeqNum &youngest_inst);
135
136    /** Writes back stores. */
137    void writebackStores();
138
139    /** Completes the data access that has been returned from the
140     * memory system. */
141    void completeDataAccess(PacketPtr pkt);
142
143    /** Clears all the entries in the LQ. */
144    void clearLQ();
145
146    /** Clears all the entries in the SQ. */
147    void clearSQ();
148
149    /** Resizes the LQ to a given size. */
150    void resizeLQ(unsigned size);
151
152    /** Resizes the SQ to a given size. */
153    void resizeSQ(unsigned size);
154
155    /** Squashes all instructions younger than a specific sequence number. */
156    void squash(const InstSeqNum &squashed_num);
157
158    /** Returns if there is a memory ordering violation. Value is reset upon
159     * call to getMemDepViolator().
160     */
161    bool violation() { return memDepViolator; }
162
163    /** Returns the memory ordering violator. */
164    DynInstPtr getMemDepViolator();
165
166    /** Returns if a load became blocked due to the memory system. */
167    bool loadBlocked()
168    { return isLoadBlocked; }
169
170    /** Clears the signal that a load became blocked. */
171    void clearLoadBlocked()
172    { isLoadBlocked = false; }
173
174    /** Returns if the blocked load was handled. */
175    bool isLoadBlockedHandled()
176    { return loadBlockedHandled; }
177
178    /** Records the blocked load as being handled. */
179    void setLoadBlockedHandled()
180    { loadBlockedHandled = true; }
181
182    /** Returns the number of free entries (min of free LQ and SQ entries). */
183    unsigned numFreeEntries();
184
185    /** Returns the number of loads ready to execute. */
186    int numLoadsReady();
187
188    /** Returns the number of loads in the LQ. */
189    int numLoads() { return loads; }
190
191    /** Returns the number of stores in the SQ. */
192    int numStores() { return stores; }
193
194    /** Returns if either the LQ or SQ is full. */
195    bool isFull() { return lqFull() || sqFull(); }
196
197    /** Returns if the LQ is full. */
198    bool lqFull() { return loads >= (LQEntries - 1); }
199
200    /** Returns if the SQ is full. */
201    bool sqFull() { return stores >= (SQEntries - 1); }
202
203    /** Returns the number of instructions in the LSQ. */
204    unsigned getCount() { return loads + stores; }
205
206    /** Returns if there are any stores to writeback. */
207    bool hasStoresToWB() { return storesToWB; }
208
209    /** Returns the number of stores to writeback. */
210    int numStoresToWB() { return storesToWB; }
211
212    /** Returns if the LSQ unit will writeback on this cycle. */
213    bool willWB() { return storeQueue[storeWBIdx].canWB &&
214                        !storeQueue[storeWBIdx].completed &&
215                        !isStoreBlocked; }
216
217    /** Handles doing the retry. */
218    void recvRetry();
219
220  private:
221    /** Writes back the instruction, sending it to IEW. */
222    void writeback(DynInstPtr &inst, PacketPtr pkt);
223
224    /** Writes back a store that couldn't be completed the previous cycle. */
225    void writebackPendingStore();
226
227    /** Handles completing the send of a store to memory. */
228    void storePostSend(PacketPtr pkt);
229
230    /** Completes the store at the specified index. */
231    void completeStore(int store_idx);
232
233    /** Attempts to send a store to the cache. */
234    bool sendStore(PacketPtr data_pkt);
235
236    /** Increments the given store index (circular queue). */
237    inline void incrStIdx(int &store_idx);
238    /** Decrements the given store index (circular queue). */
239    inline void decrStIdx(int &store_idx);
240    /** Increments the given load index (circular queue). */
241    inline void incrLdIdx(int &load_idx);
242    /** Decrements the given load index (circular queue). */
243    inline void decrLdIdx(int &load_idx);
244
245  public:
246    /** Debugging function to dump instructions in the LSQ. */
247    void dumpInsts();
248
249  private:
250    /** Pointer to the CPU. */
251    O3CPU *cpu;
252
253    /** Pointer to the IEW stage. */
254    IEW *iewStage;
255
256    /** Pointer to the LSQ. */
257    LSQ *lsq;
258
259    /** Pointer to the dcache port.  Used only for sending. */
260    Port *dcachePort;
261
262    /** Derived class to hold any sender state the LSQ needs. */
263    class LSQSenderState : public Packet::SenderState, public FastAlloc
264    {
265      public:
266        /** Default constructor. */
267        LSQSenderState()
268            : noWB(false), isSplit(false), pktToSend(false), outstanding(1),
269              mainPkt(NULL), pendingPacket(NULL)
270        { }
271
272        /** Instruction who initiated the access to memory. */
273        DynInstPtr inst;
274        /** Whether or not it is a load. */
275        bool isLoad;
276        /** The LQ/SQ index of the instruction. */
277        int idx;
278        /** Whether or not the instruction will need to writeback. */
279        bool noWB;
280        /** Whether or not this access is split in two. */
281        bool isSplit;
282        /** Whether or not there is a packet that needs sending. */
283        bool pktToSend;
284        /** Number of outstanding packets to complete. */
285        int outstanding;
286        /** The main packet from a split load, used during writeback. */
287        PacketPtr mainPkt;
288        /** A second packet from a split store that needs sending. */
289        PacketPtr pendingPacket;
290
291        /** Completes a packet and returns whether the access is finished. */
292        inline bool complete() { return --outstanding == 0; }
293    };
294
295    /** Writeback event, specifically for when stores forward data to loads. */
296    class WritebackEvent : public Event {
297      public:
298        /** Constructs a writeback event. */
299        WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr);
300
301        /** Processes the writeback event. */
302        void process();
303
304        /** Returns the description of this event. */
305        const char *description() const;
306
307      private:
308        /** Instruction whose results are being written back. */
309        DynInstPtr inst;
310
311        /** The packet that would have been sent to memory. */
312        PacketPtr pkt;
313
314        /** The pointer to the LSQ unit that issued the store. */
315        LSQUnit<Impl> *lsqPtr;
316    };
317
318  public:
319    struct SQEntry {
320        /** Constructs an empty store queue entry. */
321        SQEntry()
322            : inst(NULL), req(NULL), size(0),
323              canWB(0), committed(0), completed(0)
324        {
325            std::memset(data, 0, sizeof(data));
326        }
327
328        /** Constructs a store queue entry for a given instruction. */
329        SQEntry(DynInstPtr &_inst)
330            : inst(_inst), req(NULL), sreqLow(NULL), sreqHigh(NULL), size(0),
331              isSplit(0), canWB(0), committed(0), completed(0)
332        {
333            std::memset(data, 0, sizeof(data));
334        }
335
336        /** The store instruction. */
337        DynInstPtr inst;
338        /** The request for the store. */
339        RequestPtr req;
340        /** The split requests for the store. */
341        RequestPtr sreqLow;
342        RequestPtr sreqHigh;
343        /** The size of the store. */
344        int size;
345        /** The store data. */
346        char data[16];
347        /** Whether or not the store is split into two requests. */
348        bool isSplit;
349        /** Whether or not the store can writeback. */
350        bool canWB;
351        /** Whether or not the store is committed. */
352        bool committed;
353        /** Whether or not the store is completed. */
354        bool completed;
355    };
356
357  private:
358    /** The LSQUnit thread id. */
359    ThreadID lsqID;
360
361    /** The store queue. */
362    std::vector<SQEntry> storeQueue;
363
364    /** The load queue. */
365    std::vector<DynInstPtr> loadQueue;
366
367    /** The number of LQ entries, plus a sentinel entry (circular queue).
368     *  @todo: Consider having var that records the true number of LQ entries.
369     */
370    unsigned LQEntries;
371    /** The number of SQ entries, plus a sentinel entry (circular queue).
372     *  @todo: Consider having var that records the true number of SQ entries.
373     */
374    unsigned SQEntries;
375
376    /** The number of places to shift addresses in the LSQ before checking
377     * for dependency violations
378     */
379    unsigned depCheckShift;
380
381    /** Should loads be checked for dependency issues */
382    bool checkLoads;
383
384    /** The number of load instructions in the LQ. */
385    int loads;
386    /** The number of store instructions in the SQ. */
387    int stores;
388    /** The number of store instructions in the SQ waiting to writeback. */
389    int storesToWB;
390
391    /** The index of the head instruction in the LQ. */
392    int loadHead;
393    /** The index of the tail instruction in the LQ. */
394    int loadTail;
395
396    /** The index of the head instruction in the SQ. */
397    int storeHead;
398    /** The index of the first instruction that may be ready to be
399     * written back, and has not yet been written back.
400     */
401    int storeWBIdx;
402    /** The index of the tail instruction in the SQ. */
403    int storeTail;
404
405    /// @todo Consider moving to a more advanced model with write vs read ports
406    /** The number of cache ports available each cycle. */
407    int cachePorts;
408
409    /** The number of used cache ports in this cycle. */
410    int usedPorts;
411
412    /** Is the LSQ switched out. */
413    bool switchedOut;
414
415    //list<InstSeqNum> mshrSeqNums;
416
417    /** Wire to read information from the issue stage time queue. */
418    typename TimeBuffer<IssueStruct>::wire fromIssue;
419
420    /** Whether or not the LSQ is stalled. */
421    bool stalled;
422    /** The store that causes the stall due to partial store to load
423     * forwarding.
424     */
425    InstSeqNum stallingStoreIsn;
426    /** The index of the above store. */
427    int stallingLoadIdx;
428
429    /** The packet that needs to be retried. */
430    PacketPtr retryPkt;
431
432    /** Whehter or not a store is blocked due to the memory system. */
433    bool isStoreBlocked;
434
435    /** Whether or not a load is blocked due to the memory system. */
436    bool isLoadBlocked;
437
438    /** Has the blocked load been handled. */
439    bool loadBlockedHandled;
440
441    /** The sequence number of the blocked load. */
442    InstSeqNum blockedLoadSeqNum;
443
444    /** The oldest load that caused a memory ordering violation. */
445    DynInstPtr memDepViolator;
446
447    /** Whether or not there is a packet that couldn't be sent because of
448     * a lack of cache ports. */
449    bool hasPendingPkt;
450
451    /** The packet that is pending free cache ports. */
452    PacketPtr pendingPkt;
453
454    // Will also need how many read/write ports the Dcache has.  Or keep track
455    // of that in stage that is one level up, and only call executeLoad/Store
456    // the appropriate number of times.
457    /** Total number of loads forwaded from LSQ stores. */
458    Stats::Scalar lsqForwLoads;
459
460    /** Total number of loads ignored due to invalid addresses. */
461    Stats::Scalar invAddrLoads;
462
463    /** Total number of squashed loads. */
464    Stats::Scalar lsqSquashedLoads;
465
466    /** Total number of responses from the memory system that are
467     * ignored due to the instruction already being squashed. */
468    Stats::Scalar lsqIgnoredResponses;
469
470    /** Tota number of memory ordering violations. */
471    Stats::Scalar lsqMemOrderViolation;
472
473    /** Total number of squashed stores. */
474    Stats::Scalar lsqSquashedStores;
475
476    /** Total number of software prefetches ignored due to invalid addresses. */
477    Stats::Scalar invAddrSwpfs;
478
479    /** Ready loads blocked due to partial store-forwarding. */
480    Stats::Scalar lsqBlockedLoads;
481
482    /** Number of loads that were rescheduled. */
483    Stats::Scalar lsqRescheduledLoads;
484
485    /** Number of times the LSQ is blocked due to the cache. */
486    Stats::Scalar lsqCacheBlocked;
487
488  public:
489    /** Executes the load at the given index. */
490    Fault read(Request *req, Request *sreqLow, Request *sreqHigh,
491               uint8_t *data, int load_idx);
492
493    /** Executes the store at the given index. */
494    Fault write(Request *req, Request *sreqLow, Request *sreqHigh,
495                uint8_t *data, int store_idx);
496
497    /** Returns the index of the head load instruction. */
498    int getLoadHead() { return loadHead; }
499    /** Returns the sequence number of the head load instruction. */
500    InstSeqNum getLoadHeadSeqNum()
501    {
502        if (loadQueue[loadHead]) {
503            return loadQueue[loadHead]->seqNum;
504        } else {
505            return 0;
506        }
507
508    }
509
510    /** Returns the index of the head store instruction. */
511    int getStoreHead() { return storeHead; }
512    /** Returns the sequence number of the head store instruction. */
513    InstSeqNum getStoreHeadSeqNum()
514    {
515        if (storeQueue[storeHead].inst) {
516            return storeQueue[storeHead].inst->seqNum;
517        } else {
518            return 0;
519        }
520
521    }
522
523    /** Returns whether or not the LSQ unit is stalled. */
524    bool isStalled()  { return stalled; }
525};
526
527template <class Impl>
528Fault
529LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
530                    uint8_t *data, int load_idx)
531{
532    DynInstPtr load_inst = loadQueue[load_idx];
533
534    assert(load_inst);
535
536    assert(!load_inst->isExecuted());
537
538    // Make sure this isn't an uncacheable access
539    // A bit of a hackish way to get uncached accesses to work only if they're
540    // at the head of the LSQ and are ready to commit (at the head of the ROB
541    // too).
542    if (req->isUncacheable() &&
543        (load_idx != loadHead || !load_inst->isAtCommit())) {
544        iewStage->rescheduleMemInst(load_inst);
545        ++lsqRescheduledLoads;
546        DPRINTF(LSQUnit, "Uncachable load [sn:%lli] PC %s\n",
547                load_inst->seqNum, load_inst->pcState());
548
549        // Must delete request now that it wasn't handed off to
550        // memory.  This is quite ugly.  @todo: Figure out the proper
551        // place to really handle request deletes.
552        delete req;
553        if (TheISA::HasUnalignedMemAcc && sreqLow) {
554            delete sreqLow;
555            delete sreqHigh;
556        }
557        return TheISA::genMachineCheckFault();
558    }
559
560    // Check the SQ for any previous stores that might lead to forwarding
561    int store_idx = load_inst->sqIdx;
562
563    int store_size = 0;
564
565    DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
566            "storeHead: %i addr: %#x%s\n",
567            load_idx, store_idx, storeHead, req->getPaddr(),
568            sreqLow ? " split" : "");
569
570    if (req->isLLSC()) {
571        assert(!sreqLow);
572        // Disable recording the result temporarily.  Writing to misc
573        // regs normally updates the result, but this is not the
574        // desired behavior when handling store conditionals.
575        load_inst->recordResult = false;
576        TheISA::handleLockedRead(load_inst.get(), req);
577        load_inst->recordResult = true;
578    }
579
580    while (store_idx != -1) {
581        // End once we've reached the top of the LSQ
582        if (store_idx == storeWBIdx) {
583            break;
584        }
585
586        // Move the index to one younger
587        if (--store_idx < 0)
588            store_idx += SQEntries;
589
590        assert(storeQueue[store_idx].inst);
591
592        store_size = storeQueue[store_idx].size;
593
594        if (store_size == 0)
595            continue;
596        else if (storeQueue[store_idx].inst->uncacheable())
597            continue;
598
599        assert(storeQueue[store_idx].inst->effAddrValid);
600
601        // Check if the store data is within the lower and upper bounds of
602        // addresses that the request needs.
603        bool store_has_lower_limit =
604            req->getVaddr() >= storeQueue[store_idx].inst->effAddr;
605        bool store_has_upper_limit =
606            (req->getVaddr() + req->getSize()) <=
607            (storeQueue[store_idx].inst->effAddr + store_size);
608        bool lower_load_has_store_part =
609            req->getVaddr() < (storeQueue[store_idx].inst->effAddr +
610                           store_size);
611        bool upper_load_has_store_part =
612            (req->getVaddr() + req->getSize()) >
613            storeQueue[store_idx].inst->effAddr;
614
615        // If the store's data has all of the data needed, we can forward.
616        if ((store_has_lower_limit && store_has_upper_limit)) {
617            // Get shift amount for offset into the store's data.
618            int shift_amt = req->getVaddr() & (store_size - 1);
619
620            memcpy(data, storeQueue[store_idx].data + shift_amt,
621                   req->getSize());
622
623            assert(!load_inst->memData);
624            load_inst->memData = new uint8_t[64];
625
626            memcpy(load_inst->memData,
627                    storeQueue[store_idx].data + shift_amt, req->getSize());
628
629            DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
630                    "addr %#x, data %#x\n",
631                    store_idx, req->getVaddr(), data);
632
633            PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq,
634                                            Packet::Broadcast);
635            data_pkt->dataStatic(load_inst->memData);
636
637            WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
638
639            // We'll say this has a 1 cycle load-store forwarding latency
640            // for now.
641            // @todo: Need to make this a parameter.
642            cpu->schedule(wb, curTick());
643
644            // Don't need to do anything special for split loads.
645            if (TheISA::HasUnalignedMemAcc && sreqLow) {
646                delete sreqLow;
647                delete sreqHigh;
648            }
649
650            ++lsqForwLoads;
651            return NoFault;
652        } else if ((store_has_lower_limit && lower_load_has_store_part) ||
653                   (store_has_upper_limit && upper_load_has_store_part) ||
654                   (lower_load_has_store_part && upper_load_has_store_part)) {
655            // This is the partial store-load forwarding case where a store
656            // has only part of the load's data.
657
658            // If it's already been written back, then don't worry about
659            // stalling on it.
660            if (storeQueue[store_idx].completed) {
661                panic("Should not check one of these");
662                continue;
663            }
664
665            // Must stall load and force it to retry, so long as it's the oldest
666            // load that needs to do so.
667            if (!stalled ||
668                (stalled &&
669                 load_inst->seqNum <
670                 loadQueue[stallingLoadIdx]->seqNum)) {
671                stalled = true;
672                stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
673                stallingLoadIdx = load_idx;
674            }
675
676            // Tell IQ/mem dep unit that this instruction will need to be
677            // rescheduled eventually
678            iewStage->rescheduleMemInst(load_inst);
679            iewStage->decrWb(load_inst->seqNum);
680            load_inst->clearIssued();
681            ++lsqRescheduledLoads;
682
683            // Do not generate a writeback event as this instruction is not
684            // complete.
685            DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
686                    "Store idx %i to load addr %#x\n",
687                    store_idx, req->getVaddr());
688
689            // Must delete request now that it wasn't handed off to
690            // memory.  This is quite ugly.  @todo: Figure out the
691            // proper place to really handle request deletes.
692            delete req;
693            if (TheISA::HasUnalignedMemAcc && sreqLow) {
694                delete sreqLow;
695                delete sreqHigh;
696            }
697
698            return NoFault;
699        }
700    }
701
702    // If there's no forwarding case, then go access memory
703    DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
704            load_inst->seqNum, load_inst->pcState());
705
706    assert(!load_inst->memData);
707    load_inst->memData = new uint8_t[64];
708
709    ++usedPorts;
710
711    // if we the cache is not blocked, do cache access
712    bool completedFirst = false;
713    if (!lsq->cacheBlocked()) {
714        MemCmd command =
715            req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
716        PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast);
717        PacketPtr fst_data_pkt = NULL;
718        PacketPtr snd_data_pkt = NULL;
719
720        data_pkt->dataStatic(load_inst->memData);
721
722        LSQSenderState *state = new LSQSenderState;
723        state->isLoad = true;
724        state->idx = load_idx;
725        state->inst = load_inst;
726        data_pkt->senderState = state;
727
728        if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
729
730            // Point the first packet at the main data packet.
731            fst_data_pkt = data_pkt;
732        } else {
733
734            // Create the split packets.
735            fst_data_pkt = new Packet(sreqLow, command, Packet::Broadcast);
736            snd_data_pkt = new Packet(sreqHigh, command, Packet::Broadcast);
737
738            fst_data_pkt->dataStatic(load_inst->memData);
739            snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
740
741            fst_data_pkt->senderState = state;
742            snd_data_pkt->senderState = state;
743
744            state->isSplit = true;
745            state->outstanding = 2;
746            state->mainPkt = data_pkt;
747        }
748
749        if (!dcachePort->sendTiming(fst_data_pkt)) {
750            // Delete state and data packet because a load retry
751            // initiates a pipeline restart; it does not retry.
752            delete state;
753            delete data_pkt->req;
754            delete data_pkt;
755            if (TheISA::HasUnalignedMemAcc && sreqLow) {
756                delete fst_data_pkt->req;
757                delete fst_data_pkt;
758                delete snd_data_pkt->req;
759                delete snd_data_pkt;
760                sreqLow = NULL;
761                sreqHigh = NULL;
762            }
763
764            req = NULL;
765
766            // If the access didn't succeed, tell the LSQ by setting
767            // the retry thread id.
768            lsq->setRetryTid(lsqID);
769        } else if (TheISA::HasUnalignedMemAcc && sreqLow) {
770            completedFirst = true;
771
772            // The first packet was sent without problems, so send this one
773            // too. If there is a problem with this packet then the whole
774            // load will be squashed, so indicate this to the state object.
775            // The first packet will return in completeDataAccess and be
776            // handled there.
777            ++usedPorts;
778            if (!dcachePort->sendTiming(snd_data_pkt)) {
779
780                // The main packet will be deleted in completeDataAccess.
781                delete snd_data_pkt->req;
782                delete snd_data_pkt;
783
784                state->complete();
785
786                req = NULL;
787                sreqHigh = NULL;
788
789                lsq->setRetryTid(lsqID);
790            }
791        }
792    }
793
794    // If the cache was blocked, or has become blocked due to the access,
795    // handle it.
796    if (lsq->cacheBlocked()) {
797        if (req)
798            delete req;
799        if (TheISA::HasUnalignedMemAcc && sreqLow && !completedFirst) {
800            delete sreqLow;
801            delete sreqHigh;
802        }
803
804        ++lsqCacheBlocked;
805
806        iewStage->decrWb(load_inst->seqNum);
807        // There's an older load that's already going to squash.
808        if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
809            return NoFault;
810
811        // Record that the load was blocked due to memory.  This
812        // load will squash all instructions after it, be
813        // refetched, and re-executed.
814        isLoadBlocked = true;
815        loadBlockedHandled = false;
816        blockedLoadSeqNum = load_inst->seqNum;
817        // No fault occurred, even though the interface is blocked.
818        return NoFault;
819    }
820
821    return NoFault;
822}
823
824template <class Impl>
825Fault
826LSQUnit<Impl>::write(Request *req, Request *sreqLow, Request *sreqHigh,
827                     uint8_t *data, int store_idx)
828{
829    assert(storeQueue[store_idx].inst);
830
831    DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
832            " | storeHead:%i [sn:%i]\n",
833            store_idx, req->getPaddr(), data, storeHead,
834            storeQueue[store_idx].inst->seqNum);
835
836    storeQueue[store_idx].req = req;
837    storeQueue[store_idx].sreqLow = sreqLow;
838    storeQueue[store_idx].sreqHigh = sreqHigh;
839    unsigned size = req->getSize();
840    storeQueue[store_idx].size = size;
841    assert(size <= sizeof(storeQueue[store_idx].data));
842
843    // Split stores can only occur in ISAs with unaligned memory accesses.  If
844    // a store request has been split, sreqLow and sreqHigh will be non-null.
845    if (TheISA::HasUnalignedMemAcc && sreqLow) {
846        storeQueue[store_idx].isSplit = true;
847    }
848
849    memcpy(storeQueue[store_idx].data, data, size);
850
851    // This function only writes the data to the store queue, so no fault
852    // can happen here.
853    return NoFault;
854}
855
856#endif // __CPU_O3_LSQ_UNIT_HH__
857