lsq_unit.hh revision 2329
1/*
2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef __CPU_O3_LSQ_UNIT_HH__
30#define __CPU_O3_LSQ_UNIT_HH__
31
32#include <algorithm>
33#include <map>
34#include <queue>
35
36#include "arch/faults.hh"
37#include "config/full_system.hh"
38#include "base/hashmap.hh"
39#include "cpu/inst_seq.hh"
40#include "mem/mem_interface.hh"
41//#include "mem/page_table.hh"
42//#include "sim/debug.hh"
43//#include "sim/sim_object.hh"
44
45/**
46 * Class that implements the actual LQ and SQ for each specific
47 * thread.  Both are circular queues; load entries are freed upon
48 * committing, while store entries are freed once they writeback. The
49 * LSQUnit tracks if there are memory ordering violations, and also
50 * detects partial load to store forwarding cases (a store only has
51 * part of a load's data) that requires the load to wait until the
52 * store writes back. In the former case it holds onto the instruction
53 * until the dependence unit looks at it, and in the latter it stalls
54 * the LSQ until the store writes back. At that point the load is
55 * replayed.
56 */
57template <class Impl>
58class LSQUnit {
59  protected:
60    typedef TheISA::IntReg IntReg;
61  public:
62    typedef typename Impl::Params Params;
63    typedef typename Impl::FullCPU FullCPU;
64    typedef typename Impl::DynInstPtr DynInstPtr;
65    typedef typename Impl::CPUPol::IEW IEW;
66    typedef typename Impl::CPUPol::IssueStruct IssueStruct;
67
68  private:
69    class StoreCompletionEvent : public Event {
70      public:
71        /** Constructs a store completion event. */
72        StoreCompletionEvent(int store_idx, Event *wb_event, LSQUnit *lsq_ptr);
73
74        /** Processes the store completion event. */
75        void process();
76
77        /** Returns the description of this event. */
78        const char *description();
79
80        /** The writeback event for the store.  Needed for store
81         * conditionals.
82         */
83        Event *wbEvent;
84
85      private:
86        /** The store index of the store being written back. */
87        int storeIdx;
88      private:
89        /** The pointer to the LSQ unit that issued the store. */
90        LSQUnit<Impl> *lsqPtr;
91    };
92
93  public:
94    /** Constructs an LSQ unit. init() must be called prior to use. */
95    LSQUnit();
96
97    /** Initializes the LSQ unit with the specified number of entries. */
98    void init(Params *params, unsigned maxLQEntries,
99              unsigned maxSQEntries, unsigned id);
100
101    /** Returns the name of the LSQ unit. */
102    std::string name() const;
103
104    /** Sets the CPU pointer. */
105    void setCPU(FullCPU *cpu_ptr)
106    { cpu = cpu_ptr; }
107
108    /** Sets the IEW stage pointer. */
109    void setIEW(IEW *iew_ptr)
110    { iewStage = iew_ptr; }
111
112    /** Sets the page table pointer. */
113//    void setPageTable(PageTable *pt_ptr);
114
115    void switchOut();
116
117    void takeOverFrom();
118
119    bool isSwitchedOut() { return switchedOut; }
120
121    /** Ticks the LSQ unit, which in this case only resets the number of
122     * used cache ports.
123     * @todo: Move the number of used ports up to the LSQ level so it can
124     * be shared by all LSQ units.
125     */
126    void tick() { usedPorts = 0; }
127
128    /** Inserts an instruction. */
129    void insert(DynInstPtr &inst);
130    /** Inserts a load instruction. */
131    void insertLoad(DynInstPtr &load_inst);
132    /** Inserts a store instruction. */
133    void insertStore(DynInstPtr &store_inst);
134
135    /** Executes a load instruction. */
136    Fault executeLoad(DynInstPtr &inst);
137
138    Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
139    /** Executes a store instruction. */
140    Fault executeStore(DynInstPtr &inst);
141
142    /** Commits the head load. */
143    void commitLoad();
144    /** Commits loads older than a specific sequence number. */
145    void commitLoads(InstSeqNum &youngest_inst);
146
147    /** Commits stores older than a specific sequence number. */
148    void commitStores(InstSeqNum &youngest_inst);
149
150    /** Writes back stores. */
151    void writebackStores();
152
153    // @todo: Include stats in the LSQ unit.
154    //void regStats();
155
156    /** Clears all the entries in the LQ. */
157    void clearLQ();
158
159    /** Clears all the entries in the SQ. */
160    void clearSQ();
161
162    /** Resizes the LQ to a given size. */
163    void resizeLQ(unsigned size);
164
165    /** Resizes the SQ to a given size. */
166    void resizeSQ(unsigned size);
167
168    /** Squashes all instructions younger than a specific sequence number. */
169    void squash(const InstSeqNum &squashed_num);
170
171    /** Returns if there is a memory ordering violation. Value is reset upon
172     * call to getMemDepViolator().
173     */
174    bool violation() { return memDepViolator; }
175
176    /** Returns the memory ordering violator. */
177    DynInstPtr getMemDepViolator();
178
179    /** Returns if a load became blocked due to the memory system. */
180    bool loadBlocked()
181    { return isLoadBlocked; }
182
183    void clearLoadBlocked()
184    { isLoadBlocked = false; }
185
186    bool isLoadBlockedHandled()
187    { return loadBlockedHandled; }
188
189    void setLoadBlockedHandled()
190    { loadBlockedHandled = true; }
191
192    /** Returns the number of free entries (min of free LQ and SQ entries). */
193    unsigned numFreeEntries();
194
195    /** Returns the number of loads ready to execute. */
196    int numLoadsReady();
197
198    /** Returns the number of loads in the LQ. */
199    int numLoads() { return loads; }
200
201    /** Returns the number of stores in the SQ. */
202    int numStores() { return stores; }
203
204    /** Returns if either the LQ or SQ is full. */
205    bool isFull() { return lqFull() || sqFull(); }
206
207    /** Returns if the LQ is full. */
208    bool lqFull() { return loads >= (LQEntries - 1); }
209
210    /** Returns if the SQ is full. */
211    bool sqFull() { return stores >= (SQEntries - 1); }
212
213    /** Returns the number of instructions in the LSQ. */
214    unsigned getCount() { return loads + stores; }
215
216    /** Returns if there are any stores to writeback. */
217    bool hasStoresToWB() { return storesToWB; }
218
219    /** Returns the number of stores to writeback. */
220    int numStoresToWB() { return storesToWB; }
221
222    /** Returns if the LSQ unit will writeback on this cycle. */
223    bool willWB() { return storeQueue[storeWBIdx].canWB &&
224                        !storeQueue[storeWBIdx].completed &&
225                        !dcacheInterface->isBlocked(); }
226
227  private:
228    /** Completes the store at the specified index. */
229    void completeStore(int store_idx);
230
231    /** Increments the given store index (circular queue). */
232    inline void incrStIdx(int &store_idx);
233    /** Decrements the given store index (circular queue). */
234    inline void decrStIdx(int &store_idx);
235    /** Increments the given load index (circular queue). */
236    inline void incrLdIdx(int &load_idx);
237    /** Decrements the given load index (circular queue). */
238    inline void decrLdIdx(int &load_idx);
239
240  public:
241    /** Debugging function to dump instructions in the LSQ. */
242    void dumpInsts();
243
244  private:
245    /** Pointer to the CPU. */
246    FullCPU *cpu;
247
248    /** Pointer to the IEW stage. */
249    IEW *iewStage;
250
251    /** Pointer to the D-cache. */
252    MemInterface *dcacheInterface;
253
254    /** Pointer to the page table. */
255//    PageTable *pTable;
256
257  public:
258    struct SQEntry {
259        /** Constructs an empty store queue entry. */
260        SQEntry()
261            : inst(NULL), req(NULL), size(0), data(0),
262              canWB(0), committed(0), completed(0)
263        { }
264
265        /** Constructs a store queue entry for a given instruction. */
266        SQEntry(DynInstPtr &_inst)
267            : inst(_inst), req(NULL), size(0), data(0),
268              canWB(0), committed(0), completed(0)
269        { }
270
271        /** The store instruction. */
272        DynInstPtr inst;
273        /** The memory request for the store. */
274        MemReqPtr req;
275        /** The size of the store. */
276        int size;
277        /** The store data. */
278        IntReg data;
279        /** Whether or not the store can writeback. */
280        bool canWB;
281        /** Whether or not the store is committed. */
282        bool committed;
283        /** Whether or not the store is completed. */
284        bool completed;
285    };
286
287  private:
288    /** The LSQUnit thread id. */
289    unsigned lsqID;
290
291    /** The store queue. */
292    std::vector<SQEntry> storeQueue;
293
294    /** The load queue. */
295    std::vector<DynInstPtr> loadQueue;
296
297    /** The number of LQ entries, plus a sentinel entry (circular queue).
298     *  @todo: Consider having var that records the true number of LQ entries.
299     */
300    unsigned LQEntries;
301    /** The number of SQ entries, plus a sentinel entry (circular queue).
302     *  @todo: Consider having var that records the true number of SQ entries.
303     */
304    unsigned SQEntries;
305
306    /** The number of load instructions in the LQ. */
307    int loads;
308    /** The number of store instructions in the SQ. */
309    int stores;
310    /** The number of store instructions in the SQ waiting to writeback. */
311    int storesToWB;
312
313    /** The index of the head instruction in the LQ. */
314    int loadHead;
315    /** The index of the tail instruction in the LQ. */
316    int loadTail;
317
318    /** The index of the head instruction in the SQ. */
319    int storeHead;
320    /** The index of the first instruction that may be ready to be
321     * written back, and has not yet been written back.
322     */
323    int storeWBIdx;
324    /** The index of the tail instruction in the SQ. */
325    int storeTail;
326
327    /// @todo Consider moving to a more advanced model with write vs read ports
328    /** The number of cache ports available each cycle. */
329    int cachePorts;
330
331    /** The number of used cache ports in this cycle. */
332    int usedPorts;
333
334    bool switchedOut;
335
336    //list<InstSeqNum> mshrSeqNums;
337
338    /** Wire to read information from the issue stage time queue. */
339    typename TimeBuffer<IssueStruct>::wire fromIssue;
340
341    /** Whether or not the LSQ is stalled. */
342    bool stalled;
343    /** The store that causes the stall due to partial store to load
344     * forwarding.
345     */
346    InstSeqNum stallingStoreIsn;
347    /** The index of the above store. */
348    int stallingLoadIdx;
349
350    /** Whether or not a load is blocked due to the memory system. */
351    bool isLoadBlocked;
352
353    bool loadBlockedHandled;
354
355    InstSeqNum blockedLoadSeqNum;
356
357    /** The oldest load that caused a memory ordering violation. */
358    DynInstPtr memDepViolator;
359
360    // Will also need how many read/write ports the Dcache has.  Or keep track
361    // of that in stage that is one level up, and only call executeLoad/Store
362    // the appropriate number of times.
363/*
364    // total number of loads forwaded from LSQ stores
365    Stats::Vector<> lsq_forw_loads;
366
367    // total number of loads ignored due to invalid addresses
368    Stats::Vector<> inv_addr_loads;
369
370    // total number of software prefetches ignored due to invalid addresses
371    Stats::Vector<> inv_addr_swpfs;
372
373    // total non-speculative bogus addresses seen (debug var)
374    Counter sim_invalid_addrs;
375    Stats::Vector<> fu_busy;  //cumulative fu busy
376
377    // ready loads blocked due to memory disambiguation
378    Stats::Vector<> lsq_blocked_loads;
379
380    Stats::Scalar<> lsqInversion;
381*/
382  public:
383    /** Executes the load at the given index. */
384    template <class T>
385    Fault read(MemReqPtr &req, T &data, int load_idx);
386
387    /** Executes the store at the given index. */
388    template <class T>
389    Fault write(MemReqPtr &req, T &data, int store_idx);
390
391    /** Returns the index of the head load instruction. */
392    int getLoadHead() { return loadHead; }
393    /** Returns the sequence number of the head load instruction. */
394    InstSeqNum getLoadHeadSeqNum()
395    {
396        if (loadQueue[loadHead]) {
397            return loadQueue[loadHead]->seqNum;
398        } else {
399            return 0;
400        }
401
402    }
403
404    /** Returns the index of the head store instruction. */
405    int getStoreHead() { return storeHead; }
406    /** Returns the sequence number of the head store instruction. */
407    InstSeqNum getStoreHeadSeqNum()
408    {
409        if (storeQueue[storeHead].inst) {
410            return storeQueue[storeHead].inst->seqNum;
411        } else {
412            return 0;
413        }
414
415    }
416
417    /** Returns whether or not the LSQ unit is stalled. */
418    bool isStalled()  { return stalled; }
419};
420
421template <class Impl>
422template <class T>
423Fault
424LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
425{
426    assert(loadQueue[load_idx]);
427
428    assert(!loadQueue[load_idx]->isExecuted());
429
430    // Make sure this isn't an uncacheable access
431    // A bit of a hackish way to get uncached accesses to work only if they're
432    // at the head of the LSQ and are ready to commit (at the head of the ROB
433    // too).
434    if (req->flags & UNCACHEABLE &&
435        (load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) {
436        iewStage->rescheduleMemInst(loadQueue[load_idx]);
437        return TheISA::genMachineCheckFault();
438    }
439
440    // Check the SQ for any previous stores that might lead to forwarding
441    int store_idx = loadQueue[load_idx]->sqIdx;
442
443    int store_size = 0;
444
445    DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
446            "storeHead: %i addr: %#x\n",
447            load_idx, store_idx, storeHead, req->paddr);
448
449#if 0
450    if (req->flags & LOCKED) {
451        cpu->lockAddr = req->paddr;
452        cpu->lockFlag = true;
453    }
454#endif
455            req->cmd = Read;
456            assert(!req->completionEvent);
457            req->completionEvent = NULL;
458            req->time = curTick;
459
460    while (store_idx != -1) {
461        // End once we've reached the top of the LSQ
462        if (store_idx == storeWBIdx) {
463            break;
464        }
465
466        // Move the index to one younger
467        if (--store_idx < 0)
468            store_idx += SQEntries;
469
470        assert(storeQueue[store_idx].inst);
471
472        store_size = storeQueue[store_idx].size;
473
474        if (store_size == 0)
475            continue;
476
477        // Check if the store data is within the lower and upper bounds of
478        // addresses that the request needs.
479        bool store_has_lower_limit =
480            req->vaddr >= storeQueue[store_idx].inst->effAddr;
481        bool store_has_upper_limit =
482            (req->vaddr + req->size) <= (storeQueue[store_idx].inst->effAddr +
483                                         store_size);
484        bool lower_load_has_store_part =
485            req->vaddr < (storeQueue[store_idx].inst->effAddr +
486                           store_size);
487        bool upper_load_has_store_part =
488            (req->vaddr + req->size) > storeQueue[store_idx].inst->effAddr;
489
490        // If the store's data has all of the data needed, we can forward.
491        if (store_has_lower_limit && store_has_upper_limit) {
492            // Get shift amount for offset into the store's data.
493            int shift_amt = req->vaddr & (store_size - 1);
494            // @todo: Magic number, assumes byte addressing
495            shift_amt = shift_amt << 3;
496
497            // Cast this to type T?
498            data = storeQueue[store_idx].data >> shift_amt;
499
500            assert(!req->data);
501            req->data = new uint8_t[64];
502
503            memcpy(req->data, &data, req->size);
504
505            DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
506                    "addr %#x, data %#x\n",
507                    store_idx, req->vaddr, *(req->data));
508
509            typename IEW::LdWritebackEvent *wb =
510                new typename IEW::LdWritebackEvent(loadQueue[load_idx],
511                                                   iewStage);
512
513            // We'll say this has a 1 cycle load-store forwarding latency
514            // for now.
515            // @todo: Need to make this a parameter.
516            wb->schedule(curTick);
517
518            // Should keep track of stat for forwarded data
519            return NoFault;
520        } else if ((store_has_lower_limit && lower_load_has_store_part) ||
521                   (store_has_upper_limit && upper_load_has_store_part) ||
522                   (lower_load_has_store_part && upper_load_has_store_part)) {
523            // This is the partial store-load forwarding case where a store
524            // has only part of the load's data.
525
526            // If it's already been written back, then don't worry about
527            // stalling on it.
528            if (storeQueue[store_idx].completed) {
529                continue;
530            }
531
532            // Must stall load and force it to retry, so long as it's the oldest
533            // load that needs to do so.
534            if (!stalled ||
535                (stalled &&
536                 loadQueue[load_idx]->seqNum <
537                 loadQueue[stallingLoadIdx]->seqNum)) {
538                stalled = true;
539                stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
540                stallingLoadIdx = load_idx;
541            }
542
543            // Tell IQ/mem dep unit that this instruction will need to be
544            // rescheduled eventually
545            iewStage->rescheduleMemInst(loadQueue[load_idx]);
546
547            // Do not generate a writeback event as this instruction is not
548            // complete.
549            DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
550                    "Store idx %i to load addr %#x\n",
551                    store_idx, req->vaddr);
552
553            return NoFault;
554        }
555    }
556
557    // If there's no forwarding case, then go access memory
558    DynInstPtr inst = loadQueue[load_idx];
559
560    DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n",
561            loadQueue[load_idx]->seqNum, loadQueue[load_idx]->readPC());
562
563    assert(!req->data);
564    req->data = new uint8_t[64];
565    Fault fault = cpu->read(req, data);
566    memcpy(req->data, &data, sizeof(T));
567
568    ++usedPorts;
569
570    // if we have a cache, do cache access too
571    if (fault == NoFault && dcacheInterface) {
572        if (dcacheInterface->isBlocked()) {
573            // There's an older load that's already going to squash.
574            if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
575                return NoFault;
576
577            // Record that the load was blocked due to memory.  This
578            // load will squash all instructions after it, be
579            // refetched, and re-executed.
580            isLoadBlocked = true;
581            loadBlockedHandled = false;
582            blockedLoadSeqNum = inst->seqNum;
583            // No fault occurred, even though the interface is blocked.
584            return NoFault;
585        }
586
587        DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
588                loadQueue[load_idx]->readPC());
589
590        assert(!req->completionEvent);
591        req->completionEvent =
592            new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage);
593        MemAccessResult result = dcacheInterface->access(req);
594
595        assert(dcacheInterface->doEvents());
596
597        if (result != MA_HIT) {
598            DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
599            DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
600                    inst->seqNum);
601        } else {
602            DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
603            DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
604                    inst->seqNum);
605        }
606    }
607
608    return fault;
609}
610
611template <class Impl>
612template <class T>
613Fault
614LSQUnit<Impl>::write(MemReqPtr &req, T &data, int store_idx)
615{
616    assert(storeQueue[store_idx].inst);
617
618    DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
619            " | storeHead:%i [sn:%i]\n",
620            store_idx, req->paddr, data, storeHead,
621            storeQueue[store_idx].inst->seqNum);
622
623    storeQueue[store_idx].req = req;
624    storeQueue[store_idx].size = sizeof(T);
625    storeQueue[store_idx].data = data;
626
627    // This function only writes the data to the store queue, so no fault
628    // can happen here.
629    return NoFault;
630}
631
632#endif // __CPU_O3_LSQ_UNIT_HH__
633