lsq_unit.hh revision 4395
1/*
2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 *          Korey Sewell
30 */
31
32#ifndef __CPU_O3_LSQ_UNIT_HH__
33#define __CPU_O3_LSQ_UNIT_HH__
34
35#include <algorithm>
36#include <cstring>
37#include <map>
38#include <queue>
39
40#include "arch/faults.hh"
41#include "arch/locked_mem.hh"
42#include "config/full_system.hh"
43#include "base/hashmap.hh"
44#include "cpu/inst_seq.hh"
45#include "mem/packet.hh"
46#include "mem/port.hh"
47
48/**
49 * Class that implements the actual LQ and SQ for each specific
50 * thread.  Both are circular queues; load entries are freed upon
51 * committing, while store entries are freed once they writeback. The
52 * LSQUnit tracks if there are memory ordering violations, and also
53 * detects partial load to store forwarding cases (a store only has
54 * part of a load's data) that requires the load to wait until the
55 * store writes back. In the former case it holds onto the instruction
56 * until the dependence unit looks at it, and in the latter it stalls
57 * the LSQ until the store writes back. At that point the load is
58 * replayed.
59 */
60template <class Impl>
61class LSQUnit {
62  protected:
63    typedef TheISA::IntReg IntReg;
64  public:
65    typedef typename Impl::Params Params;
66    typedef typename Impl::O3CPU O3CPU;
67    typedef typename Impl::DynInstPtr DynInstPtr;
68    typedef typename Impl::CPUPol::IEW IEW;
69    typedef typename Impl::CPUPol::LSQ LSQ;
70    typedef typename Impl::CPUPol::IssueStruct IssueStruct;
71
72  public:
73    /** Constructs an LSQ unit. init() must be called prior to use. */
74    LSQUnit();
75
76    /** Initializes the LSQ unit with the specified number of entries. */
77    void init(O3CPU *cpu_ptr, IEW *iew_ptr, Params *params, LSQ *lsq_ptr,
78              unsigned maxLQEntries, unsigned maxSQEntries, unsigned id);
79
80    /** Returns the name of the LSQ unit. */
81    std::string name() const;
82
83    /** Registers statistics. */
84    void regStats();
85
86    /** Sets the pointer to the dcache port. */
87    void setDcachePort(Port *dcache_port);
88
89    /** Switches out LSQ unit. */
90    void switchOut();
91
92    /** Takes over from another CPU's thread. */
93    void takeOverFrom();
94
95    /** Returns if the LSQ is switched out. */
96    bool isSwitchedOut() { return switchedOut; }
97
98    /** Ticks the LSQ unit, which in this case only resets the number of
99     * used cache ports.
100     * @todo: Move the number of used ports up to the LSQ level so it can
101     * be shared by all LSQ units.
102     */
103    void tick() { usedPorts = 0; }
104
105    /** Inserts an instruction. */
106    void insert(DynInstPtr &inst);
107    /** Inserts a load instruction. */
108    void insertLoad(DynInstPtr &load_inst);
109    /** Inserts a store instruction. */
110    void insertStore(DynInstPtr &store_inst);
111
112    /** Executes a load instruction. */
113    Fault executeLoad(DynInstPtr &inst);
114
115    Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
116    /** Executes a store instruction. */
117    Fault executeStore(DynInstPtr &inst);
118
119    /** Commits the head load. */
120    void commitLoad();
121    /** Commits loads older than a specific sequence number. */
122    void commitLoads(InstSeqNum &youngest_inst);
123
124    /** Commits stores older than a specific sequence number. */
125    void commitStores(InstSeqNum &youngest_inst);
126
127    /** Writes back stores. */
128    void writebackStores();
129
130    /** Completes the data access that has been returned from the
131     * memory system. */
132    void completeDataAccess(PacketPtr pkt);
133
134    /** Clears all the entries in the LQ. */
135    void clearLQ();
136
137    /** Clears all the entries in the SQ. */
138    void clearSQ();
139
140    /** Resizes the LQ to a given size. */
141    void resizeLQ(unsigned size);
142
143    /** Resizes the SQ to a given size. */
144    void resizeSQ(unsigned size);
145
146    /** Squashes all instructions younger than a specific sequence number. */
147    void squash(const InstSeqNum &squashed_num);
148
149    /** Returns if there is a memory ordering violation. Value is reset upon
150     * call to getMemDepViolator().
151     */
152    bool violation() { return memDepViolator; }
153
154    /** Returns the memory ordering violator. */
155    DynInstPtr getMemDepViolator();
156
157    /** Returns if a load became blocked due to the memory system. */
158    bool loadBlocked()
159    { return isLoadBlocked; }
160
161    /** Clears the signal that a load became blocked. */
162    void clearLoadBlocked()
163    { isLoadBlocked = false; }
164
165    /** Returns if the blocked load was handled. */
166    bool isLoadBlockedHandled()
167    { return loadBlockedHandled; }
168
169    /** Records the blocked load as being handled. */
170    void setLoadBlockedHandled()
171    { loadBlockedHandled = true; }
172
173    /** Returns the number of free entries (min of free LQ and SQ entries). */
174    unsigned numFreeEntries();
175
176    /** Returns the number of loads ready to execute. */
177    int numLoadsReady();
178
179    /** Returns the number of loads in the LQ. */
180    int numLoads() { return loads; }
181
182    /** Returns the number of stores in the SQ. */
183    int numStores() { return stores; }
184
185    /** Returns if either the LQ or SQ is full. */
186    bool isFull() { return lqFull() || sqFull(); }
187
188    /** Returns if the LQ is full. */
189    bool lqFull() { return loads >= (LQEntries - 1); }
190
191    /** Returns if the SQ is full. */
192    bool sqFull() { return stores >= (SQEntries - 1); }
193
194    /** Returns the number of instructions in the LSQ. */
195    unsigned getCount() { return loads + stores; }
196
197    /** Returns if there are any stores to writeback. */
198    bool hasStoresToWB() { return storesToWB; }
199
200    /** Returns the number of stores to writeback. */
201    int numStoresToWB() { return storesToWB; }
202
203    /** Returns if the LSQ unit will writeback on this cycle. */
204    bool willWB() { return storeQueue[storeWBIdx].canWB &&
205                        !storeQueue[storeWBIdx].completed &&
206                        !isStoreBlocked; }
207
208    /** Handles doing the retry. */
209    void recvRetry();
210
211  private:
212    /** Writes back the instruction, sending it to IEW. */
213    void writeback(DynInstPtr &inst, PacketPtr pkt);
214
215    /** Handles completing the send of a store to memory. */
216    void storePostSend(PacketPtr pkt);
217
218    /** Completes the store at the specified index. */
219    void completeStore(int store_idx);
220
221    /** Increments the given store index (circular queue). */
222    inline void incrStIdx(int &store_idx);
223    /** Decrements the given store index (circular queue). */
224    inline void decrStIdx(int &store_idx);
225    /** Increments the given load index (circular queue). */
226    inline void incrLdIdx(int &load_idx);
227    /** Decrements the given load index (circular queue). */
228    inline void decrLdIdx(int &load_idx);
229
230  public:
231    /** Debugging function to dump instructions in the LSQ. */
232    void dumpInsts();
233
234  private:
235    /** Pointer to the CPU. */
236    O3CPU *cpu;
237
238    /** Pointer to the IEW stage. */
239    IEW *iewStage;
240
241    /** Pointer to the LSQ. */
242    LSQ *lsq;
243
244    /** Pointer to the dcache port.  Used only for sending. */
245    Port *dcachePort;
246
247    /** Derived class to hold any sender state the LSQ needs. */
248    class LSQSenderState : public Packet::SenderState
249    {
250      public:
251        /** Default constructor. */
252        LSQSenderState()
253            : noWB(false)
254        { }
255
256        /** Instruction who initiated the access to memory. */
257        DynInstPtr inst;
258        /** Whether or not it is a load. */
259        bool isLoad;
260        /** The LQ/SQ index of the instruction. */
261        int idx;
262        /** Whether or not the instruction will need to writeback. */
263        bool noWB;
264    };
265
266    /** Writeback event, specifically for when stores forward data to loads. */
267    class WritebackEvent : public Event {
268      public:
269        /** Constructs a writeback event. */
270        WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr);
271
272        /** Processes the writeback event. */
273        void process();
274
275        /** Returns the description of this event. */
276        const char *description();
277
278      private:
279        /** Instruction whose results are being written back. */
280        DynInstPtr inst;
281
282        /** The packet that would have been sent to memory. */
283        PacketPtr pkt;
284
285        /** The pointer to the LSQ unit that issued the store. */
286        LSQUnit<Impl> *lsqPtr;
287    };
288
289  public:
290    struct SQEntry {
291        /** Constructs an empty store queue entry. */
292        SQEntry()
293            : inst(NULL), req(NULL), size(0),
294              canWB(0), committed(0), completed(0)
295        {
296            std::memset(data, 0, sizeof(data));
297        }
298
299        /** Constructs a store queue entry for a given instruction. */
300        SQEntry(DynInstPtr &_inst)
301            : inst(_inst), req(NULL), size(0),
302              canWB(0), committed(0), completed(0)
303        {
304            std::memset(data, 0, sizeof(data));
305        }
306
307        /** The store instruction. */
308        DynInstPtr inst;
309        /** The request for the store. */
310        RequestPtr req;
311        /** The size of the store. */
312        int size;
313        /** The store data. */
314        char data[sizeof(IntReg)];
315        /** Whether or not the store can writeback. */
316        bool canWB;
317        /** Whether or not the store is committed. */
318        bool committed;
319        /** Whether or not the store is completed. */
320        bool completed;
321    };
322
323  private:
324    /** The LSQUnit thread id. */
325    unsigned lsqID;
326
327    /** The store queue. */
328    std::vector<SQEntry> storeQueue;
329
330    /** The load queue. */
331    std::vector<DynInstPtr> loadQueue;
332
333    /** The number of LQ entries, plus a sentinel entry (circular queue).
334     *  @todo: Consider having var that records the true number of LQ entries.
335     */
336    unsigned LQEntries;
337    /** The number of SQ entries, plus a sentinel entry (circular queue).
338     *  @todo: Consider having var that records the true number of SQ entries.
339     */
340    unsigned SQEntries;
341
342    /** The number of load instructions in the LQ. */
343    int loads;
344    /** The number of store instructions in the SQ. */
345    int stores;
346    /** The number of store instructions in the SQ waiting to writeback. */
347    int storesToWB;
348
349    /** The index of the head instruction in the LQ. */
350    int loadHead;
351    /** The index of the tail instruction in the LQ. */
352    int loadTail;
353
354    /** The index of the head instruction in the SQ. */
355    int storeHead;
356    /** The index of the first instruction that may be ready to be
357     * written back, and has not yet been written back.
358     */
359    int storeWBIdx;
360    /** The index of the tail instruction in the SQ. */
361    int storeTail;
362
363    /// @todo Consider moving to a more advanced model with write vs read ports
364    /** The number of cache ports available each cycle. */
365    int cachePorts;
366
367    /** The number of used cache ports in this cycle. */
368    int usedPorts;
369
370    /** Is the LSQ switched out. */
371    bool switchedOut;
372
373    //list<InstSeqNum> mshrSeqNums;
374
375    /** Wire to read information from the issue stage time queue. */
376    typename TimeBuffer<IssueStruct>::wire fromIssue;
377
378    /** Whether or not the LSQ is stalled. */
379    bool stalled;
380    /** The store that causes the stall due to partial store to load
381     * forwarding.
382     */
383    InstSeqNum stallingStoreIsn;
384    /** The index of the above store. */
385    int stallingLoadIdx;
386
387    /** The packet that needs to be retried. */
388    PacketPtr retryPkt;
389
390    /** Whehter or not a store is blocked due to the memory system. */
391    bool isStoreBlocked;
392
393    /** Whether or not a load is blocked due to the memory system. */
394    bool isLoadBlocked;
395
396    /** Has the blocked load been handled. */
397    bool loadBlockedHandled;
398
399    /** The sequence number of the blocked load. */
400    InstSeqNum blockedLoadSeqNum;
401
402    /** The oldest load that caused a memory ordering violation. */
403    DynInstPtr memDepViolator;
404
405    // Will also need how many read/write ports the Dcache has.  Or keep track
406    // of that in stage that is one level up, and only call executeLoad/Store
407    // the appropriate number of times.
408    /** Total number of loads forwaded from LSQ stores. */
409    Stats::Scalar<> lsqForwLoads;
410
411    /** Total number of loads ignored due to invalid addresses. */
412    Stats::Scalar<> invAddrLoads;
413
414    /** Total number of squashed loads. */
415    Stats::Scalar<> lsqSquashedLoads;
416
417    /** Total number of responses from the memory system that are
418     * ignored due to the instruction already being squashed. */
419    Stats::Scalar<> lsqIgnoredResponses;
420
421    /** Tota number of memory ordering violations. */
422    Stats::Scalar<> lsqMemOrderViolation;
423
424    /** Total number of squashed stores. */
425    Stats::Scalar<> lsqSquashedStores;
426
427    /** Total number of software prefetches ignored due to invalid addresses. */
428    Stats::Scalar<> invAddrSwpfs;
429
430    /** Ready loads blocked due to partial store-forwarding. */
431    Stats::Scalar<> lsqBlockedLoads;
432
433    /** Number of loads that were rescheduled. */
434    Stats::Scalar<> lsqRescheduledLoads;
435
436    /** Number of times the LSQ is blocked due to the cache. */
437    Stats::Scalar<> lsqCacheBlocked;
438
439  public:
440    /** Executes the load at the given index. */
441    template <class T>
442    Fault read(Request *req, T &data, int load_idx);
443
444    /** Executes the store at the given index. */
445    template <class T>
446    Fault write(Request *req, T &data, int store_idx);
447
448    /** Returns the index of the head load instruction. */
449    int getLoadHead() { return loadHead; }
450    /** Returns the sequence number of the head load instruction. */
451    InstSeqNum getLoadHeadSeqNum()
452    {
453        if (loadQueue[loadHead]) {
454            return loadQueue[loadHead]->seqNum;
455        } else {
456            return 0;
457        }
458
459    }
460
461    /** Returns the index of the head store instruction. */
462    int getStoreHead() { return storeHead; }
463    /** Returns the sequence number of the head store instruction. */
464    InstSeqNum getStoreHeadSeqNum()
465    {
466        if (storeQueue[storeHead].inst) {
467            return storeQueue[storeHead].inst->seqNum;
468        } else {
469            return 0;
470        }
471
472    }
473
474    /** Returns whether or not the LSQ unit is stalled. */
475    bool isStalled()  { return stalled; }
476};
477
478template <class Impl>
479template <class T>
480Fault
481LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
482{
483    DynInstPtr load_inst = loadQueue[load_idx];
484
485    assert(load_inst);
486
487    assert(!load_inst->isExecuted());
488
489    // Make sure this isn't an uncacheable access
490    // A bit of a hackish way to get uncached accesses to work only if they're
491    // at the head of the LSQ and are ready to commit (at the head of the ROB
492    // too).
493    if (req->isUncacheable() &&
494        (load_idx != loadHead || !load_inst->isAtCommit())) {
495        iewStage->rescheduleMemInst(load_inst);
496        ++lsqRescheduledLoads;
497
498        // Must delete request now that it wasn't handed off to
499        // memory.  This is quite ugly.  @todo: Figure out the proper
500        // place to really handle request deletes.
501        delete req;
502        return TheISA::genMachineCheckFault();
503    }
504
505    // Check the SQ for any previous stores that might lead to forwarding
506    int store_idx = load_inst->sqIdx;
507
508    int store_size = 0;
509
510    DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
511            "storeHead: %i addr: %#x\n",
512            load_idx, store_idx, storeHead, req->getPaddr());
513
514    if (req->isLocked()) {
515        // Disable recording the result temporarily.  Writing to misc
516        // regs normally updates the result, but this is not the
517        // desired behavior when handling store conditionals.
518        load_inst->recordResult = false;
519        TheISA::handleLockedRead(load_inst.get(), req);
520        load_inst->recordResult = true;
521    }
522
523    while (store_idx != -1) {
524        // End once we've reached the top of the LSQ
525        if (store_idx == storeWBIdx) {
526            break;
527        }
528
529        // Move the index to one younger
530        if (--store_idx < 0)
531            store_idx += SQEntries;
532
533        assert(storeQueue[store_idx].inst);
534
535        store_size = storeQueue[store_idx].size;
536
537        if (store_size == 0)
538            continue;
539        else if (storeQueue[store_idx].inst->uncacheable())
540            continue;
541
542        assert(storeQueue[store_idx].inst->effAddrValid);
543
544        // Check if the store data is within the lower and upper bounds of
545        // addresses that the request needs.
546        bool store_has_lower_limit =
547            req->getVaddr() >= storeQueue[store_idx].inst->effAddr;
548        bool store_has_upper_limit =
549            (req->getVaddr() + req->getSize()) <=
550            (storeQueue[store_idx].inst->effAddr + store_size);
551        bool lower_load_has_store_part =
552            req->getVaddr() < (storeQueue[store_idx].inst->effAddr +
553                           store_size);
554        bool upper_load_has_store_part =
555            (req->getVaddr() + req->getSize()) >
556            storeQueue[store_idx].inst->effAddr;
557
558        // If the store's data has all of the data needed, we can forward.
559        if ((store_has_lower_limit && store_has_upper_limit)) {
560            // Get shift amount for offset into the store's data.
561            int shift_amt = req->getVaddr() & (store_size - 1);
562
563            memcpy(&data, storeQueue[store_idx].data + shift_amt, sizeof(T));
564
565            assert(!load_inst->memData);
566            load_inst->memData = new uint8_t[64];
567
568            memcpy(load_inst->memData,
569                    storeQueue[store_idx].data + shift_amt, req->getSize());
570
571            DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
572                    "addr %#x, data %#x\n",
573                    store_idx, req->getVaddr(), data);
574
575            PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq,
576                                            Packet::Broadcast);
577            data_pkt->dataStatic(load_inst->memData);
578
579            WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
580
581            // We'll say this has a 1 cycle load-store forwarding latency
582            // for now.
583            // @todo: Need to make this a parameter.
584            wb->schedule(curTick);
585
586            ++lsqForwLoads;
587            return NoFault;
588        } else if ((store_has_lower_limit && lower_load_has_store_part) ||
589                   (store_has_upper_limit && upper_load_has_store_part) ||
590                   (lower_load_has_store_part && upper_load_has_store_part)) {
591            // This is the partial store-load forwarding case where a store
592            // has only part of the load's data.
593
594            // If it's already been written back, then don't worry about
595            // stalling on it.
596            if (storeQueue[store_idx].completed) {
597                panic("Should not check one of these");
598                continue;
599            }
600
601            // Must stall load and force it to retry, so long as it's the oldest
602            // load that needs to do so.
603            if (!stalled ||
604                (stalled &&
605                 load_inst->seqNum <
606                 loadQueue[stallingLoadIdx]->seqNum)) {
607                stalled = true;
608                stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
609                stallingLoadIdx = load_idx;
610            }
611
612            // Tell IQ/mem dep unit that this instruction will need to be
613            // rescheduled eventually
614            iewStage->rescheduleMemInst(load_inst);
615            iewStage->decrWb(load_inst->seqNum);
616            load_inst->clearIssued();
617            ++lsqRescheduledLoads;
618
619            // Do not generate a writeback event as this instruction is not
620            // complete.
621            DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
622                    "Store idx %i to load addr %#x\n",
623                    store_idx, req->getVaddr());
624
625            // Must delete request now that it wasn't handed off to
626            // memory.  This is quite ugly.  @todo: Figure out the
627            // proper place to really handle request deletes.
628            delete req;
629
630            return NoFault;
631        }
632    }
633
634    // If there's no forwarding case, then go access memory
635    DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n",
636            load_inst->seqNum, load_inst->readPC());
637
638    assert(!load_inst->memData);
639    load_inst->memData = new uint8_t[64];
640
641    ++usedPorts;
642
643    // if we the cache is not blocked, do cache access
644    if (!lsq->cacheBlocked()) {
645        PacketPtr data_pkt =
646            new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
647        data_pkt->dataStatic(load_inst->memData);
648
649        LSQSenderState *state = new LSQSenderState;
650        state->isLoad = true;
651        state->idx = load_idx;
652        state->inst = load_inst;
653        data_pkt->senderState = state;
654
655        if (!dcachePort->sendTiming(data_pkt)) {
656            Packet::Result result = data_pkt->result;
657
658            // Delete state and data packet because a load retry
659            // initiates a pipeline restart; it does not retry.
660            delete state;
661            delete data_pkt->req;
662            delete data_pkt;
663
664            req = NULL;
665
666            if (result == Packet::BadAddress) {
667                return TheISA::genMachineCheckFault();
668            }
669
670            // If the access didn't succeed, tell the LSQ by setting
671            // the retry thread id.
672            lsq->setRetryTid(lsqID);
673        }
674    }
675
676    // If the cache was blocked, or has become blocked due to the access,
677    // handle it.
678    if (lsq->cacheBlocked()) {
679        if (req)
680            delete req;
681
682        ++lsqCacheBlocked;
683
684        iewStage->decrWb(load_inst->seqNum);
685        // There's an older load that's already going to squash.
686        if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
687            return NoFault;
688
689        // Record that the load was blocked due to memory.  This
690        // load will squash all instructions after it, be
691        // refetched, and re-executed.
692        isLoadBlocked = true;
693        loadBlockedHandled = false;
694        blockedLoadSeqNum = load_inst->seqNum;
695        // No fault occurred, even though the interface is blocked.
696        return NoFault;
697    }
698
699    return NoFault;
700}
701
702template <class Impl>
703template <class T>
704Fault
705LSQUnit<Impl>::write(Request *req, T &data, int store_idx)
706{
707    assert(storeQueue[store_idx].inst);
708
709    DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
710            " | storeHead:%i [sn:%i]\n",
711            store_idx, req->getPaddr(), data, storeHead,
712            storeQueue[store_idx].inst->seqNum);
713
714    storeQueue[store_idx].req = req;
715    storeQueue[store_idx].size = sizeof(T);
716    assert(sizeof(T) <= sizeof(storeQueue[store_idx].data));
717
718    T gData = htog(data);
719    memcpy(storeQueue[store_idx].data, &gData, sizeof(T));
720
721    // This function only writes the data to the store queue, so no fault
722    // can happen here.
723    return NoFault;
724}
725
726#endif // __CPU_O3_LSQ_UNIT_HH__
727