lsq_impl.hh revision 14111:14c05f862590
1/*
2 * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder.  You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2005-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Korey Sewell
42 */
43
44#ifndef __CPU_O3_LSQ_IMPL_HH__
45#define __CPU_O3_LSQ_IMPL_HH__
46
47#include <algorithm>
48#include <list>
49#include <string>
50
51#include "base/logging.hh"
52#include "cpu/o3/lsq.hh"
53#include "debug/Drain.hh"
54#include "debug/Fetch.hh"
55#include "debug/LSQ.hh"
56#include "debug/Writeback.hh"
57#include "params/DerivO3CPU.hh"
58
59using namespace std;
60
61template <class Impl>
62LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
63    : cpu(cpu_ptr), iewStage(iew_ptr),
64      _cacheBlocked(false),
65      cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
66      cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
67      lsqPolicy(params->smtLSQPolicy),
68      LQEntries(params->LQEntries),
69      SQEntries(params->SQEntries),
70      maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
71                  params->smtLSQThreshold)),
72      maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
73                  params->smtLSQThreshold)),
74      numThreads(params->numThreads)
75{
76    assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
77
78    //**********************************************/
79    //************ Handle SMT Parameters ***********/
80    //**********************************************/
81
82    /* Run SMT olicy checks. */
83        if (lsqPolicy == SMTQueuePolicy::Dynamic) {
84        DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
85    } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
86        DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
87                "%i entries per LQ | %i entries per SQ\n",
88                maxLQEntries,maxSQEntries);
89    } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
90
91        assert(params->smtLSQThreshold > params->LQEntries);
92        assert(params->smtLSQThreshold > params->SQEntries);
93
94        DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
95                "%i entries per LQ | %i entries per SQ\n",
96                maxLQEntries,maxSQEntries);
97    } else {
98        panic("Invalid LSQ sharing policy. Options are: Dynamic, "
99                    "Partitioned, Threshold");
100    }
101
102    thread.reserve(numThreads);
103    for (ThreadID tid = 0; tid < numThreads; tid++) {
104        thread.emplace_back(maxLQEntries, maxSQEntries);
105        thread[tid].init(cpu, iew_ptr, params, this, tid);
106        thread[tid].setDcachePort(&cpu_ptr->getDataPort());
107    }
108}
109
110
111template<class Impl>
112std::string
113LSQ<Impl>::name() const
114{
115    return iewStage->name() + ".lsq";
116}
117
118template<class Impl>
119void
120LSQ<Impl>::regStats()
121{
122    //Initialize LSQs
123    for (ThreadID tid = 0; tid < numThreads; tid++) {
124        thread[tid].regStats();
125    }
126}
127
128template<class Impl>
129void
130LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
131{
132    activeThreads = at_ptr;
133    assert(activeThreads != 0);
134}
135
136template <class Impl>
137void
138LSQ<Impl>::drainSanityCheck() const
139{
140    assert(isDrained());
141
142    for (ThreadID tid = 0; tid < numThreads; tid++)
143        thread[tid].drainSanityCheck();
144}
145
146template <class Impl>
147bool
148LSQ<Impl>::isDrained() const
149{
150    bool drained(true);
151
152    if (!lqEmpty()) {
153        DPRINTF(Drain, "Not drained, LQ not empty.\n");
154        drained = false;
155    }
156
157    if (!sqEmpty()) {
158        DPRINTF(Drain, "Not drained, SQ not empty.\n");
159        drained = false;
160    }
161
162    return drained;
163}
164
165template <class Impl>
166void
167LSQ<Impl>::takeOverFrom()
168{
169    usedStorePorts = 0;
170    _cacheBlocked = false;
171
172    for (ThreadID tid = 0; tid < numThreads; tid++) {
173        thread[tid].takeOverFrom();
174    }
175}
176
177template <class Impl>
178void
179LSQ<Impl>::tick()
180{
181    // Re-issue loads which got blocked on the per-cycle load ports limit.
182    if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked)
183        iewStage->cacheUnblocked();
184
185    usedLoadPorts = 0;
186    usedStorePorts = 0;
187}
188
189template<class Impl>
190bool
191LSQ<Impl>::cacheBlocked() const
192{
193    return _cacheBlocked;
194}
195
196template<class Impl>
197void
198LSQ<Impl>::cacheBlocked(bool v)
199{
200    _cacheBlocked = v;
201}
202
203template<class Impl>
204bool
205LSQ<Impl>::cachePortAvailable(bool is_load) const
206{
207    bool ret;
208    if (is_load) {
209        ret  = usedLoadPorts < cacheLoadPorts;
210    } else {
211        ret  = usedStorePorts < cacheStorePorts;
212    }
213    return ret;
214}
215
216template<class Impl>
217void
218LSQ<Impl>::cachePortBusy(bool is_load)
219{
220    assert(cachePortAvailable(is_load));
221    if (is_load) {
222        usedLoadPorts++;
223    } else {
224        usedStorePorts++;
225    }
226}
227
228template<class Impl>
229void
230LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
231{
232    ThreadID tid = load_inst->threadNumber;
233
234    thread[tid].insertLoad(load_inst);
235}
236
237template<class Impl>
238void
239LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
240{
241    ThreadID tid = store_inst->threadNumber;
242
243    thread[tid].insertStore(store_inst);
244}
245
246template<class Impl>
247Fault
248LSQ<Impl>::executeLoad(const DynInstPtr &inst)
249{
250    ThreadID tid = inst->threadNumber;
251
252    return thread[tid].executeLoad(inst);
253}
254
255template<class Impl>
256Fault
257LSQ<Impl>::executeStore(const DynInstPtr &inst)
258{
259    ThreadID tid = inst->threadNumber;
260
261    return thread[tid].executeStore(inst);
262}
263
264template<class Impl>
265void
266LSQ<Impl>::writebackStores()
267{
268    list<ThreadID>::iterator threads = activeThreads->begin();
269    list<ThreadID>::iterator end = activeThreads->end();
270
271    while (threads != end) {
272        ThreadID tid = *threads++;
273
274        if (numStoresToWB(tid) > 0) {
275            DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
276                "available for Writeback.\n", tid, numStoresToWB(tid));
277        }
278
279        thread[tid].writebackStores();
280    }
281}
282
283template<class Impl>
284bool
285LSQ<Impl>::violation()
286{
287    /* Answers: Does Anybody Have a Violation?*/
288    list<ThreadID>::iterator threads = activeThreads->begin();
289    list<ThreadID>::iterator end = activeThreads->end();
290
291    while (threads != end) {
292        ThreadID tid = *threads++;
293
294        if (thread[tid].violation())
295            return true;
296    }
297
298    return false;
299}
300
301template <class Impl>
302void
303LSQ<Impl>::recvReqRetry()
304{
305    iewStage->cacheUnblocked();
306    cacheBlocked(false);
307
308    for (ThreadID tid : *activeThreads) {
309        thread[tid].recvRetry();
310    }
311}
312
313template <class Impl>
314void
315LSQ<Impl>::completeDataAccess(PacketPtr pkt)
316{
317    auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
318    thread[cpu->contextToThread(senderState->contextId())]
319        .completeDataAccess(pkt);
320}
321
322template <class Impl>
323bool
324LSQ<Impl>::recvTimingResp(PacketPtr pkt)
325{
326    if (pkt->isError())
327        DPRINTF(LSQ, "Got error packet back for address: %#X\n",
328                pkt->getAddr());
329
330    auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
331    panic_if(!senderState, "Got packet back with unknown sender state\n");
332
333    thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
334
335    if (pkt->isInvalidate()) {
336        // This response also contains an invalidate; e.g. this can be the case
337        // if cmd is ReadRespWithInvalidate.
338        //
339        // The calling order between completeDataAccess and checkSnoop matters.
340        // By calling checkSnoop after completeDataAccess, we ensure that the
341        // fault set by checkSnoop is not lost. Calling writeback (more
342        // specifically inst->completeAcc) in completeDataAccess overwrites
343        // fault, and in case this instruction requires squashing (as
344        // determined by checkSnoop), the ReExec fault set by checkSnoop would
345        // be lost otherwise.
346
347        DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
348                pkt->getAddr());
349
350        for (ThreadID tid = 0; tid < numThreads; tid++) {
351            thread[tid].checkSnoop(pkt);
352        }
353    }
354    // Update the LSQRequest state (this may delete the request)
355    senderState->request()->packetReplied();
356
357    return true;
358}
359
360template <class Impl>
361void
362LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt)
363{
364    DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
365            pkt->cmdString());
366
367    // must be a snoop
368    if (pkt->isInvalidate()) {
369        DPRINTF(LSQ, "received invalidation for addr:%#x\n",
370                pkt->getAddr());
371        for (ThreadID tid = 0; tid < numThreads; tid++) {
372            thread[tid].checkSnoop(pkt);
373        }
374    }
375}
376
377template<class Impl>
378int
379LSQ<Impl>::getCount()
380{
381    unsigned total = 0;
382
383    list<ThreadID>::iterator threads = activeThreads->begin();
384    list<ThreadID>::iterator end = activeThreads->end();
385
386    while (threads != end) {
387        ThreadID tid = *threads++;
388
389        total += getCount(tid);
390    }
391
392    return total;
393}
394
395template<class Impl>
396int
397LSQ<Impl>::numLoads()
398{
399    unsigned total = 0;
400
401    list<ThreadID>::iterator threads = activeThreads->begin();
402    list<ThreadID>::iterator end = activeThreads->end();
403
404    while (threads != end) {
405        ThreadID tid = *threads++;
406
407        total += numLoads(tid);
408    }
409
410    return total;
411}
412
413template<class Impl>
414int
415LSQ<Impl>::numStores()
416{
417    unsigned total = 0;
418
419    list<ThreadID>::iterator threads = activeThreads->begin();
420    list<ThreadID>::iterator end = activeThreads->end();
421
422    while (threads != end) {
423        ThreadID tid = *threads++;
424
425        total += thread[tid].numStores();
426    }
427
428    return total;
429}
430
431template<class Impl>
432unsigned
433LSQ<Impl>::numFreeLoadEntries()
434{
435    unsigned total = 0;
436
437    list<ThreadID>::iterator threads = activeThreads->begin();
438    list<ThreadID>::iterator end = activeThreads->end();
439
440    while (threads != end) {
441        ThreadID tid = *threads++;
442
443        total += thread[tid].numFreeLoadEntries();
444    }
445
446    return total;
447}
448
449template<class Impl>
450unsigned
451LSQ<Impl>::numFreeStoreEntries()
452{
453    unsigned total = 0;
454
455    list<ThreadID>::iterator threads = activeThreads->begin();
456    list<ThreadID>::iterator end = activeThreads->end();
457
458    while (threads != end) {
459        ThreadID tid = *threads++;
460
461        total += thread[tid].numFreeStoreEntries();
462    }
463
464    return total;
465}
466
467template<class Impl>
468unsigned
469LSQ<Impl>::numFreeLoadEntries(ThreadID tid)
470{
471        return thread[tid].numFreeLoadEntries();
472}
473
474template<class Impl>
475unsigned
476LSQ<Impl>::numFreeStoreEntries(ThreadID tid)
477{
478        return thread[tid].numFreeStoreEntries();
479}
480
481template<class Impl>
482bool
483LSQ<Impl>::isFull()
484{
485    list<ThreadID>::iterator threads = activeThreads->begin();
486    list<ThreadID>::iterator end = activeThreads->end();
487
488    while (threads != end) {
489        ThreadID tid = *threads++;
490
491        if (!(thread[tid].lqFull() || thread[tid].sqFull()))
492            return false;
493    }
494
495    return true;
496}
497
498template<class Impl>
499bool
500LSQ<Impl>::isFull(ThreadID tid)
501{
502    //@todo: Change to Calculate All Entries for
503    //Dynamic Policy
504    if (lsqPolicy == SMTQueuePolicy::Dynamic)
505        return isFull();
506    else
507        return thread[tid].lqFull() || thread[tid].sqFull();
508}
509
510template<class Impl>
511bool
512LSQ<Impl>::isEmpty() const
513{
514    return lqEmpty() && sqEmpty();
515}
516
517template<class Impl>
518bool
519LSQ<Impl>::lqEmpty() const
520{
521    list<ThreadID>::const_iterator threads = activeThreads->begin();
522    list<ThreadID>::const_iterator end = activeThreads->end();
523
524    while (threads != end) {
525        ThreadID tid = *threads++;
526
527        if (!thread[tid].lqEmpty())
528            return false;
529    }
530
531    return true;
532}
533
534template<class Impl>
535bool
536LSQ<Impl>::sqEmpty() const
537{
538    list<ThreadID>::const_iterator threads = activeThreads->begin();
539    list<ThreadID>::const_iterator end = activeThreads->end();
540
541    while (threads != end) {
542        ThreadID tid = *threads++;
543
544        if (!thread[tid].sqEmpty())
545            return false;
546    }
547
548    return true;
549}
550
551template<class Impl>
552bool
553LSQ<Impl>::lqFull()
554{
555    list<ThreadID>::iterator threads = activeThreads->begin();
556    list<ThreadID>::iterator end = activeThreads->end();
557
558    while (threads != end) {
559        ThreadID tid = *threads++;
560
561        if (!thread[tid].lqFull())
562            return false;
563    }
564
565    return true;
566}
567
568template<class Impl>
569bool
570LSQ<Impl>::lqFull(ThreadID tid)
571{
572    //@todo: Change to Calculate All Entries for
573    //Dynamic Policy
574    if (lsqPolicy == SMTQueuePolicy::Dynamic)
575        return lqFull();
576    else
577        return thread[tid].lqFull();
578}
579
580template<class Impl>
581bool
582LSQ<Impl>::sqFull()
583{
584    list<ThreadID>::iterator threads = activeThreads->begin();
585    list<ThreadID>::iterator end = activeThreads->end();
586
587    while (threads != end) {
588        ThreadID tid = *threads++;
589
590        if (!sqFull(tid))
591            return false;
592    }
593
594    return true;
595}
596
597template<class Impl>
598bool
599LSQ<Impl>::sqFull(ThreadID tid)
600{
601     //@todo: Change to Calculate All Entries for
602    //Dynamic Policy
603    if (lsqPolicy == SMTQueuePolicy::Dynamic)
604        return sqFull();
605    else
606        return thread[tid].sqFull();
607}
608
609template<class Impl>
610bool
611LSQ<Impl>::isStalled()
612{
613    list<ThreadID>::iterator threads = activeThreads->begin();
614    list<ThreadID>::iterator end = activeThreads->end();
615
616    while (threads != end) {
617        ThreadID tid = *threads++;
618
619        if (!thread[tid].isStalled())
620            return false;
621    }
622
623    return true;
624}
625
626template<class Impl>
627bool
628LSQ<Impl>::isStalled(ThreadID tid)
629{
630    if (lsqPolicy == SMTQueuePolicy::Dynamic)
631        return isStalled();
632    else
633        return thread[tid].isStalled();
634}
635
636template<class Impl>
637bool
638LSQ<Impl>::hasStoresToWB()
639{
640    list<ThreadID>::iterator threads = activeThreads->begin();
641    list<ThreadID>::iterator end = activeThreads->end();
642
643    while (threads != end) {
644        ThreadID tid = *threads++;
645
646        if (hasStoresToWB(tid))
647            return true;
648    }
649
650    return false;
651}
652
653template<class Impl>
654bool
655LSQ<Impl>::willWB()
656{
657    list<ThreadID>::iterator threads = activeThreads->begin();
658    list<ThreadID>::iterator end = activeThreads->end();
659
660    while (threads != end) {
661        ThreadID tid = *threads++;
662
663        if (willWB(tid))
664            return true;
665    }
666
667    return false;
668}
669
670template<class Impl>
671void
672LSQ<Impl>::dumpInsts() const
673{
674    list<ThreadID>::const_iterator threads = activeThreads->begin();
675    list<ThreadID>::const_iterator end = activeThreads->end();
676
677    while (threads != end) {
678        ThreadID tid = *threads++;
679
680        thread[tid].dumpInsts();
681    }
682}
683
684template<class Impl>
685Fault
686LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
687                       unsigned int size, Addr addr, Request::Flags flags,
688                       uint64_t *res, AtomicOpFunctor *amo_op,
689                       const std::vector<bool>& byteEnable)
690{
691    // This comming request can be either load, store or atomic.
692    // Atomic request has a corresponding pointer to its atomic memory
693    // operation
694    bool isAtomic M5_VAR_USED = !isLoad && amo_op;
695
696    ThreadID tid = cpu->contextToThread(inst->contextId());
697    auto cacheLineSize = cpu->cacheLineSize();
698    bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
699    LSQRequest* req = nullptr;
700
701    // Atomic requests that access data across cache line boundary are
702    // currently not allowed since the cache does not guarantee corresponding
703    // atomic memory operations to be executed atomically across a cache line.
704    // For ISAs such as x86 that supports cross-cache-line atomic instructions,
705    // the cache needs to be modified to perform atomic update to both cache
706    // lines. For now, such cross-line update is not supported.
707    assert(!isAtomic || (isAtomic && !needs_burst));
708
709    if (inst->translationStarted()) {
710        req = inst->savedReq;
711        assert(req);
712    } else {
713        if (needs_burst) {
714            req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
715                    size, flags, data, res);
716        } else {
717            req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
718                    size, flags, data, res, amo_op);
719        }
720        assert(req);
721        if (!byteEnable.empty()) {
722            req->_byteEnable = byteEnable;
723        }
724        inst->setRequest();
725        req->taskId(cpu->taskId());
726
727        // There might be fault from a previous execution attempt if this is
728        // a strictly ordered load
729        inst->getFault() = NoFault;
730
731        req->initiateTranslation();
732    }
733
734    /* This is the place were instructions get the effAddr. */
735    if (req->isTranslationComplete()) {
736        if (req->isMemAccessRequired()) {
737            inst->effAddr = req->getVaddr();
738            inst->effSize = size;
739            inst->effAddrValid(true);
740
741            if (cpu->checker) {
742                inst->reqToVerify = std::make_shared<Request>(*req->request());
743            }
744            Fault fault;
745            if (isLoad)
746                fault = cpu->read(req, inst->lqIdx);
747            else
748                fault = cpu->write(req, data, inst->sqIdx);
749            // inst->getFault() may have the first-fault of a
750            // multi-access split request at this point.
751            // Overwrite that only if we got another type of fault
752            // (e.g. re-exec).
753            if (fault != NoFault)
754                inst->getFault() = fault;
755        } else if (isLoad) {
756            inst->setMemAccPredicate(false);
757            // Commit will have to clean up whatever happened.  Set this
758            // instruction as executed.
759            inst->setExecuted();
760        }
761    }
762
763    if (inst->traceData)
764        inst->traceData->setMem(addr, size, flags);
765
766    return inst->getFault();
767}
768
769template<class Impl>
770void
771LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
772        ThreadContext* tc, BaseTLB::Mode mode)
773{
774    _fault.push_back(fault);
775    numInTranslationFragments = 0;
776    numTranslatedFragments = 1;
777    /* If the instruction has been squahsed, let the request know
778     * as it may have to self-destruct. */
779    if (_inst->isSquashed()) {
780        this->squashTranslation();
781    } else {
782        _inst->strictlyOrdered(req->isStrictlyOrdered());
783
784        flags.set(Flag::TranslationFinished);
785        if (fault == NoFault) {
786            _inst->physEffAddr = req->getPaddr();
787            _inst->memReqFlags = req->getFlags();
788            if (req->isCondSwap()) {
789                assert(_res);
790                req->setExtraData(*_res);
791            }
792            setState(State::Request);
793        } else {
794            setState(State::Fault);
795        }
796
797        LSQRequest::_inst->fault = fault;
798        LSQRequest::_inst->translationCompleted(true);
799    }
800}
801
802template<class Impl>
803void
804LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
805        ThreadContext* tc, BaseTLB::Mode mode)
806{
807    int i;
808    for (i = 0; i < _requests.size() && _requests[i] != req; i++);
809    assert(i < _requests.size());
810    _fault[i] = fault;
811
812    numInTranslationFragments--;
813    numTranslatedFragments++;
814
815    if (fault == NoFault)
816        mainReq->setFlags(req->getFlags());
817
818    if (numTranslatedFragments == _requests.size()) {
819        if (_inst->isSquashed()) {
820            this->squashTranslation();
821        } else {
822            _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
823            flags.set(Flag::TranslationFinished);
824            _inst->translationCompleted(true);
825
826            for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
827            if (i > 0) {
828                _inst->physEffAddr = request(0)->getPaddr();
829                _inst->memReqFlags = mainReq->getFlags();
830                if (mainReq->isCondSwap()) {
831                    assert (i == _fault.size());
832                    assert(_res);
833                    mainReq->setExtraData(*_res);
834                }
835                if (i == _fault.size()) {
836                    _inst->fault = NoFault;
837                    setState(State::Request);
838                } else {
839                  _inst->fault = _fault[i];
840                  setState(State::PartialFault);
841                }
842            } else {
843                _inst->fault = _fault[0];
844                setState(State::Fault);
845            }
846        }
847
848    }
849}
850
851template<class Impl>
852void
853LSQ<Impl>::SingleDataRequest::initiateTranslation()
854{
855    assert(_requests.size() == 0);
856
857    this->addRequest(_addr, _size, _byteEnable);
858
859    if (_requests.size() > 0) {
860        _requests.back()->setReqInstSeqNum(_inst->seqNum);
861        _requests.back()->taskId(_taskId);
862        _inst->translationStarted(true);
863        setState(State::Translation);
864        flags.set(Flag::TranslationStarted);
865
866        _inst->savedReq = this;
867        sendFragmentToTranslation(0);
868    } else {
869        _inst->setMemAccPredicate(false);
870    }
871}
872
873template<class Impl>
874PacketPtr
875LSQ<Impl>::SplitDataRequest::mainPacket()
876{
877    return _mainPacket;
878}
879
880template<class Impl>
881RequestPtr
882LSQ<Impl>::SplitDataRequest::mainRequest()
883{
884    return mainReq;
885}
886
887template<class Impl>
888void
889LSQ<Impl>::SplitDataRequest::initiateTranslation()
890{
891    auto cacheLineSize = _port.cacheLineSize();
892    Addr base_addr = _addr;
893    Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
894    Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
895    uint32_t size_so_far = 0;
896
897    mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
898                _size, _flags, _inst->masterId(),
899                _inst->instAddr(), _inst->contextId());
900    if (!_byteEnable.empty()) {
901        mainReq->setByteEnable(_byteEnable);
902    }
903
904    // Paddr is not used in mainReq. However, we will accumulate the flags
905    // from the sub requests into mainReq by calling setFlags() in finish().
906    // setFlags() assumes that paddr is set so flip the paddr valid bit here to
907    // avoid a potential assert in setFlags() when we call it from  finish().
908    mainReq->setPaddr(0);
909
910    /* Get the pre-fix, possibly unaligned. */
911    if (_byteEnable.empty()) {
912        this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
913    } else {
914        auto it_start = _byteEnable.begin();
915        auto it_end = _byteEnable.begin() + (next_addr - base_addr);
916        this->addRequest(base_addr, next_addr - base_addr,
917                         std::vector<bool>(it_start, it_end));
918    }
919    size_so_far = next_addr - base_addr;
920
921    /* We are block aligned now, reading whole blocks. */
922    base_addr = next_addr;
923    while (base_addr != final_addr) {
924        if (_byteEnable.empty()) {
925            this->addRequest(base_addr, cacheLineSize, _byteEnable);
926        } else {
927            auto it_start = _byteEnable.begin() + size_so_far;
928            auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
929            this->addRequest(base_addr, cacheLineSize,
930                             std::vector<bool>(it_start, it_end));
931        }
932        size_so_far += cacheLineSize;
933        base_addr += cacheLineSize;
934    }
935
936    /* Deal with the tail. */
937    if (size_so_far < _size) {
938        if (_byteEnable.empty()) {
939            this->addRequest(base_addr, _size - size_so_far, _byteEnable);
940        } else {
941            auto it_start = _byteEnable.begin() + size_so_far;
942            auto it_end = _byteEnable.end();
943            this->addRequest(base_addr, _size - size_so_far,
944                             std::vector<bool>(it_start, it_end));
945        }
946    }
947
948    if (_requests.size() > 0) {
949        /* Setup the requests and send them to translation. */
950        for (auto& r: _requests) {
951            r->setReqInstSeqNum(_inst->seqNum);
952            r->taskId(_taskId);
953        }
954
955        _inst->translationStarted(true);
956        setState(State::Translation);
957        flags.set(Flag::TranslationStarted);
958        this->_inst->savedReq = this;
959        numInTranslationFragments = 0;
960        numTranslatedFragments = 0;
961        _fault.resize(_requests.size());
962
963        for (uint32_t i = 0; i < _requests.size(); i++) {
964            sendFragmentToTranslation(i);
965        }
966    } else {
967        _inst->setMemAccPredicate(false);
968    }
969}
970
971template<class Impl>
972void
973LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
974{
975    numInTranslationFragments++;
976    _port.dTLB()->translateTiming(
977            this->request(i),
978            this->_inst->thread->getTC(), this,
979            this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
980}
981
982template<class Impl>
983bool
984LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
985{
986    assert(_numOutstandingPackets == 1);
987    auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
988    flags.set(Flag::Complete);
989    state->outstanding--;
990    assert(pkt == _packets.front());
991    _port.completeDataAccess(pkt);
992    return true;
993}
994
995template<class Impl>
996bool
997LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
998{
999    auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
1000    uint32_t pktIdx = 0;
1001    while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1002        pktIdx++;
1003    assert(pktIdx < _packets.size());
1004    numReceivedPackets++;
1005    state->outstanding--;
1006    if (numReceivedPackets == _packets.size()) {
1007        flags.set(Flag::Complete);
1008        /* Assemble packets. */
1009        PacketPtr resp = isLoad()
1010            ? Packet::createRead(mainReq)
1011            : Packet::createWrite(mainReq);
1012        if (isLoad())
1013            resp->dataStatic(_inst->memData);
1014        else
1015            resp->dataStatic(_data);
1016        resp->senderState = _senderState;
1017        _port.completeDataAccess(resp);
1018        delete resp;
1019    }
1020    return true;
1021}
1022
1023template<class Impl>
1024void
1025LSQ<Impl>::SingleDataRequest::buildPackets()
1026{
1027    assert(_senderState);
1028    /* Retries do not create new packets. */
1029    if (_packets.size() == 0) {
1030        _packets.push_back(
1031                isLoad()
1032                    ?  Packet::createRead(request())
1033                    :  Packet::createWrite(request()));
1034        _packets.back()->dataStatic(_inst->memData);
1035        _packets.back()->senderState = _senderState;
1036    }
1037    assert(_packets.size() == 1);
1038}
1039
1040template<class Impl>
1041void
1042LSQ<Impl>::SplitDataRequest::buildPackets()
1043{
1044    /* Extra data?? */
1045    Addr base_address = _addr;
1046
1047    if (_packets.size() == 0) {
1048        /* New stuff */
1049        if (isLoad()) {
1050            _mainPacket = Packet::createRead(mainReq);
1051            _mainPacket->dataStatic(_inst->memData);
1052        }
1053        for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
1054            RequestPtr r = _requests[i];
1055            PacketPtr pkt = isLoad() ? Packet::createRead(r)
1056                                     : Packet::createWrite(r);
1057            ptrdiff_t offset = r->getVaddr() - base_address;
1058            if (isLoad()) {
1059                pkt->dataStatic(_inst->memData + offset);
1060            } else {
1061                uint8_t* req_data = new uint8_t[r->getSize()];
1062                std::memcpy(req_data,
1063                        _inst->memData + offset,
1064                        r->getSize());
1065                pkt->dataDynamic(req_data);
1066            }
1067            pkt->senderState = _senderState;
1068            _packets.push_back(pkt);
1069        }
1070    }
1071    assert(_packets.size() > 0);
1072}
1073
1074template<class Impl>
1075void
1076LSQ<Impl>::SingleDataRequest::sendPacketToCache()
1077{
1078    assert(_numOutstandingPackets == 0);
1079    if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1080        _numOutstandingPackets = 1;
1081}
1082
1083template<class Impl>
1084void
1085LSQ<Impl>::SplitDataRequest::sendPacketToCache()
1086{
1087    /* Try to send the packets. */
1088    while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1089            lsqUnit()->trySendPacket(isLoad(),
1090                _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1091        _numOutstandingPackets++;
1092    }
1093}
1094
1095template<class Impl>
1096void
1097LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext *thread,
1098                                             PacketPtr pkt)
1099{
1100    TheISA::handleIprWrite(thread, pkt);
1101}
1102
1103template<class Impl>
1104void
1105LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext *thread,
1106                                            PacketPtr mainPkt)
1107{
1108    unsigned offset = 0;
1109    for (auto r: _requests) {
1110        PacketPtr pkt = new Packet(r, MemCmd::WriteReq);
1111        pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1112        TheISA::handleIprWrite(thread, pkt);
1113        offset += r->getSize();
1114        delete pkt;
1115    }
1116}
1117
1118template<class Impl>
1119Cycles
1120LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext *thread,
1121                                            PacketPtr pkt)
1122{
1123    return TheISA::handleIprRead(thread, pkt);
1124}
1125
1126template<class Impl>
1127Cycles
1128LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext *thread,
1129                                           PacketPtr mainPkt)
1130{
1131    Cycles delay(0);
1132    unsigned offset = 0;
1133
1134    for (auto r: _requests) {
1135        PacketPtr pkt = new Packet(r, MemCmd::ReadReq);
1136        pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1137        Cycles d = TheISA::handleIprRead(thread, pkt);
1138        if (d > delay)
1139            delay = d;
1140        offset += r->getSize();
1141        delete pkt;
1142    }
1143    return delay;
1144}
1145
1146template<class Impl>
1147bool
1148LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1149{
1150    return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
1151}
1152
1153template<class Impl>
1154bool
1155LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1156{
1157    bool is_hit = false;
1158    for (auto &r: _requests) {
1159        if ((r->getPaddr() & blockMask) == blockAddr) {
1160            is_hit = true;
1161            break;
1162        }
1163    }
1164    return is_hit;
1165}
1166
1167#endif//__CPU_O3_LSQ_IMPL_HH__
1168