1/*
2 * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder.  You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2005-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Korey Sewell
42 */
43
44#ifndef __CPU_O3_LSQ_IMPL_HH__
45#define __CPU_O3_LSQ_IMPL_HH__
46
47#include <algorithm>
48#include <list>
49#include <string>
50
51#include "base/logging.hh"
52#include "cpu/o3/cpu.hh"
53#include "cpu/o3/lsq.hh"
54#include "debug/Drain.hh"
55#include "debug/Fetch.hh"
56#include "debug/LSQ.hh"
57#include "debug/Writeback.hh"
58#include "params/DerivO3CPU.hh"
59
60using namespace std;
61
62template <class Impl>
63LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
64    : cpu(cpu_ptr), iewStage(iew_ptr),
65      _cacheBlocked(false),
66      cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
67      cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
68      lsqPolicy(params->smtLSQPolicy),
69      LQEntries(params->LQEntries),
70      SQEntries(params->SQEntries),
71      maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
72                  params->smtLSQThreshold)),
73      maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
74                  params->smtLSQThreshold)),
75      dcachePort(this, cpu_ptr),
76      numThreads(params->numThreads)
77{
78    assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
79
80    //**********************************************/
81    //************ Handle SMT Parameters ***********/
82    //**********************************************/
83
84    /* Run SMT olicy checks. */
85        if (lsqPolicy == SMTQueuePolicy::Dynamic) {
86        DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
87    } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
88        DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
89                "%i entries per LQ | %i entries per SQ\n",
90                maxLQEntries,maxSQEntries);
91    } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
92
93        assert(params->smtLSQThreshold > params->LQEntries);
94        assert(params->smtLSQThreshold > params->SQEntries);
95
96        DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
97                "%i entries per LQ | %i entries per SQ\n",
98                maxLQEntries,maxSQEntries);
99    } else {
100        panic("Invalid LSQ sharing policy. Options are: Dynamic, "
101                    "Partitioned, Threshold");
102    }
103
104    thread.reserve(numThreads);
105    for (ThreadID tid = 0; tid < numThreads; tid++) {
106        thread.emplace_back(maxLQEntries, maxSQEntries);
107        thread[tid].init(cpu, iew_ptr, params, this, tid);
108        thread[tid].setDcachePort(&dcachePort);
109    }
110}
111
112
113template<class Impl>
114std::string
115LSQ<Impl>::name() const
116{
117    return iewStage->name() + ".lsq";
118}
119
120template<class Impl>
121void
122LSQ<Impl>::regStats()
123{
124    //Initialize LSQs
125    for (ThreadID tid = 0; tid < numThreads; tid++) {
126        thread[tid].regStats();
127    }
128}
129
130template<class Impl>
131void
132LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
133{
134    activeThreads = at_ptr;
135    assert(activeThreads != 0);
136}
137
138template <class Impl>
139void
140LSQ<Impl>::drainSanityCheck() const
141{
142    assert(isDrained());
143
144    for (ThreadID tid = 0; tid < numThreads; tid++)
145        thread[tid].drainSanityCheck();
146}
147
148template <class Impl>
149bool
150LSQ<Impl>::isDrained() const
151{
152    bool drained(true);
153
154    if (!lqEmpty()) {
155        DPRINTF(Drain, "Not drained, LQ not empty.\n");
156        drained = false;
157    }
158
159    if (!sqEmpty()) {
160        DPRINTF(Drain, "Not drained, SQ not empty.\n");
161        drained = false;
162    }
163
164    return drained;
165}
166
167template <class Impl>
168void
169LSQ<Impl>::takeOverFrom()
170{
171    usedStorePorts = 0;
172    _cacheBlocked = false;
173
174    for (ThreadID tid = 0; tid < numThreads; tid++) {
175        thread[tid].takeOverFrom();
176    }
177}
178
179template <class Impl>
180void
181LSQ<Impl>::tick()
182{
183    // Re-issue loads which got blocked on the per-cycle load ports limit.
184    if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked)
185        iewStage->cacheUnblocked();
186
187    usedLoadPorts = 0;
188    usedStorePorts = 0;
189}
190
191template<class Impl>
192bool
193LSQ<Impl>::cacheBlocked() const
194{
195    return _cacheBlocked;
196}
197
198template<class Impl>
199void
200LSQ<Impl>::cacheBlocked(bool v)
201{
202    _cacheBlocked = v;
203}
204
205template<class Impl>
206bool
207LSQ<Impl>::cachePortAvailable(bool is_load) const
208{
209    bool ret;
210    if (is_load) {
211        ret  = usedLoadPorts < cacheLoadPorts;
212    } else {
213        ret  = usedStorePorts < cacheStorePorts;
214    }
215    return ret;
216}
217
218template<class Impl>
219void
220LSQ<Impl>::cachePortBusy(bool is_load)
221{
222    assert(cachePortAvailable(is_load));
223    if (is_load) {
224        usedLoadPorts++;
225    } else {
226        usedStorePorts++;
227    }
228}
229
230template<class Impl>
231void
232LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
233{
234    ThreadID tid = load_inst->threadNumber;
235
236    thread[tid].insertLoad(load_inst);
237}
238
239template<class Impl>
240void
241LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
242{
243    ThreadID tid = store_inst->threadNumber;
244
245    thread[tid].insertStore(store_inst);
246}
247
248template<class Impl>
249Fault
250LSQ<Impl>::executeLoad(const DynInstPtr &inst)
251{
252    ThreadID tid = inst->threadNumber;
253
254    return thread[tid].executeLoad(inst);
255}
256
257template<class Impl>
258Fault
259LSQ<Impl>::executeStore(const DynInstPtr &inst)
260{
261    ThreadID tid = inst->threadNumber;
262
263    return thread[tid].executeStore(inst);
264}
265
266template<class Impl>
267void
268LSQ<Impl>::writebackStores()
269{
270    list<ThreadID>::iterator threads = activeThreads->begin();
271    list<ThreadID>::iterator end = activeThreads->end();
272
273    while (threads != end) {
274        ThreadID tid = *threads++;
275
276        if (numStoresToWB(tid) > 0) {
277            DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
278                "available for Writeback.\n", tid, numStoresToWB(tid));
279        }
280
281        thread[tid].writebackStores();
282    }
283}
284
285template<class Impl>
286bool
287LSQ<Impl>::violation()
288{
289    /* Answers: Does Anybody Have a Violation?*/
290    list<ThreadID>::iterator threads = activeThreads->begin();
291    list<ThreadID>::iterator end = activeThreads->end();
292
293    while (threads != end) {
294        ThreadID tid = *threads++;
295
296        if (thread[tid].violation())
297            return true;
298    }
299
300    return false;
301}
302
303template <class Impl>
304void
305LSQ<Impl>::recvReqRetry()
306{
307    iewStage->cacheUnblocked();
308    cacheBlocked(false);
309
310    for (ThreadID tid : *activeThreads) {
311        thread[tid].recvRetry();
312    }
313}
314
315template <class Impl>
316void
317LSQ<Impl>::completeDataAccess(PacketPtr pkt)
318{
319    auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
320    thread[cpu->contextToThread(senderState->contextId())]
321        .completeDataAccess(pkt);
322}
323
324template <class Impl>
325bool
326LSQ<Impl>::recvTimingResp(PacketPtr pkt)
327{
328    if (pkt->isError())
329        DPRINTF(LSQ, "Got error packet back for address: %#X\n",
330                pkt->getAddr());
331
332    auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
333    panic_if(!senderState, "Got packet back with unknown sender state\n");
334
335    thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
336
337    if (pkt->isInvalidate()) {
338        // This response also contains an invalidate; e.g. this can be the case
339        // if cmd is ReadRespWithInvalidate.
340        //
341        // The calling order between completeDataAccess and checkSnoop matters.
342        // By calling checkSnoop after completeDataAccess, we ensure that the
343        // fault set by checkSnoop is not lost. Calling writeback (more
344        // specifically inst->completeAcc) in completeDataAccess overwrites
345        // fault, and in case this instruction requires squashing (as
346        // determined by checkSnoop), the ReExec fault set by checkSnoop would
347        // be lost otherwise.
348
349        DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
350                pkt->getAddr());
351
352        for (ThreadID tid = 0; tid < numThreads; tid++) {
353            thread[tid].checkSnoop(pkt);
354        }
355    }
356    // Update the LSQRequest state (this may delete the request)
357    senderState->request()->packetReplied();
358
359    return true;
360}
361
362template <class Impl>
363void
364LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt)
365{
366    DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
367            pkt->cmdString());
368
369    // must be a snoop
370    if (pkt->isInvalidate()) {
371        DPRINTF(LSQ, "received invalidation for addr:%#x\n",
372                pkt->getAddr());
373        for (ThreadID tid = 0; tid < numThreads; tid++) {
374            thread[tid].checkSnoop(pkt);
375        }
376    }
377}
378
379template<class Impl>
380int
381LSQ<Impl>::getCount()
382{
383    unsigned total = 0;
384
385    list<ThreadID>::iterator threads = activeThreads->begin();
386    list<ThreadID>::iterator end = activeThreads->end();
387
388    while (threads != end) {
389        ThreadID tid = *threads++;
390
391        total += getCount(tid);
392    }
393
394    return total;
395}
396
397template<class Impl>
398int
399LSQ<Impl>::numLoads()
400{
401    unsigned total = 0;
402
403    list<ThreadID>::iterator threads = activeThreads->begin();
404    list<ThreadID>::iterator end = activeThreads->end();
405
406    while (threads != end) {
407        ThreadID tid = *threads++;
408
409        total += numLoads(tid);
410    }
411
412    return total;
413}
414
415template<class Impl>
416int
417LSQ<Impl>::numStores()
418{
419    unsigned total = 0;
420
421    list<ThreadID>::iterator threads = activeThreads->begin();
422    list<ThreadID>::iterator end = activeThreads->end();
423
424    while (threads != end) {
425        ThreadID tid = *threads++;
426
427        total += thread[tid].numStores();
428    }
429
430    return total;
431}
432
433template<class Impl>
434unsigned
435LSQ<Impl>::numFreeLoadEntries()
436{
437    unsigned total = 0;
438
439    list<ThreadID>::iterator threads = activeThreads->begin();
440    list<ThreadID>::iterator end = activeThreads->end();
441
442    while (threads != end) {
443        ThreadID tid = *threads++;
444
445        total += thread[tid].numFreeLoadEntries();
446    }
447
448    return total;
449}
450
451template<class Impl>
452unsigned
453LSQ<Impl>::numFreeStoreEntries()
454{
455    unsigned total = 0;
456
457    list<ThreadID>::iterator threads = activeThreads->begin();
458    list<ThreadID>::iterator end = activeThreads->end();
459
460    while (threads != end) {
461        ThreadID tid = *threads++;
462
463        total += thread[tid].numFreeStoreEntries();
464    }
465
466    return total;
467}
468
469template<class Impl>
470unsigned
471LSQ<Impl>::numFreeLoadEntries(ThreadID tid)
472{
473        return thread[tid].numFreeLoadEntries();
474}
475
476template<class Impl>
477unsigned
478LSQ<Impl>::numFreeStoreEntries(ThreadID tid)
479{
480        return thread[tid].numFreeStoreEntries();
481}
482
483template<class Impl>
484bool
485LSQ<Impl>::isFull()
486{
487    list<ThreadID>::iterator threads = activeThreads->begin();
488    list<ThreadID>::iterator end = activeThreads->end();
489
490    while (threads != end) {
491        ThreadID tid = *threads++;
492
493        if (!(thread[tid].lqFull() || thread[tid].sqFull()))
494            return false;
495    }
496
497    return true;
498}
499
500template<class Impl>
501bool
502LSQ<Impl>::isFull(ThreadID tid)
503{
504    //@todo: Change to Calculate All Entries for
505    //Dynamic Policy
506    if (lsqPolicy == SMTQueuePolicy::Dynamic)
507        return isFull();
508    else
509        return thread[tid].lqFull() || thread[tid].sqFull();
510}
511
512template<class Impl>
513bool
514LSQ<Impl>::isEmpty() const
515{
516    return lqEmpty() && sqEmpty();
517}
518
519template<class Impl>
520bool
521LSQ<Impl>::lqEmpty() const
522{
523    list<ThreadID>::const_iterator threads = activeThreads->begin();
524    list<ThreadID>::const_iterator end = activeThreads->end();
525
526    while (threads != end) {
527        ThreadID tid = *threads++;
528
529        if (!thread[tid].lqEmpty())
530            return false;
531    }
532
533    return true;
534}
535
536template<class Impl>
537bool
538LSQ<Impl>::sqEmpty() const
539{
540    list<ThreadID>::const_iterator threads = activeThreads->begin();
541    list<ThreadID>::const_iterator end = activeThreads->end();
542
543    while (threads != end) {
544        ThreadID tid = *threads++;
545
546        if (!thread[tid].sqEmpty())
547            return false;
548    }
549
550    return true;
551}
552
553template<class Impl>
554bool
555LSQ<Impl>::lqFull()
556{
557    list<ThreadID>::iterator threads = activeThreads->begin();
558    list<ThreadID>::iterator end = activeThreads->end();
559
560    while (threads != end) {
561        ThreadID tid = *threads++;
562
563        if (!thread[tid].lqFull())
564            return false;
565    }
566
567    return true;
568}
569
570template<class Impl>
571bool
572LSQ<Impl>::lqFull(ThreadID tid)
573{
574    //@todo: Change to Calculate All Entries for
575    //Dynamic Policy
576    if (lsqPolicy == SMTQueuePolicy::Dynamic)
577        return lqFull();
578    else
579        return thread[tid].lqFull();
580}
581
582template<class Impl>
583bool
584LSQ<Impl>::sqFull()
585{
586    list<ThreadID>::iterator threads = activeThreads->begin();
587    list<ThreadID>::iterator end = activeThreads->end();
588
589    while (threads != end) {
590        ThreadID tid = *threads++;
591
592        if (!sqFull(tid))
593            return false;
594    }
595
596    return true;
597}
598
599template<class Impl>
600bool
601LSQ<Impl>::sqFull(ThreadID tid)
602{
603     //@todo: Change to Calculate All Entries for
604    //Dynamic Policy
605    if (lsqPolicy == SMTQueuePolicy::Dynamic)
606        return sqFull();
607    else
608        return thread[tid].sqFull();
609}
610
611template<class Impl>
612bool
613LSQ<Impl>::isStalled()
614{
615    list<ThreadID>::iterator threads = activeThreads->begin();
616    list<ThreadID>::iterator end = activeThreads->end();
617
618    while (threads != end) {
619        ThreadID tid = *threads++;
620
621        if (!thread[tid].isStalled())
622            return false;
623    }
624
625    return true;
626}
627
628template<class Impl>
629bool
630LSQ<Impl>::isStalled(ThreadID tid)
631{
632    if (lsqPolicy == SMTQueuePolicy::Dynamic)
633        return isStalled();
634    else
635        return thread[tid].isStalled();
636}
637
638template<class Impl>
639bool
640LSQ<Impl>::hasStoresToWB()
641{
642    list<ThreadID>::iterator threads = activeThreads->begin();
643    list<ThreadID>::iterator end = activeThreads->end();
644
645    while (threads != end) {
646        ThreadID tid = *threads++;
647
648        if (hasStoresToWB(tid))
649            return true;
650    }
651
652    return false;
653}
654
655template<class Impl>
656bool
657LSQ<Impl>::willWB()
658{
659    list<ThreadID>::iterator threads = activeThreads->begin();
660    list<ThreadID>::iterator end = activeThreads->end();
661
662    while (threads != end) {
663        ThreadID tid = *threads++;
664
665        if (willWB(tid))
666            return true;
667    }
668
669    return false;
670}
671
672template<class Impl>
673void
674LSQ<Impl>::dumpInsts() const
675{
676    list<ThreadID>::const_iterator threads = activeThreads->begin();
677    list<ThreadID>::const_iterator end = activeThreads->end();
678
679    while (threads != end) {
680        ThreadID tid = *threads++;
681
682        thread[tid].dumpInsts();
683    }
684}
685
686template<class Impl>
687Fault
688LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
689                       unsigned int size, Addr addr, Request::Flags flags,
690                       uint64_t *res, AtomicOpFunctorPtr amo_op,
691                       const std::vector<bool>& byteEnable)
692{
693    // This comming request can be either load, store or atomic.
694    // Atomic request has a corresponding pointer to its atomic memory
695    // operation
696    bool isAtomic M5_VAR_USED = !isLoad && amo_op;
697
698    ThreadID tid = cpu->contextToThread(inst->contextId());
699    auto cacheLineSize = cpu->cacheLineSize();
700    bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
701    LSQRequest* req = nullptr;
702
703    // Atomic requests that access data across cache line boundary are
704    // currently not allowed since the cache does not guarantee corresponding
705    // atomic memory operations to be executed atomically across a cache line.
706    // For ISAs such as x86 that supports cross-cache-line atomic instructions,
707    // the cache needs to be modified to perform atomic update to both cache
708    // lines. For now, such cross-line update is not supported.
709    assert(!isAtomic || (isAtomic && !needs_burst));
710
711    if (inst->translationStarted()) {
712        req = inst->savedReq;
713        assert(req);
714    } else {
715        if (needs_burst) {
716            req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
717                    size, flags, data, res);
718        } else {
719            req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
720                    size, flags, data, res, std::move(amo_op));
721        }
722        assert(req);
723        if (!byteEnable.empty()) {
724            req->_byteEnable = byteEnable;
725        }
726        inst->setRequest();
727        req->taskId(cpu->taskId());
728
729        // There might be fault from a previous execution attempt if this is
730        // a strictly ordered load
731        inst->getFault() = NoFault;
732
733        req->initiateTranslation();
734    }
735
736    /* This is the place were instructions get the effAddr. */
737    if (req->isTranslationComplete()) {
738        if (req->isMemAccessRequired()) {
739            inst->effAddr = req->getVaddr();
740            inst->effSize = size;
741            inst->effAddrValid(true);
742
743            if (cpu->checker) {
744                inst->reqToVerify = std::make_shared<Request>(*req->request());
745            }
746            Fault fault;
747            if (isLoad)
748                fault = cpu->read(req, inst->lqIdx);
749            else
750                fault = cpu->write(req, data, inst->sqIdx);
751            // inst->getFault() may have the first-fault of a
752            // multi-access split request at this point.
753            // Overwrite that only if we got another type of fault
754            // (e.g. re-exec).
755            if (fault != NoFault)
756                inst->getFault() = fault;
757        } else if (isLoad) {
758            inst->setMemAccPredicate(false);
759            // Commit will have to clean up whatever happened.  Set this
760            // instruction as executed.
761            inst->setExecuted();
762        }
763    }
764
765    if (inst->traceData)
766        inst->traceData->setMem(addr, size, flags);
767
768    return inst->getFault();
769}
770
771template<class Impl>
772void
773LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
774        ThreadContext* tc, BaseTLB::Mode mode)
775{
776    _fault.push_back(fault);
777    numInTranslationFragments = 0;
778    numTranslatedFragments = 1;
779    /* If the instruction has been squahsed, let the request know
780     * as it may have to self-destruct. */
781    if (_inst->isSquashed()) {
782        this->squashTranslation();
783    } else {
784        _inst->strictlyOrdered(req->isStrictlyOrdered());
785
786        flags.set(Flag::TranslationFinished);
787        if (fault == NoFault) {
788            _inst->physEffAddr = req->getPaddr();
789            _inst->memReqFlags = req->getFlags();
790            if (req->isCondSwap()) {
791                assert(_res);
792                req->setExtraData(*_res);
793            }
794            setState(State::Request);
795        } else {
796            setState(State::Fault);
797        }
798
799        LSQRequest::_inst->fault = fault;
800        LSQRequest::_inst->translationCompleted(true);
801    }
802}
803
804template<class Impl>
805void
806LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
807        ThreadContext* tc, BaseTLB::Mode mode)
808{
809    int i;
810    for (i = 0; i < _requests.size() && _requests[i] != req; i++);
811    assert(i < _requests.size());
812    _fault[i] = fault;
813
814    numInTranslationFragments--;
815    numTranslatedFragments++;
816
817    if (fault == NoFault)
818        mainReq->setFlags(req->getFlags());
819
820    if (numTranslatedFragments == _requests.size()) {
821        if (_inst->isSquashed()) {
822            this->squashTranslation();
823        } else {
824            _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
825            flags.set(Flag::TranslationFinished);
826            _inst->translationCompleted(true);
827
828            for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
829            if (i > 0) {
830                _inst->physEffAddr = request(0)->getPaddr();
831                _inst->memReqFlags = mainReq->getFlags();
832                if (mainReq->isCondSwap()) {
833                    assert (i == _fault.size());
834                    assert(_res);
835                    mainReq->setExtraData(*_res);
836                }
837                if (i == _fault.size()) {
838                    _inst->fault = NoFault;
839                    setState(State::Request);
840                } else {
841                  _inst->fault = _fault[i];
842                  setState(State::PartialFault);
843                }
844            } else {
845                _inst->fault = _fault[0];
846                setState(State::Fault);
847            }
848        }
849
850    }
851}
852
853template<class Impl>
854void
855LSQ<Impl>::SingleDataRequest::initiateTranslation()
856{
857    assert(_requests.size() == 0);
858
859    this->addRequest(_addr, _size, _byteEnable);
860
861    if (_requests.size() > 0) {
862        _requests.back()->setReqInstSeqNum(_inst->seqNum);
863        _requests.back()->taskId(_taskId);
864        _inst->translationStarted(true);
865        setState(State::Translation);
866        flags.set(Flag::TranslationStarted);
867
868        _inst->savedReq = this;
869        sendFragmentToTranslation(0);
870    } else {
871        _inst->setMemAccPredicate(false);
872    }
873}
874
875template<class Impl>
876PacketPtr
877LSQ<Impl>::SplitDataRequest::mainPacket()
878{
879    return _mainPacket;
880}
881
882template<class Impl>
883RequestPtr
884LSQ<Impl>::SplitDataRequest::mainRequest()
885{
886    return mainReq;
887}
888
889template<class Impl>
890void
891LSQ<Impl>::SplitDataRequest::initiateTranslation()
892{
893    auto cacheLineSize = _port.cacheLineSize();
894    Addr base_addr = _addr;
895    Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
896    Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
897    uint32_t size_so_far = 0;
898
899    mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
900                _size, _flags, _inst->masterId(),
901                _inst->instAddr(), _inst->contextId());
902    if (!_byteEnable.empty()) {
903        mainReq->setByteEnable(_byteEnable);
904    }
905
906    // Paddr is not used in mainReq. However, we will accumulate the flags
907    // from the sub requests into mainReq by calling setFlags() in finish().
908    // setFlags() assumes that paddr is set so flip the paddr valid bit here to
909    // avoid a potential assert in setFlags() when we call it from  finish().
910    mainReq->setPaddr(0);
911
912    /* Get the pre-fix, possibly unaligned. */
913    if (_byteEnable.empty()) {
914        this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
915    } else {
916        auto it_start = _byteEnable.begin();
917        auto it_end = _byteEnable.begin() + (next_addr - base_addr);
918        this->addRequest(base_addr, next_addr - base_addr,
919                         std::vector<bool>(it_start, it_end));
920    }
921    size_so_far = next_addr - base_addr;
922
923    /* We are block aligned now, reading whole blocks. */
924    base_addr = next_addr;
925    while (base_addr != final_addr) {
926        if (_byteEnable.empty()) {
927            this->addRequest(base_addr, cacheLineSize, _byteEnable);
928        } else {
929            auto it_start = _byteEnable.begin() + size_so_far;
930            auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
931            this->addRequest(base_addr, cacheLineSize,
932                             std::vector<bool>(it_start, it_end));
933        }
934        size_so_far += cacheLineSize;
935        base_addr += cacheLineSize;
936    }
937
938    /* Deal with the tail. */
939    if (size_so_far < _size) {
940        if (_byteEnable.empty()) {
941            this->addRequest(base_addr, _size - size_so_far, _byteEnable);
942        } else {
943            auto it_start = _byteEnable.begin() + size_so_far;
944            auto it_end = _byteEnable.end();
945            this->addRequest(base_addr, _size - size_so_far,
946                             std::vector<bool>(it_start, it_end));
947        }
948    }
949
950    if (_requests.size() > 0) {
951        /* Setup the requests and send them to translation. */
952        for (auto& r: _requests) {
953            r->setReqInstSeqNum(_inst->seqNum);
954            r->taskId(_taskId);
955        }
956
957        _inst->translationStarted(true);
958        setState(State::Translation);
959        flags.set(Flag::TranslationStarted);
960        this->_inst->savedReq = this;
961        numInTranslationFragments = 0;
962        numTranslatedFragments = 0;
963        _fault.resize(_requests.size());
964
965        for (uint32_t i = 0; i < _requests.size(); i++) {
966            sendFragmentToTranslation(i);
967        }
968    } else {
969        _inst->setMemAccPredicate(false);
970    }
971}
972
973template<class Impl>
974void
975LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
976{
977    numInTranslationFragments++;
978    _port.dTLB()->translateTiming(
979            this->request(i),
980            this->_inst->thread->getTC(), this,
981            this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
982}
983
984template<class Impl>
985bool
986LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
987{
988    assert(_numOutstandingPackets == 1);
989    auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
990    flags.set(Flag::Complete);
991    state->outstanding--;
992    assert(pkt == _packets.front());
993    _port.completeDataAccess(pkt);
994    return true;
995}
996
997template<class Impl>
998bool
999LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
1000{
1001    auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
1002    uint32_t pktIdx = 0;
1003    while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1004        pktIdx++;
1005    assert(pktIdx < _packets.size());
1006    numReceivedPackets++;
1007    state->outstanding--;
1008    if (numReceivedPackets == _packets.size()) {
1009        flags.set(Flag::Complete);
1010        /* Assemble packets. */
1011        PacketPtr resp = isLoad()
1012            ? Packet::createRead(mainReq)
1013            : Packet::createWrite(mainReq);
1014        if (isLoad())
1015            resp->dataStatic(_inst->memData);
1016        else
1017            resp->dataStatic(_data);
1018        resp->senderState = _senderState;
1019        _port.completeDataAccess(resp);
1020        delete resp;
1021    }
1022    return true;
1023}
1024
1025template<class Impl>
1026void
1027LSQ<Impl>::SingleDataRequest::buildPackets()
1028{
1029    assert(_senderState);
1030    /* Retries do not create new packets. */
1031    if (_packets.size() == 0) {
1032        _packets.push_back(
1033                isLoad()
1034                    ?  Packet::createRead(request())
1035                    :  Packet::createWrite(request()));
1036        _packets.back()->dataStatic(_inst->memData);
1037        _packets.back()->senderState = _senderState;
1038    }
1039    assert(_packets.size() == 1);
1040}
1041
1042template<class Impl>
1043void
1044LSQ<Impl>::SplitDataRequest::buildPackets()
1045{
1046    /* Extra data?? */
1047    Addr base_address = _addr;
1048
1049    if (_packets.size() == 0) {
1050        /* New stuff */
1051        if (isLoad()) {
1052            _mainPacket = Packet::createRead(mainReq);
1053            _mainPacket->dataStatic(_inst->memData);
1054        }
1055        for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
1056            RequestPtr r = _requests[i];
1057            PacketPtr pkt = isLoad() ? Packet::createRead(r)
1058                                     : Packet::createWrite(r);
1059            ptrdiff_t offset = r->getVaddr() - base_address;
1060            if (isLoad()) {
1061                pkt->dataStatic(_inst->memData + offset);
1062            } else {
1063                uint8_t* req_data = new uint8_t[r->getSize()];
1064                std::memcpy(req_data,
1065                        _inst->memData + offset,
1066                        r->getSize());
1067                pkt->dataDynamic(req_data);
1068            }
1069            pkt->senderState = _senderState;
1070            _packets.push_back(pkt);
1071        }
1072    }
1073    assert(_packets.size() > 0);
1074}
1075
1076template<class Impl>
1077void
1078LSQ<Impl>::SingleDataRequest::sendPacketToCache()
1079{
1080    assert(_numOutstandingPackets == 0);
1081    if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1082        _numOutstandingPackets = 1;
1083}
1084
1085template<class Impl>
1086void
1087LSQ<Impl>::SplitDataRequest::sendPacketToCache()
1088{
1089    /* Try to send the packets. */
1090    while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1091            lsqUnit()->trySendPacket(isLoad(),
1092                _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1093        _numOutstandingPackets++;
1094    }
1095}
1096
1097template<class Impl>
1098void
1099LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext *thread,
1100                                             PacketPtr pkt)
1101{
1102    TheISA::handleIprWrite(thread, pkt);
1103}
1104
1105template<class Impl>
1106void
1107LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext *thread,
1108                                            PacketPtr mainPkt)
1109{
1110    unsigned offset = 0;
1111    for (auto r: _requests) {
1112        PacketPtr pkt = new Packet(r, MemCmd::WriteReq);
1113        pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1114        TheISA::handleIprWrite(thread, pkt);
1115        offset += r->getSize();
1116        delete pkt;
1117    }
1118}
1119
1120template<class Impl>
1121Cycles
1122LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext *thread,
1123                                            PacketPtr pkt)
1124{
1125    return TheISA::handleIprRead(thread, pkt);
1126}
1127
1128template<class Impl>
1129Cycles
1130LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext *thread,
1131                                           PacketPtr mainPkt)
1132{
1133    Cycles delay(0);
1134    unsigned offset = 0;
1135
1136    for (auto r: _requests) {
1137        PacketPtr pkt = new Packet(r, MemCmd::ReadReq);
1138        pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1139        Cycles d = TheISA::handleIprRead(thread, pkt);
1140        if (d > delay)
1141            delay = d;
1142        offset += r->getSize();
1143        delete pkt;
1144    }
1145    return delay;
1146}
1147
1148template<class Impl>
1149bool
1150LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1151{
1152    return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
1153}
1154
1155template<class Impl>
1156bool
1157LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1158{
1159    bool is_hit = false;
1160    for (auto &r: _requests) {
1161        if ((r->getPaddr() & blockMask) == blockAddr) {
1162            is_hit = true;
1163            break;
1164        }
1165    }
1166    return is_hit;
1167}
1168
1169template <class Impl>
1170bool
1171LSQ<Impl>::DcachePort::recvTimingResp(PacketPtr pkt)
1172{
1173    return lsq->recvTimingResp(pkt);
1174}
1175
1176template <class Impl>
1177void
1178LSQ<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
1179{
1180    for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
1181        if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1182            cpu->wakeup(tid);
1183        }
1184    }
1185    lsq->recvTimingSnoopReq(pkt);
1186}
1187
1188template <class Impl>
1189void
1190LSQ<Impl>::DcachePort::recvReqRetry()
1191{
1192    lsq->recvReqRetry();
1193}
1194
1195#endif//__CPU_O3_LSQ_IMPL_HH__
1196