Cross Reference: /gem5/src/cpu/o3/lsq

Deleted Added

sdiff udiff text old ( 14194:967b9c450b04 ) new ( 14297:b4519e586f5e )

full compact

1/*
2 * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2005-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Korey Sewell
42 */
43
44#ifndef __CPU_O3_LSQ_IMPL_HH__
45#define __CPU_O3_LSQ_IMPL_HH__
46
47#include <algorithm>
48#include <list>
49#include <string>
50
51#include "base/logging.hh"
52#include "cpu/o3/cpu.hh"
53#include "cpu/o3/lsq.hh"
54#include "debug/Drain.hh"
55#include "debug/Fetch.hh"
56#include "debug/LSQ.hh"
57#include "debug/Writeback.hh"
58#include "params/DerivO3CPU.hh"
59
60using namespace std;
61
62template <class Impl>
63LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
64 : cpu(cpu_ptr), iewStage(iew_ptr),
65 _cacheBlocked(false),
66 cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
67 cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
68 lsqPolicy(params->smtLSQPolicy),
69 LQEntries(params->LQEntries),
70 SQEntries(params->SQEntries),
71 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
72 params->smtLSQThreshold)),
73 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
74 params->smtLSQThreshold)),
75 dcachePort(this, cpu_ptr),
76 numThreads(params->numThreads)
77{
78 assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
79
80 //**********************************************/
81 //************ Handle SMT Parameters ***********/
82 //**********************************************/
83
84 /* Run SMT olicy checks. */
85 if (lsqPolicy == SMTQueuePolicy::Dynamic) {
86 DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
87 } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
88 DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
89 "%i entries per LQ | %i entries per SQ\n",
90 maxLQEntries,maxSQEntries);
91 } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
92
93 assert(params->smtLSQThreshold > params->LQEntries);
94 assert(params->smtLSQThreshold > params->SQEntries);
95
96 DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
97 "%i entries per LQ | %i entries per SQ\n",
98 maxLQEntries,maxSQEntries);
99 } else {
100 panic("Invalid LSQ sharing policy. Options are: Dynamic, "
101 "Partitioned, Threshold");
102 }
103
104 thread.reserve(numThreads);
105 for (ThreadID tid = 0; tid < numThreads; tid++) {
106 thread.emplace_back(maxLQEntries, maxSQEntries);
107 thread[tid].init(cpu, iew_ptr, params, this, tid);
108 thread[tid].setDcachePort(&dcachePort);
109 }
110}
111
112
113template<class Impl>
114std::string
115LSQ<Impl>::name() const
116{
117 return iewStage->name() + ".lsq";
118}
119
120template<class Impl>
121void
122LSQ<Impl>::regStats()
123{
124 //Initialize LSQs
125 for (ThreadID tid = 0; tid < numThreads; tid++) {
126 thread[tid].regStats();
127 }
128}
129
130template<class Impl>
131void
132LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
133{
134 activeThreads = at_ptr;
135 assert(activeThreads != 0);
136}
137
138template <class Impl>
139void
140LSQ<Impl>::drainSanityCheck() const
141{
142 assert(isDrained());
143
144 for (ThreadID tid = 0; tid < numThreads; tid++)
145 thread[tid].drainSanityCheck();
146}
147
148template <class Impl>
149bool
150LSQ<Impl>::isDrained() const
151{
152 bool drained(true);
153
154 if (!lqEmpty()) {
155 DPRINTF(Drain, "Not drained, LQ not empty.\n");
156 drained = false;
157 }
158
159 if (!sqEmpty()) {
160 DPRINTF(Drain, "Not drained, SQ not empty.\n");
161 drained = false;
162 }
163
164 return drained;
165}
166
167template <class Impl>
168void
169LSQ<Impl>::takeOverFrom()
170{
171 usedStorePorts = 0;
172 _cacheBlocked = false;
173
174 for (ThreadID tid = 0; tid < numThreads; tid++) {
175 thread[tid].takeOverFrom();
176 }
177}
178
179template <class Impl>
180void
181LSQ<Impl>::tick()
182{
183 // Re-issue loads which got blocked on the per-cycle load ports limit.
184 if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked)
185 iewStage->cacheUnblocked();
186
187 usedLoadPorts = 0;
188 usedStorePorts = 0;
189}
190
191template<class Impl>
192bool
193LSQ<Impl>::cacheBlocked() const
194{
195 return _cacheBlocked;
196}
197
198template<class Impl>
199void
200LSQ<Impl>::cacheBlocked(bool v)
201{
202 _cacheBlocked = v;
203}
204
205template<class Impl>
206bool
207LSQ<Impl>::cachePortAvailable(bool is_load) const
208{
209 bool ret;
210 if (is_load) {
211 ret = usedLoadPorts < cacheLoadPorts;
212 } else {
213 ret = usedStorePorts < cacheStorePorts;
214 }
215 return ret;
216}
217
218template<class Impl>
219void
220LSQ<Impl>::cachePortBusy(bool is_load)
221{
222 assert(cachePortAvailable(is_load));
223 if (is_load) {
224 usedLoadPorts++;
225 } else {
226 usedStorePorts++;
227 }
228}
229
230template<class Impl>
231void
232LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
233{
234 ThreadID tid = load_inst->threadNumber;
235
236 thread[tid].insertLoad(load_inst);
237}
238
239template<class Impl>
240void
241LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
242{
243 ThreadID tid = store_inst->threadNumber;
244
245 thread[tid].insertStore(store_inst);
246}
247
248template<class Impl>
249Fault
250LSQ<Impl>::executeLoad(const DynInstPtr &inst)
251{
252 ThreadID tid = inst->threadNumber;
253
254 return thread[tid].executeLoad(inst);
255}
256
257template<class Impl>
258Fault
259LSQ<Impl>::executeStore(const DynInstPtr &inst)
260{
261 ThreadID tid = inst->threadNumber;
262
263 return thread[tid].executeStore(inst);
264}
265
266template<class Impl>
267void
268LSQ<Impl>::writebackStores()
269{
270 list<ThreadID>::iterator threads = activeThreads->begin();
271 list<ThreadID>::iterator end = activeThreads->end();
272
273 while (threads != end) {
274 ThreadID tid = *threads++;
275
276 if (numStoresToWB(tid) > 0) {
277 DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
278 "available for Writeback.\n", tid, numStoresToWB(tid));
279 }
280
281 thread[tid].writebackStores();
282 }
283}
284
285template<class Impl>
286bool
287LSQ<Impl>::violation()
288{
289 /* Answers: Does Anybody Have a Violation?*/
290 list<ThreadID>::iterator threads = activeThreads->begin();
291 list<ThreadID>::iterator end = activeThreads->end();
292
293 while (threads != end) {
294 ThreadID tid = *threads++;
295
296 if (thread[tid].violation())
297 return true;
298 }
299
300 return false;
301}
302
303template <class Impl>
304void
305LSQ<Impl>::recvReqRetry()
306{
307 iewStage->cacheUnblocked();
308 cacheBlocked(false);
309
310 for (ThreadID tid : *activeThreads) {
311 thread[tid].recvRetry();
312 }
313}
314
315template <class Impl>
316void
317LSQ<Impl>::completeDataAccess(PacketPtr pkt)
318{
319 auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
320 thread[cpu->contextToThread(senderState->contextId())]
321 .completeDataAccess(pkt);
322}
323
324template <class Impl>
325bool
326LSQ<Impl>::recvTimingResp(PacketPtr pkt)
327{
328 if (pkt->isError())
329 DPRINTF(LSQ, "Got error packet back for address: %#X\n",
330 pkt->getAddr());
331
332 auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
333 panic_if(!senderState, "Got packet back with unknown sender state\n");
334
335 thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
336
337 if (pkt->isInvalidate()) {
338 // This response also contains an invalidate; e.g. this can be the case
339 // if cmd is ReadRespWithInvalidate.
340 //
341 // The calling order between completeDataAccess and checkSnoop matters.
342 // By calling checkSnoop after completeDataAccess, we ensure that the
343 // fault set by checkSnoop is not lost. Calling writeback (more
344 // specifically inst->completeAcc) in completeDataAccess overwrites
345 // fault, and in case this instruction requires squashing (as
346 // determined by checkSnoop), the ReExec fault set by checkSnoop would
347 // be lost otherwise.
348
349 DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
350 pkt->getAddr());
351
352 for (ThreadID tid = 0; tid < numThreads; tid++) {
353 thread[tid].checkSnoop(pkt);
354 }
355 }
356 // Update the LSQRequest state (this may delete the request)
357 senderState->request()->packetReplied();
358
359 return true;
360}
361
362template <class Impl>
363void
364LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt)
365{
366 DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
367 pkt->cmdString());
368
369 // must be a snoop
370 if (pkt->isInvalidate()) {
371 DPRINTF(LSQ, "received invalidation for addr:%#x\n",
372 pkt->getAddr());
373 for (ThreadID tid = 0; tid < numThreads; tid++) {
374 thread[tid].checkSnoop(pkt);
375 }
376 }
377}
378
379template<class Impl>
380int
381LSQ<Impl>::getCount()
382{
383 unsigned total = 0;
384
385 list<ThreadID>::iterator threads = activeThreads->begin();
386 list<ThreadID>::iterator end = activeThreads->end();
387
388 while (threads != end) {
389 ThreadID tid = *threads++;
390
391 total += getCount(tid);
392 }
393
394 return total;
395}
396
397template<class Impl>
398int
399LSQ<Impl>::numLoads()
400{
401 unsigned total = 0;
402
403 list<ThreadID>::iterator threads = activeThreads->begin();
404 list<ThreadID>::iterator end = activeThreads->end();
405
406 while (threads != end) {
407 ThreadID tid = *threads++;
408
409 total += numLoads(tid);
410 }
411
412 return total;
413}
414
415template<class Impl>
416int
417LSQ<Impl>::numStores()
418{
419 unsigned total = 0;
420
421 list<ThreadID>::iterator threads = activeThreads->begin();
422 list<ThreadID>::iterator end = activeThreads->end();
423
424 while (threads != end) {
425 ThreadID tid = *threads++;
426
427 total += thread[tid].numStores();
428 }
429
430 return total;
431}
432
433template<class Impl>
434unsigned
435LSQ<Impl>::numFreeLoadEntries()
436{
437 unsigned total = 0;
438
439 list<ThreadID>::iterator threads = activeThreads->begin();
440 list<ThreadID>::iterator end = activeThreads->end();
441
442 while (threads != end) {
443 ThreadID tid = *threads++;
444
445 total += thread[tid].numFreeLoadEntries();
446 }
447
448 return total;
449}
450
451template<class Impl>
452unsigned
453LSQ<Impl>::numFreeStoreEntries()
454{
455 unsigned total = 0;
456
457 list<ThreadID>::iterator threads = activeThreads->begin();
458 list<ThreadID>::iterator end = activeThreads->end();
459
460 while (threads != end) {
461 ThreadID tid = *threads++;
462
463 total += thread[tid].numFreeStoreEntries();
464 }
465
466 return total;
467}
468
469template<class Impl>
470unsigned
471LSQ<Impl>::numFreeLoadEntries(ThreadID tid)
472{
473 return thread[tid].numFreeLoadEntries();
474}
475
476template<class Impl>
477unsigned
478LSQ<Impl>::numFreeStoreEntries(ThreadID tid)
479{
480 return thread[tid].numFreeStoreEntries();
481}
482
483template<class Impl>
484bool
485LSQ<Impl>::isFull()
486{
487 list<ThreadID>::iterator threads = activeThreads->begin();
488 list<ThreadID>::iterator end = activeThreads->end();
489
490 while (threads != end) {
491 ThreadID tid = *threads++;
492
493 if (!(thread[tid].lqFull() || thread[tid].sqFull()))
494 return false;
495 }
496
497 return true;
498}
499
500template<class Impl>
501bool
502LSQ<Impl>::isFull(ThreadID tid)
503{
504 //@todo: Change to Calculate All Entries for
505 //Dynamic Policy
506 if (lsqPolicy == SMTQueuePolicy::Dynamic)
507 return isFull();
508 else
509 return thread[tid].lqFull() || thread[tid].sqFull();
510}
511
512template<class Impl>
513bool
514LSQ<Impl>::isEmpty() const
515{
516 return lqEmpty() && sqEmpty();
517}
518
519template<class Impl>
520bool
521LSQ<Impl>::lqEmpty() const
522{
523 list<ThreadID>::const_iterator threads = activeThreads->begin();
524 list<ThreadID>::const_iterator end = activeThreads->end();
525
526 while (threads != end) {
527 ThreadID tid = *threads++;
528
529 if (!thread[tid].lqEmpty())
530 return false;
531 }
532
533 return true;
534}
535
536template<class Impl>
537bool
538LSQ<Impl>::sqEmpty() const
539{
540 list<ThreadID>::const_iterator threads = activeThreads->begin();
541 list<ThreadID>::const_iterator end = activeThreads->end();
542
543 while (threads != end) {
544 ThreadID tid = *threads++;
545
546 if (!thread[tid].sqEmpty())
547 return false;
548 }
549
550 return true;
551}
552
553template<class Impl>
554bool
555LSQ<Impl>::lqFull()
556{
557 list<ThreadID>::iterator threads = activeThreads->begin();
558 list<ThreadID>::iterator end = activeThreads->end();
559
560 while (threads != end) {
561 ThreadID tid = *threads++;
562
563 if (!thread[tid].lqFull())
564 return false;
565 }
566
567 return true;
568}
569
570template<class Impl>
571bool
572LSQ<Impl>::lqFull(ThreadID tid)
573{
574 //@todo: Change to Calculate All Entries for
575 //Dynamic Policy
576 if (lsqPolicy == SMTQueuePolicy::Dynamic)
577 return lqFull();
578 else
579 return thread[tid].lqFull();
580}
581
582template<class Impl>
583bool
584LSQ<Impl>::sqFull()
585{
586 list<ThreadID>::iterator threads = activeThreads->begin();
587 list<ThreadID>::iterator end = activeThreads->end();
588
589 while (threads != end) {
590 ThreadID tid = *threads++;
591
592 if (!sqFull(tid))
593 return false;
594 }
595
596 return true;
597}
598
599template<class Impl>
600bool
601LSQ<Impl>::sqFull(ThreadID tid)
602{
603 //@todo: Change to Calculate All Entries for
604 //Dynamic Policy
605 if (lsqPolicy == SMTQueuePolicy::Dynamic)
606 return sqFull();
607 else
608 return thread[tid].sqFull();
609}
610
611template<class Impl>
612bool
613LSQ<Impl>::isStalled()
614{
615 list<ThreadID>::iterator threads = activeThreads->begin();
616 list<ThreadID>::iterator end = activeThreads->end();
617
618 while (threads != end) {
619 ThreadID tid = *threads++;
620
621 if (!thread[tid].isStalled())
622 return false;
623 }
624
625 return true;
626}
627
628template<class Impl>
629bool
630LSQ<Impl>::isStalled(ThreadID tid)
631{
632 if (lsqPolicy == SMTQueuePolicy::Dynamic)
633 return isStalled();
634 else
635 return thread[tid].isStalled();
636}
637
638template<class Impl>
639bool
640LSQ<Impl>::hasStoresToWB()
641{
642 list<ThreadID>::iterator threads = activeThreads->begin();
643 list<ThreadID>::iterator end = activeThreads->end();
644
645 while (threads != end) {
646 ThreadID tid = *threads++;
647
648 if (hasStoresToWB(tid))
649 return true;
650 }
651
652 return false;
653}
654
655template<class Impl>
656bool
657LSQ<Impl>::willWB()
658{
659 list<ThreadID>::iterator threads = activeThreads->begin();
660 list<ThreadID>::iterator end = activeThreads->end();
661
662 while (threads != end) {
663 ThreadID tid = *threads++;
664
665 if (willWB(tid))
666 return true;
667 }
668
669 return false;
670}
671
672template<class Impl>
673void
674LSQ<Impl>::dumpInsts() const
675{
676 list<ThreadID>::const_iterator threads = activeThreads->begin();
677 list<ThreadID>::const_iterator end = activeThreads->end();
678
679 while (threads != end) {
680 ThreadID tid = *threads++;
681
682 thread[tid].dumpInsts();
683 }
684}
685
686template<class Impl>
687Fault
688LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
689 unsigned int size, Addr addr, Request::Flags flags,

~~690~~ uint64_t *res, AtomicOpFunctor *amo_op,

690 uint64_t *res, AtomicOpFunctorPtr amo_op,

691 const std::vector<bool>& byteEnable)
692{
693 // This comming request can be either load, store or atomic.
694 // Atomic request has a corresponding pointer to its atomic memory
695 // operation
696 bool isAtomic M5_VAR_USED = !isLoad && amo_op;
697
698 ThreadID tid = cpu->contextToThread(inst->contextId());
699 auto cacheLineSize = cpu->cacheLineSize();
700 bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
701 LSQRequest* req = nullptr;
702
703 // Atomic requests that access data across cache line boundary are
704 // currently not allowed since the cache does not guarantee corresponding
705 // atomic memory operations to be executed atomically across a cache line.
706 // For ISAs such as x86 that supports cross-cache-line atomic instructions,
707 // the cache needs to be modified to perform atomic update to both cache
708 // lines. For now, such cross-line update is not supported.
709 assert(!isAtomic || (isAtomic && !needs_burst));
710
711 if (inst->translationStarted()) {
712 req = inst->savedReq;
713 assert(req);
714 } else {
715 if (needs_burst) {
716 req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
717 size, flags, data, res);
718 } else {
719 req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,

~~720~~ size, flags, data, res, amo_op);

720 size, flags, data, res, std::move(amo_op));

721 }
722 assert(req);
723 if (!byteEnable.empty()) {
724 req->_byteEnable = byteEnable;
725 }
726 inst->setRequest();
727 req->taskId(cpu->taskId());
728
729 // There might be fault from a previous execution attempt if this is
730 // a strictly ordered load
731 inst->getFault() = NoFault;
732
733 req->initiateTranslation();
734 }
735
736 /* This is the place were instructions get the effAddr. */
737 if (req->isTranslationComplete()) {
738 if (req->isMemAccessRequired()) {
739 inst->effAddr = req->getVaddr();
740 inst->effSize = size;
741 inst->effAddrValid(true);
742
743 if (cpu->checker) {
744 inst->reqToVerify = std::make_shared<Request>(*req->request());
745 }
746 Fault fault;
747 if (isLoad)
748 fault = cpu->read(req, inst->lqIdx);
749 else
750 fault = cpu->write(req, data, inst->sqIdx);
751 // inst->getFault() may have the first-fault of a
752 // multi-access split request at this point.
753 // Overwrite that only if we got another type of fault
754 // (e.g. re-exec).
755 if (fault != NoFault)
756 inst->getFault() = fault;
757 } else if (isLoad) {
758 inst->setMemAccPredicate(false);
759 // Commit will have to clean up whatever happened. Set this
760 // instruction as executed.
761 inst->setExecuted();
762 }
763 }
764
765 if (inst->traceData)
766 inst->traceData->setMem(addr, size, flags);
767
768 return inst->getFault();
769}
770
771template<class Impl>
772void
773LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
774 ThreadContext* tc, BaseTLB::Mode mode)
775{
776 _fault.push_back(fault);
777 numInTranslationFragments = 0;
778 numTranslatedFragments = 1;
779 /* If the instruction has been squahsed, let the request know
780 * as it may have to self-destruct. */
781 if (_inst->isSquashed()) {
782 this->squashTranslation();
783 } else {
784 _inst->strictlyOrdered(req->isStrictlyOrdered());
785
786 flags.set(Flag::TranslationFinished);
787 if (fault == NoFault) {
788 _inst->physEffAddr = req->getPaddr();
789 _inst->memReqFlags = req->getFlags();
790 if (req->isCondSwap()) {
791 assert(_res);
792 req->setExtraData(*_res);
793 }
794 setState(State::Request);
795 } else {
796 setState(State::Fault);
797 }
798
799 LSQRequest::_inst->fault = fault;
800 LSQRequest::_inst->translationCompleted(true);
801 }
802}
803
804template<class Impl>
805void
806LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
807 ThreadContext* tc, BaseTLB::Mode mode)
808{
809 int i;
810 for (i = 0; i < _requests.size() && _requests[i] != req; i++);
811 assert(i < _requests.size());
812 _fault[i] = fault;
813
814 numInTranslationFragments--;
815 numTranslatedFragments++;
816
817 if (fault == NoFault)
818 mainReq->setFlags(req->getFlags());
819
820 if (numTranslatedFragments == _requests.size()) {
821 if (_inst->isSquashed()) {
822 this->squashTranslation();
823 } else {
824 _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
825 flags.set(Flag::TranslationFinished);
826 _inst->translationCompleted(true);
827
828 for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
829 if (i > 0) {
830 _inst->physEffAddr = request(0)->getPaddr();
831 _inst->memReqFlags = mainReq->getFlags();
832 if (mainReq->isCondSwap()) {
833 assert (i == _fault.size());
834 assert(_res);
835 mainReq->setExtraData(*_res);
836 }
837 if (i == _fault.size()) {
838 _inst->fault = NoFault;
839 setState(State::Request);
840 } else {
841 _inst->fault = _fault[i];
842 setState(State::PartialFault);
843 }
844 } else {
845 _inst->fault = _fault[0];
846 setState(State::Fault);
847 }
848 }
849
850 }
851}
852
853template<class Impl>
854void
855LSQ<Impl>::SingleDataRequest::initiateTranslation()
856{
857 assert(_requests.size() == 0);
858
859 this->addRequest(_addr, _size, _byteEnable);
860
861 if (_requests.size() > 0) {
862 _requests.back()->setReqInstSeqNum(_inst->seqNum);
863 _requests.back()->taskId(_taskId);
864 _inst->translationStarted(true);
865 setState(State::Translation);
866 flags.set(Flag::TranslationStarted);
867
868 _inst->savedReq = this;
869 sendFragmentToTranslation(0);
870 } else {
871 _inst->setMemAccPredicate(false);
872 }
873}
874
875template<class Impl>
876PacketPtr
877LSQ<Impl>::SplitDataRequest::mainPacket()
878{
879 return _mainPacket;
880}
881
882template<class Impl>
883RequestPtr
884LSQ<Impl>::SplitDataRequest::mainRequest()
885{
886 return mainReq;
887}
888
889template<class Impl>
890void
891LSQ<Impl>::SplitDataRequest::initiateTranslation()
892{
893 auto cacheLineSize = _port.cacheLineSize();
894 Addr base_addr = _addr;
895 Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
896 Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
897 uint32_t size_so_far = 0;
898
899 mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
900 _size, _flags, _inst->masterId(),
901 _inst->instAddr(), _inst->contextId());
902 if (!_byteEnable.empty()) {
903 mainReq->setByteEnable(_byteEnable);
904 }
905
906 // Paddr is not used in mainReq. However, we will accumulate the flags
907 // from the sub requests into mainReq by calling setFlags() in finish().
908 // setFlags() assumes that paddr is set so flip the paddr valid bit here to
909 // avoid a potential assert in setFlags() when we call it from finish().
910 mainReq->setPaddr(0);
911
912 /* Get the pre-fix, possibly unaligned. */
913 if (_byteEnable.empty()) {
914 this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
915 } else {
916 auto it_start = _byteEnable.begin();
917 auto it_end = _byteEnable.begin() + (next_addr - base_addr);
918 this->addRequest(base_addr, next_addr - base_addr,
919 std::vector<bool>(it_start, it_end));
920 }
921 size_so_far = next_addr - base_addr;
922
923 /* We are block aligned now, reading whole blocks. */
924 base_addr = next_addr;
925 while (base_addr != final_addr) {
926 if (_byteEnable.empty()) {
927 this->addRequest(base_addr, cacheLineSize, _byteEnable);
928 } else {
929 auto it_start = _byteEnable.begin() + size_so_far;
930 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
931 this->addRequest(base_addr, cacheLineSize,
932 std::vector<bool>(it_start, it_end));
933 }
934 size_so_far += cacheLineSize;
935 base_addr += cacheLineSize;
936 }
937
938 /* Deal with the tail. */
939 if (size_so_far < _size) {
940 if (_byteEnable.empty()) {
941 this->addRequest(base_addr, _size - size_so_far, _byteEnable);
942 } else {
943 auto it_start = _byteEnable.begin() + size_so_far;
944 auto it_end = _byteEnable.end();
945 this->addRequest(base_addr, _size - size_so_far,
946 std::vector<bool>(it_start, it_end));
947 }
948 }
949
950 if (_requests.size() > 0) {
951 /* Setup the requests and send them to translation. */
952 for (auto& r: _requests) {
953 r->setReqInstSeqNum(_inst->seqNum);
954 r->taskId(_taskId);
955 }
956
957 _inst->translationStarted(true);
958 setState(State::Translation);
959 flags.set(Flag::TranslationStarted);
960 this->_inst->savedReq = this;
961 numInTranslationFragments = 0;
962 numTranslatedFragments = 0;
963 _fault.resize(_requests.size());
964
965 for (uint32_t i = 0; i < _requests.size(); i++) {
966 sendFragmentToTranslation(i);
967 }
968 } else {
969 _inst->setMemAccPredicate(false);
970 }
971}
972
973template<class Impl>
974void
975LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
976{
977 numInTranslationFragments++;
978 _port.dTLB()->translateTiming(
979 this->request(i),
980 this->_inst->thread->getTC(), this,
981 this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
982}
983
984template<class Impl>
985bool
986LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
987{
988 assert(_numOutstandingPackets == 1);
989 auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
990 flags.set(Flag::Complete);
991 state->outstanding--;
992 assert(pkt == _packets.front());
993 _port.completeDataAccess(pkt);
994 return true;
995}
996
997template<class Impl>
998bool
999LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
1000{
1001 auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
1002 uint32_t pktIdx = 0;
1003 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1004 pktIdx++;
1005 assert(pktIdx < _packets.size());
1006 numReceivedPackets++;
1007 state->outstanding--;
1008 if (numReceivedPackets == _packets.size()) {
1009 flags.set(Flag::Complete);
1010 /* Assemble packets. */
1011 PacketPtr resp = isLoad()
1012 ? Packet::createRead(mainReq)
1013 : Packet::createWrite(mainReq);
1014 if (isLoad())
1015 resp->dataStatic(_inst->memData);
1016 else
1017 resp->dataStatic(_data);
1018 resp->senderState = _senderState;
1019 _port.completeDataAccess(resp);
1020 delete resp;
1021 }
1022 return true;
1023}
1024
1025template<class Impl>
1026void
1027LSQ<Impl>::SingleDataRequest::buildPackets()
1028{
1029 assert(_senderState);
1030 /* Retries do not create new packets. */
1031 if (_packets.size() == 0) {
1032 _packets.push_back(
1033 isLoad()
1034 ? Packet::createRead(request())
1035 : Packet::createWrite(request()));
1036 _packets.back()->dataStatic(_inst->memData);
1037 _packets.back()->senderState = _senderState;
1038 }
1039 assert(_packets.size() == 1);
1040}
1041
1042template<class Impl>
1043void
1044LSQ<Impl>::SplitDataRequest::buildPackets()
1045{
1046 /* Extra data?? */
1047 Addr base_address = _addr;
1048
1049 if (_packets.size() == 0) {
1050 /* New stuff */
1051 if (isLoad()) {
1052 _mainPacket = Packet::createRead(mainReq);
1053 _mainPacket->dataStatic(_inst->memData);
1054 }
1055 for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
1056 RequestPtr r = _requests[i];
1057 PacketPtr pkt = isLoad() ? Packet::createRead(r)
1058 : Packet::createWrite(r);
1059 ptrdiff_t offset = r->getVaddr() - base_address;
1060 if (isLoad()) {
1061 pkt->dataStatic(_inst->memData + offset);
1062 } else {
1063 uint8_t* req_data = new uint8_t[r->getSize()];
1064 std::memcpy(req_data,
1065 _inst->memData + offset,
1066 r->getSize());
1067 pkt->dataDynamic(req_data);
1068 }
1069 pkt->senderState = _senderState;
1070 _packets.push_back(pkt);
1071 }
1072 }
1073 assert(_packets.size() > 0);
1074}
1075
1076template<class Impl>
1077void
1078LSQ<Impl>::SingleDataRequest::sendPacketToCache()
1079{
1080 assert(_numOutstandingPackets == 0);
1081 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1082 _numOutstandingPackets = 1;
1083}
1084
1085template<class Impl>
1086void
1087LSQ<Impl>::SplitDataRequest::sendPacketToCache()
1088{
1089 /* Try to send the packets. */
1090 while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1091 lsqUnit()->trySendPacket(isLoad(),
1092 _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1093 _numOutstandingPackets++;
1094 }
1095}
1096
1097template<class Impl>
1098void
1099LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext *thread,
1100 PacketPtr pkt)
1101{
1102 TheISA::handleIprWrite(thread, pkt);
1103}
1104
1105template<class Impl>
1106void
1107LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext *thread,
1108 PacketPtr mainPkt)
1109{
1110 unsigned offset = 0;
1111 for (auto r: _requests) {
1112 PacketPtr pkt = new Packet(r, MemCmd::WriteReq);
1113 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1114 TheISA::handleIprWrite(thread, pkt);
1115 offset += r->getSize();
1116 delete pkt;
1117 }
1118}
1119
1120template<class Impl>
1121Cycles
1122LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext *thread,
1123 PacketPtr pkt)
1124{
1125 return TheISA::handleIprRead(thread, pkt);
1126}
1127
1128template<class Impl>
1129Cycles
1130LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext *thread,
1131 PacketPtr mainPkt)
1132{
1133 Cycles delay(0);
1134 unsigned offset = 0;
1135
1136 for (auto r: _requests) {
1137 PacketPtr pkt = new Packet(r, MemCmd::ReadReq);
1138 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1139 Cycles d = TheISA::handleIprRead(thread, pkt);
1140 if (d > delay)
1141 delay = d;
1142 offset += r->getSize();
1143 delete pkt;
1144 }
1145 return delay;
1146}
1147
1148template<class Impl>
1149bool
1150LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1151{
1152 return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
1153}
1154
1155template<class Impl>
1156bool
1157LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1158{
1159 bool is_hit = false;
1160 for (auto &r: _requests) {
1161 if ((r->getPaddr() & blockMask) == blockAddr) {
1162 is_hit = true;
1163 break;
1164 }
1165 }
1166 return is_hit;
1167}
1168
1169template <class Impl>
1170bool
1171LSQ<Impl>::DcachePort::recvTimingResp(PacketPtr pkt)
1172{
1173 return lsq->recvTimingResp(pkt);
1174}
1175
1176template <class Impl>
1177void
1178LSQ<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
1179{
1180 for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
1181 if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1182 cpu->wakeup(tid);
1183 }
1184 }
1185 lsq->recvTimingSnoopReq(pkt);
1186}
1187
1188template <class Impl>
1189void
1190LSQ<Impl>::DcachePort::recvReqRetry()
1191{
1192 lsq->recvReqRetry();
1193}
1194
1195#endif//__CPU_O3_LSQ_IMPL_HH__