lsq_impl.hh (13954:2f400a5f2627) lsq_impl.hh (14080:4472576445e7)
1/*
2 * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2005-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Korey Sewell
42 */
43
44#ifndef __CPU_O3_LSQ_IMPL_HH__
45#define __CPU_O3_LSQ_IMPL_HH__
46
47#include <algorithm>
48#include <list>
49#include <string>
50
51#include "base/logging.hh"
52#include "cpu/o3/lsq.hh"
53#include "debug/Drain.hh"
54#include "debug/Fetch.hh"
55#include "debug/LSQ.hh"
56#include "debug/Writeback.hh"
57#include "params/DerivO3CPU.hh"
58
59using namespace std;
60
61template <class Impl>
62LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
63 : cpu(cpu_ptr), iewStage(iew_ptr),
64 _cacheBlocked(false),
65 cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
66 cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
67 lsqPolicy(params->smtLSQPolicy),
68 LQEntries(params->LQEntries),
69 SQEntries(params->SQEntries),
70 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
71 params->smtLSQThreshold)),
72 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
73 params->smtLSQThreshold)),
74 numThreads(params->numThreads)
75{
76 assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
77
78 //**********************************************/
79 //************ Handle SMT Parameters ***********/
80 //**********************************************/
81
82 /* Run SMT olicy checks. */
83 if (lsqPolicy == SMTQueuePolicy::Dynamic) {
84 DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
85 } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
86 DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
87 "%i entries per LQ | %i entries per SQ\n",
88 maxLQEntries,maxSQEntries);
89 } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
90
91 assert(params->smtLSQThreshold > params->LQEntries);
92 assert(params->smtLSQThreshold > params->SQEntries);
93
94 DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
95 "%i entries per LQ | %i entries per SQ\n",
96 maxLQEntries,maxSQEntries);
97 } else {
98 panic("Invalid LSQ sharing policy. Options are: Dynamic, "
99 "Partitioned, Threshold");
100 }
101
102 thread.reserve(numThreads);
103 for (ThreadID tid = 0; tid < numThreads; tid++) {
104 thread.emplace_back(maxLQEntries, maxSQEntries);
105 thread[tid].init(cpu, iew_ptr, params, this, tid);
106 thread[tid].setDcachePort(&cpu_ptr->getDataPort());
107 }
108}
109
110
111template<class Impl>
112std::string
113LSQ<Impl>::name() const
114{
115 return iewStage->name() + ".lsq";
116}
117
118template<class Impl>
119void
120LSQ<Impl>::regStats()
121{
122 //Initialize LSQs
123 for (ThreadID tid = 0; tid < numThreads; tid++) {
124 thread[tid].regStats();
125 }
126}
127
128template<class Impl>
129void
130LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
131{
132 activeThreads = at_ptr;
133 assert(activeThreads != 0);
134}
135
136template <class Impl>
137void
138LSQ<Impl>::drainSanityCheck() const
139{
140 assert(isDrained());
141
142 for (ThreadID tid = 0; tid < numThreads; tid++)
143 thread[tid].drainSanityCheck();
144}
145
146template <class Impl>
147bool
148LSQ<Impl>::isDrained() const
149{
150 bool drained(true);
151
152 if (!lqEmpty()) {
153 DPRINTF(Drain, "Not drained, LQ not empty.\n");
154 drained = false;
155 }
156
157 if (!sqEmpty()) {
158 DPRINTF(Drain, "Not drained, SQ not empty.\n");
159 drained = false;
160 }
161
162 return drained;
163}
164
165template <class Impl>
166void
167LSQ<Impl>::takeOverFrom()
168{
169 usedStorePorts = 0;
170 _cacheBlocked = false;
171
172 for (ThreadID tid = 0; tid < numThreads; tid++) {
173 thread[tid].takeOverFrom();
174 }
175}
176
177template <class Impl>
178void
179LSQ<Impl>::tick()
180{
181 // Re-issue loads which got blocked on the per-cycle load ports limit.
182 if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked)
183 iewStage->cacheUnblocked();
184
185 usedLoadPorts = 0;
186 usedStorePorts = 0;
187}
188
189template<class Impl>
190bool
191LSQ<Impl>::cacheBlocked() const
192{
193 return _cacheBlocked;
194}
195
196template<class Impl>
197void
198LSQ<Impl>::cacheBlocked(bool v)
199{
200 _cacheBlocked = v;
201}
202
203template<class Impl>
204bool
205LSQ<Impl>::cachePortAvailable(bool is_load) const
206{
207 bool ret;
208 if (is_load) {
209 ret = usedLoadPorts < cacheLoadPorts;
210 } else {
211 ret = usedStorePorts < cacheStorePorts;
212 }
213 return ret;
214}
215
216template<class Impl>
217void
218LSQ<Impl>::cachePortBusy(bool is_load)
219{
220 assert(cachePortAvailable(is_load));
221 if (is_load) {
222 usedLoadPorts++;
223 } else {
224 usedStorePorts++;
225 }
226}
227
228template<class Impl>
229void
230LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
231{
232 ThreadID tid = load_inst->threadNumber;
233
234 thread[tid].insertLoad(load_inst);
235}
236
237template<class Impl>
238void
239LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
240{
241 ThreadID tid = store_inst->threadNumber;
242
243 thread[tid].insertStore(store_inst);
244}
245
246template<class Impl>
247Fault
248LSQ<Impl>::executeLoad(const DynInstPtr &inst)
249{
250 ThreadID tid = inst->threadNumber;
251
252 return thread[tid].executeLoad(inst);
253}
254
255template<class Impl>
256Fault
257LSQ<Impl>::executeStore(const DynInstPtr &inst)
258{
259 ThreadID tid = inst->threadNumber;
260
261 return thread[tid].executeStore(inst);
262}
263
264template<class Impl>
265void
266LSQ<Impl>::writebackStores()
267{
268 list<ThreadID>::iterator threads = activeThreads->begin();
269 list<ThreadID>::iterator end = activeThreads->end();
270
271 while (threads != end) {
272 ThreadID tid = *threads++;
273
274 if (numStoresToWB(tid) > 0) {
275 DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
276 "available for Writeback.\n", tid, numStoresToWB(tid));
277 }
278
279 thread[tid].writebackStores();
280 }
281}
282
283template<class Impl>
284bool
285LSQ<Impl>::violation()
286{
287 /* Answers: Does Anybody Have a Violation?*/
288 list<ThreadID>::iterator threads = activeThreads->begin();
289 list<ThreadID>::iterator end = activeThreads->end();
290
291 while (threads != end) {
292 ThreadID tid = *threads++;
293
294 if (thread[tid].violation())
295 return true;
296 }
297
298 return false;
299}
300
301template <class Impl>
302void
303LSQ<Impl>::recvReqRetry()
304{
305 iewStage->cacheUnblocked();
306 cacheBlocked(false);
307
308 for (ThreadID tid : *activeThreads) {
309 thread[tid].recvRetry();
310 }
311}
312
313template <class Impl>
314void
315LSQ<Impl>::completeDataAccess(PacketPtr pkt)
316{
317 auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
318 thread[cpu->contextToThread(senderState->contextId())]
319 .completeDataAccess(pkt);
320}
321
322template <class Impl>
323bool
324LSQ<Impl>::recvTimingResp(PacketPtr pkt)
325{
326 if (pkt->isError())
327 DPRINTF(LSQ, "Got error packet back for address: %#X\n",
328 pkt->getAddr());
329
330 auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
331 panic_if(!senderState, "Got packet back with unknown sender state\n");
332
333 thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
334
335 if (pkt->isInvalidate()) {
336 // This response also contains an invalidate; e.g. this can be the case
337 // if cmd is ReadRespWithInvalidate.
338 //
339 // The calling order between completeDataAccess and checkSnoop matters.
340 // By calling checkSnoop after completeDataAccess, we ensure that the
341 // fault set by checkSnoop is not lost. Calling writeback (more
342 // specifically inst->completeAcc) in completeDataAccess overwrites
343 // fault, and in case this instruction requires squashing (as
344 // determined by checkSnoop), the ReExec fault set by checkSnoop would
345 // be lost otherwise.
346
347 DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
348 pkt->getAddr());
349
350 for (ThreadID tid = 0; tid < numThreads; tid++) {
351 thread[tid].checkSnoop(pkt);
352 }
353 }
354 // Update the LSQRequest state (this may delete the request)
355 senderState->request()->packetReplied();
356
357 return true;
358}
359
360template <class Impl>
361void
362LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt)
363{
364 DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
365 pkt->cmdString());
366
367 // must be a snoop
368 if (pkt->isInvalidate()) {
369 DPRINTF(LSQ, "received invalidation for addr:%#x\n",
370 pkt->getAddr());
371 for (ThreadID tid = 0; tid < numThreads; tid++) {
372 thread[tid].checkSnoop(pkt);
373 }
374 }
375}
376
377template<class Impl>
378int
379LSQ<Impl>::getCount()
380{
381 unsigned total = 0;
382
383 list<ThreadID>::iterator threads = activeThreads->begin();
384 list<ThreadID>::iterator end = activeThreads->end();
385
386 while (threads != end) {
387 ThreadID tid = *threads++;
388
389 total += getCount(tid);
390 }
391
392 return total;
393}
394
395template<class Impl>
396int
397LSQ<Impl>::numLoads()
398{
399 unsigned total = 0;
400
401 list<ThreadID>::iterator threads = activeThreads->begin();
402 list<ThreadID>::iterator end = activeThreads->end();
403
404 while (threads != end) {
405 ThreadID tid = *threads++;
406
407 total += numLoads(tid);
408 }
409
410 return total;
411}
412
413template<class Impl>
414int
415LSQ<Impl>::numStores()
416{
417 unsigned total = 0;
418
419 list<ThreadID>::iterator threads = activeThreads->begin();
420 list<ThreadID>::iterator end = activeThreads->end();
421
422 while (threads != end) {
423 ThreadID tid = *threads++;
424
425 total += thread[tid].numStores();
426 }
427
428 return total;
429}
430
431template<class Impl>
432unsigned
433LSQ<Impl>::numFreeLoadEntries()
434{
435 unsigned total = 0;
436
437 list<ThreadID>::iterator threads = activeThreads->begin();
438 list<ThreadID>::iterator end = activeThreads->end();
439
440 while (threads != end) {
441 ThreadID tid = *threads++;
442
443 total += thread[tid].numFreeLoadEntries();
444 }
445
446 return total;
447}
448
449template<class Impl>
450unsigned
451LSQ<Impl>::numFreeStoreEntries()
452{
453 unsigned total = 0;
454
455 list<ThreadID>::iterator threads = activeThreads->begin();
456 list<ThreadID>::iterator end = activeThreads->end();
457
458 while (threads != end) {
459 ThreadID tid = *threads++;
460
461 total += thread[tid].numFreeStoreEntries();
462 }
463
464 return total;
465}
466
467template<class Impl>
468unsigned
469LSQ<Impl>::numFreeLoadEntries(ThreadID tid)
470{
471 return thread[tid].numFreeLoadEntries();
472}
473
474template<class Impl>
475unsigned
476LSQ<Impl>::numFreeStoreEntries(ThreadID tid)
477{
478 return thread[tid].numFreeStoreEntries();
479}
480
481template<class Impl>
482bool
483LSQ<Impl>::isFull()
484{
485 list<ThreadID>::iterator threads = activeThreads->begin();
486 list<ThreadID>::iterator end = activeThreads->end();
487
488 while (threads != end) {
489 ThreadID tid = *threads++;
490
491 if (!(thread[tid].lqFull() || thread[tid].sqFull()))
492 return false;
493 }
494
495 return true;
496}
497
498template<class Impl>
499bool
500LSQ<Impl>::isFull(ThreadID tid)
501{
502 //@todo: Change to Calculate All Entries for
503 //Dynamic Policy
504 if (lsqPolicy == SMTQueuePolicy::Dynamic)
505 return isFull();
506 else
507 return thread[tid].lqFull() || thread[tid].sqFull();
508}
509
510template<class Impl>
511bool
512LSQ<Impl>::isEmpty() const
513{
514 return lqEmpty() && sqEmpty();
515}
516
517template<class Impl>
518bool
519LSQ<Impl>::lqEmpty() const
520{
521 list<ThreadID>::const_iterator threads = activeThreads->begin();
522 list<ThreadID>::const_iterator end = activeThreads->end();
523
524 while (threads != end) {
525 ThreadID tid = *threads++;
526
527 if (!thread[tid].lqEmpty())
528 return false;
529 }
530
531 return true;
532}
533
534template<class Impl>
535bool
536LSQ<Impl>::sqEmpty() const
537{
538 list<ThreadID>::const_iterator threads = activeThreads->begin();
539 list<ThreadID>::const_iterator end = activeThreads->end();
540
541 while (threads != end) {
542 ThreadID tid = *threads++;
543
544 if (!thread[tid].sqEmpty())
545 return false;
546 }
547
548 return true;
549}
550
551template<class Impl>
552bool
553LSQ<Impl>::lqFull()
554{
555 list<ThreadID>::iterator threads = activeThreads->begin();
556 list<ThreadID>::iterator end = activeThreads->end();
557
558 while (threads != end) {
559 ThreadID tid = *threads++;
560
561 if (!thread[tid].lqFull())
562 return false;
563 }
564
565 return true;
566}
567
568template<class Impl>
569bool
570LSQ<Impl>::lqFull(ThreadID tid)
571{
572 //@todo: Change to Calculate All Entries for
573 //Dynamic Policy
574 if (lsqPolicy == SMTQueuePolicy::Dynamic)
575 return lqFull();
576 else
577 return thread[tid].lqFull();
578}
579
580template<class Impl>
581bool
582LSQ<Impl>::sqFull()
583{
584 list<ThreadID>::iterator threads = activeThreads->begin();
585 list<ThreadID>::iterator end = activeThreads->end();
586
587 while (threads != end) {
588 ThreadID tid = *threads++;
589
590 if (!sqFull(tid))
591 return false;
592 }
593
594 return true;
595}
596
597template<class Impl>
598bool
599LSQ<Impl>::sqFull(ThreadID tid)
600{
601 //@todo: Change to Calculate All Entries for
602 //Dynamic Policy
603 if (lsqPolicy == SMTQueuePolicy::Dynamic)
604 return sqFull();
605 else
606 return thread[tid].sqFull();
607}
608
609template<class Impl>
610bool
611LSQ<Impl>::isStalled()
612{
613 list<ThreadID>::iterator threads = activeThreads->begin();
614 list<ThreadID>::iterator end = activeThreads->end();
615
616 while (threads != end) {
617 ThreadID tid = *threads++;
618
619 if (!thread[tid].isStalled())
620 return false;
621 }
622
623 return true;
624}
625
626template<class Impl>
627bool
628LSQ<Impl>::isStalled(ThreadID tid)
629{
630 if (lsqPolicy == SMTQueuePolicy::Dynamic)
631 return isStalled();
632 else
633 return thread[tid].isStalled();
634}
635
636template<class Impl>
637bool
638LSQ<Impl>::hasStoresToWB()
639{
640 list<ThreadID>::iterator threads = activeThreads->begin();
641 list<ThreadID>::iterator end = activeThreads->end();
642
643 while (threads != end) {
644 ThreadID tid = *threads++;
645
646 if (hasStoresToWB(tid))
647 return true;
648 }
649
650 return false;
651}
652
653template<class Impl>
654bool
655LSQ<Impl>::willWB()
656{
657 list<ThreadID>::iterator threads = activeThreads->begin();
658 list<ThreadID>::iterator end = activeThreads->end();
659
660 while (threads != end) {
661 ThreadID tid = *threads++;
662
663 if (willWB(tid))
664 return true;
665 }
666
667 return false;
668}
669
670template<class Impl>
671void
672LSQ<Impl>::dumpInsts() const
673{
674 list<ThreadID>::const_iterator threads = activeThreads->begin();
675 list<ThreadID>::const_iterator end = activeThreads->end();
676
677 while (threads != end) {
678 ThreadID tid = *threads++;
679
680 thread[tid].dumpInsts();
681 }
682}
683
684template<class Impl>
685Fault
686LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
687 unsigned int size, Addr addr, Request::Flags flags,
688 uint64_t *res, AtomicOpFunctor *amo_op,
689 const std::vector<bool>& byteEnable)
690{
691 // This comming request can be either load, store or atomic.
692 // Atomic request has a corresponding pointer to its atomic memory
693 // operation
694 bool isAtomic M5_VAR_USED = !isLoad && amo_op;
695
696 ThreadID tid = cpu->contextToThread(inst->contextId());
697 auto cacheLineSize = cpu->cacheLineSize();
698 bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
699 LSQRequest* req = nullptr;
700
701 // Atomic requests that access data across cache line boundary are
702 // currently not allowed since the cache does not guarantee corresponding
703 // atomic memory operations to be executed atomically across a cache line.
704 // For ISAs such as x86 that supports cross-cache-line atomic instructions,
705 // the cache needs to be modified to perform atomic update to both cache
706 // lines. For now, such cross-line update is not supported.
707 assert(!isAtomic || (isAtomic && !needs_burst));
708
709 if (inst->translationStarted()) {
710 req = inst->savedReq;
711 assert(req);
712 } else {
713 if (needs_burst) {
714 req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
715 size, flags, data, res);
716 } else {
717 req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
718 size, flags, data, res, amo_op);
719 }
720 assert(req);
721 if (!byteEnable.empty()) {
722 req->_byteEnable = byteEnable;
723 }
724 inst->setRequest();
725 req->taskId(cpu->taskId());
726
1/*
2 * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2005-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Korey Sewell
42 */
43
44#ifndef __CPU_O3_LSQ_IMPL_HH__
45#define __CPU_O3_LSQ_IMPL_HH__
46
47#include <algorithm>
48#include <list>
49#include <string>
50
51#include "base/logging.hh"
52#include "cpu/o3/lsq.hh"
53#include "debug/Drain.hh"
54#include "debug/Fetch.hh"
55#include "debug/LSQ.hh"
56#include "debug/Writeback.hh"
57#include "params/DerivO3CPU.hh"
58
59using namespace std;
60
61template <class Impl>
62LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
63 : cpu(cpu_ptr), iewStage(iew_ptr),
64 _cacheBlocked(false),
65 cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
66 cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
67 lsqPolicy(params->smtLSQPolicy),
68 LQEntries(params->LQEntries),
69 SQEntries(params->SQEntries),
70 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
71 params->smtLSQThreshold)),
72 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
73 params->smtLSQThreshold)),
74 numThreads(params->numThreads)
75{
76 assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
77
78 //**********************************************/
79 //************ Handle SMT Parameters ***********/
80 //**********************************************/
81
82 /* Run SMT olicy checks. */
83 if (lsqPolicy == SMTQueuePolicy::Dynamic) {
84 DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
85 } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
86 DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
87 "%i entries per LQ | %i entries per SQ\n",
88 maxLQEntries,maxSQEntries);
89 } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
90
91 assert(params->smtLSQThreshold > params->LQEntries);
92 assert(params->smtLSQThreshold > params->SQEntries);
93
94 DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
95 "%i entries per LQ | %i entries per SQ\n",
96 maxLQEntries,maxSQEntries);
97 } else {
98 panic("Invalid LSQ sharing policy. Options are: Dynamic, "
99 "Partitioned, Threshold");
100 }
101
102 thread.reserve(numThreads);
103 for (ThreadID tid = 0; tid < numThreads; tid++) {
104 thread.emplace_back(maxLQEntries, maxSQEntries);
105 thread[tid].init(cpu, iew_ptr, params, this, tid);
106 thread[tid].setDcachePort(&cpu_ptr->getDataPort());
107 }
108}
109
110
111template<class Impl>
112std::string
113LSQ<Impl>::name() const
114{
115 return iewStage->name() + ".lsq";
116}
117
118template<class Impl>
119void
120LSQ<Impl>::regStats()
121{
122 //Initialize LSQs
123 for (ThreadID tid = 0; tid < numThreads; tid++) {
124 thread[tid].regStats();
125 }
126}
127
128template<class Impl>
129void
130LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
131{
132 activeThreads = at_ptr;
133 assert(activeThreads != 0);
134}
135
136template <class Impl>
137void
138LSQ<Impl>::drainSanityCheck() const
139{
140 assert(isDrained());
141
142 for (ThreadID tid = 0; tid < numThreads; tid++)
143 thread[tid].drainSanityCheck();
144}
145
146template <class Impl>
147bool
148LSQ<Impl>::isDrained() const
149{
150 bool drained(true);
151
152 if (!lqEmpty()) {
153 DPRINTF(Drain, "Not drained, LQ not empty.\n");
154 drained = false;
155 }
156
157 if (!sqEmpty()) {
158 DPRINTF(Drain, "Not drained, SQ not empty.\n");
159 drained = false;
160 }
161
162 return drained;
163}
164
165template <class Impl>
166void
167LSQ<Impl>::takeOverFrom()
168{
169 usedStorePorts = 0;
170 _cacheBlocked = false;
171
172 for (ThreadID tid = 0; tid < numThreads; tid++) {
173 thread[tid].takeOverFrom();
174 }
175}
176
177template <class Impl>
178void
179LSQ<Impl>::tick()
180{
181 // Re-issue loads which got blocked on the per-cycle load ports limit.
182 if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked)
183 iewStage->cacheUnblocked();
184
185 usedLoadPorts = 0;
186 usedStorePorts = 0;
187}
188
189template<class Impl>
190bool
191LSQ<Impl>::cacheBlocked() const
192{
193 return _cacheBlocked;
194}
195
196template<class Impl>
197void
198LSQ<Impl>::cacheBlocked(bool v)
199{
200 _cacheBlocked = v;
201}
202
203template<class Impl>
204bool
205LSQ<Impl>::cachePortAvailable(bool is_load) const
206{
207 bool ret;
208 if (is_load) {
209 ret = usedLoadPorts < cacheLoadPorts;
210 } else {
211 ret = usedStorePorts < cacheStorePorts;
212 }
213 return ret;
214}
215
216template<class Impl>
217void
218LSQ<Impl>::cachePortBusy(bool is_load)
219{
220 assert(cachePortAvailable(is_load));
221 if (is_load) {
222 usedLoadPorts++;
223 } else {
224 usedStorePorts++;
225 }
226}
227
228template<class Impl>
229void
230LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
231{
232 ThreadID tid = load_inst->threadNumber;
233
234 thread[tid].insertLoad(load_inst);
235}
236
237template<class Impl>
238void
239LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
240{
241 ThreadID tid = store_inst->threadNumber;
242
243 thread[tid].insertStore(store_inst);
244}
245
246template<class Impl>
247Fault
248LSQ<Impl>::executeLoad(const DynInstPtr &inst)
249{
250 ThreadID tid = inst->threadNumber;
251
252 return thread[tid].executeLoad(inst);
253}
254
255template<class Impl>
256Fault
257LSQ<Impl>::executeStore(const DynInstPtr &inst)
258{
259 ThreadID tid = inst->threadNumber;
260
261 return thread[tid].executeStore(inst);
262}
263
264template<class Impl>
265void
266LSQ<Impl>::writebackStores()
267{
268 list<ThreadID>::iterator threads = activeThreads->begin();
269 list<ThreadID>::iterator end = activeThreads->end();
270
271 while (threads != end) {
272 ThreadID tid = *threads++;
273
274 if (numStoresToWB(tid) > 0) {
275 DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
276 "available for Writeback.\n", tid, numStoresToWB(tid));
277 }
278
279 thread[tid].writebackStores();
280 }
281}
282
283template<class Impl>
284bool
285LSQ<Impl>::violation()
286{
287 /* Answers: Does Anybody Have a Violation?*/
288 list<ThreadID>::iterator threads = activeThreads->begin();
289 list<ThreadID>::iterator end = activeThreads->end();
290
291 while (threads != end) {
292 ThreadID tid = *threads++;
293
294 if (thread[tid].violation())
295 return true;
296 }
297
298 return false;
299}
300
301template <class Impl>
302void
303LSQ<Impl>::recvReqRetry()
304{
305 iewStage->cacheUnblocked();
306 cacheBlocked(false);
307
308 for (ThreadID tid : *activeThreads) {
309 thread[tid].recvRetry();
310 }
311}
312
313template <class Impl>
314void
315LSQ<Impl>::completeDataAccess(PacketPtr pkt)
316{
317 auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
318 thread[cpu->contextToThread(senderState->contextId())]
319 .completeDataAccess(pkt);
320}
321
322template <class Impl>
323bool
324LSQ<Impl>::recvTimingResp(PacketPtr pkt)
325{
326 if (pkt->isError())
327 DPRINTF(LSQ, "Got error packet back for address: %#X\n",
328 pkt->getAddr());
329
330 auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
331 panic_if(!senderState, "Got packet back with unknown sender state\n");
332
333 thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
334
335 if (pkt->isInvalidate()) {
336 // This response also contains an invalidate; e.g. this can be the case
337 // if cmd is ReadRespWithInvalidate.
338 //
339 // The calling order between completeDataAccess and checkSnoop matters.
340 // By calling checkSnoop after completeDataAccess, we ensure that the
341 // fault set by checkSnoop is not lost. Calling writeback (more
342 // specifically inst->completeAcc) in completeDataAccess overwrites
343 // fault, and in case this instruction requires squashing (as
344 // determined by checkSnoop), the ReExec fault set by checkSnoop would
345 // be lost otherwise.
346
347 DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
348 pkt->getAddr());
349
350 for (ThreadID tid = 0; tid < numThreads; tid++) {
351 thread[tid].checkSnoop(pkt);
352 }
353 }
354 // Update the LSQRequest state (this may delete the request)
355 senderState->request()->packetReplied();
356
357 return true;
358}
359
360template <class Impl>
361void
362LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt)
363{
364 DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
365 pkt->cmdString());
366
367 // must be a snoop
368 if (pkt->isInvalidate()) {
369 DPRINTF(LSQ, "received invalidation for addr:%#x\n",
370 pkt->getAddr());
371 for (ThreadID tid = 0; tid < numThreads; tid++) {
372 thread[tid].checkSnoop(pkt);
373 }
374 }
375}
376
377template<class Impl>
378int
379LSQ<Impl>::getCount()
380{
381 unsigned total = 0;
382
383 list<ThreadID>::iterator threads = activeThreads->begin();
384 list<ThreadID>::iterator end = activeThreads->end();
385
386 while (threads != end) {
387 ThreadID tid = *threads++;
388
389 total += getCount(tid);
390 }
391
392 return total;
393}
394
395template<class Impl>
396int
397LSQ<Impl>::numLoads()
398{
399 unsigned total = 0;
400
401 list<ThreadID>::iterator threads = activeThreads->begin();
402 list<ThreadID>::iterator end = activeThreads->end();
403
404 while (threads != end) {
405 ThreadID tid = *threads++;
406
407 total += numLoads(tid);
408 }
409
410 return total;
411}
412
413template<class Impl>
414int
415LSQ<Impl>::numStores()
416{
417 unsigned total = 0;
418
419 list<ThreadID>::iterator threads = activeThreads->begin();
420 list<ThreadID>::iterator end = activeThreads->end();
421
422 while (threads != end) {
423 ThreadID tid = *threads++;
424
425 total += thread[tid].numStores();
426 }
427
428 return total;
429}
430
431template<class Impl>
432unsigned
433LSQ<Impl>::numFreeLoadEntries()
434{
435 unsigned total = 0;
436
437 list<ThreadID>::iterator threads = activeThreads->begin();
438 list<ThreadID>::iterator end = activeThreads->end();
439
440 while (threads != end) {
441 ThreadID tid = *threads++;
442
443 total += thread[tid].numFreeLoadEntries();
444 }
445
446 return total;
447}
448
449template<class Impl>
450unsigned
451LSQ<Impl>::numFreeStoreEntries()
452{
453 unsigned total = 0;
454
455 list<ThreadID>::iterator threads = activeThreads->begin();
456 list<ThreadID>::iterator end = activeThreads->end();
457
458 while (threads != end) {
459 ThreadID tid = *threads++;
460
461 total += thread[tid].numFreeStoreEntries();
462 }
463
464 return total;
465}
466
467template<class Impl>
468unsigned
469LSQ<Impl>::numFreeLoadEntries(ThreadID tid)
470{
471 return thread[tid].numFreeLoadEntries();
472}
473
474template<class Impl>
475unsigned
476LSQ<Impl>::numFreeStoreEntries(ThreadID tid)
477{
478 return thread[tid].numFreeStoreEntries();
479}
480
481template<class Impl>
482bool
483LSQ<Impl>::isFull()
484{
485 list<ThreadID>::iterator threads = activeThreads->begin();
486 list<ThreadID>::iterator end = activeThreads->end();
487
488 while (threads != end) {
489 ThreadID tid = *threads++;
490
491 if (!(thread[tid].lqFull() || thread[tid].sqFull()))
492 return false;
493 }
494
495 return true;
496}
497
498template<class Impl>
499bool
500LSQ<Impl>::isFull(ThreadID tid)
501{
502 //@todo: Change to Calculate All Entries for
503 //Dynamic Policy
504 if (lsqPolicy == SMTQueuePolicy::Dynamic)
505 return isFull();
506 else
507 return thread[tid].lqFull() || thread[tid].sqFull();
508}
509
510template<class Impl>
511bool
512LSQ<Impl>::isEmpty() const
513{
514 return lqEmpty() && sqEmpty();
515}
516
517template<class Impl>
518bool
519LSQ<Impl>::lqEmpty() const
520{
521 list<ThreadID>::const_iterator threads = activeThreads->begin();
522 list<ThreadID>::const_iterator end = activeThreads->end();
523
524 while (threads != end) {
525 ThreadID tid = *threads++;
526
527 if (!thread[tid].lqEmpty())
528 return false;
529 }
530
531 return true;
532}
533
534template<class Impl>
535bool
536LSQ<Impl>::sqEmpty() const
537{
538 list<ThreadID>::const_iterator threads = activeThreads->begin();
539 list<ThreadID>::const_iterator end = activeThreads->end();
540
541 while (threads != end) {
542 ThreadID tid = *threads++;
543
544 if (!thread[tid].sqEmpty())
545 return false;
546 }
547
548 return true;
549}
550
551template<class Impl>
552bool
553LSQ<Impl>::lqFull()
554{
555 list<ThreadID>::iterator threads = activeThreads->begin();
556 list<ThreadID>::iterator end = activeThreads->end();
557
558 while (threads != end) {
559 ThreadID tid = *threads++;
560
561 if (!thread[tid].lqFull())
562 return false;
563 }
564
565 return true;
566}
567
568template<class Impl>
569bool
570LSQ<Impl>::lqFull(ThreadID tid)
571{
572 //@todo: Change to Calculate All Entries for
573 //Dynamic Policy
574 if (lsqPolicy == SMTQueuePolicy::Dynamic)
575 return lqFull();
576 else
577 return thread[tid].lqFull();
578}
579
580template<class Impl>
581bool
582LSQ<Impl>::sqFull()
583{
584 list<ThreadID>::iterator threads = activeThreads->begin();
585 list<ThreadID>::iterator end = activeThreads->end();
586
587 while (threads != end) {
588 ThreadID tid = *threads++;
589
590 if (!sqFull(tid))
591 return false;
592 }
593
594 return true;
595}
596
597template<class Impl>
598bool
599LSQ<Impl>::sqFull(ThreadID tid)
600{
601 //@todo: Change to Calculate All Entries for
602 //Dynamic Policy
603 if (lsqPolicy == SMTQueuePolicy::Dynamic)
604 return sqFull();
605 else
606 return thread[tid].sqFull();
607}
608
609template<class Impl>
610bool
611LSQ<Impl>::isStalled()
612{
613 list<ThreadID>::iterator threads = activeThreads->begin();
614 list<ThreadID>::iterator end = activeThreads->end();
615
616 while (threads != end) {
617 ThreadID tid = *threads++;
618
619 if (!thread[tid].isStalled())
620 return false;
621 }
622
623 return true;
624}
625
626template<class Impl>
627bool
628LSQ<Impl>::isStalled(ThreadID tid)
629{
630 if (lsqPolicy == SMTQueuePolicy::Dynamic)
631 return isStalled();
632 else
633 return thread[tid].isStalled();
634}
635
636template<class Impl>
637bool
638LSQ<Impl>::hasStoresToWB()
639{
640 list<ThreadID>::iterator threads = activeThreads->begin();
641 list<ThreadID>::iterator end = activeThreads->end();
642
643 while (threads != end) {
644 ThreadID tid = *threads++;
645
646 if (hasStoresToWB(tid))
647 return true;
648 }
649
650 return false;
651}
652
653template<class Impl>
654bool
655LSQ<Impl>::willWB()
656{
657 list<ThreadID>::iterator threads = activeThreads->begin();
658 list<ThreadID>::iterator end = activeThreads->end();
659
660 while (threads != end) {
661 ThreadID tid = *threads++;
662
663 if (willWB(tid))
664 return true;
665 }
666
667 return false;
668}
669
670template<class Impl>
671void
672LSQ<Impl>::dumpInsts() const
673{
674 list<ThreadID>::const_iterator threads = activeThreads->begin();
675 list<ThreadID>::const_iterator end = activeThreads->end();
676
677 while (threads != end) {
678 ThreadID tid = *threads++;
679
680 thread[tid].dumpInsts();
681 }
682}
683
684template<class Impl>
685Fault
686LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
687 unsigned int size, Addr addr, Request::Flags flags,
688 uint64_t *res, AtomicOpFunctor *amo_op,
689 const std::vector<bool>& byteEnable)
690{
691 // This comming request can be either load, store or atomic.
692 // Atomic request has a corresponding pointer to its atomic memory
693 // operation
694 bool isAtomic M5_VAR_USED = !isLoad && amo_op;
695
696 ThreadID tid = cpu->contextToThread(inst->contextId());
697 auto cacheLineSize = cpu->cacheLineSize();
698 bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
699 LSQRequest* req = nullptr;
700
701 // Atomic requests that access data across cache line boundary are
702 // currently not allowed since the cache does not guarantee corresponding
703 // atomic memory operations to be executed atomically across a cache line.
704 // For ISAs such as x86 that supports cross-cache-line atomic instructions,
705 // the cache needs to be modified to perform atomic update to both cache
706 // lines. For now, such cross-line update is not supported.
707 assert(!isAtomic || (isAtomic && !needs_burst));
708
709 if (inst->translationStarted()) {
710 req = inst->savedReq;
711 assert(req);
712 } else {
713 if (needs_burst) {
714 req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
715 size, flags, data, res);
716 } else {
717 req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
718 size, flags, data, res, amo_op);
719 }
720 assert(req);
721 if (!byteEnable.empty()) {
722 req->_byteEnable = byteEnable;
723 }
724 inst->setRequest();
725 req->taskId(cpu->taskId());
726
727 // There might be fault from a previous execution attempt if this is
728 // a strictly ordered load
729 inst->getFault() = NoFault;
730
727 req->initiateTranslation();
728 }
729
730 /* This is the place were instructions get the effAddr. */
731 if (req->isTranslationComplete()) {
732 if (inst->getFault() == NoFault) {
733 inst->effAddr = req->getVaddr();
734 inst->effSize = size;
735 inst->effAddrValid(true);
736
737 if (cpu->checker) {
738 inst->reqToVerify = std::make_shared<Request>(*req->request());
739 }
740 if (isLoad)
741 inst->getFault() = cpu->read(req, inst->lqIdx);
742 else
743 inst->getFault() = cpu->write(req, data, inst->sqIdx);
744 } else if (isLoad) {
745 inst->setMemAccPredicate(false);
746 // Commit will have to clean up whatever happened. Set this
747 // instruction as executed.
748 inst->setExecuted();
749 }
750 }
751
752 if (inst->traceData)
753 inst->traceData->setMem(addr, size, flags);
754
755 return inst->getFault();
756}
757
758template<class Impl>
759void
760LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
761 ThreadContext* tc, BaseTLB::Mode mode)
762{
763 _fault.push_back(fault);
764 numInTranslationFragments = 0;
765 numTranslatedFragments = 1;
766 /* If the instruction has been squahsed, let the request know
767 * as it may have to self-destruct. */
768 if (_inst->isSquashed()) {
769 this->squashTranslation();
770 } else {
771 _inst->strictlyOrdered(req->isStrictlyOrdered());
772
773 flags.set(Flag::TranslationFinished);
774 if (fault == NoFault) {
775 _inst->physEffAddr = req->getPaddr();
776 _inst->memReqFlags = req->getFlags();
777 if (req->isCondSwap()) {
778 assert(_res);
779 req->setExtraData(*_res);
780 }
781 setState(State::Request);
782 } else {
783 setState(State::Fault);
784 }
785
786 LSQRequest::_inst->fault = fault;
787 LSQRequest::_inst->translationCompleted(true);
788 }
789}
790
791template<class Impl>
792void
793LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
794 ThreadContext* tc, BaseTLB::Mode mode)
795{
796 _fault.push_back(fault);
797 assert(req == _requests[numTranslatedFragments] || this->isDelayed());
798
799 numInTranslationFragments--;
800 numTranslatedFragments++;
801
802 mainReq->setFlags(req->getFlags());
803
804 if (numTranslatedFragments == _requests.size()) {
805 if (_inst->isSquashed()) {
806 this->squashTranslation();
807 } else {
808 _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
809 flags.set(Flag::TranslationFinished);
810 auto fault_it = _fault.begin();
811 /* Ffwd to the first NoFault. */
812 while (fault_it != _fault.end() && *fault_it == NoFault)
813 fault_it++;
814 /* If none of the fragments faulted: */
815 if (fault_it == _fault.end()) {
816 _inst->physEffAddr = request(0)->getPaddr();
817
818 _inst->memReqFlags = mainReq->getFlags();
819 if (mainReq->isCondSwap()) {
820 assert(_res);
821 mainReq->setExtraData(*_res);
822 }
823 setState(State::Request);
824 _inst->fault = NoFault;
825 } else {
826 setState(State::Fault);
827 _inst->fault = *fault_it;
828 }
829 _inst->translationCompleted(true);
830 }
831 }
832}
833
834template<class Impl>
835void
836LSQ<Impl>::SingleDataRequest::initiateTranslation()
837{
838 assert(_requests.size() == 0);
839
840 this->addRequest(_addr, _size, _byteEnable);
841
842 if (_requests.size() > 0) {
843 _requests.back()->setReqInstSeqNum(_inst->seqNum);
844 _requests.back()->taskId(_taskId);
845 _inst->translationStarted(true);
846 setState(State::Translation);
847 flags.set(Flag::TranslationStarted);
848
849 _inst->savedReq = this;
850 sendFragmentToTranslation(0);
851 } else {
852 _inst->setMemAccPredicate(false);
853 }
854}
855
856template<class Impl>
857PacketPtr
858LSQ<Impl>::SplitDataRequest::mainPacket()
859{
860 return _mainPacket;
861}
862
863template<class Impl>
864RequestPtr
865LSQ<Impl>::SplitDataRequest::mainRequest()
866{
867 return mainReq;
868}
869
870template<class Impl>
871void
872LSQ<Impl>::SplitDataRequest::initiateTranslation()
873{
874 auto cacheLineSize = _port.cacheLineSize();
875 Addr base_addr = _addr;
876 Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
877 Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
878 uint32_t size_so_far = 0;
879
880 mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
881 _size, _flags, _inst->masterId(),
882 _inst->instAddr(), _inst->contextId());
883 if (!_byteEnable.empty()) {
884 mainReq->setByteEnable(_byteEnable);
885 }
886
887 // Paddr is not used in mainReq. However, we will accumulate the flags
888 // from the sub requests into mainReq by calling setFlags() in finish().
889 // setFlags() assumes that paddr is set so flip the paddr valid bit here to
890 // avoid a potential assert in setFlags() when we call it from finish().
891 mainReq->setPaddr(0);
892
893 /* Get the pre-fix, possibly unaligned. */
894 if (_byteEnable.empty()) {
895 this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
896 } else {
897 auto it_start = _byteEnable.begin();
898 auto it_end = _byteEnable.begin() + (next_addr - base_addr);
899 this->addRequest(base_addr, next_addr - base_addr,
900 std::vector<bool>(it_start, it_end));
901 }
902 size_so_far = next_addr - base_addr;
903
904 /* We are block aligned now, reading whole blocks. */
905 base_addr = next_addr;
906 while (base_addr != final_addr) {
907 if (_byteEnable.empty()) {
908 this->addRequest(base_addr, cacheLineSize, _byteEnable);
909 } else {
910 auto it_start = _byteEnable.begin() + size_so_far;
911 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
912 this->addRequest(base_addr, cacheLineSize,
913 std::vector<bool>(it_start, it_end));
914 }
915 size_so_far += cacheLineSize;
916 base_addr += cacheLineSize;
917 }
918
919 /* Deal with the tail. */
920 if (size_so_far < _size) {
921 if (_byteEnable.empty()) {
922 this->addRequest(base_addr, _size - size_so_far, _byteEnable);
923 } else {
924 auto it_start = _byteEnable.begin() + size_so_far;
925 auto it_end = _byteEnable.end();
926 this->addRequest(base_addr, _size - size_so_far,
927 std::vector<bool>(it_start, it_end));
928 }
929 }
930
931 if (_requests.size() > 0) {
932 /* Setup the requests and send them to translation. */
933 for (auto& r: _requests) {
934 r->setReqInstSeqNum(_inst->seqNum);
935 r->taskId(_taskId);
936 }
937
938 _inst->translationStarted(true);
939 setState(State::Translation);
940 flags.set(Flag::TranslationStarted);
941 this->_inst->savedReq = this;
942 numInTranslationFragments = 0;
943 numTranslatedFragments = 0;
944 _fault.resize(_requests.size());
945
946 for (uint32_t i = 0; i < _requests.size(); i++) {
947 sendFragmentToTranslation(i);
948 }
949 } else {
950 _inst->setMemAccPredicate(false);
951 }
952}
953
954template<class Impl>
955void
956LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
957{
958 numInTranslationFragments++;
959 _port.dTLB()->translateTiming(
960 this->request(i),
961 this->_inst->thread->getTC(), this,
962 this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
963}
964
965template<class Impl>
966bool
967LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
968{
969 assert(_numOutstandingPackets == 1);
970 auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
971 setState(State::Complete);
972 flags.set(Flag::Complete);
973 state->outstanding--;
974 assert(pkt == _packets.front());
975 _port.completeDataAccess(pkt);
976 return true;
977}
978
979template<class Impl>
980bool
981LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
982{
983 auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
984 uint32_t pktIdx = 0;
985 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
986 pktIdx++;
987 assert(pktIdx < _packets.size());
988 numReceivedPackets++;
989 state->outstanding--;
990 if (numReceivedPackets == _packets.size()) {
991 setState(State::Complete);
992 flags.set(Flag::Complete);
993 /* Assemble packets. */
994 PacketPtr resp = isLoad()
995 ? Packet::createRead(mainReq)
996 : Packet::createWrite(mainReq);
997 if (isLoad())
998 resp->dataStatic(_inst->memData);
999 else
1000 resp->dataStatic(_data);
1001 resp->senderState = _senderState;
1002 _port.completeDataAccess(resp);
1003 delete resp;
1004 }
1005 return true;
1006}
1007
1008template<class Impl>
1009void
1010LSQ<Impl>::SingleDataRequest::buildPackets()
1011{
1012 assert(_senderState);
1013 /* Retries do not create new packets. */
1014 if (_packets.size() == 0) {
1015 _packets.push_back(
1016 isLoad()
1017 ? Packet::createRead(request())
1018 : Packet::createWrite(request()));
1019 _packets.back()->dataStatic(_inst->memData);
1020 _packets.back()->senderState = _senderState;
1021 }
1022 assert(_packets.size() == 1);
1023}
1024
1025template<class Impl>
1026void
1027LSQ<Impl>::SplitDataRequest::buildPackets()
1028{
1029 /* Extra data?? */
1030 Addr base_address = _addr;
1031
1032 if (_packets.size() == 0) {
1033 /* New stuff */
1034 if (isLoad()) {
1035 _mainPacket = Packet::createRead(mainReq);
1036 _mainPacket->dataStatic(_inst->memData);
1037 }
1038 for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
1039 RequestPtr r = _requests[i];
1040 PacketPtr pkt = isLoad() ? Packet::createRead(r)
1041 : Packet::createWrite(r);
1042 ptrdiff_t offset = r->getVaddr() - base_address;
1043 if (isLoad()) {
1044 pkt->dataStatic(_inst->memData + offset);
1045 } else {
1046 uint8_t* req_data = new uint8_t[r->getSize()];
1047 std::memcpy(req_data,
1048 _inst->memData + offset,
1049 r->getSize());
1050 pkt->dataDynamic(req_data);
1051 }
1052 pkt->senderState = _senderState;
1053 _packets.push_back(pkt);
1054 }
1055 }
1056 assert(_packets.size() > 0);
1057}
1058
1059template<class Impl>
1060void
1061LSQ<Impl>::SingleDataRequest::sendPacketToCache()
1062{
1063 assert(_numOutstandingPackets == 0);
1064 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1065 _numOutstandingPackets = 1;
1066}
1067
1068template<class Impl>
1069void
1070LSQ<Impl>::SplitDataRequest::sendPacketToCache()
1071{
1072 /* Try to send the packets. */
1073 while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1074 lsqUnit()->trySendPacket(isLoad(),
1075 _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1076 _numOutstandingPackets++;
1077 }
1078}
1079
1080template<class Impl>
1081void
1082LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext *thread,
1083 PacketPtr pkt)
1084{
1085 TheISA::handleIprWrite(thread, pkt);
1086}
1087
1088template<class Impl>
1089void
1090LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext *thread,
1091 PacketPtr mainPkt)
1092{
1093 unsigned offset = 0;
1094 for (auto r: _requests) {
1095 PacketPtr pkt = new Packet(r, MemCmd::WriteReq);
1096 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1097 TheISA::handleIprWrite(thread, pkt);
1098 offset += r->getSize();
1099 delete pkt;
1100 }
1101}
1102
1103template<class Impl>
1104Cycles
1105LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext *thread,
1106 PacketPtr pkt)
1107{
1108 return TheISA::handleIprRead(thread, pkt);
1109}
1110
1111template<class Impl>
1112Cycles
1113LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext *thread,
1114 PacketPtr mainPkt)
1115{
1116 Cycles delay(0);
1117 unsigned offset = 0;
1118
1119 for (auto r: _requests) {
1120 PacketPtr pkt = new Packet(r, MemCmd::ReadReq);
1121 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1122 Cycles d = TheISA::handleIprRead(thread, pkt);
1123 if (d > delay)
1124 delay = d;
1125 offset += r->getSize();
1126 delete pkt;
1127 }
1128 return delay;
1129}
1130
1131template<class Impl>
1132bool
1133LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1134{
1135 return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
1136}
1137
1138template<class Impl>
1139bool
1140LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1141{
1142 bool is_hit = false;
1143 for (auto &r: _requests) {
1144 if ((r->getPaddr() & blockMask) == blockAddr) {
1145 is_hit = true;
1146 break;
1147 }
1148 }
1149 return is_hit;
1150}
1151
1152#endif//__CPU_O3_LSQ_IMPL_HH__
731 req->initiateTranslation();
732 }
733
734 /* This is the place were instructions get the effAddr. */
735 if (req->isTranslationComplete()) {
736 if (inst->getFault() == NoFault) {
737 inst->effAddr = req->getVaddr();
738 inst->effSize = size;
739 inst->effAddrValid(true);
740
741 if (cpu->checker) {
742 inst->reqToVerify = std::make_shared<Request>(*req->request());
743 }
744 if (isLoad)
745 inst->getFault() = cpu->read(req, inst->lqIdx);
746 else
747 inst->getFault() = cpu->write(req, data, inst->sqIdx);
748 } else if (isLoad) {
749 inst->setMemAccPredicate(false);
750 // Commit will have to clean up whatever happened. Set this
751 // instruction as executed.
752 inst->setExecuted();
753 }
754 }
755
756 if (inst->traceData)
757 inst->traceData->setMem(addr, size, flags);
758
759 return inst->getFault();
760}
761
762template<class Impl>
763void
764LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
765 ThreadContext* tc, BaseTLB::Mode mode)
766{
767 _fault.push_back(fault);
768 numInTranslationFragments = 0;
769 numTranslatedFragments = 1;
770 /* If the instruction has been squahsed, let the request know
771 * as it may have to self-destruct. */
772 if (_inst->isSquashed()) {
773 this->squashTranslation();
774 } else {
775 _inst->strictlyOrdered(req->isStrictlyOrdered());
776
777 flags.set(Flag::TranslationFinished);
778 if (fault == NoFault) {
779 _inst->physEffAddr = req->getPaddr();
780 _inst->memReqFlags = req->getFlags();
781 if (req->isCondSwap()) {
782 assert(_res);
783 req->setExtraData(*_res);
784 }
785 setState(State::Request);
786 } else {
787 setState(State::Fault);
788 }
789
790 LSQRequest::_inst->fault = fault;
791 LSQRequest::_inst->translationCompleted(true);
792 }
793}
794
795template<class Impl>
796void
797LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
798 ThreadContext* tc, BaseTLB::Mode mode)
799{
800 _fault.push_back(fault);
801 assert(req == _requests[numTranslatedFragments] || this->isDelayed());
802
803 numInTranslationFragments--;
804 numTranslatedFragments++;
805
806 mainReq->setFlags(req->getFlags());
807
808 if (numTranslatedFragments == _requests.size()) {
809 if (_inst->isSquashed()) {
810 this->squashTranslation();
811 } else {
812 _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
813 flags.set(Flag::TranslationFinished);
814 auto fault_it = _fault.begin();
815 /* Ffwd to the first NoFault. */
816 while (fault_it != _fault.end() && *fault_it == NoFault)
817 fault_it++;
818 /* If none of the fragments faulted: */
819 if (fault_it == _fault.end()) {
820 _inst->physEffAddr = request(0)->getPaddr();
821
822 _inst->memReqFlags = mainReq->getFlags();
823 if (mainReq->isCondSwap()) {
824 assert(_res);
825 mainReq->setExtraData(*_res);
826 }
827 setState(State::Request);
828 _inst->fault = NoFault;
829 } else {
830 setState(State::Fault);
831 _inst->fault = *fault_it;
832 }
833 _inst->translationCompleted(true);
834 }
835 }
836}
837
838template<class Impl>
839void
840LSQ<Impl>::SingleDataRequest::initiateTranslation()
841{
842 assert(_requests.size() == 0);
843
844 this->addRequest(_addr, _size, _byteEnable);
845
846 if (_requests.size() > 0) {
847 _requests.back()->setReqInstSeqNum(_inst->seqNum);
848 _requests.back()->taskId(_taskId);
849 _inst->translationStarted(true);
850 setState(State::Translation);
851 flags.set(Flag::TranslationStarted);
852
853 _inst->savedReq = this;
854 sendFragmentToTranslation(0);
855 } else {
856 _inst->setMemAccPredicate(false);
857 }
858}
859
860template<class Impl>
861PacketPtr
862LSQ<Impl>::SplitDataRequest::mainPacket()
863{
864 return _mainPacket;
865}
866
867template<class Impl>
868RequestPtr
869LSQ<Impl>::SplitDataRequest::mainRequest()
870{
871 return mainReq;
872}
873
874template<class Impl>
875void
876LSQ<Impl>::SplitDataRequest::initiateTranslation()
877{
878 auto cacheLineSize = _port.cacheLineSize();
879 Addr base_addr = _addr;
880 Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
881 Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
882 uint32_t size_so_far = 0;
883
884 mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
885 _size, _flags, _inst->masterId(),
886 _inst->instAddr(), _inst->contextId());
887 if (!_byteEnable.empty()) {
888 mainReq->setByteEnable(_byteEnable);
889 }
890
891 // Paddr is not used in mainReq. However, we will accumulate the flags
892 // from the sub requests into mainReq by calling setFlags() in finish().
893 // setFlags() assumes that paddr is set so flip the paddr valid bit here to
894 // avoid a potential assert in setFlags() when we call it from finish().
895 mainReq->setPaddr(0);
896
897 /* Get the pre-fix, possibly unaligned. */
898 if (_byteEnable.empty()) {
899 this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
900 } else {
901 auto it_start = _byteEnable.begin();
902 auto it_end = _byteEnable.begin() + (next_addr - base_addr);
903 this->addRequest(base_addr, next_addr - base_addr,
904 std::vector<bool>(it_start, it_end));
905 }
906 size_so_far = next_addr - base_addr;
907
908 /* We are block aligned now, reading whole blocks. */
909 base_addr = next_addr;
910 while (base_addr != final_addr) {
911 if (_byteEnable.empty()) {
912 this->addRequest(base_addr, cacheLineSize, _byteEnable);
913 } else {
914 auto it_start = _byteEnable.begin() + size_so_far;
915 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
916 this->addRequest(base_addr, cacheLineSize,
917 std::vector<bool>(it_start, it_end));
918 }
919 size_so_far += cacheLineSize;
920 base_addr += cacheLineSize;
921 }
922
923 /* Deal with the tail. */
924 if (size_so_far < _size) {
925 if (_byteEnable.empty()) {
926 this->addRequest(base_addr, _size - size_so_far, _byteEnable);
927 } else {
928 auto it_start = _byteEnable.begin() + size_so_far;
929 auto it_end = _byteEnable.end();
930 this->addRequest(base_addr, _size - size_so_far,
931 std::vector<bool>(it_start, it_end));
932 }
933 }
934
935 if (_requests.size() > 0) {
936 /* Setup the requests and send them to translation. */
937 for (auto& r: _requests) {
938 r->setReqInstSeqNum(_inst->seqNum);
939 r->taskId(_taskId);
940 }
941
942 _inst->translationStarted(true);
943 setState(State::Translation);
944 flags.set(Flag::TranslationStarted);
945 this->_inst->savedReq = this;
946 numInTranslationFragments = 0;
947 numTranslatedFragments = 0;
948 _fault.resize(_requests.size());
949
950 for (uint32_t i = 0; i < _requests.size(); i++) {
951 sendFragmentToTranslation(i);
952 }
953 } else {
954 _inst->setMemAccPredicate(false);
955 }
956}
957
958template<class Impl>
959void
960LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
961{
962 numInTranslationFragments++;
963 _port.dTLB()->translateTiming(
964 this->request(i),
965 this->_inst->thread->getTC(), this,
966 this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
967}
968
969template<class Impl>
970bool
971LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
972{
973 assert(_numOutstandingPackets == 1);
974 auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
975 setState(State::Complete);
976 flags.set(Flag::Complete);
977 state->outstanding--;
978 assert(pkt == _packets.front());
979 _port.completeDataAccess(pkt);
980 return true;
981}
982
983template<class Impl>
984bool
985LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
986{
987 auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
988 uint32_t pktIdx = 0;
989 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
990 pktIdx++;
991 assert(pktIdx < _packets.size());
992 numReceivedPackets++;
993 state->outstanding--;
994 if (numReceivedPackets == _packets.size()) {
995 setState(State::Complete);
996 flags.set(Flag::Complete);
997 /* Assemble packets. */
998 PacketPtr resp = isLoad()
999 ? Packet::createRead(mainReq)
1000 : Packet::createWrite(mainReq);
1001 if (isLoad())
1002 resp->dataStatic(_inst->memData);
1003 else
1004 resp->dataStatic(_data);
1005 resp->senderState = _senderState;
1006 _port.completeDataAccess(resp);
1007 delete resp;
1008 }
1009 return true;
1010}
1011
1012template<class Impl>
1013void
1014LSQ<Impl>::SingleDataRequest::buildPackets()
1015{
1016 assert(_senderState);
1017 /* Retries do not create new packets. */
1018 if (_packets.size() == 0) {
1019 _packets.push_back(
1020 isLoad()
1021 ? Packet::createRead(request())
1022 : Packet::createWrite(request()));
1023 _packets.back()->dataStatic(_inst->memData);
1024 _packets.back()->senderState = _senderState;
1025 }
1026 assert(_packets.size() == 1);
1027}
1028
1029template<class Impl>
1030void
1031LSQ<Impl>::SplitDataRequest::buildPackets()
1032{
1033 /* Extra data?? */
1034 Addr base_address = _addr;
1035
1036 if (_packets.size() == 0) {
1037 /* New stuff */
1038 if (isLoad()) {
1039 _mainPacket = Packet::createRead(mainReq);
1040 _mainPacket->dataStatic(_inst->memData);
1041 }
1042 for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
1043 RequestPtr r = _requests[i];
1044 PacketPtr pkt = isLoad() ? Packet::createRead(r)
1045 : Packet::createWrite(r);
1046 ptrdiff_t offset = r->getVaddr() - base_address;
1047 if (isLoad()) {
1048 pkt->dataStatic(_inst->memData + offset);
1049 } else {
1050 uint8_t* req_data = new uint8_t[r->getSize()];
1051 std::memcpy(req_data,
1052 _inst->memData + offset,
1053 r->getSize());
1054 pkt->dataDynamic(req_data);
1055 }
1056 pkt->senderState = _senderState;
1057 _packets.push_back(pkt);
1058 }
1059 }
1060 assert(_packets.size() > 0);
1061}
1062
1063template<class Impl>
1064void
1065LSQ<Impl>::SingleDataRequest::sendPacketToCache()
1066{
1067 assert(_numOutstandingPackets == 0);
1068 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1069 _numOutstandingPackets = 1;
1070}
1071
1072template<class Impl>
1073void
1074LSQ<Impl>::SplitDataRequest::sendPacketToCache()
1075{
1076 /* Try to send the packets. */
1077 while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1078 lsqUnit()->trySendPacket(isLoad(),
1079 _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1080 _numOutstandingPackets++;
1081 }
1082}
1083
1084template<class Impl>
1085void
1086LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext *thread,
1087 PacketPtr pkt)
1088{
1089 TheISA::handleIprWrite(thread, pkt);
1090}
1091
1092template<class Impl>
1093void
1094LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext *thread,
1095 PacketPtr mainPkt)
1096{
1097 unsigned offset = 0;
1098 for (auto r: _requests) {
1099 PacketPtr pkt = new Packet(r, MemCmd::WriteReq);
1100 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1101 TheISA::handleIprWrite(thread, pkt);
1102 offset += r->getSize();
1103 delete pkt;
1104 }
1105}
1106
1107template<class Impl>
1108Cycles
1109LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext *thread,
1110 PacketPtr pkt)
1111{
1112 return TheISA::handleIprRead(thread, pkt);
1113}
1114
1115template<class Impl>
1116Cycles
1117LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext *thread,
1118 PacketPtr mainPkt)
1119{
1120 Cycles delay(0);
1121 unsigned offset = 0;
1122
1123 for (auto r: _requests) {
1124 PacketPtr pkt = new Packet(r, MemCmd::ReadReq);
1125 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1126 Cycles d = TheISA::handleIprRead(thread, pkt);
1127 if (d > delay)
1128 delay = d;
1129 offset += r->getSize();
1130 delete pkt;
1131 }
1132 return delay;
1133}
1134
1135template<class Impl>
1136bool
1137LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1138{
1139 return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
1140}
1141
1142template<class Impl>
1143bool
1144LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1145{
1146 bool is_hit = false;
1147 for (auto &r: _requests) {
1148 if ((r->getPaddr() & blockMask) == blockAddr) {
1149 is_hit = true;
1150 break;
1151 }
1152 }
1153 return is_hit;
1154}
1155
1156#endif//__CPU_O3_LSQ_IMPL_HH__