lsq_impl.hh (13652:45d94ac03a27) lsq_impl.hh (13688:5bb3bf2f2559)
1/*
2 * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2005-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Korey Sewell
42 */
43
44#ifndef __CPU_O3_LSQ_IMPL_HH__
45#define __CPU_O3_LSQ_IMPL_HH__
46
47#include <algorithm>
48#include <list>
49#include <string>
50
51#include "base/logging.hh"
52#include "cpu/o3/lsq.hh"
53#include "debug/Drain.hh"
54#include "debug/Fetch.hh"
55#include "debug/LSQ.hh"
56#include "debug/Writeback.hh"
57#include "params/DerivO3CPU.hh"
58
59using namespace std;
60
61template <class Impl>
62LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
63 : cpu(cpu_ptr), iewStage(iew_ptr),
64 _cacheBlocked(false),
65 cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
66 lsqPolicy(params->smtLSQPolicy),
67 LQEntries(params->LQEntries),
68 SQEntries(params->SQEntries),
69 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
70 params->smtLSQThreshold)),
71 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
72 params->smtLSQThreshold)),
73 numThreads(params->numThreads)
74{
75 assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
76
77 //**********************************************/
78 //************ Handle SMT Parameters ***********/
79 //**********************************************/
80
81 /* Run SMT olicy checks. */
82 if (lsqPolicy == SMTQueuePolicy::Dynamic) {
83 DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
84 } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
85 DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
86 "%i entries per LQ | %i entries per SQ\n",
87 maxLQEntries,maxSQEntries);
88 } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
89
90 assert(params->smtLSQThreshold > params->LQEntries);
91 assert(params->smtLSQThreshold > params->SQEntries);
92
93 DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
94 "%i entries per LQ | %i entries per SQ\n",
95 maxLQEntries,maxSQEntries);
96 } else {
97 panic("Invalid LSQ sharing policy. Options are: Dynamic, "
98 "Partitioned, Threshold");
99 }
100
101 thread.reserve(numThreads);
102 for (ThreadID tid = 0; tid < numThreads; tid++) {
103 thread.emplace_back(maxLQEntries, maxSQEntries);
104 thread[tid].init(cpu, iew_ptr, params, this, tid);
105 thread[tid].setDcachePort(&cpu_ptr->getDataPort());
106 }
107}
108
109
110template<class Impl>
111std::string
112LSQ<Impl>::name() const
113{
114 return iewStage->name() + ".lsq";
115}
116
117template<class Impl>
118void
119LSQ<Impl>::regStats()
120{
121 //Initialize LSQs
122 for (ThreadID tid = 0; tid < numThreads; tid++) {
123 thread[tid].regStats();
124 }
125}
126
127template<class Impl>
128void
129LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
130{
131 activeThreads = at_ptr;
132 assert(activeThreads != 0);
133}
134
135template <class Impl>
136void
137LSQ<Impl>::drainSanityCheck() const
138{
139 assert(isDrained());
140
141 for (ThreadID tid = 0; tid < numThreads; tid++)
142 thread[tid].drainSanityCheck();
143}
144
145template <class Impl>
146bool
147LSQ<Impl>::isDrained() const
148{
149 bool drained(true);
150
151 if (!lqEmpty()) {
152 DPRINTF(Drain, "Not drained, LQ not empty.\n");
153 drained = false;
154 }
155
156 if (!sqEmpty()) {
157 DPRINTF(Drain, "Not drained, SQ not empty.\n");
158 drained = false;
159 }
160
161 return drained;
162}
163
164template <class Impl>
165void
166LSQ<Impl>::takeOverFrom()
167{
168 usedStorePorts = 0;
169 _cacheBlocked = false;
170
171 for (ThreadID tid = 0; tid < numThreads; tid++) {
172 thread[tid].takeOverFrom();
173 }
174}
175
176template<class Impl>
177bool
178LSQ<Impl>::cacheBlocked() const
179{
180 return _cacheBlocked;
181}
182
183template<class Impl>
184void
185LSQ<Impl>::cacheBlocked(bool v)
186{
187 _cacheBlocked = v;
188}
189
190template<class Impl>
191bool
192LSQ<Impl>::storePortAvailable() const
193{
194 return usedStorePorts < cacheStorePorts;
195}
196
197template<class Impl>
198void
199LSQ<Impl>::storePortBusy()
200{
201 usedStorePorts++;
202 assert(usedStorePorts <= cacheStorePorts);
203}
204
205template<class Impl>
206void
207LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
208{
209 ThreadID tid = load_inst->threadNumber;
210
211 thread[tid].insertLoad(load_inst);
212}
213
214template<class Impl>
215void
216LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
217{
218 ThreadID tid = store_inst->threadNumber;
219
220 thread[tid].insertStore(store_inst);
221}
222
223template<class Impl>
224Fault
225LSQ<Impl>::executeLoad(const DynInstPtr &inst)
226{
227 ThreadID tid = inst->threadNumber;
228
229 return thread[tid].executeLoad(inst);
230}
231
232template<class Impl>
233Fault
234LSQ<Impl>::executeStore(const DynInstPtr &inst)
235{
236 ThreadID tid = inst->threadNumber;
237
238 return thread[tid].executeStore(inst);
239}
240
241template<class Impl>
242void
243LSQ<Impl>::writebackStores()
244{
245 list<ThreadID>::iterator threads = activeThreads->begin();
246 list<ThreadID>::iterator end = activeThreads->end();
247
248 while (threads != end) {
249 ThreadID tid = *threads++;
250
251 if (numStoresToWB(tid) > 0) {
252 DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
253 "available for Writeback.\n", tid, numStoresToWB(tid));
254 }
255
256 thread[tid].writebackStores();
257 }
258}
259
260template<class Impl>
261bool
262LSQ<Impl>::violation()
263{
264 /* Answers: Does Anybody Have a Violation?*/
265 list<ThreadID>::iterator threads = activeThreads->begin();
266 list<ThreadID>::iterator end = activeThreads->end();
267
268 while (threads != end) {
269 ThreadID tid = *threads++;
270
271 if (thread[tid].violation())
272 return true;
273 }
274
275 return false;
276}
277
278template <class Impl>
279void
280LSQ<Impl>::recvReqRetry()
281{
282 iewStage->cacheUnblocked();
283 cacheBlocked(false);
284
285 for (ThreadID tid : *activeThreads) {
286 thread[tid].recvRetry();
287 }
288}
289
290template <class Impl>
291void
292LSQ<Impl>::completeDataAccess(PacketPtr pkt)
293{
294 auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
295 thread[cpu->contextToThread(senderState->contextId())]
296 .completeDataAccess(pkt);
297}
298
299template <class Impl>
300bool
301LSQ<Impl>::recvTimingResp(PacketPtr pkt)
302{
303 if (pkt->isError())
304 DPRINTF(LSQ, "Got error packet back for address: %#X\n",
305 pkt->getAddr());
306
307 auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
308 panic_if(!senderState, "Got packet back with unknown sender state\n");
309
310 thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
311
312 if (pkt->isInvalidate()) {
313 // This response also contains an invalidate; e.g. this can be the case
314 // if cmd is ReadRespWithInvalidate.
315 //
316 // The calling order between completeDataAccess and checkSnoop matters.
317 // By calling checkSnoop after completeDataAccess, we ensure that the
318 // fault set by checkSnoop is not lost. Calling writeback (more
319 // specifically inst->completeAcc) in completeDataAccess overwrites
320 // fault, and in case this instruction requires squashing (as
321 // determined by checkSnoop), the ReExec fault set by checkSnoop would
322 // be lost otherwise.
323
324 DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
325 pkt->getAddr());
326
327 for (ThreadID tid = 0; tid < numThreads; tid++) {
328 thread[tid].checkSnoop(pkt);
329 }
330 }
331 // Update the LSQRequest state (this may delete the request)
332 senderState->request()->packetReplied();
333
334 return true;
335}
336
337template <class Impl>
338void
339LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt)
340{
341 DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
342 pkt->cmdString());
343
344 // must be a snoop
345 if (pkt->isInvalidate()) {
346 DPRINTF(LSQ, "received invalidation for addr:%#x\n",
347 pkt->getAddr());
348 for (ThreadID tid = 0; tid < numThreads; tid++) {
349 thread[tid].checkSnoop(pkt);
350 }
351 }
352}
353
354template<class Impl>
355int
356LSQ<Impl>::getCount()
357{
358 unsigned total = 0;
359
360 list<ThreadID>::iterator threads = activeThreads->begin();
361 list<ThreadID>::iterator end = activeThreads->end();
362
363 while (threads != end) {
364 ThreadID tid = *threads++;
365
366 total += getCount(tid);
367 }
368
369 return total;
370}
371
372template<class Impl>
373int
374LSQ<Impl>::numLoads()
375{
376 unsigned total = 0;
377
378 list<ThreadID>::iterator threads = activeThreads->begin();
379 list<ThreadID>::iterator end = activeThreads->end();
380
381 while (threads != end) {
382 ThreadID tid = *threads++;
383
384 total += numLoads(tid);
385 }
386
387 return total;
388}
389
390template<class Impl>
391int
392LSQ<Impl>::numStores()
393{
394 unsigned total = 0;
395
396 list<ThreadID>::iterator threads = activeThreads->begin();
397 list<ThreadID>::iterator end = activeThreads->end();
398
399 while (threads != end) {
400 ThreadID tid = *threads++;
401
402 total += thread[tid].numStores();
403 }
404
405 return total;
406}
407
408template<class Impl>
409unsigned
410LSQ<Impl>::numFreeLoadEntries()
411{
412 unsigned total = 0;
413
414 list<ThreadID>::iterator threads = activeThreads->begin();
415 list<ThreadID>::iterator end = activeThreads->end();
416
417 while (threads != end) {
418 ThreadID tid = *threads++;
419
420 total += thread[tid].numFreeLoadEntries();
421 }
422
423 return total;
424}
425
426template<class Impl>
427unsigned
428LSQ<Impl>::numFreeStoreEntries()
429{
430 unsigned total = 0;
431
432 list<ThreadID>::iterator threads = activeThreads->begin();
433 list<ThreadID>::iterator end = activeThreads->end();
434
435 while (threads != end) {
436 ThreadID tid = *threads++;
437
438 total += thread[tid].numFreeStoreEntries();
439 }
440
441 return total;
442}
443
444template<class Impl>
445unsigned
446LSQ<Impl>::numFreeLoadEntries(ThreadID tid)
447{
448 return thread[tid].numFreeLoadEntries();
449}
450
451template<class Impl>
452unsigned
453LSQ<Impl>::numFreeStoreEntries(ThreadID tid)
454{
455 return thread[tid].numFreeStoreEntries();
456}
457
458template<class Impl>
459bool
460LSQ<Impl>::isFull()
461{
462 list<ThreadID>::iterator threads = activeThreads->begin();
463 list<ThreadID>::iterator end = activeThreads->end();
464
465 while (threads != end) {
466 ThreadID tid = *threads++;
467
468 if (!(thread[tid].lqFull() || thread[tid].sqFull()))
469 return false;
470 }
471
472 return true;
473}
474
475template<class Impl>
476bool
477LSQ<Impl>::isFull(ThreadID tid)
478{
479 //@todo: Change to Calculate All Entries for
480 //Dynamic Policy
481 if (lsqPolicy == SMTQueuePolicy::Dynamic)
482 return isFull();
483 else
484 return thread[tid].lqFull() || thread[tid].sqFull();
485}
486
487template<class Impl>
488bool
489LSQ<Impl>::isEmpty() const
490{
491 return lqEmpty() && sqEmpty();
492}
493
494template<class Impl>
495bool
496LSQ<Impl>::lqEmpty() const
497{
498 list<ThreadID>::const_iterator threads = activeThreads->begin();
499 list<ThreadID>::const_iterator end = activeThreads->end();
500
501 while (threads != end) {
502 ThreadID tid = *threads++;
503
504 if (!thread[tid].lqEmpty())
505 return false;
506 }
507
508 return true;
509}
510
511template<class Impl>
512bool
513LSQ<Impl>::sqEmpty() const
514{
515 list<ThreadID>::const_iterator threads = activeThreads->begin();
516 list<ThreadID>::const_iterator end = activeThreads->end();
517
518 while (threads != end) {
519 ThreadID tid = *threads++;
520
521 if (!thread[tid].sqEmpty())
522 return false;
523 }
524
525 return true;
526}
527
528template<class Impl>
529bool
530LSQ<Impl>::lqFull()
531{
532 list<ThreadID>::iterator threads = activeThreads->begin();
533 list<ThreadID>::iterator end = activeThreads->end();
534
535 while (threads != end) {
536 ThreadID tid = *threads++;
537
538 if (!thread[tid].lqFull())
539 return false;
540 }
541
542 return true;
543}
544
545template<class Impl>
546bool
547LSQ<Impl>::lqFull(ThreadID tid)
548{
549 //@todo: Change to Calculate All Entries for
550 //Dynamic Policy
551 if (lsqPolicy == SMTQueuePolicy::Dynamic)
552 return lqFull();
553 else
554 return thread[tid].lqFull();
555}
556
557template<class Impl>
558bool
559LSQ<Impl>::sqFull()
560{
561 list<ThreadID>::iterator threads = activeThreads->begin();
562 list<ThreadID>::iterator end = activeThreads->end();
563
564 while (threads != end) {
565 ThreadID tid = *threads++;
566
567 if (!sqFull(tid))
568 return false;
569 }
570
571 return true;
572}
573
574template<class Impl>
575bool
576LSQ<Impl>::sqFull(ThreadID tid)
577{
578 //@todo: Change to Calculate All Entries for
579 //Dynamic Policy
580 if (lsqPolicy == SMTQueuePolicy::Dynamic)
581 return sqFull();
582 else
583 return thread[tid].sqFull();
584}
585
586template<class Impl>
587bool
588LSQ<Impl>::isStalled()
589{
590 list<ThreadID>::iterator threads = activeThreads->begin();
591 list<ThreadID>::iterator end = activeThreads->end();
592
593 while (threads != end) {
594 ThreadID tid = *threads++;
595
596 if (!thread[tid].isStalled())
597 return false;
598 }
599
600 return true;
601}
602
603template<class Impl>
604bool
605LSQ<Impl>::isStalled(ThreadID tid)
606{
607 if (lsqPolicy == SMTQueuePolicy::Dynamic)
608 return isStalled();
609 else
610 return thread[tid].isStalled();
611}
612
613template<class Impl>
614bool
615LSQ<Impl>::hasStoresToWB()
616{
617 list<ThreadID>::iterator threads = activeThreads->begin();
618 list<ThreadID>::iterator end = activeThreads->end();
619
620 while (threads != end) {
621 ThreadID tid = *threads++;
622
623 if (hasStoresToWB(tid))
624 return true;
625 }
626
627 return false;
628}
629
630template<class Impl>
631bool
632LSQ<Impl>::willWB()
633{
634 list<ThreadID>::iterator threads = activeThreads->begin();
635 list<ThreadID>::iterator end = activeThreads->end();
636
637 while (threads != end) {
638 ThreadID tid = *threads++;
639
640 if (willWB(tid))
641 return true;
642 }
643
644 return false;
645}
646
647template<class Impl>
648void
649LSQ<Impl>::dumpInsts() const
650{
651 list<ThreadID>::const_iterator threads = activeThreads->begin();
652 list<ThreadID>::const_iterator end = activeThreads->end();
653
654 while (threads != end) {
655 ThreadID tid = *threads++;
656
657 thread[tid].dumpInsts();
658 }
659}
660
661static Addr
662addrBlockOffset(Addr addr, unsigned int block_size)
663{
664 return addr & (block_size - 1);
665}
666
667static Addr
668addrBlockAlign(Addr addr, uint64_t block_size)
669{
670 return addr & ~(block_size - 1);
671}
672
673static bool
674transferNeedsBurst(Addr addr, uint64_t size, uint64_t block_size)
675{
676 return (addrBlockOffset(addr, block_size) + size) > block_size;
677}
678
679template<class Impl>
680Fault
681LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
682 unsigned int size, Addr addr, Request::Flags flags,
683 uint64_t *res, AtomicOpFunctor *amo_op)
684{
685 // This comming request can be either load, store or atomic.
686 // Atomic request has a corresponding pointer to its atomic memory
687 // operation
1/*
2 * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2005-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Korey Sewell
42 */
43
44#ifndef __CPU_O3_LSQ_IMPL_HH__
45#define __CPU_O3_LSQ_IMPL_HH__
46
47#include <algorithm>
48#include <list>
49#include <string>
50
51#include "base/logging.hh"
52#include "cpu/o3/lsq.hh"
53#include "debug/Drain.hh"
54#include "debug/Fetch.hh"
55#include "debug/LSQ.hh"
56#include "debug/Writeback.hh"
57#include "params/DerivO3CPU.hh"
58
59using namespace std;
60
61template <class Impl>
62LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
63 : cpu(cpu_ptr), iewStage(iew_ptr),
64 _cacheBlocked(false),
65 cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
66 lsqPolicy(params->smtLSQPolicy),
67 LQEntries(params->LQEntries),
68 SQEntries(params->SQEntries),
69 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
70 params->smtLSQThreshold)),
71 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
72 params->smtLSQThreshold)),
73 numThreads(params->numThreads)
74{
75 assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
76
77 //**********************************************/
78 //************ Handle SMT Parameters ***********/
79 //**********************************************/
80
81 /* Run SMT olicy checks. */
82 if (lsqPolicy == SMTQueuePolicy::Dynamic) {
83 DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
84 } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
85 DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
86 "%i entries per LQ | %i entries per SQ\n",
87 maxLQEntries,maxSQEntries);
88 } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
89
90 assert(params->smtLSQThreshold > params->LQEntries);
91 assert(params->smtLSQThreshold > params->SQEntries);
92
93 DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
94 "%i entries per LQ | %i entries per SQ\n",
95 maxLQEntries,maxSQEntries);
96 } else {
97 panic("Invalid LSQ sharing policy. Options are: Dynamic, "
98 "Partitioned, Threshold");
99 }
100
101 thread.reserve(numThreads);
102 for (ThreadID tid = 0; tid < numThreads; tid++) {
103 thread.emplace_back(maxLQEntries, maxSQEntries);
104 thread[tid].init(cpu, iew_ptr, params, this, tid);
105 thread[tid].setDcachePort(&cpu_ptr->getDataPort());
106 }
107}
108
109
110template<class Impl>
111std::string
112LSQ<Impl>::name() const
113{
114 return iewStage->name() + ".lsq";
115}
116
117template<class Impl>
118void
119LSQ<Impl>::regStats()
120{
121 //Initialize LSQs
122 for (ThreadID tid = 0; tid < numThreads; tid++) {
123 thread[tid].regStats();
124 }
125}
126
127template<class Impl>
128void
129LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
130{
131 activeThreads = at_ptr;
132 assert(activeThreads != 0);
133}
134
135template <class Impl>
136void
137LSQ<Impl>::drainSanityCheck() const
138{
139 assert(isDrained());
140
141 for (ThreadID tid = 0; tid < numThreads; tid++)
142 thread[tid].drainSanityCheck();
143}
144
145template <class Impl>
146bool
147LSQ<Impl>::isDrained() const
148{
149 bool drained(true);
150
151 if (!lqEmpty()) {
152 DPRINTF(Drain, "Not drained, LQ not empty.\n");
153 drained = false;
154 }
155
156 if (!sqEmpty()) {
157 DPRINTF(Drain, "Not drained, SQ not empty.\n");
158 drained = false;
159 }
160
161 return drained;
162}
163
164template <class Impl>
165void
166LSQ<Impl>::takeOverFrom()
167{
168 usedStorePorts = 0;
169 _cacheBlocked = false;
170
171 for (ThreadID tid = 0; tid < numThreads; tid++) {
172 thread[tid].takeOverFrom();
173 }
174}
175
176template<class Impl>
177bool
178LSQ<Impl>::cacheBlocked() const
179{
180 return _cacheBlocked;
181}
182
183template<class Impl>
184void
185LSQ<Impl>::cacheBlocked(bool v)
186{
187 _cacheBlocked = v;
188}
189
190template<class Impl>
191bool
192LSQ<Impl>::storePortAvailable() const
193{
194 return usedStorePorts < cacheStorePorts;
195}
196
197template<class Impl>
198void
199LSQ<Impl>::storePortBusy()
200{
201 usedStorePorts++;
202 assert(usedStorePorts <= cacheStorePorts);
203}
204
205template<class Impl>
206void
207LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
208{
209 ThreadID tid = load_inst->threadNumber;
210
211 thread[tid].insertLoad(load_inst);
212}
213
214template<class Impl>
215void
216LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
217{
218 ThreadID tid = store_inst->threadNumber;
219
220 thread[tid].insertStore(store_inst);
221}
222
223template<class Impl>
224Fault
225LSQ<Impl>::executeLoad(const DynInstPtr &inst)
226{
227 ThreadID tid = inst->threadNumber;
228
229 return thread[tid].executeLoad(inst);
230}
231
232template<class Impl>
233Fault
234LSQ<Impl>::executeStore(const DynInstPtr &inst)
235{
236 ThreadID tid = inst->threadNumber;
237
238 return thread[tid].executeStore(inst);
239}
240
241template<class Impl>
242void
243LSQ<Impl>::writebackStores()
244{
245 list<ThreadID>::iterator threads = activeThreads->begin();
246 list<ThreadID>::iterator end = activeThreads->end();
247
248 while (threads != end) {
249 ThreadID tid = *threads++;
250
251 if (numStoresToWB(tid) > 0) {
252 DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
253 "available for Writeback.\n", tid, numStoresToWB(tid));
254 }
255
256 thread[tid].writebackStores();
257 }
258}
259
260template<class Impl>
261bool
262LSQ<Impl>::violation()
263{
264 /* Answers: Does Anybody Have a Violation?*/
265 list<ThreadID>::iterator threads = activeThreads->begin();
266 list<ThreadID>::iterator end = activeThreads->end();
267
268 while (threads != end) {
269 ThreadID tid = *threads++;
270
271 if (thread[tid].violation())
272 return true;
273 }
274
275 return false;
276}
277
278template <class Impl>
279void
280LSQ<Impl>::recvReqRetry()
281{
282 iewStage->cacheUnblocked();
283 cacheBlocked(false);
284
285 for (ThreadID tid : *activeThreads) {
286 thread[tid].recvRetry();
287 }
288}
289
290template <class Impl>
291void
292LSQ<Impl>::completeDataAccess(PacketPtr pkt)
293{
294 auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
295 thread[cpu->contextToThread(senderState->contextId())]
296 .completeDataAccess(pkt);
297}
298
299template <class Impl>
300bool
301LSQ<Impl>::recvTimingResp(PacketPtr pkt)
302{
303 if (pkt->isError())
304 DPRINTF(LSQ, "Got error packet back for address: %#X\n",
305 pkt->getAddr());
306
307 auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
308 panic_if(!senderState, "Got packet back with unknown sender state\n");
309
310 thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
311
312 if (pkt->isInvalidate()) {
313 // This response also contains an invalidate; e.g. this can be the case
314 // if cmd is ReadRespWithInvalidate.
315 //
316 // The calling order between completeDataAccess and checkSnoop matters.
317 // By calling checkSnoop after completeDataAccess, we ensure that the
318 // fault set by checkSnoop is not lost. Calling writeback (more
319 // specifically inst->completeAcc) in completeDataAccess overwrites
320 // fault, and in case this instruction requires squashing (as
321 // determined by checkSnoop), the ReExec fault set by checkSnoop would
322 // be lost otherwise.
323
324 DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
325 pkt->getAddr());
326
327 for (ThreadID tid = 0; tid < numThreads; tid++) {
328 thread[tid].checkSnoop(pkt);
329 }
330 }
331 // Update the LSQRequest state (this may delete the request)
332 senderState->request()->packetReplied();
333
334 return true;
335}
336
337template <class Impl>
338void
339LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt)
340{
341 DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
342 pkt->cmdString());
343
344 // must be a snoop
345 if (pkt->isInvalidate()) {
346 DPRINTF(LSQ, "received invalidation for addr:%#x\n",
347 pkt->getAddr());
348 for (ThreadID tid = 0; tid < numThreads; tid++) {
349 thread[tid].checkSnoop(pkt);
350 }
351 }
352}
353
354template<class Impl>
355int
356LSQ<Impl>::getCount()
357{
358 unsigned total = 0;
359
360 list<ThreadID>::iterator threads = activeThreads->begin();
361 list<ThreadID>::iterator end = activeThreads->end();
362
363 while (threads != end) {
364 ThreadID tid = *threads++;
365
366 total += getCount(tid);
367 }
368
369 return total;
370}
371
372template<class Impl>
373int
374LSQ<Impl>::numLoads()
375{
376 unsigned total = 0;
377
378 list<ThreadID>::iterator threads = activeThreads->begin();
379 list<ThreadID>::iterator end = activeThreads->end();
380
381 while (threads != end) {
382 ThreadID tid = *threads++;
383
384 total += numLoads(tid);
385 }
386
387 return total;
388}
389
390template<class Impl>
391int
392LSQ<Impl>::numStores()
393{
394 unsigned total = 0;
395
396 list<ThreadID>::iterator threads = activeThreads->begin();
397 list<ThreadID>::iterator end = activeThreads->end();
398
399 while (threads != end) {
400 ThreadID tid = *threads++;
401
402 total += thread[tid].numStores();
403 }
404
405 return total;
406}
407
408template<class Impl>
409unsigned
410LSQ<Impl>::numFreeLoadEntries()
411{
412 unsigned total = 0;
413
414 list<ThreadID>::iterator threads = activeThreads->begin();
415 list<ThreadID>::iterator end = activeThreads->end();
416
417 while (threads != end) {
418 ThreadID tid = *threads++;
419
420 total += thread[tid].numFreeLoadEntries();
421 }
422
423 return total;
424}
425
426template<class Impl>
427unsigned
428LSQ<Impl>::numFreeStoreEntries()
429{
430 unsigned total = 0;
431
432 list<ThreadID>::iterator threads = activeThreads->begin();
433 list<ThreadID>::iterator end = activeThreads->end();
434
435 while (threads != end) {
436 ThreadID tid = *threads++;
437
438 total += thread[tid].numFreeStoreEntries();
439 }
440
441 return total;
442}
443
444template<class Impl>
445unsigned
446LSQ<Impl>::numFreeLoadEntries(ThreadID tid)
447{
448 return thread[tid].numFreeLoadEntries();
449}
450
451template<class Impl>
452unsigned
453LSQ<Impl>::numFreeStoreEntries(ThreadID tid)
454{
455 return thread[tid].numFreeStoreEntries();
456}
457
458template<class Impl>
459bool
460LSQ<Impl>::isFull()
461{
462 list<ThreadID>::iterator threads = activeThreads->begin();
463 list<ThreadID>::iterator end = activeThreads->end();
464
465 while (threads != end) {
466 ThreadID tid = *threads++;
467
468 if (!(thread[tid].lqFull() || thread[tid].sqFull()))
469 return false;
470 }
471
472 return true;
473}
474
475template<class Impl>
476bool
477LSQ<Impl>::isFull(ThreadID tid)
478{
479 //@todo: Change to Calculate All Entries for
480 //Dynamic Policy
481 if (lsqPolicy == SMTQueuePolicy::Dynamic)
482 return isFull();
483 else
484 return thread[tid].lqFull() || thread[tid].sqFull();
485}
486
487template<class Impl>
488bool
489LSQ<Impl>::isEmpty() const
490{
491 return lqEmpty() && sqEmpty();
492}
493
494template<class Impl>
495bool
496LSQ<Impl>::lqEmpty() const
497{
498 list<ThreadID>::const_iterator threads = activeThreads->begin();
499 list<ThreadID>::const_iterator end = activeThreads->end();
500
501 while (threads != end) {
502 ThreadID tid = *threads++;
503
504 if (!thread[tid].lqEmpty())
505 return false;
506 }
507
508 return true;
509}
510
511template<class Impl>
512bool
513LSQ<Impl>::sqEmpty() const
514{
515 list<ThreadID>::const_iterator threads = activeThreads->begin();
516 list<ThreadID>::const_iterator end = activeThreads->end();
517
518 while (threads != end) {
519 ThreadID tid = *threads++;
520
521 if (!thread[tid].sqEmpty())
522 return false;
523 }
524
525 return true;
526}
527
528template<class Impl>
529bool
530LSQ<Impl>::lqFull()
531{
532 list<ThreadID>::iterator threads = activeThreads->begin();
533 list<ThreadID>::iterator end = activeThreads->end();
534
535 while (threads != end) {
536 ThreadID tid = *threads++;
537
538 if (!thread[tid].lqFull())
539 return false;
540 }
541
542 return true;
543}
544
545template<class Impl>
546bool
547LSQ<Impl>::lqFull(ThreadID tid)
548{
549 //@todo: Change to Calculate All Entries for
550 //Dynamic Policy
551 if (lsqPolicy == SMTQueuePolicy::Dynamic)
552 return lqFull();
553 else
554 return thread[tid].lqFull();
555}
556
557template<class Impl>
558bool
559LSQ<Impl>::sqFull()
560{
561 list<ThreadID>::iterator threads = activeThreads->begin();
562 list<ThreadID>::iterator end = activeThreads->end();
563
564 while (threads != end) {
565 ThreadID tid = *threads++;
566
567 if (!sqFull(tid))
568 return false;
569 }
570
571 return true;
572}
573
574template<class Impl>
575bool
576LSQ<Impl>::sqFull(ThreadID tid)
577{
578 //@todo: Change to Calculate All Entries for
579 //Dynamic Policy
580 if (lsqPolicy == SMTQueuePolicy::Dynamic)
581 return sqFull();
582 else
583 return thread[tid].sqFull();
584}
585
586template<class Impl>
587bool
588LSQ<Impl>::isStalled()
589{
590 list<ThreadID>::iterator threads = activeThreads->begin();
591 list<ThreadID>::iterator end = activeThreads->end();
592
593 while (threads != end) {
594 ThreadID tid = *threads++;
595
596 if (!thread[tid].isStalled())
597 return false;
598 }
599
600 return true;
601}
602
603template<class Impl>
604bool
605LSQ<Impl>::isStalled(ThreadID tid)
606{
607 if (lsqPolicy == SMTQueuePolicy::Dynamic)
608 return isStalled();
609 else
610 return thread[tid].isStalled();
611}
612
613template<class Impl>
614bool
615LSQ<Impl>::hasStoresToWB()
616{
617 list<ThreadID>::iterator threads = activeThreads->begin();
618 list<ThreadID>::iterator end = activeThreads->end();
619
620 while (threads != end) {
621 ThreadID tid = *threads++;
622
623 if (hasStoresToWB(tid))
624 return true;
625 }
626
627 return false;
628}
629
630template<class Impl>
631bool
632LSQ<Impl>::willWB()
633{
634 list<ThreadID>::iterator threads = activeThreads->begin();
635 list<ThreadID>::iterator end = activeThreads->end();
636
637 while (threads != end) {
638 ThreadID tid = *threads++;
639
640 if (willWB(tid))
641 return true;
642 }
643
644 return false;
645}
646
647template<class Impl>
648void
649LSQ<Impl>::dumpInsts() const
650{
651 list<ThreadID>::const_iterator threads = activeThreads->begin();
652 list<ThreadID>::const_iterator end = activeThreads->end();
653
654 while (threads != end) {
655 ThreadID tid = *threads++;
656
657 thread[tid].dumpInsts();
658 }
659}
660
661static Addr
662addrBlockOffset(Addr addr, unsigned int block_size)
663{
664 return addr & (block_size - 1);
665}
666
667static Addr
668addrBlockAlign(Addr addr, uint64_t block_size)
669{
670 return addr & ~(block_size - 1);
671}
672
673static bool
674transferNeedsBurst(Addr addr, uint64_t size, uint64_t block_size)
675{
676 return (addrBlockOffset(addr, block_size) + size) > block_size;
677}
678
679template<class Impl>
680Fault
681LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
682 unsigned int size, Addr addr, Request::Flags flags,
683 uint64_t *res, AtomicOpFunctor *amo_op)
684{
685 // This comming request can be either load, store or atomic.
686 // Atomic request has a corresponding pointer to its atomic memory
687 // operation
688 bool isAtomic = !isLoad && amo_op;
688 bool isAtomic M5_VAR_USED = !isLoad && amo_op;
689
690 ThreadID tid = cpu->contextToThread(inst->contextId());
691 auto cacheLineSize = cpu->cacheLineSize();
692 bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
693 LSQRequest* req = nullptr;
694
695 // Atomic requests that access data across cache line boundary are
696 // currently not allowed since the cache does not guarantee corresponding
697 // atomic memory operations to be executed atomically across a cache line.
698 // For ISAs such as x86 that supports cross-cache-line atomic instructions,
699 // the cache needs to be modified to perform atomic update to both cache
700 // lines. For now, such cross-line update is not supported.
701 assert(!isAtomic || (isAtomic && !needs_burst));
702
703 if (inst->translationStarted()) {
704 req = inst->savedReq;
705 assert(req);
706 } else {
707 if (needs_burst) {
708 req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
709 size, flags, data, res);
710 } else {
711 req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
712 size, flags, data, res, amo_op);
713 }
714 assert(req);
715 inst->setRequest();
716 req->taskId(cpu->taskId());
717
718 req->initiateTranslation();
719 }
720
721 /* This is the place were instructions get the effAddr. */
722 if (req->isTranslationComplete()) {
723 if (inst->getFault() == NoFault) {
724 inst->effAddr = req->getVaddr();
725 inst->effSize = size;
726 inst->effAddrValid(true);
727
728 if (cpu->checker) {
729 inst->reqToVerify = std::make_shared<Request>(*req->request());
730 }
731 if (isLoad)
732 inst->getFault() = cpu->read(req, inst->lqIdx);
733 else
734 inst->getFault() = cpu->write(req, data, inst->sqIdx);
735 } else if (isLoad) {
736 // Commit will have to clean up whatever happened. Set this
737 // instruction as executed.
738 inst->setExecuted();
739 }
740 }
741
742 if (inst->traceData)
743 inst->traceData->setMem(addr, size, flags);
744
745 return inst->getFault();
746}
747
748template<class Impl>
749void
750LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
751 ThreadContext* tc, BaseTLB::Mode mode)
752{
753 _fault.push_back(fault);
754 numInTranslationFragments = 0;
755 numTranslatedFragments = 1;
756 /* If the instruction has been squahsed, let the request know
757 * as it may have to self-destruct. */
758 if (_inst->isSquashed()) {
759 this->squashTranslation();
760 } else {
761 _inst->strictlyOrdered(req->isStrictlyOrdered());
762
763 flags.set(Flag::TranslationFinished);
764 if (fault == NoFault) {
765 _inst->physEffAddr = req->getPaddr();
766 _inst->memReqFlags = req->getFlags();
767 if (req->isCondSwap()) {
768 assert(_res);
769 req->setExtraData(*_res);
770 }
771 setState(State::Request);
772 } else {
773 setState(State::Fault);
774 }
775
776 LSQRequest::_inst->fault = fault;
777 LSQRequest::_inst->translationCompleted(true);
778 }
779}
780
781template<class Impl>
782void
783LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
784 ThreadContext* tc, BaseTLB::Mode mode)
785{
786 _fault.push_back(fault);
787 assert(req == _requests[numTranslatedFragments] || this->isDelayed());
788
789 numInTranslationFragments--;
790 numTranslatedFragments++;
791
792 mainReq->setFlags(req->getFlags());
793
794 if (numTranslatedFragments == _requests.size()) {
795 if (_inst->isSquashed()) {
796 this->squashTranslation();
797 } else {
798 _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
799 flags.set(Flag::TranslationFinished);
800 auto fault_it = _fault.begin();
801 /* Ffwd to the first NoFault. */
802 while (fault_it != _fault.end() && *fault_it == NoFault)
803 fault_it++;
804 /* If none of the fragments faulted: */
805 if (fault_it == _fault.end()) {
806 _inst->physEffAddr = request(0)->getPaddr();
807
808 _inst->memReqFlags = mainReq->getFlags();
809 if (mainReq->isCondSwap()) {
810 assert(_res);
811 mainReq->setExtraData(*_res);
812 }
813 setState(State::Request);
814 _inst->fault = NoFault;
815 } else {
816 setState(State::Fault);
817 _inst->fault = *fault_it;
818 }
819 _inst->translationCompleted(true);
820 }
821 }
822}
823
824template<class Impl>
825void
826LSQ<Impl>::SingleDataRequest::initiateTranslation()
827{
828 _inst->translationStarted(true);
829 setState(State::Translation);
830 flags.set(Flag::TranslationStarted);
831
832 _inst->savedReq = this;
833 sendFragmentToTranslation(0);
834
835 if (isTranslationComplete()) {
836 }
837}
838
839template<class Impl>
840PacketPtr
841LSQ<Impl>::SplitDataRequest::mainPacket()
842{
843 return _mainPacket;
844}
845
846template<class Impl>
847RequestPtr
848LSQ<Impl>::SplitDataRequest::mainRequest()
849{
850 return mainReq;
851}
852
853template<class Impl>
854void
855LSQ<Impl>::SplitDataRequest::initiateTranslation()
856{
857 _inst->translationStarted(true);
858 setState(State::Translation);
859 flags.set(Flag::TranslationStarted);
860
861 unsigned int cacheLineSize = _port.cacheLineSize();
862 Addr base_addr = _addr;
863 Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
864 Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
865 uint32_t size_so_far = 0;
866
867 mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
868 _size, _flags, _inst->masterId(),
869 _inst->instAddr(), _inst->contextId());
870
871 // Paddr is not used in mainReq. However, we will accumulate the flags
872 // from the sub requests into mainReq by calling setFlags() in finish().
873 // setFlags() assumes that paddr is set so flip the paddr valid bit here to
874 // avoid a potential assert in setFlags() when we call it from finish().
875 mainReq->setPaddr(0);
876
877 /* Get the pre-fix, possibly unaligned. */
878 _requests.push_back(std::make_shared<Request>(_inst->getASID(), base_addr,
879 next_addr - base_addr, _flags, _inst->masterId(),
880 _inst->instAddr(), _inst->contextId()));
881 size_so_far = next_addr - base_addr;
882
883 /* We are block aligned now, reading whole blocks. */
884 base_addr = next_addr;
885 while (base_addr != final_addr) {
886 _requests.push_back(std::make_shared<Request>(_inst->getASID(),
887 base_addr, cacheLineSize, _flags, _inst->masterId(),
888 _inst->instAddr(), _inst->contextId()));
889 size_so_far += cacheLineSize;
890 base_addr += cacheLineSize;
891 }
892
893 /* Deal with the tail. */
894 if (size_so_far < _size) {
895 _requests.push_back(std::make_shared<Request>(_inst->getASID(),
896 base_addr, _size - size_so_far, _flags, _inst->masterId(),
897 _inst->instAddr(), _inst->contextId()));
898 }
899
900 /* Setup the requests and send them to translation. */
901 for (auto& r: _requests) {
902 r->setReqInstSeqNum(_inst->seqNum);
903 r->taskId(_taskId);
904 }
905 this->_inst->savedReq = this;
906 numInTranslationFragments = 0;
907 numTranslatedFragments = 0;
908
909 for (uint32_t i = 0; i < _requests.size(); i++) {
910 sendFragmentToTranslation(i);
911 }
912}
913
914template<class Impl>
915void
916LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
917{
918 numInTranslationFragments++;
919 _port.dTLB()->translateTiming(
920 this->request(i),
921 this->_inst->thread->getTC(), this,
922 this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
923}
924
925template<class Impl>
926bool
927LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
928{
929 assert(_numOutstandingPackets == 1);
930 auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
931 setState(State::Complete);
932 flags.set(Flag::Complete);
933 state->outstanding--;
934 assert(pkt == _packets.front());
935 _port.completeDataAccess(pkt);
936 return true;
937}
938
939template<class Impl>
940bool
941LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
942{
943 auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
944 uint32_t pktIdx = 0;
945 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
946 pktIdx++;
947 assert(pktIdx < _packets.size());
948 assert(pkt->req == _requests[pktIdx]);
949 assert(pkt == _packets[pktIdx]);
950 numReceivedPackets++;
951 state->outstanding--;
952 if (numReceivedPackets == _packets.size()) {
953 setState(State::Complete);
954 flags.set(Flag::Complete);
955 /* Assemble packets. */
956 PacketPtr resp = isLoad()
957 ? Packet::createRead(mainReq)
958 : Packet::createWrite(mainReq);
959 if (isLoad())
960 resp->dataStatic(_inst->memData);
961 else
962 resp->dataStatic(_data);
963 resp->senderState = _senderState;
964 _port.completeDataAccess(resp);
965 delete resp;
966 }
967 return true;
968}
969
970template<class Impl>
971void
972LSQ<Impl>::SingleDataRequest::buildPackets()
973{
974 assert(_senderState);
975 /* Retries do not create new packets. */
976 if (_packets.size() == 0) {
977 _packets.push_back(
978 isLoad()
979 ? Packet::createRead(request())
980 : Packet::createWrite(request()));
981 _packets.back()->dataStatic(_inst->memData);
982 _packets.back()->senderState = _senderState;
983 }
984 assert(_packets.size() == 1);
985}
986
987template<class Impl>
988void
989LSQ<Impl>::SplitDataRequest::buildPackets()
990{
991 /* Extra data?? */
992 ptrdiff_t offset = 0;
993 if (_packets.size() == 0) {
994 /* New stuff */
995 if (isLoad()) {
996 _mainPacket = Packet::createRead(mainReq);
997 _mainPacket->dataStatic(_inst->memData);
998 }
999 for (auto& r: _requests) {
1000 PacketPtr pkt = isLoad() ? Packet::createRead(r)
1001 : Packet::createWrite(r);
1002 if (isLoad()) {
1003 pkt->dataStatic(_inst->memData + offset);
1004 } else {
1005 uint8_t* req_data = new uint8_t[r->getSize()];
1006 std::memcpy(req_data,
1007 _inst->memData + offset,
1008 r->getSize());
1009 pkt->dataDynamic(req_data);
1010 }
1011 offset += r->getSize();
1012 pkt->senderState = _senderState;
1013 _packets.push_back(pkt);
1014 }
1015 }
1016 assert(_packets.size() == _requests.size());
1017}
1018
1019template<class Impl>
1020void
1021LSQ<Impl>::SingleDataRequest::sendPacketToCache()
1022{
1023 assert(_numOutstandingPackets == 0);
1024 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1025 _numOutstandingPackets = 1;
1026}
1027
1028template<class Impl>
1029void
1030LSQ<Impl>::SplitDataRequest::sendPacketToCache()
1031{
1032 /* Try to send the packets. */
1033 while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1034 lsqUnit()->trySendPacket(isLoad(),
1035 _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1036 _numOutstandingPackets++;
1037 }
1038}
1039
1040template<class Impl>
1041void
1042LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext *thread,
1043 PacketPtr pkt)
1044{
1045 TheISA::handleIprWrite(thread, pkt);
1046}
1047
1048template<class Impl>
1049void
1050LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext *thread,
1051 PacketPtr mainPkt)
1052{
1053 unsigned offset = 0;
1054 for (auto r: _requests) {
1055 PacketPtr pkt = new Packet(r, MemCmd::WriteReq);
1056 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1057 TheISA::handleIprWrite(thread, pkt);
1058 offset += r->getSize();
1059 delete pkt;
1060 }
1061}
1062
1063template<class Impl>
1064Cycles
1065LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext *thread,
1066 PacketPtr pkt)
1067{
1068 return TheISA::handleIprRead(thread, pkt);
1069}
1070
1071template<class Impl>
1072Cycles
1073LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext *thread,
1074 PacketPtr mainPkt)
1075{
1076 Cycles delay(0);
1077 unsigned offset = 0;
1078
1079 for (auto r: _requests) {
1080 PacketPtr pkt = new Packet(r, MemCmd::ReadReq);
1081 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1082 Cycles d = TheISA::handleIprRead(thread, pkt);
1083 if (d > delay)
1084 delay = d;
1085 offset += r->getSize();
1086 delete pkt;
1087 }
1088 return delay;
1089}
1090
1091template<class Impl>
1092bool
1093LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1094{
1095 return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
1096}
1097
1098template<class Impl>
1099bool
1100LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1101{
1102 bool is_hit = false;
1103 for (auto &r: _requests) {
1104 if ((r->getPaddr() & blockMask) == blockAddr) {
1105 is_hit = true;
1106 break;
1107 }
1108 }
1109 return is_hit;
1110}
1111
1112#endif//__CPU_O3_LSQ_IMPL_HH__
689
690 ThreadID tid = cpu->contextToThread(inst->contextId());
691 auto cacheLineSize = cpu->cacheLineSize();
692 bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
693 LSQRequest* req = nullptr;
694
695 // Atomic requests that access data across cache line boundary are
696 // currently not allowed since the cache does not guarantee corresponding
697 // atomic memory operations to be executed atomically across a cache line.
698 // For ISAs such as x86 that supports cross-cache-line atomic instructions,
699 // the cache needs to be modified to perform atomic update to both cache
700 // lines. For now, such cross-line update is not supported.
701 assert(!isAtomic || (isAtomic && !needs_burst));
702
703 if (inst->translationStarted()) {
704 req = inst->savedReq;
705 assert(req);
706 } else {
707 if (needs_burst) {
708 req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
709 size, flags, data, res);
710 } else {
711 req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
712 size, flags, data, res, amo_op);
713 }
714 assert(req);
715 inst->setRequest();
716 req->taskId(cpu->taskId());
717
718 req->initiateTranslation();
719 }
720
721 /* This is the place were instructions get the effAddr. */
722 if (req->isTranslationComplete()) {
723 if (inst->getFault() == NoFault) {
724 inst->effAddr = req->getVaddr();
725 inst->effSize = size;
726 inst->effAddrValid(true);
727
728 if (cpu->checker) {
729 inst->reqToVerify = std::make_shared<Request>(*req->request());
730 }
731 if (isLoad)
732 inst->getFault() = cpu->read(req, inst->lqIdx);
733 else
734 inst->getFault() = cpu->write(req, data, inst->sqIdx);
735 } else if (isLoad) {
736 // Commit will have to clean up whatever happened. Set this
737 // instruction as executed.
738 inst->setExecuted();
739 }
740 }
741
742 if (inst->traceData)
743 inst->traceData->setMem(addr, size, flags);
744
745 return inst->getFault();
746}
747
748template<class Impl>
749void
750LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
751 ThreadContext* tc, BaseTLB::Mode mode)
752{
753 _fault.push_back(fault);
754 numInTranslationFragments = 0;
755 numTranslatedFragments = 1;
756 /* If the instruction has been squahsed, let the request know
757 * as it may have to self-destruct. */
758 if (_inst->isSquashed()) {
759 this->squashTranslation();
760 } else {
761 _inst->strictlyOrdered(req->isStrictlyOrdered());
762
763 flags.set(Flag::TranslationFinished);
764 if (fault == NoFault) {
765 _inst->physEffAddr = req->getPaddr();
766 _inst->memReqFlags = req->getFlags();
767 if (req->isCondSwap()) {
768 assert(_res);
769 req->setExtraData(*_res);
770 }
771 setState(State::Request);
772 } else {
773 setState(State::Fault);
774 }
775
776 LSQRequest::_inst->fault = fault;
777 LSQRequest::_inst->translationCompleted(true);
778 }
779}
780
781template<class Impl>
782void
783LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
784 ThreadContext* tc, BaseTLB::Mode mode)
785{
786 _fault.push_back(fault);
787 assert(req == _requests[numTranslatedFragments] || this->isDelayed());
788
789 numInTranslationFragments--;
790 numTranslatedFragments++;
791
792 mainReq->setFlags(req->getFlags());
793
794 if (numTranslatedFragments == _requests.size()) {
795 if (_inst->isSquashed()) {
796 this->squashTranslation();
797 } else {
798 _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
799 flags.set(Flag::TranslationFinished);
800 auto fault_it = _fault.begin();
801 /* Ffwd to the first NoFault. */
802 while (fault_it != _fault.end() && *fault_it == NoFault)
803 fault_it++;
804 /* If none of the fragments faulted: */
805 if (fault_it == _fault.end()) {
806 _inst->physEffAddr = request(0)->getPaddr();
807
808 _inst->memReqFlags = mainReq->getFlags();
809 if (mainReq->isCondSwap()) {
810 assert(_res);
811 mainReq->setExtraData(*_res);
812 }
813 setState(State::Request);
814 _inst->fault = NoFault;
815 } else {
816 setState(State::Fault);
817 _inst->fault = *fault_it;
818 }
819 _inst->translationCompleted(true);
820 }
821 }
822}
823
824template<class Impl>
825void
826LSQ<Impl>::SingleDataRequest::initiateTranslation()
827{
828 _inst->translationStarted(true);
829 setState(State::Translation);
830 flags.set(Flag::TranslationStarted);
831
832 _inst->savedReq = this;
833 sendFragmentToTranslation(0);
834
835 if (isTranslationComplete()) {
836 }
837}
838
839template<class Impl>
840PacketPtr
841LSQ<Impl>::SplitDataRequest::mainPacket()
842{
843 return _mainPacket;
844}
845
846template<class Impl>
847RequestPtr
848LSQ<Impl>::SplitDataRequest::mainRequest()
849{
850 return mainReq;
851}
852
853template<class Impl>
854void
855LSQ<Impl>::SplitDataRequest::initiateTranslation()
856{
857 _inst->translationStarted(true);
858 setState(State::Translation);
859 flags.set(Flag::TranslationStarted);
860
861 unsigned int cacheLineSize = _port.cacheLineSize();
862 Addr base_addr = _addr;
863 Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
864 Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
865 uint32_t size_so_far = 0;
866
867 mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
868 _size, _flags, _inst->masterId(),
869 _inst->instAddr(), _inst->contextId());
870
871 // Paddr is not used in mainReq. However, we will accumulate the flags
872 // from the sub requests into mainReq by calling setFlags() in finish().
873 // setFlags() assumes that paddr is set so flip the paddr valid bit here to
874 // avoid a potential assert in setFlags() when we call it from finish().
875 mainReq->setPaddr(0);
876
877 /* Get the pre-fix, possibly unaligned. */
878 _requests.push_back(std::make_shared<Request>(_inst->getASID(), base_addr,
879 next_addr - base_addr, _flags, _inst->masterId(),
880 _inst->instAddr(), _inst->contextId()));
881 size_so_far = next_addr - base_addr;
882
883 /* We are block aligned now, reading whole blocks. */
884 base_addr = next_addr;
885 while (base_addr != final_addr) {
886 _requests.push_back(std::make_shared<Request>(_inst->getASID(),
887 base_addr, cacheLineSize, _flags, _inst->masterId(),
888 _inst->instAddr(), _inst->contextId()));
889 size_so_far += cacheLineSize;
890 base_addr += cacheLineSize;
891 }
892
893 /* Deal with the tail. */
894 if (size_so_far < _size) {
895 _requests.push_back(std::make_shared<Request>(_inst->getASID(),
896 base_addr, _size - size_so_far, _flags, _inst->masterId(),
897 _inst->instAddr(), _inst->contextId()));
898 }
899
900 /* Setup the requests and send them to translation. */
901 for (auto& r: _requests) {
902 r->setReqInstSeqNum(_inst->seqNum);
903 r->taskId(_taskId);
904 }
905 this->_inst->savedReq = this;
906 numInTranslationFragments = 0;
907 numTranslatedFragments = 0;
908
909 for (uint32_t i = 0; i < _requests.size(); i++) {
910 sendFragmentToTranslation(i);
911 }
912}
913
914template<class Impl>
915void
916LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
917{
918 numInTranslationFragments++;
919 _port.dTLB()->translateTiming(
920 this->request(i),
921 this->_inst->thread->getTC(), this,
922 this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
923}
924
925template<class Impl>
926bool
927LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
928{
929 assert(_numOutstandingPackets == 1);
930 auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
931 setState(State::Complete);
932 flags.set(Flag::Complete);
933 state->outstanding--;
934 assert(pkt == _packets.front());
935 _port.completeDataAccess(pkt);
936 return true;
937}
938
939template<class Impl>
940bool
941LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
942{
943 auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
944 uint32_t pktIdx = 0;
945 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
946 pktIdx++;
947 assert(pktIdx < _packets.size());
948 assert(pkt->req == _requests[pktIdx]);
949 assert(pkt == _packets[pktIdx]);
950 numReceivedPackets++;
951 state->outstanding--;
952 if (numReceivedPackets == _packets.size()) {
953 setState(State::Complete);
954 flags.set(Flag::Complete);
955 /* Assemble packets. */
956 PacketPtr resp = isLoad()
957 ? Packet::createRead(mainReq)
958 : Packet::createWrite(mainReq);
959 if (isLoad())
960 resp->dataStatic(_inst->memData);
961 else
962 resp->dataStatic(_data);
963 resp->senderState = _senderState;
964 _port.completeDataAccess(resp);
965 delete resp;
966 }
967 return true;
968}
969
970template<class Impl>
971void
972LSQ<Impl>::SingleDataRequest::buildPackets()
973{
974 assert(_senderState);
975 /* Retries do not create new packets. */
976 if (_packets.size() == 0) {
977 _packets.push_back(
978 isLoad()
979 ? Packet::createRead(request())
980 : Packet::createWrite(request()));
981 _packets.back()->dataStatic(_inst->memData);
982 _packets.back()->senderState = _senderState;
983 }
984 assert(_packets.size() == 1);
985}
986
987template<class Impl>
988void
989LSQ<Impl>::SplitDataRequest::buildPackets()
990{
991 /* Extra data?? */
992 ptrdiff_t offset = 0;
993 if (_packets.size() == 0) {
994 /* New stuff */
995 if (isLoad()) {
996 _mainPacket = Packet::createRead(mainReq);
997 _mainPacket->dataStatic(_inst->memData);
998 }
999 for (auto& r: _requests) {
1000 PacketPtr pkt = isLoad() ? Packet::createRead(r)
1001 : Packet::createWrite(r);
1002 if (isLoad()) {
1003 pkt->dataStatic(_inst->memData + offset);
1004 } else {
1005 uint8_t* req_data = new uint8_t[r->getSize()];
1006 std::memcpy(req_data,
1007 _inst->memData + offset,
1008 r->getSize());
1009 pkt->dataDynamic(req_data);
1010 }
1011 offset += r->getSize();
1012 pkt->senderState = _senderState;
1013 _packets.push_back(pkt);
1014 }
1015 }
1016 assert(_packets.size() == _requests.size());
1017}
1018
1019template<class Impl>
1020void
1021LSQ<Impl>::SingleDataRequest::sendPacketToCache()
1022{
1023 assert(_numOutstandingPackets == 0);
1024 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1025 _numOutstandingPackets = 1;
1026}
1027
1028template<class Impl>
1029void
1030LSQ<Impl>::SplitDataRequest::sendPacketToCache()
1031{
1032 /* Try to send the packets. */
1033 while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1034 lsqUnit()->trySendPacket(isLoad(),
1035 _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1036 _numOutstandingPackets++;
1037 }
1038}
1039
1040template<class Impl>
1041void
1042LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext *thread,
1043 PacketPtr pkt)
1044{
1045 TheISA::handleIprWrite(thread, pkt);
1046}
1047
1048template<class Impl>
1049void
1050LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext *thread,
1051 PacketPtr mainPkt)
1052{
1053 unsigned offset = 0;
1054 for (auto r: _requests) {
1055 PacketPtr pkt = new Packet(r, MemCmd::WriteReq);
1056 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1057 TheISA::handleIprWrite(thread, pkt);
1058 offset += r->getSize();
1059 delete pkt;
1060 }
1061}
1062
1063template<class Impl>
1064Cycles
1065LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext *thread,
1066 PacketPtr pkt)
1067{
1068 return TheISA::handleIprRead(thread, pkt);
1069}
1070
1071template<class Impl>
1072Cycles
1073LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext *thread,
1074 PacketPtr mainPkt)
1075{
1076 Cycles delay(0);
1077 unsigned offset = 0;
1078
1079 for (auto r: _requests) {
1080 PacketPtr pkt = new Packet(r, MemCmd::ReadReq);
1081 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1082 Cycles d = TheISA::handleIprRead(thread, pkt);
1083 if (d > delay)
1084 delay = d;
1085 offset += r->getSize();
1086 delete pkt;
1087 }
1088 return delay;
1089}
1090
1091template<class Impl>
1092bool
1093LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1094{
1095 return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
1096}
1097
1098template<class Impl>
1099bool
1100LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1101{
1102 bool is_hit = false;
1103 for (auto &r: _requests) {
1104 if ((r->getPaddr() & blockMask) == blockAddr) {
1105 is_hit = true;
1106 break;
1107 }
1108 }
1109 return is_hit;
1110}
1111
1112#endif//__CPU_O3_LSQ_IMPL_HH__