lsq.cc (12179:432a44667130) lsq.cc (12355:568ec3a0c614)
1/*
1/*
2 * Copyright (c) 2013-2014 ARM Limited
2 * Copyright (c) 2013-2014,2017 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Andrew Bardsley
38 */
39
40#include "cpu/minor/lsq.hh"
41
42#include <iomanip>
43#include <sstream>
44
45#include "arch/locked_mem.hh"
46#include "arch/mmapped_ipr.hh"
47#include "cpu/minor/cpu.hh"
48#include "cpu/minor/exec_context.hh"
49#include "cpu/minor/execute.hh"
50#include "cpu/minor/pipeline.hh"
51#include "debug/Activity.hh"
52#include "debug/MinorMem.hh"
53
54namespace Minor
55{
56
57/** Returns the offset of addr into an aligned a block of size block_size */
58static Addr
59addrBlockOffset(Addr addr, unsigned int block_size)
60{
61 return addr & (block_size - 1);
62}
63
64/** Returns true if the given [addr .. addr+size-1] transfer needs to be
65 * fragmented across a block size of block_size */
66static bool
67transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
68{
69 return (addrBlockOffset(addr, block_size) + size) > block_size;
70}
71
72LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
73 PacketDataPtr data_, uint64_t *res_) :
74 SenderState(),
75 port(port_),
76 inst(inst_),
77 isLoad(isLoad_),
78 data(data_),
79 packet(NULL),
80 request(),
81 fault(NoFault),
82 res(res_),
83 skipped(false),
84 issuedToMemory(false),
85 state(NotIssued)
86{ }
87
88LSQ::AddrRangeCoverage
89LSQ::LSQRequest::containsAddrRangeOf(
90 Addr req1_addr, unsigned int req1_size,
91 Addr req2_addr, unsigned int req2_size)
92{
93 /* 'end' here means the address of the byte just past the request
94 * blocks */
95 Addr req2_end_addr = req2_addr + req2_size;
96 Addr req1_end_addr = req1_addr + req1_size;
97
98 AddrRangeCoverage ret;
99
100 if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr)
101 ret = NoAddrRangeCoverage;
102 else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr)
103 ret = FullAddrRangeCoverage;
104 else
105 ret = PartialAddrRangeCoverage;
106
107 return ret;
108}
109
110LSQ::AddrRangeCoverage
111LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request)
112{
113 return containsAddrRangeOf(request.getPaddr(), request.getSize(),
114 other_request->request.getPaddr(), other_request->request.getSize());
115}
116
117bool
118LSQ::LSQRequest::isBarrier()
119{
120 return inst->isInst() && inst->staticInst->isMemBarrier();
121}
122
123bool
124LSQ::LSQRequest::needsToBeSentToStoreBuffer()
125{
126 return state == StoreToStoreBuffer;
127}
128
129void
130LSQ::LSQRequest::setState(LSQRequestState new_state)
131{
132 DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:"
133 " %s\n", state, new_state, *inst);
134 state = new_state;
135}
136
137bool
138LSQ::LSQRequest::isComplete() const
139{
140 /* @todo, There is currently only one 'completed' state. This
141 * may not be a good choice */
142 return state == Complete;
143}
144
145void
146LSQ::LSQRequest::reportData(std::ostream &os) const
147{
148 os << (isLoad ? 'R' : 'W') << ';';
149 inst->reportData(os);
150 os << ';' << state;
151}
152
153std::ostream &
154operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage)
155{
156 switch (coverage) {
157 case LSQ::PartialAddrRangeCoverage:
158 os << "PartialAddrRangeCoverage";
159 break;
160 case LSQ::FullAddrRangeCoverage:
161 os << "FullAddrRangeCoverage";
162 break;
163 case LSQ::NoAddrRangeCoverage:
164 os << "NoAddrRangeCoverage";
165 break;
166 default:
167 os << "AddrRangeCoverage-" << static_cast<int>(coverage);
168 break;
169 }
170 return os;
171}
172
173std::ostream &
174operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state)
175{
176 switch (state) {
177 case LSQ::LSQRequest::NotIssued:
178 os << "NotIssued";
179 break;
180 case LSQ::LSQRequest::InTranslation:
181 os << "InTranslation";
182 break;
183 case LSQ::LSQRequest::Translated:
184 os << "Translated";
185 break;
186 case LSQ::LSQRequest::Failed:
187 os << "Failed";
188 break;
189 case LSQ::LSQRequest::RequestIssuing:
190 os << "RequestIssuing";
191 break;
192 case LSQ::LSQRequest::StoreToStoreBuffer:
193 os << "StoreToStoreBuffer";
194 break;
195 case LSQ::LSQRequest::StoreInStoreBuffer:
196 os << "StoreInStoreBuffer";
197 break;
198 case LSQ::LSQRequest::StoreBufferIssuing:
199 os << "StoreBufferIssuing";
200 break;
201 case LSQ::LSQRequest::RequestNeedsRetry:
202 os << "RequestNeedsRetry";
203 break;
204 case LSQ::LSQRequest::StoreBufferNeedsRetry:
205 os << "StoreBufferNeedsRetry";
206 break;
207 case LSQ::LSQRequest::Complete:
208 os << "Complete";
209 break;
210 default:
211 os << "LSQRequestState-" << static_cast<int>(state);
212 break;
213 }
214 return os;
215}
216
217void
218LSQ::clearMemBarrier(MinorDynInstPtr inst)
219{
220 bool is_last_barrier =
221 inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId];
222
223 DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n",
224 (is_last_barrier ? "last" : "a"), *inst);
225
226 if (is_last_barrier)
227 lastMemBarrier[inst->id.threadId] = 0;
228}
229
230void
231LSQ::SingleDataRequest::finish(const Fault &fault_, RequestPtr request_,
232 ThreadContext *tc, BaseTLB::Mode mode)
233{
234 fault = fault_;
235
236 port.numAccessesInDTLB--;
237
238 DPRINTFS(MinorMem, (&port), "Received translation response for"
239 " request: %s\n", *inst);
240
241 makePacket();
242
243 setState(Translated);
244 port.tryToSendToTransfers(this);
245
246 /* Let's try and wake up the processor for the next cycle */
247 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
248}
249
250void
251LSQ::SingleDataRequest::startAddrTranslation()
252{
253 ThreadContext *thread = port.cpu.getContext(
254 inst->id.threadId);
255
256 port.numAccessesInDTLB++;
257
258 setState(LSQ::LSQRequest::InTranslation);
259
260 DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
261 /* Submit the translation request. The response will come through
262 * finish/markDelayed on the LSQRequest as it bears the Translation
263 * interface */
264 thread->getDTBPtr()->translateTiming(
265 &request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write));
266}
267
268void
269LSQ::SingleDataRequest::retireResponse(PacketPtr packet_)
270{
271 DPRINTFS(MinorMem, (&port), "Retiring packet\n");
272 packet = packet_;
273 packetInFlight = false;
274 setState(Complete);
275}
276
277void
278LSQ::SplitDataRequest::finish(const Fault &fault_, RequestPtr request_,
279 ThreadContext *tc, BaseTLB::Mode mode)
280{
281 fault = fault_;
282
283 port.numAccessesInDTLB--;
284
285 unsigned int M5_VAR_USED expected_fragment_index =
286 numTranslatedFragments;
287
288 numInTranslationFragments--;
289 numTranslatedFragments++;
290
291 DPRINTFS(MinorMem, (&port), "Received translation response for fragment"
292 " %d of request: %s\n", expected_fragment_index, *inst);
293
294 assert(request_ == fragmentRequests[expected_fragment_index]);
295
296 /* Wake up next cycle to get things going again in case the
297 * tryToSendToTransfers does take */
298 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
299
300 if (fault != NoFault) {
301 /* tryToSendToTransfers will handle the fault */
302
303 DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:"
304 " %d of request: %s\n",
305 expected_fragment_index, *inst);
306
307 setState(Translated);
308 port.tryToSendToTransfers(this);
309 } else if (numTranslatedFragments == numFragments) {
310 makeFragmentPackets();
311
312 setState(Translated);
313 port.tryToSendToTransfers(this);
314 } else {
315 /* Avoid calling translateTiming from within ::finish */
316 assert(!translationEvent.scheduled());
317 port.cpu.schedule(translationEvent, curTick());
318 }
319}
320
321LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_,
322 bool isLoad_, PacketDataPtr data_, uint64_t *res_) :
323 LSQRequest(port_, inst_, isLoad_, data_, res_),
324 translationEvent([this]{ sendNextFragmentToTranslation(); },
325 "translationEvent"),
326 numFragments(0),
327 numInTranslationFragments(0),
328 numTranslatedFragments(0),
329 numIssuedFragments(0),
330 numRetiredFragments(0),
331 fragmentRequests(),
332 fragmentPackets()
333{
334 /* Don't know how many elements are needed until the request is
335 * populated by the caller. */
336}
337
338LSQ::SplitDataRequest::~SplitDataRequest()
339{
340 for (auto i = fragmentRequests.begin();
341 i != fragmentRequests.end(); i++)
342 {
343 delete *i;
344 }
345
346 for (auto i = fragmentPackets.begin();
347 i != fragmentPackets.end(); i++)
348 {
349 delete *i;
350 }
351}
352
353void
354LSQ::SplitDataRequest::makeFragmentRequests()
355{
356 Addr base_addr = request.getVaddr();
357 unsigned int whole_size = request.getSize();
358 unsigned int line_width = port.lineWidth;
359
360 unsigned int fragment_size;
361 Addr fragment_addr;
362
363 /* Assume that this transfer is across potentially many block snap
364 * boundaries:
365 *
366 * | _|________|________|________|___ |
367 * | |0| 1 | 2 | 3 | 4 | |
368 * | |_|________|________|________|___| |
369 * | | | | | |
370 *
371 * The first transfer (0) can be up to lineWidth in size.
372 * All the middle transfers (1-3) are lineWidth in size
373 * The last transfer (4) can be from zero to lineWidth - 1 in size
374 */
375 unsigned int first_fragment_offset =
376 addrBlockOffset(base_addr, line_width);
377 unsigned int last_fragment_size =
378 addrBlockOffset(base_addr + whole_size, line_width);
379 unsigned int first_fragment_size =
380 line_width - first_fragment_offset;
381
382 unsigned int middle_fragments_total_size =
383 whole_size - (first_fragment_size + last_fragment_size);
384
385 assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0);
386
387 unsigned int middle_fragment_count =
388 middle_fragments_total_size / line_width;
389
390 numFragments = 1 /* first */ + middle_fragment_count +
391 (last_fragment_size == 0 ? 0 : 1);
392
393 DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests."
394 " First fragment size: %d Last fragment size: %d\n",
395 numFragments, first_fragment_size,
396 (last_fragment_size == 0 ? line_width : last_fragment_size));
397
398 assert(((middle_fragment_count * line_width) +
399 first_fragment_size + last_fragment_size) == whole_size);
400
401 fragment_addr = base_addr;
402 fragment_size = first_fragment_size;
403
404 /* Just past the last address in the request */
405 Addr end_addr = base_addr + whole_size;
406
407 for (unsigned int fragment_index = 0; fragment_index < numFragments;
408 fragment_index++)
409 {
410 bool M5_VAR_USED is_last_fragment = false;
411
412 if (fragment_addr == base_addr) {
413 /* First fragment */
414 fragment_size = first_fragment_size;
415 } else {
416 if ((fragment_addr + line_width) > end_addr) {
417 /* Adjust size of last fragment */
418 fragment_size = end_addr - fragment_addr;
419 is_last_fragment = true;
420 } else {
421 /* Middle fragments */
422 fragment_size = line_width;
423 }
424 }
425
426 Request *fragment = new Request();
427
428 fragment->setContext(request.contextId());
429 fragment->setVirt(0 /* asid */,
430 fragment_addr, fragment_size, request.getFlags(),
431 request.masterId(),
432 request.getPC());
433
434 DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x size: %d"
435 " (whole request addr: 0x%x size: %d) %s\n",
436 fragment_addr, fragment_size, base_addr, whole_size,
437 (is_last_fragment ? "last fragment" : ""));
438
439 fragment_addr += fragment_size;
440
441 fragmentRequests.push_back(fragment);
442 }
443}
444
445void
446LSQ::SplitDataRequest::makeFragmentPackets()
447{
448 Addr base_addr = request.getVaddr();
449
450 DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst);
451
452 for (unsigned int fragment_index = 0; fragment_index < numFragments;
453 fragment_index++)
454 {
455 Request *fragment = fragmentRequests[fragment_index];
456
457 DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s"
458 " (%d, 0x%x)\n",
459 fragment_index, *inst,
460 (fragment->hasPaddr() ? "has paddr" : "no paddr"),
461 (fragment->hasPaddr() ? fragment->getPaddr() : 0));
462
463 Addr fragment_addr = fragment->getVaddr();
464 unsigned int fragment_size = fragment->getSize();
465
466 uint8_t *request_data = NULL;
467
468 if (!isLoad) {
469 /* Split data for Packets. Will become the property of the
470 * outgoing Packets */
471 request_data = new uint8_t[fragment_size];
472 std::memcpy(request_data, data + (fragment_addr - base_addr),
473 fragment_size);
474 }
475
476 assert(fragment->hasPaddr());
477
478 PacketPtr fragment_packet =
479 makePacketForRequest(*fragment, isLoad, this, request_data);
480
481 fragmentPackets.push_back(fragment_packet);
482 /* Accumulate flags in parent request */
483 request.setFlags(fragment->getFlags());
484 }
485
486 /* Might as well make the overall/response packet here */
487 /* Get the physical address for the whole request/packet from the first
488 * fragment */
489 request.setPaddr(fragmentRequests[0]->getPaddr());
490 makePacket();
491}
492
493void
494LSQ::SplitDataRequest::startAddrTranslation()
495{
496 setState(LSQ::LSQRequest::InTranslation);
497
498 makeFragmentRequests();
499
500 numInTranslationFragments = 0;
501 numTranslatedFragments = 0;
502
503 /* @todo, just do these in sequence for now with
504 * a loop of:
505 * do {
506 * sendNextFragmentToTranslation ; translateTiming ; finish
507 * } while (numTranslatedFragments != numFragments);
508 */
509
510 /* Do first translation */
511 sendNextFragmentToTranslation();
512}
513
514PacketPtr
515LSQ::SplitDataRequest::getHeadPacket()
516{
517 assert(numIssuedFragments < numFragments);
518
519 return fragmentPackets[numIssuedFragments];
520}
521
522void
523LSQ::SplitDataRequest::stepToNextPacket()
524{
525 assert(numIssuedFragments < numFragments);
526
527 numIssuedFragments++;
528}
529
530void
531LSQ::SplitDataRequest::retireResponse(PacketPtr response)
532{
533 assert(numRetiredFragments < numFragments);
534
535 DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d"
536 " offset: 0x%x (retired fragment num: %d) %s\n",
537 response->req->getVaddr(), response->req->getSize(),
538 request.getVaddr() - response->req->getVaddr(),
539 numRetiredFragments,
540 (fault == NoFault ? "" : fault->name()));
541
542 numRetiredFragments++;
543
544 if (skipped) {
545 /* Skip because we already knew the request had faulted or been
546 * skipped */
547 DPRINTFS(MinorMem, (&port), "Skipping this fragment\n");
548 } else if (response->isError()) {
549 /* Mark up the error and leave to execute to handle it */
550 DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n");
551 setSkipped();
552 packet->copyError(response);
553 } else {
554 if (isLoad) {
555 if (!data) {
556 /* For a split transfer, a Packet must be constructed
557 * to contain all returning data. This is that packet's
558 * data */
559 data = new uint8_t[request.getSize()];
560 }
561
562 /* Populate the portion of the overall response data represented
563 * by the response fragment */
564 std::memcpy(
565 data + (response->req->getVaddr() - request.getVaddr()),
566 response->getConstPtr<uint8_t>(),
567 response->req->getSize());
568 }
569 }
570
571 /* Complete early if we're skipping are no more in-flight accesses */
572 if (skipped && !hasPacketsInMemSystem()) {
573 DPRINTFS(MinorMem, (&port), "Completed skipped burst\n");
574 setState(Complete);
575 if (packet->needsResponse())
576 packet->makeResponse();
577 }
578
579 if (numRetiredFragments == numFragments)
580 setState(Complete);
581
582 if (!skipped && isComplete()) {
583 DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL);
584
585 DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d"
586 " needsResponse: %d packetSize: %s requestSize: %s responseSize:"
587 " %s\n", packet->isRead(), packet->isWrite(),
588 packet->needsResponse(), packet->getSize(), request.getSize(),
589 response->getSize());
590
591 /* A request can become complete by several paths, this is a sanity
592 * check to make sure the packet's data is created */
593 if (!data) {
594 data = new uint8_t[request.getSize()];
595 }
596
597 if (isLoad) {
598 DPRINTFS(MinorMem, (&port), "Copying read data\n");
599 std::memcpy(packet->getPtr<uint8_t>(), data, request.getSize());
600 }
601 packet->makeResponse();
602 }
603
604 /* Packets are all deallocated together in ~SplitLSQRequest */
605}
606
607void
608LSQ::SplitDataRequest::sendNextFragmentToTranslation()
609{
610 unsigned int fragment_index = numTranslatedFragments;
611
612 ThreadContext *thread = port.cpu.getContext(
613 inst->id.threadId);
614
615 DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n",
616 fragment_index);
617
618 port.numAccessesInDTLB++;
619 numInTranslationFragments++;
620
621 thread->getDTBPtr()->translateTiming(
622 fragmentRequests[fragment_index], thread, this, (isLoad ?
623 BaseTLB::Read : BaseTLB::Write));
624}
625
626bool
627LSQ::StoreBuffer::canInsert() const
628{
629 /* @todo, support store amalgamation */
630 return slots.size() < numSlots;
631}
632
633void
634LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request)
635{
636 auto found = std::find(slots.begin(), slots.end(), request);
637
638 if (found != slots.end()) {
639 DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n",
640 request, *found, *(request->inst));
641 slots.erase(found);
642
643 delete request;
644 }
645}
646
647void
648LSQ::StoreBuffer::insert(LSQRequestPtr request)
649{
650 if (!canInsert()) {
651 warn("%s: store buffer insertion without space to insert from"
652 " inst: %s\n", name(), *(request->inst));
653 }
654
655 DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request);
656
657 numUnissuedAccesses++;
658
659 if (request->state != LSQRequest::Complete)
660 request->setState(LSQRequest::StoreInStoreBuffer);
661
662 slots.push_back(request);
663
664 /* Let's try and wake up the processor for the next cycle to step
665 * the store buffer */
666 lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
667}
668
669LSQ::AddrRangeCoverage
670LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request,
671 unsigned int &found_slot)
672{
673 unsigned int slot_index = slots.size() - 1;
674 auto i = slots.rbegin();
675 AddrRangeCoverage ret = NoAddrRangeCoverage;
676
677 /* Traverse the store buffer in reverse order (most to least recent)
678 * and try to find a slot whose address range overlaps this request */
679 while (ret == NoAddrRangeCoverage && i != slots.rend()) {
680 LSQRequestPtr slot = *i;
681
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Andrew Bardsley
38 */
39
40#include "cpu/minor/lsq.hh"
41
42#include <iomanip>
43#include <sstream>
44
45#include "arch/locked_mem.hh"
46#include "arch/mmapped_ipr.hh"
47#include "cpu/minor/cpu.hh"
48#include "cpu/minor/exec_context.hh"
49#include "cpu/minor/execute.hh"
50#include "cpu/minor/pipeline.hh"
51#include "debug/Activity.hh"
52#include "debug/MinorMem.hh"
53
54namespace Minor
55{
56
57/** Returns the offset of addr into an aligned a block of size block_size */
58static Addr
59addrBlockOffset(Addr addr, unsigned int block_size)
60{
61 return addr & (block_size - 1);
62}
63
64/** Returns true if the given [addr .. addr+size-1] transfer needs to be
65 * fragmented across a block size of block_size */
66static bool
67transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
68{
69 return (addrBlockOffset(addr, block_size) + size) > block_size;
70}
71
72LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
73 PacketDataPtr data_, uint64_t *res_) :
74 SenderState(),
75 port(port_),
76 inst(inst_),
77 isLoad(isLoad_),
78 data(data_),
79 packet(NULL),
80 request(),
81 fault(NoFault),
82 res(res_),
83 skipped(false),
84 issuedToMemory(false),
85 state(NotIssued)
86{ }
87
88LSQ::AddrRangeCoverage
89LSQ::LSQRequest::containsAddrRangeOf(
90 Addr req1_addr, unsigned int req1_size,
91 Addr req2_addr, unsigned int req2_size)
92{
93 /* 'end' here means the address of the byte just past the request
94 * blocks */
95 Addr req2_end_addr = req2_addr + req2_size;
96 Addr req1_end_addr = req1_addr + req1_size;
97
98 AddrRangeCoverage ret;
99
100 if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr)
101 ret = NoAddrRangeCoverage;
102 else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr)
103 ret = FullAddrRangeCoverage;
104 else
105 ret = PartialAddrRangeCoverage;
106
107 return ret;
108}
109
110LSQ::AddrRangeCoverage
111LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request)
112{
113 return containsAddrRangeOf(request.getPaddr(), request.getSize(),
114 other_request->request.getPaddr(), other_request->request.getSize());
115}
116
117bool
118LSQ::LSQRequest::isBarrier()
119{
120 return inst->isInst() && inst->staticInst->isMemBarrier();
121}
122
123bool
124LSQ::LSQRequest::needsToBeSentToStoreBuffer()
125{
126 return state == StoreToStoreBuffer;
127}
128
129void
130LSQ::LSQRequest::setState(LSQRequestState new_state)
131{
132 DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:"
133 " %s\n", state, new_state, *inst);
134 state = new_state;
135}
136
137bool
138LSQ::LSQRequest::isComplete() const
139{
140 /* @todo, There is currently only one 'completed' state. This
141 * may not be a good choice */
142 return state == Complete;
143}
144
145void
146LSQ::LSQRequest::reportData(std::ostream &os) const
147{
148 os << (isLoad ? 'R' : 'W') << ';';
149 inst->reportData(os);
150 os << ';' << state;
151}
152
153std::ostream &
154operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage)
155{
156 switch (coverage) {
157 case LSQ::PartialAddrRangeCoverage:
158 os << "PartialAddrRangeCoverage";
159 break;
160 case LSQ::FullAddrRangeCoverage:
161 os << "FullAddrRangeCoverage";
162 break;
163 case LSQ::NoAddrRangeCoverage:
164 os << "NoAddrRangeCoverage";
165 break;
166 default:
167 os << "AddrRangeCoverage-" << static_cast<int>(coverage);
168 break;
169 }
170 return os;
171}
172
173std::ostream &
174operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state)
175{
176 switch (state) {
177 case LSQ::LSQRequest::NotIssued:
178 os << "NotIssued";
179 break;
180 case LSQ::LSQRequest::InTranslation:
181 os << "InTranslation";
182 break;
183 case LSQ::LSQRequest::Translated:
184 os << "Translated";
185 break;
186 case LSQ::LSQRequest::Failed:
187 os << "Failed";
188 break;
189 case LSQ::LSQRequest::RequestIssuing:
190 os << "RequestIssuing";
191 break;
192 case LSQ::LSQRequest::StoreToStoreBuffer:
193 os << "StoreToStoreBuffer";
194 break;
195 case LSQ::LSQRequest::StoreInStoreBuffer:
196 os << "StoreInStoreBuffer";
197 break;
198 case LSQ::LSQRequest::StoreBufferIssuing:
199 os << "StoreBufferIssuing";
200 break;
201 case LSQ::LSQRequest::RequestNeedsRetry:
202 os << "RequestNeedsRetry";
203 break;
204 case LSQ::LSQRequest::StoreBufferNeedsRetry:
205 os << "StoreBufferNeedsRetry";
206 break;
207 case LSQ::LSQRequest::Complete:
208 os << "Complete";
209 break;
210 default:
211 os << "LSQRequestState-" << static_cast<int>(state);
212 break;
213 }
214 return os;
215}
216
217void
218LSQ::clearMemBarrier(MinorDynInstPtr inst)
219{
220 bool is_last_barrier =
221 inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId];
222
223 DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n",
224 (is_last_barrier ? "last" : "a"), *inst);
225
226 if (is_last_barrier)
227 lastMemBarrier[inst->id.threadId] = 0;
228}
229
230void
231LSQ::SingleDataRequest::finish(const Fault &fault_, RequestPtr request_,
232 ThreadContext *tc, BaseTLB::Mode mode)
233{
234 fault = fault_;
235
236 port.numAccessesInDTLB--;
237
238 DPRINTFS(MinorMem, (&port), "Received translation response for"
239 " request: %s\n", *inst);
240
241 makePacket();
242
243 setState(Translated);
244 port.tryToSendToTransfers(this);
245
246 /* Let's try and wake up the processor for the next cycle */
247 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
248}
249
250void
251LSQ::SingleDataRequest::startAddrTranslation()
252{
253 ThreadContext *thread = port.cpu.getContext(
254 inst->id.threadId);
255
256 port.numAccessesInDTLB++;
257
258 setState(LSQ::LSQRequest::InTranslation);
259
260 DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
261 /* Submit the translation request. The response will come through
262 * finish/markDelayed on the LSQRequest as it bears the Translation
263 * interface */
264 thread->getDTBPtr()->translateTiming(
265 &request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write));
266}
267
268void
269LSQ::SingleDataRequest::retireResponse(PacketPtr packet_)
270{
271 DPRINTFS(MinorMem, (&port), "Retiring packet\n");
272 packet = packet_;
273 packetInFlight = false;
274 setState(Complete);
275}
276
277void
278LSQ::SplitDataRequest::finish(const Fault &fault_, RequestPtr request_,
279 ThreadContext *tc, BaseTLB::Mode mode)
280{
281 fault = fault_;
282
283 port.numAccessesInDTLB--;
284
285 unsigned int M5_VAR_USED expected_fragment_index =
286 numTranslatedFragments;
287
288 numInTranslationFragments--;
289 numTranslatedFragments++;
290
291 DPRINTFS(MinorMem, (&port), "Received translation response for fragment"
292 " %d of request: %s\n", expected_fragment_index, *inst);
293
294 assert(request_ == fragmentRequests[expected_fragment_index]);
295
296 /* Wake up next cycle to get things going again in case the
297 * tryToSendToTransfers does take */
298 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
299
300 if (fault != NoFault) {
301 /* tryToSendToTransfers will handle the fault */
302
303 DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:"
304 " %d of request: %s\n",
305 expected_fragment_index, *inst);
306
307 setState(Translated);
308 port.tryToSendToTransfers(this);
309 } else if (numTranslatedFragments == numFragments) {
310 makeFragmentPackets();
311
312 setState(Translated);
313 port.tryToSendToTransfers(this);
314 } else {
315 /* Avoid calling translateTiming from within ::finish */
316 assert(!translationEvent.scheduled());
317 port.cpu.schedule(translationEvent, curTick());
318 }
319}
320
321LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_,
322 bool isLoad_, PacketDataPtr data_, uint64_t *res_) :
323 LSQRequest(port_, inst_, isLoad_, data_, res_),
324 translationEvent([this]{ sendNextFragmentToTranslation(); },
325 "translationEvent"),
326 numFragments(0),
327 numInTranslationFragments(0),
328 numTranslatedFragments(0),
329 numIssuedFragments(0),
330 numRetiredFragments(0),
331 fragmentRequests(),
332 fragmentPackets()
333{
334 /* Don't know how many elements are needed until the request is
335 * populated by the caller. */
336}
337
338LSQ::SplitDataRequest::~SplitDataRequest()
339{
340 for (auto i = fragmentRequests.begin();
341 i != fragmentRequests.end(); i++)
342 {
343 delete *i;
344 }
345
346 for (auto i = fragmentPackets.begin();
347 i != fragmentPackets.end(); i++)
348 {
349 delete *i;
350 }
351}
352
353void
354LSQ::SplitDataRequest::makeFragmentRequests()
355{
356 Addr base_addr = request.getVaddr();
357 unsigned int whole_size = request.getSize();
358 unsigned int line_width = port.lineWidth;
359
360 unsigned int fragment_size;
361 Addr fragment_addr;
362
363 /* Assume that this transfer is across potentially many block snap
364 * boundaries:
365 *
366 * | _|________|________|________|___ |
367 * | |0| 1 | 2 | 3 | 4 | |
368 * | |_|________|________|________|___| |
369 * | | | | | |
370 *
371 * The first transfer (0) can be up to lineWidth in size.
372 * All the middle transfers (1-3) are lineWidth in size
373 * The last transfer (4) can be from zero to lineWidth - 1 in size
374 */
375 unsigned int first_fragment_offset =
376 addrBlockOffset(base_addr, line_width);
377 unsigned int last_fragment_size =
378 addrBlockOffset(base_addr + whole_size, line_width);
379 unsigned int first_fragment_size =
380 line_width - first_fragment_offset;
381
382 unsigned int middle_fragments_total_size =
383 whole_size - (first_fragment_size + last_fragment_size);
384
385 assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0);
386
387 unsigned int middle_fragment_count =
388 middle_fragments_total_size / line_width;
389
390 numFragments = 1 /* first */ + middle_fragment_count +
391 (last_fragment_size == 0 ? 0 : 1);
392
393 DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests."
394 " First fragment size: %d Last fragment size: %d\n",
395 numFragments, first_fragment_size,
396 (last_fragment_size == 0 ? line_width : last_fragment_size));
397
398 assert(((middle_fragment_count * line_width) +
399 first_fragment_size + last_fragment_size) == whole_size);
400
401 fragment_addr = base_addr;
402 fragment_size = first_fragment_size;
403
404 /* Just past the last address in the request */
405 Addr end_addr = base_addr + whole_size;
406
407 for (unsigned int fragment_index = 0; fragment_index < numFragments;
408 fragment_index++)
409 {
410 bool M5_VAR_USED is_last_fragment = false;
411
412 if (fragment_addr == base_addr) {
413 /* First fragment */
414 fragment_size = first_fragment_size;
415 } else {
416 if ((fragment_addr + line_width) > end_addr) {
417 /* Adjust size of last fragment */
418 fragment_size = end_addr - fragment_addr;
419 is_last_fragment = true;
420 } else {
421 /* Middle fragments */
422 fragment_size = line_width;
423 }
424 }
425
426 Request *fragment = new Request();
427
428 fragment->setContext(request.contextId());
429 fragment->setVirt(0 /* asid */,
430 fragment_addr, fragment_size, request.getFlags(),
431 request.masterId(),
432 request.getPC());
433
434 DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x size: %d"
435 " (whole request addr: 0x%x size: %d) %s\n",
436 fragment_addr, fragment_size, base_addr, whole_size,
437 (is_last_fragment ? "last fragment" : ""));
438
439 fragment_addr += fragment_size;
440
441 fragmentRequests.push_back(fragment);
442 }
443}
444
445void
446LSQ::SplitDataRequest::makeFragmentPackets()
447{
448 Addr base_addr = request.getVaddr();
449
450 DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst);
451
452 for (unsigned int fragment_index = 0; fragment_index < numFragments;
453 fragment_index++)
454 {
455 Request *fragment = fragmentRequests[fragment_index];
456
457 DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s"
458 " (%d, 0x%x)\n",
459 fragment_index, *inst,
460 (fragment->hasPaddr() ? "has paddr" : "no paddr"),
461 (fragment->hasPaddr() ? fragment->getPaddr() : 0));
462
463 Addr fragment_addr = fragment->getVaddr();
464 unsigned int fragment_size = fragment->getSize();
465
466 uint8_t *request_data = NULL;
467
468 if (!isLoad) {
469 /* Split data for Packets. Will become the property of the
470 * outgoing Packets */
471 request_data = new uint8_t[fragment_size];
472 std::memcpy(request_data, data + (fragment_addr - base_addr),
473 fragment_size);
474 }
475
476 assert(fragment->hasPaddr());
477
478 PacketPtr fragment_packet =
479 makePacketForRequest(*fragment, isLoad, this, request_data);
480
481 fragmentPackets.push_back(fragment_packet);
482 /* Accumulate flags in parent request */
483 request.setFlags(fragment->getFlags());
484 }
485
486 /* Might as well make the overall/response packet here */
487 /* Get the physical address for the whole request/packet from the first
488 * fragment */
489 request.setPaddr(fragmentRequests[0]->getPaddr());
490 makePacket();
491}
492
493void
494LSQ::SplitDataRequest::startAddrTranslation()
495{
496 setState(LSQ::LSQRequest::InTranslation);
497
498 makeFragmentRequests();
499
500 numInTranslationFragments = 0;
501 numTranslatedFragments = 0;
502
503 /* @todo, just do these in sequence for now with
504 * a loop of:
505 * do {
506 * sendNextFragmentToTranslation ; translateTiming ; finish
507 * } while (numTranslatedFragments != numFragments);
508 */
509
510 /* Do first translation */
511 sendNextFragmentToTranslation();
512}
513
514PacketPtr
515LSQ::SplitDataRequest::getHeadPacket()
516{
517 assert(numIssuedFragments < numFragments);
518
519 return fragmentPackets[numIssuedFragments];
520}
521
522void
523LSQ::SplitDataRequest::stepToNextPacket()
524{
525 assert(numIssuedFragments < numFragments);
526
527 numIssuedFragments++;
528}
529
530void
531LSQ::SplitDataRequest::retireResponse(PacketPtr response)
532{
533 assert(numRetiredFragments < numFragments);
534
535 DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d"
536 " offset: 0x%x (retired fragment num: %d) %s\n",
537 response->req->getVaddr(), response->req->getSize(),
538 request.getVaddr() - response->req->getVaddr(),
539 numRetiredFragments,
540 (fault == NoFault ? "" : fault->name()));
541
542 numRetiredFragments++;
543
544 if (skipped) {
545 /* Skip because we already knew the request had faulted or been
546 * skipped */
547 DPRINTFS(MinorMem, (&port), "Skipping this fragment\n");
548 } else if (response->isError()) {
549 /* Mark up the error and leave to execute to handle it */
550 DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n");
551 setSkipped();
552 packet->copyError(response);
553 } else {
554 if (isLoad) {
555 if (!data) {
556 /* For a split transfer, a Packet must be constructed
557 * to contain all returning data. This is that packet's
558 * data */
559 data = new uint8_t[request.getSize()];
560 }
561
562 /* Populate the portion of the overall response data represented
563 * by the response fragment */
564 std::memcpy(
565 data + (response->req->getVaddr() - request.getVaddr()),
566 response->getConstPtr<uint8_t>(),
567 response->req->getSize());
568 }
569 }
570
571 /* Complete early if we're skipping are no more in-flight accesses */
572 if (skipped && !hasPacketsInMemSystem()) {
573 DPRINTFS(MinorMem, (&port), "Completed skipped burst\n");
574 setState(Complete);
575 if (packet->needsResponse())
576 packet->makeResponse();
577 }
578
579 if (numRetiredFragments == numFragments)
580 setState(Complete);
581
582 if (!skipped && isComplete()) {
583 DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL);
584
585 DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d"
586 " needsResponse: %d packetSize: %s requestSize: %s responseSize:"
587 " %s\n", packet->isRead(), packet->isWrite(),
588 packet->needsResponse(), packet->getSize(), request.getSize(),
589 response->getSize());
590
591 /* A request can become complete by several paths, this is a sanity
592 * check to make sure the packet's data is created */
593 if (!data) {
594 data = new uint8_t[request.getSize()];
595 }
596
597 if (isLoad) {
598 DPRINTFS(MinorMem, (&port), "Copying read data\n");
599 std::memcpy(packet->getPtr<uint8_t>(), data, request.getSize());
600 }
601 packet->makeResponse();
602 }
603
604 /* Packets are all deallocated together in ~SplitLSQRequest */
605}
606
607void
608LSQ::SplitDataRequest::sendNextFragmentToTranslation()
609{
610 unsigned int fragment_index = numTranslatedFragments;
611
612 ThreadContext *thread = port.cpu.getContext(
613 inst->id.threadId);
614
615 DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n",
616 fragment_index);
617
618 port.numAccessesInDTLB++;
619 numInTranslationFragments++;
620
621 thread->getDTBPtr()->translateTiming(
622 fragmentRequests[fragment_index], thread, this, (isLoad ?
623 BaseTLB::Read : BaseTLB::Write));
624}
625
626bool
627LSQ::StoreBuffer::canInsert() const
628{
629 /* @todo, support store amalgamation */
630 return slots.size() < numSlots;
631}
632
633void
634LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request)
635{
636 auto found = std::find(slots.begin(), slots.end(), request);
637
638 if (found != slots.end()) {
639 DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n",
640 request, *found, *(request->inst));
641 slots.erase(found);
642
643 delete request;
644 }
645}
646
647void
648LSQ::StoreBuffer::insert(LSQRequestPtr request)
649{
650 if (!canInsert()) {
651 warn("%s: store buffer insertion without space to insert from"
652 " inst: %s\n", name(), *(request->inst));
653 }
654
655 DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request);
656
657 numUnissuedAccesses++;
658
659 if (request->state != LSQRequest::Complete)
660 request->setState(LSQRequest::StoreInStoreBuffer);
661
662 slots.push_back(request);
663
664 /* Let's try and wake up the processor for the next cycle to step
665 * the store buffer */
666 lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
667}
668
669LSQ::AddrRangeCoverage
670LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request,
671 unsigned int &found_slot)
672{
673 unsigned int slot_index = slots.size() - 1;
674 auto i = slots.rbegin();
675 AddrRangeCoverage ret = NoAddrRangeCoverage;
676
677 /* Traverse the store buffer in reverse order (most to least recent)
678 * and try to find a slot whose address range overlaps this request */
679 while (ret == NoAddrRangeCoverage && i != slots.rend()) {
680 LSQRequestPtr slot = *i;
681
682 /* Cache maintenance instructions go down via the store path *
683 * but they carry no data and they shouldn't be considered for
684 * forwarding */
682 if (slot->packet &&
685 if (slot->packet &&
683 slot->inst->id.threadId == request->inst->id.threadId) {
686 slot->inst->id.threadId == request->inst->id.threadId &&
687 !slot->packet->req->isCacheMaintenance()) {
684 AddrRangeCoverage coverage = slot->containsAddrRangeOf(request);
685
686 if (coverage != NoAddrRangeCoverage) {
687 DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:"
688 " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n",
689 slot_index, coverage,
690 request->request.getPaddr(), request->request.getSize(),
691 slot->request.getPaddr(), slot->request.getSize());
692
693 found_slot = slot_index;
694 ret = coverage;
695 }
696 }
697
698 i++;
699 slot_index--;
700 }
701
702 return ret;
703}
704
705/** Fill the given packet with appropriate date from slot slot_number */
706void
707LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load,
708 unsigned int slot_number)
709{
710 assert(slot_number < slots.size());
711 assert(load->packet);
712 assert(load->isLoad);
713
714 LSQRequestPtr store = slots[slot_number];
715
716 assert(store->packet);
717 assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage);
718
719 Addr load_addr = load->request.getPaddr();
720 Addr store_addr = store->request.getPaddr();
721 Addr addr_offset = load_addr - store_addr;
722
723 unsigned int load_size = load->request.getSize();
724
725 DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer"
726 " slot: %d addr: 0x%x addressOffset: 0x%x\n",
727 load_size, load_addr, slot_number,
728 store_addr, addr_offset);
729
730 void *load_packet_data = load->packet->getPtr<void>();
731 void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset;
732
733 std::memcpy(load_packet_data, store_packet_data, load_size);
734}
735
736void
737LSQ::StoreBuffer::countIssuedStore(LSQRequestPtr request)
738{
739 /* Barriers are accounted for as they are cleared from
740 * the queue, not after their transfers are complete */
741 if (!request->isBarrier())
742 numUnissuedAccesses--;
743}
744
745void
746LSQ::StoreBuffer::step()
747{
748 DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n",
749 numUnissuedAccesses);
750
751 if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) {
752 /* Clear all the leading barriers */
753 while (!slots.empty() &&
754 slots.front()->isComplete() && slots.front()->isBarrier())
755 {
756 LSQRequestPtr barrier = slots.front();
757
758 DPRINTF(MinorMem, "Clearing barrier for inst: %s\n",
759 *(barrier->inst));
760
761 numUnissuedAccesses--;
762 lsq.clearMemBarrier(barrier->inst);
763 slots.pop_front();
764
765 delete barrier;
766 }
767
768 auto i = slots.begin();
769 bool issued = true;
770 unsigned int issue_count = 0;
771
772 /* Skip trying if the memory system is busy */
773 if (lsq.state == LSQ::MemoryNeedsRetry)
774 issued = false;
775
776 /* Try to issue all stores in order starting from the head
777 * of the queue. Responses are allowed to be retired
778 * out of order */
779 while (issued &&
780 issue_count < storeLimitPerCycle &&
781 lsq.canSendToMemorySystem() &&
782 i != slots.end())
783 {
784 LSQRequestPtr request = *i;
785
786 DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d"
787 " state: %s\n",
788 *(request->inst), request->sentAllPackets(),
789 request->state);
790
791 if (request->isBarrier() && request->isComplete()) {
792 /* Give up at barriers */
793 issued = false;
794 } else if (!(request->state == LSQRequest::StoreBufferIssuing &&
795 request->sentAllPackets()))
796 {
797 DPRINTF(MinorMem, "Trying to send request: %s to memory"
798 " system\n", *(request->inst));
799
800 if (lsq.tryToSend(request)) {
801 countIssuedStore(request);
802 issue_count++;
803 } else {
804 /* Don't step on to the next store buffer entry if this
805 * one hasn't issued all its packets as the store
806 * buffer must still enforce ordering */
807 issued = false;
808 }
809 }
810 i++;
811 }
812 }
813}
814
815void
816LSQ::completeMemBarrierInst(MinorDynInstPtr inst,
817 bool committed)
818{
819 if (committed) {
820 /* Not already sent to the store buffer as a store request? */
821 if (!inst->inStoreBuffer) {
822 /* Insert an entry into the store buffer to tick off barriers
823 * until there are none in flight */
824 storeBuffer.insert(new BarrierDataRequest(*this, inst));
825 }
826 } else {
827 /* Clear the barrier anyway if it wasn't actually committed */
828 clearMemBarrier(inst);
829 }
830}
831
832void
833LSQ::StoreBuffer::minorTrace() const
834{
835 unsigned int size = slots.size();
836 unsigned int i = 0;
837 std::ostringstream os;
838
839 while (i < size) {
840 LSQRequestPtr request = slots[i];
841
842 request->reportData(os);
843
844 i++;
845 if (i < numSlots)
846 os << ',';
847 }
848
849 while (i < numSlots) {
850 os << '-';
851
852 i++;
853 if (i < numSlots)
854 os << ',';
855 }
856
857 MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(),
858 numUnissuedAccesses);
859}
860
861void
862LSQ::tryToSendToTransfers(LSQRequestPtr request)
863{
864 if (state == MemoryNeedsRetry) {
865 DPRINTF(MinorMem, "Request needs retry, not issuing to"
866 " memory until retry arrives\n");
867 return;
868 }
869
870 if (request->state == LSQRequest::InTranslation) {
871 DPRINTF(MinorMem, "Request still in translation, not issuing to"
872 " memory\n");
873 return;
874 }
875
876 assert(request->state == LSQRequest::Translated ||
877 request->state == LSQRequest::RequestIssuing ||
878 request->state == LSQRequest::Failed ||
879 request->state == LSQRequest::Complete);
880
881 if (requests.empty() || requests.front() != request) {
882 DPRINTF(MinorMem, "Request not at front of requests queue, can't"
883 " issue to memory\n");
884 return;
885 }
886
887 if (transfers.unreservedRemainingSpace() == 0) {
888 DPRINTF(MinorMem, "No space to insert request into transfers"
889 " queue\n");
890 return;
891 }
892
893 if (request->isComplete() || request->state == LSQRequest::Failed) {
894 DPRINTF(MinorMem, "Passing a %s transfer on to transfers"
895 " queue\n", (request->isComplete() ? "completed" : "failed"));
896 request->setState(LSQRequest::Complete);
897 request->setSkipped();
898 moveFromRequestsToTransfers(request);
899 return;
900 }
901
902 if (!execute.instIsRightStream(request->inst)) {
903 /* Wrong stream, try to abort the transfer but only do so if
904 * there are no packets in flight */
905 if (request->hasPacketsInMemSystem()) {
906 DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
907 " waiting for responses before aborting request\n");
908 } else {
909 DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
910 " aborting request\n");
911 request->setState(LSQRequest::Complete);
912 request->setSkipped();
913 moveFromRequestsToTransfers(request);
914 }
915 return;
916 }
917
918 if (request->fault != NoFault) {
919 if (request->inst->staticInst->isPrefetch()) {
920 DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n");
921 }
922 DPRINTF(MinorMem, "Moving faulting request into the transfers"
923 " queue\n");
924 request->setState(LSQRequest::Complete);
925 request->setSkipped();
926 moveFromRequestsToTransfers(request);
927 return;
928 }
929
930 bool is_load = request->isLoad;
931 bool is_llsc = request->request.isLLSC();
932 bool is_swap = request->request.isSwap();
933 bool bufferable = !(request->request.isStrictlyOrdered() ||
934 is_llsc || is_swap);
935
936 if (is_load) {
937 if (numStoresInTransfers != 0) {
938 DPRINTF(MinorMem, "Load request with stores still in transfers"
939 " queue, stalling\n");
940 return;
941 }
942 } else {
943 /* Store. Can it be sent to the store buffer? */
944 if (bufferable && !request->request.isMmappedIpr()) {
945 request->setState(LSQRequest::StoreToStoreBuffer);
946 moveFromRequestsToTransfers(request);
947 DPRINTF(MinorMem, "Moving store into transfers queue\n");
948 return;
949 }
950 }
951
952 /* Check if this is the head instruction (and so must be executable as
953 * its stream sequence number was checked above) for loads which must
954 * not be speculatively issued and stores which must be issued here */
955 if (!bufferable) {
956 if (!execute.instIsHeadInst(request->inst)) {
957 DPRINTF(MinorMem, "Memory access not the head inst., can't be"
958 " sure it can be performed, not issuing\n");
959 return;
960 }
961
962 unsigned int forwarding_slot = 0;
963
964 if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) !=
965 NoAddrRangeCoverage)
966 {
967 DPRINTF(MinorMem, "Memory access can receive forwarded data"
968 " from the store buffer, need to wait for store buffer to"
969 " drain\n");
970 return;
971 }
972 }
973
974 /* True: submit this packet to the transfers queue to be sent to the
975 * memory system.
976 * False: skip the memory and push a packet for this request onto
977 * requests */
978 bool do_access = true;
979
980 if (!is_llsc) {
981 /* Check for match in the store buffer */
982 if (is_load) {
983 unsigned int forwarding_slot = 0;
984 AddrRangeCoverage forwarding_result =
985 storeBuffer.canForwardDataToLoad(request,
986 forwarding_slot);
987
988 switch (forwarding_result) {
989 case FullAddrRangeCoverage:
990 /* Forward data from the store buffer into this request and
991 * repurpose this request's packet into a response packet */
992 storeBuffer.forwardStoreData(request, forwarding_slot);
993 request->packet->makeResponse();
994
995 /* Just move between queues, no access */
996 do_access = false;
997 break;
998 case PartialAddrRangeCoverage:
999 DPRINTF(MinorMem, "Load partly satisfied by store buffer"
1000 " data. Must wait for the store to complete\n");
1001 return;
1002 break;
1003 case NoAddrRangeCoverage:
1004 DPRINTF(MinorMem, "No forwardable data from store buffer\n");
1005 /* Fall through to try access */
1006 break;
1007 }
1008 }
1009 } else {
1010 if (!canSendToMemorySystem()) {
1011 DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1012 return;
1013 }
1014
1015 SimpleThread &thread = *cpu.threads[request->inst->id.threadId];
1016
1017 TheISA::PCState old_pc = thread.pcState();
1018 ExecContext context(cpu, thread, execute, request->inst);
1019
1020 /* Handle LLSC requests and tests */
1021 if (is_load) {
1022 TheISA::handleLockedRead(&context, &request->request);
1023 } else {
1024 do_access = TheISA::handleLockedWrite(&context,
1025 &request->request, cacheBlockMask);
1026
1027 if (!do_access) {
1028 DPRINTF(MinorMem, "Not perfoming a memory "
1029 "access for store conditional\n");
1030 }
1031 }
1032 thread.pcState(old_pc);
1033 }
1034
1035 /* See the do_access comment above */
1036 if (do_access) {
1037 if (!canSendToMemorySystem()) {
1038 DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1039 return;
1040 }
1041
1042 /* Remember if this is an access which can't be idly
1043 * discarded by an interrupt */
1044 if (!bufferable && !request->issuedToMemory) {
1045 numAccessesIssuedToMemory++;
1046 request->issuedToMemory = true;
1047 }
1048
1049 if (tryToSend(request)) {
1050 moveFromRequestsToTransfers(request);
1051 }
1052 } else {
1053 request->setState(LSQRequest::Complete);
1054 moveFromRequestsToTransfers(request);
1055 }
1056}
1057
1058bool
1059LSQ::tryToSend(LSQRequestPtr request)
1060{
1061 bool ret = false;
1062
1063 if (!canSendToMemorySystem()) {
1064 DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n",
1065 *(request->inst));
1066 } else {
1067 PacketPtr packet = request->getHeadPacket();
1068
1069 DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n",
1070 *(request->inst), packet->req->getVaddr());
1071
1072 /* The sender state of the packet *must* be an LSQRequest
1073 * so the response can be correctly handled */
1074 assert(packet->findNextSenderState<LSQRequest>());
1075
1076 if (request->request.isMmappedIpr()) {
1077 ThreadContext *thread =
1078 cpu.getContext(cpu.contextToThread(
1079 request->request.contextId()));
1080
1081 if (request->isLoad) {
1082 DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst));
1083 TheISA::handleIprRead(thread, packet);
1084 } else {
1085 DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst));
1086 TheISA::handleIprWrite(thread, packet);
1087 }
1088
1089 request->stepToNextPacket();
1090 ret = request->sentAllPackets();
1091
1092 if (!ret) {
1093 DPRINTF(MinorMem, "IPR access has another packet: %s\n",
1094 *(request->inst));
1095 }
1096
1097 if (ret)
1098 request->setState(LSQRequest::Complete);
1099 else
1100 request->setState(LSQRequest::RequestIssuing);
1101 } else if (dcachePort.sendTimingReq(packet)) {
1102 DPRINTF(MinorMem, "Sent data memory request\n");
1103
1104 numAccessesInMemorySystem++;
1105
1106 request->stepToNextPacket();
1107
1108 ret = request->sentAllPackets();
1109
1110 switch (request->state) {
1111 case LSQRequest::Translated:
1112 case LSQRequest::RequestIssuing:
1113 /* Fully or partially issued a request in the transfers
1114 * queue */
1115 request->setState(LSQRequest::RequestIssuing);
1116 break;
1117 case LSQRequest::StoreInStoreBuffer:
1118 case LSQRequest::StoreBufferIssuing:
1119 /* Fully or partially issued a request in the store
1120 * buffer */
1121 request->setState(LSQRequest::StoreBufferIssuing);
1122 break;
1123 default:
1124 assert(false);
1125 break;
1126 }
1127
1128 state = MemoryRunning;
1129 } else {
1130 DPRINTF(MinorMem,
1131 "Sending data memory request - needs retry\n");
1132
1133 /* Needs to be resent, wait for that */
1134 state = MemoryNeedsRetry;
1135 retryRequest = request;
1136
1137 switch (request->state) {
1138 case LSQRequest::Translated:
1139 case LSQRequest::RequestIssuing:
1140 request->setState(LSQRequest::RequestNeedsRetry);
1141 break;
1142 case LSQRequest::StoreInStoreBuffer:
1143 case LSQRequest::StoreBufferIssuing:
1144 request->setState(LSQRequest::StoreBufferNeedsRetry);
1145 break;
1146 default:
1147 assert(false);
1148 break;
1149 }
1150 }
1151 }
1152
1153 if (ret)
1154 threadSnoop(request);
1155
1156 return ret;
1157}
1158
1159void
1160LSQ::moveFromRequestsToTransfers(LSQRequestPtr request)
1161{
1162 assert(!requests.empty() && requests.front() == request);
1163 assert(transfers.unreservedRemainingSpace() != 0);
1164
1165 /* Need to count the number of stores in the transfers
1166 * queue so that loads know when their store buffer forwarding
1167 * results will be correct (only when all those stores
1168 * have reached the store buffer) */
1169 if (!request->isLoad)
1170 numStoresInTransfers++;
1171
1172 requests.pop();
1173 transfers.push(request);
1174}
1175
1176bool
1177LSQ::canSendToMemorySystem()
1178{
1179 return state == MemoryRunning &&
1180 numAccessesInMemorySystem < inMemorySystemLimit;
1181}
1182
1183bool
1184LSQ::recvTimingResp(PacketPtr response)
1185{
1186 LSQRequestPtr request =
1187 safe_cast<LSQRequestPtr>(response->popSenderState());
1188
1189 DPRINTF(MinorMem, "Received response packet inst: %s"
1190 " addr: 0x%x cmd: %s\n",
1191 *(request->inst), response->getAddr(),
1192 response->cmd.toString());
1193
1194 numAccessesInMemorySystem--;
1195
1196 if (response->isError()) {
1197 DPRINTF(MinorMem, "Received error response packet: %s\n",
1198 *request->inst);
1199 }
1200
1201 switch (request->state) {
1202 case LSQRequest::RequestIssuing:
1203 case LSQRequest::RequestNeedsRetry:
1204 /* Response to a request from the transfers queue */
1205 request->retireResponse(response);
1206
1207 DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n",
1208 request->hasPacketsInMemSystem(), request->isComplete());
1209
1210 break;
1211 case LSQRequest::StoreBufferIssuing:
1212 case LSQRequest::StoreBufferNeedsRetry:
1213 /* Response to a request from the store buffer */
1214 request->retireResponse(response);
1215
1216 /* Remove completed requests unless they are barriers (which will
1217 * need to be removed in order */
1218 if (request->isComplete()) {
1219 if (!request->isBarrier()) {
1220 storeBuffer.deleteRequest(request);
1221 } else {
1222 DPRINTF(MinorMem, "Completed transfer for barrier: %s"
1223 " leaving the request as it is also a barrier\n",
1224 *(request->inst));
1225 }
1226 }
1227 break;
1228 default:
1229 /* Shouldn't be allowed to receive a response from another
1230 * state */
1231 assert(false);
1232 break;
1233 }
1234
1235 /* We go to idle even if there are more things in the requests queue
1236 * as it's the job of step to actually step us on to the next
1237 * transaction */
1238
1239 /* Let's try and wake up the processor for the next cycle */
1240 cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
1241
1242 /* Never busy */
1243 return true;
1244}
1245
1246void
1247LSQ::recvReqRetry()
1248{
1249 DPRINTF(MinorMem, "Received retry request\n");
1250
1251 assert(state == MemoryNeedsRetry);
1252
1253 switch (retryRequest->state) {
1254 case LSQRequest::RequestNeedsRetry:
1255 /* Retry in the requests queue */
1256 retryRequest->setState(LSQRequest::Translated);
1257 break;
1258 case LSQRequest::StoreBufferNeedsRetry:
1259 /* Retry in the store buffer */
1260 retryRequest->setState(LSQRequest::StoreInStoreBuffer);
1261 break;
1262 default:
1263 assert(false);
1264 }
1265
1266 /* Set state back to MemoryRunning so that the following
1267 * tryToSend can actually send. Note that this won't
1268 * allow another transfer in as tryToSend should
1269 * issue a memory request and either succeed for this
1270 * request or return the LSQ back to MemoryNeedsRetry */
1271 state = MemoryRunning;
1272
1273 /* Try to resend the request */
1274 if (tryToSend(retryRequest)) {
1275 /* Successfully sent, need to move the request */
1276 switch (retryRequest->state) {
1277 case LSQRequest::RequestIssuing:
1278 /* In the requests queue */
1279 moveFromRequestsToTransfers(retryRequest);
1280 break;
1281 case LSQRequest::StoreBufferIssuing:
1282 /* In the store buffer */
1283 storeBuffer.countIssuedStore(retryRequest);
1284 break;
1285 default:
1286 assert(false);
1287 break;
1288 }
1289
1290 retryRequest = NULL;
1291 }
1292}
1293
1294LSQ::LSQ(std::string name_, std::string dcache_port_name_,
1295 MinorCPU &cpu_, Execute &execute_,
1296 unsigned int in_memory_system_limit, unsigned int line_width,
1297 unsigned int requests_queue_size, unsigned int transfers_queue_size,
1298 unsigned int store_buffer_size,
1299 unsigned int store_buffer_cycle_store_limit) :
1300 Named(name_),
1301 cpu(cpu_),
1302 execute(execute_),
1303 dcachePort(dcache_port_name_, *this, cpu_),
1304 lastMemBarrier(cpu.numThreads, 0),
1305 state(MemoryRunning),
1306 inMemorySystemLimit(in_memory_system_limit),
1307 lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)),
1308 requests(name_ + ".requests", "addr", requests_queue_size),
1309 transfers(name_ + ".transfers", "addr", transfers_queue_size),
1310 storeBuffer(name_ + ".storeBuffer",
1311 *this, store_buffer_size, store_buffer_cycle_store_limit),
1312 numAccessesInMemorySystem(0),
1313 numAccessesInDTLB(0),
1314 numStoresInTransfers(0),
1315 numAccessesIssuedToMemory(0),
1316 retryRequest(NULL),
1317 cacheBlockMask(~(cpu_.cacheLineSize() - 1))
1318{
1319 if (in_memory_system_limit < 1) {
1320 fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_,
1321 in_memory_system_limit);
1322 }
1323
1324 if (store_buffer_cycle_store_limit < 1) {
1325 fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be"
1326 " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit);
1327 }
1328
1329 if (requests_queue_size < 1) {
1330 fatal("%s: executeLSQRequestsQueueSize must be"
1331 " >= 1 (%d)\n", name_, requests_queue_size);
1332 }
1333
1334 if (transfers_queue_size < 1) {
1335 fatal("%s: executeLSQTransfersQueueSize must be"
1336 " >= 1 (%d)\n", name_, transfers_queue_size);
1337 }
1338
1339 if (store_buffer_size < 1) {
1340 fatal("%s: executeLSQStoreBufferSize must be"
1341 " >= 1 (%d)\n", name_, store_buffer_size);
1342 }
1343
1344 if ((lineWidth & (lineWidth - 1)) != 0) {
1345 fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth);
1346 }
1347}
1348
1349LSQ::~LSQ()
1350{ }
1351
1352LSQ::LSQRequest::~LSQRequest()
1353{
1354 if (packet)
1355 delete packet;
1356 if (data)
1357 delete [] data;
1358}
1359
1360/**
1361 * Step the memory access mechanism on to its next state. In reality, most
1362 * of the stepping is done by the callbacks on the LSQ but this
1363 * function is responsible for issuing memory requests lodged in the
1364 * requests queue.
1365 */
1366void
1367LSQ::step()
1368{
1369 /* Try to move address-translated requests between queues and issue
1370 * them */
1371 if (!requests.empty())
1372 tryToSendToTransfers(requests.front());
1373
1374 storeBuffer.step();
1375}
1376
1377LSQ::LSQRequestPtr
1378LSQ::findResponse(MinorDynInstPtr inst)
1379{
1380 LSQ::LSQRequestPtr ret = NULL;
1381
1382 if (!transfers.empty()) {
1383 LSQRequestPtr request = transfers.front();
1384
1385 /* Same instruction and complete access or a store that's
1386 * capable of being moved to the store buffer */
1387 if (request->inst->id == inst->id) {
1388 bool complete = request->isComplete();
1389 bool can_store = storeBuffer.canInsert();
1390 bool to_store_buffer = request->state ==
1391 LSQRequest::StoreToStoreBuffer;
1392
1393 if ((complete && !(request->isBarrier() && !can_store)) ||
1394 (to_store_buffer && can_store))
1395 {
1396 ret = request;
1397 }
1398 }
1399 }
1400
1401 if (ret) {
1402 DPRINTF(MinorMem, "Found matching memory response for inst: %s\n",
1403 *inst);
1404 } else {
1405 DPRINTF(MinorMem, "No matching memory response for inst: %s\n",
1406 *inst);
1407 }
1408
1409 return ret;
1410}
1411
1412void
1413LSQ::popResponse(LSQ::LSQRequestPtr response)
1414{
1415 assert(!transfers.empty() && transfers.front() == response);
1416
1417 transfers.pop();
1418
1419 if (!response->isLoad)
1420 numStoresInTransfers--;
1421
1422 if (response->issuedToMemory)
1423 numAccessesIssuedToMemory--;
1424
1425 if (response->state != LSQRequest::StoreInStoreBuffer) {
1426 DPRINTF(MinorMem, "Deleting %s request: %s\n",
1427 (response->isLoad ? "load" : "store"),
1428 *(response->inst));
1429
1430 delete response;
1431 }
1432}
1433
1434void
1435LSQ::sendStoreToStoreBuffer(LSQRequestPtr request)
1436{
1437 assert(request->state == LSQRequest::StoreToStoreBuffer);
1438
1439 DPRINTF(MinorMem, "Sending store: %s to store buffer\n",
1440 *(request->inst));
1441
1442 request->inst->inStoreBuffer = true;
1443
1444 storeBuffer.insert(request);
1445}
1446
1447bool
1448LSQ::isDrained()
1449{
1450 return requests.empty() && transfers.empty() &&
1451 storeBuffer.isDrained();
1452}
1453
1454bool
1455LSQ::needsToTick()
1456{
1457 bool ret = false;
1458
1459 if (canSendToMemorySystem()) {
1460 bool have_translated_requests = !requests.empty() &&
1461 requests.front()->state != LSQRequest::InTranslation &&
1462 transfers.unreservedRemainingSpace() != 0;
1463
1464 ret = have_translated_requests ||
1465 storeBuffer.numUnissuedStores() != 0;
1466 }
1467
1468 if (ret)
1469 DPRINTF(Activity, "Need to tick\n");
1470
1471 return ret;
1472}
1473
1474void
1475LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
1476 unsigned int size, Addr addr, Request::Flags flags,
1477 uint64_t *res)
1478{
1479 bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
1480 LSQRequestPtr request;
1481
1482 /* Copy given data into the request. The request will pass this to the
1483 * packet and then it will own the data */
1484 uint8_t *request_data = NULL;
1485
1486 DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:"
1487 " 0x%x%s lineWidth : 0x%x\n",
1488 (isLoad ? "load" : "store"), addr, size, flags,
1489 (needs_burst ? " (needs burst)" : ""), lineWidth);
1490
1491 if (!isLoad) {
1492 /* request_data becomes the property of a ...DataRequest (see below)
1493 * and destroyed by its destructor */
1494 request_data = new uint8_t[size];
688 AddrRangeCoverage coverage = slot->containsAddrRangeOf(request);
689
690 if (coverage != NoAddrRangeCoverage) {
691 DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:"
692 " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n",
693 slot_index, coverage,
694 request->request.getPaddr(), request->request.getSize(),
695 slot->request.getPaddr(), slot->request.getSize());
696
697 found_slot = slot_index;
698 ret = coverage;
699 }
700 }
701
702 i++;
703 slot_index--;
704 }
705
706 return ret;
707}
708
709/** Fill the given packet with appropriate date from slot slot_number */
710void
711LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load,
712 unsigned int slot_number)
713{
714 assert(slot_number < slots.size());
715 assert(load->packet);
716 assert(load->isLoad);
717
718 LSQRequestPtr store = slots[slot_number];
719
720 assert(store->packet);
721 assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage);
722
723 Addr load_addr = load->request.getPaddr();
724 Addr store_addr = store->request.getPaddr();
725 Addr addr_offset = load_addr - store_addr;
726
727 unsigned int load_size = load->request.getSize();
728
729 DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer"
730 " slot: %d addr: 0x%x addressOffset: 0x%x\n",
731 load_size, load_addr, slot_number,
732 store_addr, addr_offset);
733
734 void *load_packet_data = load->packet->getPtr<void>();
735 void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset;
736
737 std::memcpy(load_packet_data, store_packet_data, load_size);
738}
739
740void
741LSQ::StoreBuffer::countIssuedStore(LSQRequestPtr request)
742{
743 /* Barriers are accounted for as they are cleared from
744 * the queue, not after their transfers are complete */
745 if (!request->isBarrier())
746 numUnissuedAccesses--;
747}
748
749void
750LSQ::StoreBuffer::step()
751{
752 DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n",
753 numUnissuedAccesses);
754
755 if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) {
756 /* Clear all the leading barriers */
757 while (!slots.empty() &&
758 slots.front()->isComplete() && slots.front()->isBarrier())
759 {
760 LSQRequestPtr barrier = slots.front();
761
762 DPRINTF(MinorMem, "Clearing barrier for inst: %s\n",
763 *(barrier->inst));
764
765 numUnissuedAccesses--;
766 lsq.clearMemBarrier(barrier->inst);
767 slots.pop_front();
768
769 delete barrier;
770 }
771
772 auto i = slots.begin();
773 bool issued = true;
774 unsigned int issue_count = 0;
775
776 /* Skip trying if the memory system is busy */
777 if (lsq.state == LSQ::MemoryNeedsRetry)
778 issued = false;
779
780 /* Try to issue all stores in order starting from the head
781 * of the queue. Responses are allowed to be retired
782 * out of order */
783 while (issued &&
784 issue_count < storeLimitPerCycle &&
785 lsq.canSendToMemorySystem() &&
786 i != slots.end())
787 {
788 LSQRequestPtr request = *i;
789
790 DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d"
791 " state: %s\n",
792 *(request->inst), request->sentAllPackets(),
793 request->state);
794
795 if (request->isBarrier() && request->isComplete()) {
796 /* Give up at barriers */
797 issued = false;
798 } else if (!(request->state == LSQRequest::StoreBufferIssuing &&
799 request->sentAllPackets()))
800 {
801 DPRINTF(MinorMem, "Trying to send request: %s to memory"
802 " system\n", *(request->inst));
803
804 if (lsq.tryToSend(request)) {
805 countIssuedStore(request);
806 issue_count++;
807 } else {
808 /* Don't step on to the next store buffer entry if this
809 * one hasn't issued all its packets as the store
810 * buffer must still enforce ordering */
811 issued = false;
812 }
813 }
814 i++;
815 }
816 }
817}
818
819void
820LSQ::completeMemBarrierInst(MinorDynInstPtr inst,
821 bool committed)
822{
823 if (committed) {
824 /* Not already sent to the store buffer as a store request? */
825 if (!inst->inStoreBuffer) {
826 /* Insert an entry into the store buffer to tick off barriers
827 * until there are none in flight */
828 storeBuffer.insert(new BarrierDataRequest(*this, inst));
829 }
830 } else {
831 /* Clear the barrier anyway if it wasn't actually committed */
832 clearMemBarrier(inst);
833 }
834}
835
836void
837LSQ::StoreBuffer::minorTrace() const
838{
839 unsigned int size = slots.size();
840 unsigned int i = 0;
841 std::ostringstream os;
842
843 while (i < size) {
844 LSQRequestPtr request = slots[i];
845
846 request->reportData(os);
847
848 i++;
849 if (i < numSlots)
850 os << ',';
851 }
852
853 while (i < numSlots) {
854 os << '-';
855
856 i++;
857 if (i < numSlots)
858 os << ',';
859 }
860
861 MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(),
862 numUnissuedAccesses);
863}
864
865void
866LSQ::tryToSendToTransfers(LSQRequestPtr request)
867{
868 if (state == MemoryNeedsRetry) {
869 DPRINTF(MinorMem, "Request needs retry, not issuing to"
870 " memory until retry arrives\n");
871 return;
872 }
873
874 if (request->state == LSQRequest::InTranslation) {
875 DPRINTF(MinorMem, "Request still in translation, not issuing to"
876 " memory\n");
877 return;
878 }
879
880 assert(request->state == LSQRequest::Translated ||
881 request->state == LSQRequest::RequestIssuing ||
882 request->state == LSQRequest::Failed ||
883 request->state == LSQRequest::Complete);
884
885 if (requests.empty() || requests.front() != request) {
886 DPRINTF(MinorMem, "Request not at front of requests queue, can't"
887 " issue to memory\n");
888 return;
889 }
890
891 if (transfers.unreservedRemainingSpace() == 0) {
892 DPRINTF(MinorMem, "No space to insert request into transfers"
893 " queue\n");
894 return;
895 }
896
897 if (request->isComplete() || request->state == LSQRequest::Failed) {
898 DPRINTF(MinorMem, "Passing a %s transfer on to transfers"
899 " queue\n", (request->isComplete() ? "completed" : "failed"));
900 request->setState(LSQRequest::Complete);
901 request->setSkipped();
902 moveFromRequestsToTransfers(request);
903 return;
904 }
905
906 if (!execute.instIsRightStream(request->inst)) {
907 /* Wrong stream, try to abort the transfer but only do so if
908 * there are no packets in flight */
909 if (request->hasPacketsInMemSystem()) {
910 DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
911 " waiting for responses before aborting request\n");
912 } else {
913 DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
914 " aborting request\n");
915 request->setState(LSQRequest::Complete);
916 request->setSkipped();
917 moveFromRequestsToTransfers(request);
918 }
919 return;
920 }
921
922 if (request->fault != NoFault) {
923 if (request->inst->staticInst->isPrefetch()) {
924 DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n");
925 }
926 DPRINTF(MinorMem, "Moving faulting request into the transfers"
927 " queue\n");
928 request->setState(LSQRequest::Complete);
929 request->setSkipped();
930 moveFromRequestsToTransfers(request);
931 return;
932 }
933
934 bool is_load = request->isLoad;
935 bool is_llsc = request->request.isLLSC();
936 bool is_swap = request->request.isSwap();
937 bool bufferable = !(request->request.isStrictlyOrdered() ||
938 is_llsc || is_swap);
939
940 if (is_load) {
941 if (numStoresInTransfers != 0) {
942 DPRINTF(MinorMem, "Load request with stores still in transfers"
943 " queue, stalling\n");
944 return;
945 }
946 } else {
947 /* Store. Can it be sent to the store buffer? */
948 if (bufferable && !request->request.isMmappedIpr()) {
949 request->setState(LSQRequest::StoreToStoreBuffer);
950 moveFromRequestsToTransfers(request);
951 DPRINTF(MinorMem, "Moving store into transfers queue\n");
952 return;
953 }
954 }
955
956 /* Check if this is the head instruction (and so must be executable as
957 * its stream sequence number was checked above) for loads which must
958 * not be speculatively issued and stores which must be issued here */
959 if (!bufferable) {
960 if (!execute.instIsHeadInst(request->inst)) {
961 DPRINTF(MinorMem, "Memory access not the head inst., can't be"
962 " sure it can be performed, not issuing\n");
963 return;
964 }
965
966 unsigned int forwarding_slot = 0;
967
968 if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) !=
969 NoAddrRangeCoverage)
970 {
971 DPRINTF(MinorMem, "Memory access can receive forwarded data"
972 " from the store buffer, need to wait for store buffer to"
973 " drain\n");
974 return;
975 }
976 }
977
978 /* True: submit this packet to the transfers queue to be sent to the
979 * memory system.
980 * False: skip the memory and push a packet for this request onto
981 * requests */
982 bool do_access = true;
983
984 if (!is_llsc) {
985 /* Check for match in the store buffer */
986 if (is_load) {
987 unsigned int forwarding_slot = 0;
988 AddrRangeCoverage forwarding_result =
989 storeBuffer.canForwardDataToLoad(request,
990 forwarding_slot);
991
992 switch (forwarding_result) {
993 case FullAddrRangeCoverage:
994 /* Forward data from the store buffer into this request and
995 * repurpose this request's packet into a response packet */
996 storeBuffer.forwardStoreData(request, forwarding_slot);
997 request->packet->makeResponse();
998
999 /* Just move between queues, no access */
1000 do_access = false;
1001 break;
1002 case PartialAddrRangeCoverage:
1003 DPRINTF(MinorMem, "Load partly satisfied by store buffer"
1004 " data. Must wait for the store to complete\n");
1005 return;
1006 break;
1007 case NoAddrRangeCoverage:
1008 DPRINTF(MinorMem, "No forwardable data from store buffer\n");
1009 /* Fall through to try access */
1010 break;
1011 }
1012 }
1013 } else {
1014 if (!canSendToMemorySystem()) {
1015 DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1016 return;
1017 }
1018
1019 SimpleThread &thread = *cpu.threads[request->inst->id.threadId];
1020
1021 TheISA::PCState old_pc = thread.pcState();
1022 ExecContext context(cpu, thread, execute, request->inst);
1023
1024 /* Handle LLSC requests and tests */
1025 if (is_load) {
1026 TheISA::handleLockedRead(&context, &request->request);
1027 } else {
1028 do_access = TheISA::handleLockedWrite(&context,
1029 &request->request, cacheBlockMask);
1030
1031 if (!do_access) {
1032 DPRINTF(MinorMem, "Not perfoming a memory "
1033 "access for store conditional\n");
1034 }
1035 }
1036 thread.pcState(old_pc);
1037 }
1038
1039 /* See the do_access comment above */
1040 if (do_access) {
1041 if (!canSendToMemorySystem()) {
1042 DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1043 return;
1044 }
1045
1046 /* Remember if this is an access which can't be idly
1047 * discarded by an interrupt */
1048 if (!bufferable && !request->issuedToMemory) {
1049 numAccessesIssuedToMemory++;
1050 request->issuedToMemory = true;
1051 }
1052
1053 if (tryToSend(request)) {
1054 moveFromRequestsToTransfers(request);
1055 }
1056 } else {
1057 request->setState(LSQRequest::Complete);
1058 moveFromRequestsToTransfers(request);
1059 }
1060}
1061
1062bool
1063LSQ::tryToSend(LSQRequestPtr request)
1064{
1065 bool ret = false;
1066
1067 if (!canSendToMemorySystem()) {
1068 DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n",
1069 *(request->inst));
1070 } else {
1071 PacketPtr packet = request->getHeadPacket();
1072
1073 DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n",
1074 *(request->inst), packet->req->getVaddr());
1075
1076 /* The sender state of the packet *must* be an LSQRequest
1077 * so the response can be correctly handled */
1078 assert(packet->findNextSenderState<LSQRequest>());
1079
1080 if (request->request.isMmappedIpr()) {
1081 ThreadContext *thread =
1082 cpu.getContext(cpu.contextToThread(
1083 request->request.contextId()));
1084
1085 if (request->isLoad) {
1086 DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst));
1087 TheISA::handleIprRead(thread, packet);
1088 } else {
1089 DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst));
1090 TheISA::handleIprWrite(thread, packet);
1091 }
1092
1093 request->stepToNextPacket();
1094 ret = request->sentAllPackets();
1095
1096 if (!ret) {
1097 DPRINTF(MinorMem, "IPR access has another packet: %s\n",
1098 *(request->inst));
1099 }
1100
1101 if (ret)
1102 request->setState(LSQRequest::Complete);
1103 else
1104 request->setState(LSQRequest::RequestIssuing);
1105 } else if (dcachePort.sendTimingReq(packet)) {
1106 DPRINTF(MinorMem, "Sent data memory request\n");
1107
1108 numAccessesInMemorySystem++;
1109
1110 request->stepToNextPacket();
1111
1112 ret = request->sentAllPackets();
1113
1114 switch (request->state) {
1115 case LSQRequest::Translated:
1116 case LSQRequest::RequestIssuing:
1117 /* Fully or partially issued a request in the transfers
1118 * queue */
1119 request->setState(LSQRequest::RequestIssuing);
1120 break;
1121 case LSQRequest::StoreInStoreBuffer:
1122 case LSQRequest::StoreBufferIssuing:
1123 /* Fully or partially issued a request in the store
1124 * buffer */
1125 request->setState(LSQRequest::StoreBufferIssuing);
1126 break;
1127 default:
1128 assert(false);
1129 break;
1130 }
1131
1132 state = MemoryRunning;
1133 } else {
1134 DPRINTF(MinorMem,
1135 "Sending data memory request - needs retry\n");
1136
1137 /* Needs to be resent, wait for that */
1138 state = MemoryNeedsRetry;
1139 retryRequest = request;
1140
1141 switch (request->state) {
1142 case LSQRequest::Translated:
1143 case LSQRequest::RequestIssuing:
1144 request->setState(LSQRequest::RequestNeedsRetry);
1145 break;
1146 case LSQRequest::StoreInStoreBuffer:
1147 case LSQRequest::StoreBufferIssuing:
1148 request->setState(LSQRequest::StoreBufferNeedsRetry);
1149 break;
1150 default:
1151 assert(false);
1152 break;
1153 }
1154 }
1155 }
1156
1157 if (ret)
1158 threadSnoop(request);
1159
1160 return ret;
1161}
1162
1163void
1164LSQ::moveFromRequestsToTransfers(LSQRequestPtr request)
1165{
1166 assert(!requests.empty() && requests.front() == request);
1167 assert(transfers.unreservedRemainingSpace() != 0);
1168
1169 /* Need to count the number of stores in the transfers
1170 * queue so that loads know when their store buffer forwarding
1171 * results will be correct (only when all those stores
1172 * have reached the store buffer) */
1173 if (!request->isLoad)
1174 numStoresInTransfers++;
1175
1176 requests.pop();
1177 transfers.push(request);
1178}
1179
1180bool
1181LSQ::canSendToMemorySystem()
1182{
1183 return state == MemoryRunning &&
1184 numAccessesInMemorySystem < inMemorySystemLimit;
1185}
1186
1187bool
1188LSQ::recvTimingResp(PacketPtr response)
1189{
1190 LSQRequestPtr request =
1191 safe_cast<LSQRequestPtr>(response->popSenderState());
1192
1193 DPRINTF(MinorMem, "Received response packet inst: %s"
1194 " addr: 0x%x cmd: %s\n",
1195 *(request->inst), response->getAddr(),
1196 response->cmd.toString());
1197
1198 numAccessesInMemorySystem--;
1199
1200 if (response->isError()) {
1201 DPRINTF(MinorMem, "Received error response packet: %s\n",
1202 *request->inst);
1203 }
1204
1205 switch (request->state) {
1206 case LSQRequest::RequestIssuing:
1207 case LSQRequest::RequestNeedsRetry:
1208 /* Response to a request from the transfers queue */
1209 request->retireResponse(response);
1210
1211 DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n",
1212 request->hasPacketsInMemSystem(), request->isComplete());
1213
1214 break;
1215 case LSQRequest::StoreBufferIssuing:
1216 case LSQRequest::StoreBufferNeedsRetry:
1217 /* Response to a request from the store buffer */
1218 request->retireResponse(response);
1219
1220 /* Remove completed requests unless they are barriers (which will
1221 * need to be removed in order */
1222 if (request->isComplete()) {
1223 if (!request->isBarrier()) {
1224 storeBuffer.deleteRequest(request);
1225 } else {
1226 DPRINTF(MinorMem, "Completed transfer for barrier: %s"
1227 " leaving the request as it is also a barrier\n",
1228 *(request->inst));
1229 }
1230 }
1231 break;
1232 default:
1233 /* Shouldn't be allowed to receive a response from another
1234 * state */
1235 assert(false);
1236 break;
1237 }
1238
1239 /* We go to idle even if there are more things in the requests queue
1240 * as it's the job of step to actually step us on to the next
1241 * transaction */
1242
1243 /* Let's try and wake up the processor for the next cycle */
1244 cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
1245
1246 /* Never busy */
1247 return true;
1248}
1249
1250void
1251LSQ::recvReqRetry()
1252{
1253 DPRINTF(MinorMem, "Received retry request\n");
1254
1255 assert(state == MemoryNeedsRetry);
1256
1257 switch (retryRequest->state) {
1258 case LSQRequest::RequestNeedsRetry:
1259 /* Retry in the requests queue */
1260 retryRequest->setState(LSQRequest::Translated);
1261 break;
1262 case LSQRequest::StoreBufferNeedsRetry:
1263 /* Retry in the store buffer */
1264 retryRequest->setState(LSQRequest::StoreInStoreBuffer);
1265 break;
1266 default:
1267 assert(false);
1268 }
1269
1270 /* Set state back to MemoryRunning so that the following
1271 * tryToSend can actually send. Note that this won't
1272 * allow another transfer in as tryToSend should
1273 * issue a memory request and either succeed for this
1274 * request or return the LSQ back to MemoryNeedsRetry */
1275 state = MemoryRunning;
1276
1277 /* Try to resend the request */
1278 if (tryToSend(retryRequest)) {
1279 /* Successfully sent, need to move the request */
1280 switch (retryRequest->state) {
1281 case LSQRequest::RequestIssuing:
1282 /* In the requests queue */
1283 moveFromRequestsToTransfers(retryRequest);
1284 break;
1285 case LSQRequest::StoreBufferIssuing:
1286 /* In the store buffer */
1287 storeBuffer.countIssuedStore(retryRequest);
1288 break;
1289 default:
1290 assert(false);
1291 break;
1292 }
1293
1294 retryRequest = NULL;
1295 }
1296}
1297
1298LSQ::LSQ(std::string name_, std::string dcache_port_name_,
1299 MinorCPU &cpu_, Execute &execute_,
1300 unsigned int in_memory_system_limit, unsigned int line_width,
1301 unsigned int requests_queue_size, unsigned int transfers_queue_size,
1302 unsigned int store_buffer_size,
1303 unsigned int store_buffer_cycle_store_limit) :
1304 Named(name_),
1305 cpu(cpu_),
1306 execute(execute_),
1307 dcachePort(dcache_port_name_, *this, cpu_),
1308 lastMemBarrier(cpu.numThreads, 0),
1309 state(MemoryRunning),
1310 inMemorySystemLimit(in_memory_system_limit),
1311 lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)),
1312 requests(name_ + ".requests", "addr", requests_queue_size),
1313 transfers(name_ + ".transfers", "addr", transfers_queue_size),
1314 storeBuffer(name_ + ".storeBuffer",
1315 *this, store_buffer_size, store_buffer_cycle_store_limit),
1316 numAccessesInMemorySystem(0),
1317 numAccessesInDTLB(0),
1318 numStoresInTransfers(0),
1319 numAccessesIssuedToMemory(0),
1320 retryRequest(NULL),
1321 cacheBlockMask(~(cpu_.cacheLineSize() - 1))
1322{
1323 if (in_memory_system_limit < 1) {
1324 fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_,
1325 in_memory_system_limit);
1326 }
1327
1328 if (store_buffer_cycle_store_limit < 1) {
1329 fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be"
1330 " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit);
1331 }
1332
1333 if (requests_queue_size < 1) {
1334 fatal("%s: executeLSQRequestsQueueSize must be"
1335 " >= 1 (%d)\n", name_, requests_queue_size);
1336 }
1337
1338 if (transfers_queue_size < 1) {
1339 fatal("%s: executeLSQTransfersQueueSize must be"
1340 " >= 1 (%d)\n", name_, transfers_queue_size);
1341 }
1342
1343 if (store_buffer_size < 1) {
1344 fatal("%s: executeLSQStoreBufferSize must be"
1345 " >= 1 (%d)\n", name_, store_buffer_size);
1346 }
1347
1348 if ((lineWidth & (lineWidth - 1)) != 0) {
1349 fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth);
1350 }
1351}
1352
1353LSQ::~LSQ()
1354{ }
1355
1356LSQ::LSQRequest::~LSQRequest()
1357{
1358 if (packet)
1359 delete packet;
1360 if (data)
1361 delete [] data;
1362}
1363
1364/**
1365 * Step the memory access mechanism on to its next state. In reality, most
1366 * of the stepping is done by the callbacks on the LSQ but this
1367 * function is responsible for issuing memory requests lodged in the
1368 * requests queue.
1369 */
1370void
1371LSQ::step()
1372{
1373 /* Try to move address-translated requests between queues and issue
1374 * them */
1375 if (!requests.empty())
1376 tryToSendToTransfers(requests.front());
1377
1378 storeBuffer.step();
1379}
1380
1381LSQ::LSQRequestPtr
1382LSQ::findResponse(MinorDynInstPtr inst)
1383{
1384 LSQ::LSQRequestPtr ret = NULL;
1385
1386 if (!transfers.empty()) {
1387 LSQRequestPtr request = transfers.front();
1388
1389 /* Same instruction and complete access or a store that's
1390 * capable of being moved to the store buffer */
1391 if (request->inst->id == inst->id) {
1392 bool complete = request->isComplete();
1393 bool can_store = storeBuffer.canInsert();
1394 bool to_store_buffer = request->state ==
1395 LSQRequest::StoreToStoreBuffer;
1396
1397 if ((complete && !(request->isBarrier() && !can_store)) ||
1398 (to_store_buffer && can_store))
1399 {
1400 ret = request;
1401 }
1402 }
1403 }
1404
1405 if (ret) {
1406 DPRINTF(MinorMem, "Found matching memory response for inst: %s\n",
1407 *inst);
1408 } else {
1409 DPRINTF(MinorMem, "No matching memory response for inst: %s\n",
1410 *inst);
1411 }
1412
1413 return ret;
1414}
1415
1416void
1417LSQ::popResponse(LSQ::LSQRequestPtr response)
1418{
1419 assert(!transfers.empty() && transfers.front() == response);
1420
1421 transfers.pop();
1422
1423 if (!response->isLoad)
1424 numStoresInTransfers--;
1425
1426 if (response->issuedToMemory)
1427 numAccessesIssuedToMemory--;
1428
1429 if (response->state != LSQRequest::StoreInStoreBuffer) {
1430 DPRINTF(MinorMem, "Deleting %s request: %s\n",
1431 (response->isLoad ? "load" : "store"),
1432 *(response->inst));
1433
1434 delete response;
1435 }
1436}
1437
1438void
1439LSQ::sendStoreToStoreBuffer(LSQRequestPtr request)
1440{
1441 assert(request->state == LSQRequest::StoreToStoreBuffer);
1442
1443 DPRINTF(MinorMem, "Sending store: %s to store buffer\n",
1444 *(request->inst));
1445
1446 request->inst->inStoreBuffer = true;
1447
1448 storeBuffer.insert(request);
1449}
1450
1451bool
1452LSQ::isDrained()
1453{
1454 return requests.empty() && transfers.empty() &&
1455 storeBuffer.isDrained();
1456}
1457
1458bool
1459LSQ::needsToTick()
1460{
1461 bool ret = false;
1462
1463 if (canSendToMemorySystem()) {
1464 bool have_translated_requests = !requests.empty() &&
1465 requests.front()->state != LSQRequest::InTranslation &&
1466 transfers.unreservedRemainingSpace() != 0;
1467
1468 ret = have_translated_requests ||
1469 storeBuffer.numUnissuedStores() != 0;
1470 }
1471
1472 if (ret)
1473 DPRINTF(Activity, "Need to tick\n");
1474
1475 return ret;
1476}
1477
1478void
1479LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
1480 unsigned int size, Addr addr, Request::Flags flags,
1481 uint64_t *res)
1482{
1483 bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
1484 LSQRequestPtr request;
1485
1486 /* Copy given data into the request. The request will pass this to the
1487 * packet and then it will own the data */
1488 uint8_t *request_data = NULL;
1489
1490 DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:"
1491 " 0x%x%s lineWidth : 0x%x\n",
1492 (isLoad ? "load" : "store"), addr, size, flags,
1493 (needs_burst ? " (needs burst)" : ""), lineWidth);
1494
1495 if (!isLoad) {
1496 /* request_data becomes the property of a ...DataRequest (see below)
1497 * and destroyed by its destructor */
1498 request_data = new uint8_t[size];
1495 if (flags & Request::CACHE_BLOCK_ZERO) {
1499 if (flags & Request::STORE_NO_DATA) {
1496 /* For cache zeroing, just use zeroed data */
1497 std::memset(request_data, 0, size);
1498 } else {
1499 std::memcpy(request_data, data, size);
1500 }
1501 }
1502
1503 if (needs_burst) {
1504 request = new SplitDataRequest(
1505 *this, inst, isLoad, request_data, res);
1506 } else {
1507 request = new SingleDataRequest(
1508 *this, inst, isLoad, request_data, res);
1509 }
1510
1511 if (inst->traceData)
1512 inst->traceData->setMem(addr, size, flags);
1513
1514 int cid = cpu.threads[inst->id.threadId]->getTC()->contextId();
1515 request->request.setContext(cid);
1516 request->request.setVirt(0 /* asid */,
1517 addr, size, flags, cpu.dataMasterId(),
1518 /* I've no idea why we need the PC, but give it */
1519 inst->pc.instAddr());
1520
1521 requests.push(request);
1522 request->startAddrTranslation();
1523}
1524
1525void
1526LSQ::pushFailedRequest(MinorDynInstPtr inst)
1527{
1528 LSQRequestPtr request = new FailedDataRequest(*this, inst);
1529 requests.push(request);
1530}
1531
1532void
1533LSQ::minorTrace() const
1534{
1535 MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d"
1536 " lastMemBarrier=%d\n",
1537 state, numAccessesInDTLB, numAccessesInMemorySystem,
1538 numStoresInTransfers, lastMemBarrier[0]);
1539 requests.minorTrace();
1540 transfers.minorTrace();
1541 storeBuffer.minorTrace();
1542}
1543
1544LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_,
1545 unsigned int store_buffer_size,
1546 unsigned int store_limit_per_cycle) :
1547 Named(name_), lsq(lsq_),
1548 numSlots(store_buffer_size),
1549 storeLimitPerCycle(store_limit_per_cycle),
1550 slots(),
1551 numUnissuedAccesses(0)
1552{
1553}
1554
1555PacketPtr
1556makePacketForRequest(Request &request, bool isLoad,
1557 Packet::SenderState *sender_state, PacketDataPtr data)
1558{
1559 PacketPtr ret = isLoad ? Packet::createRead(&request)
1560 : Packet::createWrite(&request);
1561
1562 if (sender_state)
1563 ret->pushSenderState(sender_state);
1564
1500 /* For cache zeroing, just use zeroed data */
1501 std::memset(request_data, 0, size);
1502 } else {
1503 std::memcpy(request_data, data, size);
1504 }
1505 }
1506
1507 if (needs_burst) {
1508 request = new SplitDataRequest(
1509 *this, inst, isLoad, request_data, res);
1510 } else {
1511 request = new SingleDataRequest(
1512 *this, inst, isLoad, request_data, res);
1513 }
1514
1515 if (inst->traceData)
1516 inst->traceData->setMem(addr, size, flags);
1517
1518 int cid = cpu.threads[inst->id.threadId]->getTC()->contextId();
1519 request->request.setContext(cid);
1520 request->request.setVirt(0 /* asid */,
1521 addr, size, flags, cpu.dataMasterId(),
1522 /* I've no idea why we need the PC, but give it */
1523 inst->pc.instAddr());
1524
1525 requests.push(request);
1526 request->startAddrTranslation();
1527}
1528
1529void
1530LSQ::pushFailedRequest(MinorDynInstPtr inst)
1531{
1532 LSQRequestPtr request = new FailedDataRequest(*this, inst);
1533 requests.push(request);
1534}
1535
1536void
1537LSQ::minorTrace() const
1538{
1539 MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d"
1540 " lastMemBarrier=%d\n",
1541 state, numAccessesInDTLB, numAccessesInMemorySystem,
1542 numStoresInTransfers, lastMemBarrier[0]);
1543 requests.minorTrace();
1544 transfers.minorTrace();
1545 storeBuffer.minorTrace();
1546}
1547
1548LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_,
1549 unsigned int store_buffer_size,
1550 unsigned int store_limit_per_cycle) :
1551 Named(name_), lsq(lsq_),
1552 numSlots(store_buffer_size),
1553 storeLimitPerCycle(store_limit_per_cycle),
1554 slots(),
1555 numUnissuedAccesses(0)
1556{
1557}
1558
1559PacketPtr
1560makePacketForRequest(Request &request, bool isLoad,
1561 Packet::SenderState *sender_state, PacketDataPtr data)
1562{
1563 PacketPtr ret = isLoad ? Packet::createRead(&request)
1564 : Packet::createWrite(&request);
1565
1566 if (sender_state)
1567 ret->pushSenderState(sender_state);
1568
1565 if (isLoad)
1569 if (isLoad) {
1566 ret->allocate();
1570 ret->allocate();
1567 else
1571 } else if (!request.isCacheMaintenance()) {
1572 // CMOs are treated as stores but they don't have data. All
1573 // stores otherwise need to allocate for data.
1568 ret->dataDynamic(data);
1574 ret->dataDynamic(data);
1575 }
1569
1570 return ret;
1571}
1572
1573void
1574LSQ::issuedMemBarrierInst(MinorDynInstPtr inst)
1575{
1576 assert(inst->isInst() && inst->staticInst->isMemBarrier());
1577 assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]);
1578
1579 /* Remember the barrier. We only have a notion of one
1580 * barrier so this may result in some mem refs being
1581 * delayed if they are between barriers */
1582 lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum;
1583}
1584
1585void
1586LSQ::LSQRequest::makePacket()
1587{
1588 /* Make the function idempotent */
1589 if (packet)
1590 return;
1591
1592 // if the translation faulted, do not create a packet
1593 if (fault != NoFault) {
1594 assert(packet == NULL);
1595 return;
1596 }
1597
1598 packet = makePacketForRequest(request, isLoad, this, data);
1599 /* Null the ret data so we know not to deallocate it when the
1600 * ret is destroyed. The data now belongs to the ret and
1601 * the ret is responsible for its destruction */
1602 data = NULL;
1603}
1604
1605std::ostream &
1606operator <<(std::ostream &os, LSQ::MemoryState state)
1607{
1608 switch (state) {
1609 case LSQ::MemoryRunning:
1610 os << "MemoryRunning";
1611 break;
1612 case LSQ::MemoryNeedsRetry:
1613 os << "MemoryNeedsRetry";
1614 break;
1615 default:
1616 os << "MemoryState-" << static_cast<int>(state);
1617 break;
1618 }
1619 return os;
1620}
1621
1622void
1623LSQ::recvTimingSnoopReq(PacketPtr pkt)
1624{
1625 /* LLSC operations in Minor can't be speculative and are executed from
1626 * the head of the requests queue. We shouldn't need to do more than
1627 * this action on snoops. */
1628 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1629 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1630 cpu.wakeup(tid);
1631 }
1632 }
1633
1634 if (pkt->isInvalidate() || pkt->isWrite()) {
1635 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1636 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
1637 cacheBlockMask);
1638 }
1639 }
1640}
1641
1642void
1643LSQ::threadSnoop(LSQRequestPtr request)
1644{
1645 /* LLSC operations in Minor can't be speculative and are executed from
1646 * the head of the requests queue. We shouldn't need to do more than
1647 * this action on snoops. */
1648 ThreadID req_tid = request->inst->id.threadId;
1649 PacketPtr pkt = request->packet;
1650
1651 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1652 if (tid != req_tid) {
1653 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1654 cpu.wakeup(tid);
1655 }
1656
1657 if (pkt->isInvalidate() || pkt->isWrite()) {
1658 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
1659 cacheBlockMask);
1660 }
1661 }
1662 }
1663}
1664
1665}
1576
1577 return ret;
1578}
1579
1580void
1581LSQ::issuedMemBarrierInst(MinorDynInstPtr inst)
1582{
1583 assert(inst->isInst() && inst->staticInst->isMemBarrier());
1584 assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]);
1585
1586 /* Remember the barrier. We only have a notion of one
1587 * barrier so this may result in some mem refs being
1588 * delayed if they are between barriers */
1589 lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum;
1590}
1591
1592void
1593LSQ::LSQRequest::makePacket()
1594{
1595 /* Make the function idempotent */
1596 if (packet)
1597 return;
1598
1599 // if the translation faulted, do not create a packet
1600 if (fault != NoFault) {
1601 assert(packet == NULL);
1602 return;
1603 }
1604
1605 packet = makePacketForRequest(request, isLoad, this, data);
1606 /* Null the ret data so we know not to deallocate it when the
1607 * ret is destroyed. The data now belongs to the ret and
1608 * the ret is responsible for its destruction */
1609 data = NULL;
1610}
1611
1612std::ostream &
1613operator <<(std::ostream &os, LSQ::MemoryState state)
1614{
1615 switch (state) {
1616 case LSQ::MemoryRunning:
1617 os << "MemoryRunning";
1618 break;
1619 case LSQ::MemoryNeedsRetry:
1620 os << "MemoryNeedsRetry";
1621 break;
1622 default:
1623 os << "MemoryState-" << static_cast<int>(state);
1624 break;
1625 }
1626 return os;
1627}
1628
1629void
1630LSQ::recvTimingSnoopReq(PacketPtr pkt)
1631{
1632 /* LLSC operations in Minor can't be speculative and are executed from
1633 * the head of the requests queue. We shouldn't need to do more than
1634 * this action on snoops. */
1635 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1636 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1637 cpu.wakeup(tid);
1638 }
1639 }
1640
1641 if (pkt->isInvalidate() || pkt->isWrite()) {
1642 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1643 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
1644 cacheBlockMask);
1645 }
1646 }
1647}
1648
1649void
1650LSQ::threadSnoop(LSQRequestPtr request)
1651{
1652 /* LLSC operations in Minor can't be speculative and are executed from
1653 * the head of the requests queue. We shouldn't need to do more than
1654 * this action on snoops. */
1655 ThreadID req_tid = request->inst->id.threadId;
1656 PacketPtr pkt = request->packet;
1657
1658 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1659 if (tid != req_tid) {
1660 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1661 cpu.wakeup(tid);
1662 }
1663
1664 if (pkt->isInvalidate() || pkt->isWrite()) {
1665 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
1666 cacheBlockMask);
1667 }
1668 }
1669 }
1670}
1671
1672}