lsq.cc revision 14297
1/* 2 * Copyright (c) 2013-2014,2017-2018 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Andrew Bardsley 38 */ 39 40#include "cpu/minor/lsq.hh" 41 42#include <iomanip> 43#include <sstream> 44 45#include "arch/locked_mem.hh" 46#include "arch/mmapped_ipr.hh" 47#include "base/logging.hh" 48#include "cpu/minor/cpu.hh" 49#include "cpu/minor/exec_context.hh" 50#include "cpu/minor/execute.hh" 51#include "cpu/minor/pipeline.hh" 52#include "cpu/utils.hh" 53#include "debug/Activity.hh" 54#include "debug/MinorMem.hh" 55 56namespace Minor 57{ 58 59LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, 60 PacketDataPtr data_, uint64_t *res_) : 61 SenderState(), 62 port(port_), 63 inst(inst_), 64 isLoad(isLoad_), 65 data(data_), 66 packet(NULL), 67 request(), 68 res(res_), 69 skipped(false), 70 issuedToMemory(false), 71 isTranslationDelayed(false), 72 state(NotIssued) 73{ 74 request = std::make_shared<Request>(); 75} 76 77void 78LSQ::LSQRequest::tryToSuppressFault() 79{ 80 SimpleThread &thread = *port.cpu.threads[inst->id.threadId]; 81 TheISA::PCState old_pc = thread.pcState(); 82 ExecContext context(port.cpu, thread, port.execute, inst); 83 Fault M5_VAR_USED fault = inst->translationFault; 84 85 // Give the instruction a chance to suppress a translation fault 86 inst->translationFault = inst->staticInst->initiateAcc(&context, nullptr); 87 if (inst->translationFault == NoFault) { 88 DPRINTFS(MinorMem, (&port), 89 "Translation fault suppressed for inst:%s\n", *inst); 90 } else { 91 assert(inst->translationFault == fault); 92 } 93 thread.pcState(old_pc); 94} 95 96void 97LSQ::LSQRequest::completeDisabledMemAccess() 98{ 99 DPRINTFS(MinorMem, (&port), "Complete disabled mem access for inst:%s\n", 100 *inst); 101 102 SimpleThread &thread = *port.cpu.threads[inst->id.threadId]; 103 TheISA::PCState old_pc = thread.pcState(); 104 105 ExecContext context(port.cpu, thread, port.execute, inst); 106 107 context.setMemAccPredicate(false); 108 inst->staticInst->completeAcc(nullptr, &context, inst->traceData); 109 110 thread.pcState(old_pc); 111} 112 113void 114LSQ::LSQRequest::disableMemAccess() 115{ 116 port.cpu.threads[inst->id.threadId]->setMemAccPredicate(false); 117 DPRINTFS(MinorMem, (&port), "Disable mem access for inst:%s\n", *inst); 118} 119 120LSQ::AddrRangeCoverage 121LSQ::LSQRequest::containsAddrRangeOf( 122 Addr req1_addr, unsigned int req1_size, 123 Addr req2_addr, unsigned int req2_size) 124{ 125 /* 'end' here means the address of the byte just past the request 126 * blocks */ 127 Addr req2_end_addr = req2_addr + req2_size; 128 Addr req1_end_addr = req1_addr + req1_size; 129 130 AddrRangeCoverage ret; 131 132 if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr) 133 ret = NoAddrRangeCoverage; 134 else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr) 135 ret = FullAddrRangeCoverage; 136 else 137 ret = PartialAddrRangeCoverage; 138 139 return ret; 140} 141 142LSQ::AddrRangeCoverage 143LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request) 144{ 145 return containsAddrRangeOf(request->getPaddr(), request->getSize(), 146 other_request->request->getPaddr(), other_request->request->getSize()); 147} 148 149bool 150LSQ::LSQRequest::isBarrier() 151{ 152 return inst->isInst() && inst->staticInst->isMemBarrier(); 153} 154 155bool 156LSQ::LSQRequest::needsToBeSentToStoreBuffer() 157{ 158 return state == StoreToStoreBuffer; 159} 160 161void 162LSQ::LSQRequest::setState(LSQRequestState new_state) 163{ 164 DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:" 165 " %s\n", state, new_state, *inst); 166 state = new_state; 167} 168 169bool 170LSQ::LSQRequest::isComplete() const 171{ 172 /* @todo, There is currently only one 'completed' state. This 173 * may not be a good choice */ 174 return state == Complete; 175} 176 177void 178LSQ::LSQRequest::reportData(std::ostream &os) const 179{ 180 os << (isLoad ? 'R' : 'W') << ';'; 181 inst->reportData(os); 182 os << ';' << state; 183} 184 185std::ostream & 186operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage) 187{ 188 switch (coverage) { 189 case LSQ::PartialAddrRangeCoverage: 190 os << "PartialAddrRangeCoverage"; 191 break; 192 case LSQ::FullAddrRangeCoverage: 193 os << "FullAddrRangeCoverage"; 194 break; 195 case LSQ::NoAddrRangeCoverage: 196 os << "NoAddrRangeCoverage"; 197 break; 198 default: 199 os << "AddrRangeCoverage-" << static_cast<int>(coverage); 200 break; 201 } 202 return os; 203} 204 205std::ostream & 206operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state) 207{ 208 switch (state) { 209 case LSQ::LSQRequest::NotIssued: 210 os << "NotIssued"; 211 break; 212 case LSQ::LSQRequest::InTranslation: 213 os << "InTranslation"; 214 break; 215 case LSQ::LSQRequest::Translated: 216 os << "Translated"; 217 break; 218 case LSQ::LSQRequest::Failed: 219 os << "Failed"; 220 break; 221 case LSQ::LSQRequest::RequestIssuing: 222 os << "RequestIssuing"; 223 break; 224 case LSQ::LSQRequest::StoreToStoreBuffer: 225 os << "StoreToStoreBuffer"; 226 break; 227 case LSQ::LSQRequest::StoreInStoreBuffer: 228 os << "StoreInStoreBuffer"; 229 break; 230 case LSQ::LSQRequest::StoreBufferIssuing: 231 os << "StoreBufferIssuing"; 232 break; 233 case LSQ::LSQRequest::RequestNeedsRetry: 234 os << "RequestNeedsRetry"; 235 break; 236 case LSQ::LSQRequest::StoreBufferNeedsRetry: 237 os << "StoreBufferNeedsRetry"; 238 break; 239 case LSQ::LSQRequest::Complete: 240 os << "Complete"; 241 break; 242 default: 243 os << "LSQRequestState-" << static_cast<int>(state); 244 break; 245 } 246 return os; 247} 248 249void 250LSQ::clearMemBarrier(MinorDynInstPtr inst) 251{ 252 bool is_last_barrier = 253 inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId]; 254 255 DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n", 256 (is_last_barrier ? "last" : "a"), *inst); 257 258 if (is_last_barrier) 259 lastMemBarrier[inst->id.threadId] = 0; 260} 261 262void 263LSQ::SingleDataRequest::finish(const Fault &fault_, const RequestPtr &request_, 264 ThreadContext *tc, BaseTLB::Mode mode) 265{ 266 port.numAccessesInDTLB--; 267 268 DPRINTFS(MinorMem, (&port), "Received translation response for" 269 " request: %s delayed:%d %s\n", *inst, isTranslationDelayed, 270 fault_ != NoFault ? fault_->name() : ""); 271 272 if (fault_ != NoFault) { 273 inst->translationFault = fault_; 274 if (isTranslationDelayed) { 275 tryToSuppressFault(); 276 if (inst->translationFault == NoFault) { 277 completeDisabledMemAccess(); 278 setState(Complete); 279 } 280 } 281 setState(Translated); 282 } else { 283 setState(Translated); 284 makePacket(); 285 } 286 port.tryToSendToTransfers(this); 287 288 /* Let's try and wake up the processor for the next cycle */ 289 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 290} 291 292void 293LSQ::SingleDataRequest::startAddrTranslation() 294{ 295 ThreadContext *thread = port.cpu.getContext( 296 inst->id.threadId); 297 298 const auto &byteEnable = request->getByteEnable(); 299 if (byteEnable.size() == 0 || 300 isAnyActiveElement(byteEnable.cbegin(), byteEnable.cend())) { 301 port.numAccessesInDTLB++; 302 303 setState(LSQ::LSQRequest::InTranslation); 304 305 DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n"); 306 /* Submit the translation request. The response will come through 307 * finish/markDelayed on the LSQRequest as it bears the Translation 308 * interface */ 309 thread->getDTBPtr()->translateTiming( 310 request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write)); 311 } else { 312 disableMemAccess(); 313 setState(LSQ::LSQRequest::Complete); 314 } 315} 316 317void 318LSQ::SingleDataRequest::retireResponse(PacketPtr packet_) 319{ 320 DPRINTFS(MinorMem, (&port), "Retiring packet\n"); 321 packet = packet_; 322 packetInFlight = false; 323 setState(Complete); 324} 325 326void 327LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_, 328 ThreadContext *tc, BaseTLB::Mode mode) 329{ 330 port.numAccessesInDTLB--; 331 332 unsigned int M5_VAR_USED expected_fragment_index = 333 numTranslatedFragments; 334 335 numInTranslationFragments--; 336 numTranslatedFragments++; 337 338 DPRINTFS(MinorMem, (&port), "Received translation response for fragment" 339 " %d of request: %s delayed:%d %s\n", expected_fragment_index, 340 *inst, isTranslationDelayed, 341 fault_ != NoFault ? fault_->name() : ""); 342 343 assert(request_ == fragmentRequests[expected_fragment_index]); 344 345 /* Wake up next cycle to get things going again in case the 346 * tryToSendToTransfers does take */ 347 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 348 349 if (fault_ != NoFault) { 350 /* tryToSendToTransfers will handle the fault */ 351 inst->translationFault = fault_; 352 353 DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:" 354 " %d of request: %s\n", 355 expected_fragment_index, *inst); 356 357 if (expected_fragment_index > 0 || isTranslationDelayed) 358 tryToSuppressFault(); 359 if (expected_fragment_index == 0) { 360 if (isTranslationDelayed && inst->translationFault == NoFault) { 361 completeDisabledMemAccess(); 362 setState(Complete); 363 } else { 364 setState(Translated); 365 } 366 } else if (inst->translationFault == NoFault) { 367 setState(Translated); 368 numTranslatedFragments--; 369 makeFragmentPackets(); 370 } else { 371 setState(Translated); 372 } 373 port.tryToSendToTransfers(this); 374 } else if (numTranslatedFragments == numFragments) { 375 makeFragmentPackets(); 376 setState(Translated); 377 port.tryToSendToTransfers(this); 378 } else { 379 /* Avoid calling translateTiming from within ::finish */ 380 assert(!translationEvent.scheduled()); 381 port.cpu.schedule(translationEvent, curTick()); 382 } 383} 384 385LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, 386 bool isLoad_, PacketDataPtr data_, uint64_t *res_) : 387 LSQRequest(port_, inst_, isLoad_, data_, res_), 388 translationEvent([this]{ sendNextFragmentToTranslation(); }, 389 "translationEvent"), 390 numFragments(0), 391 numInTranslationFragments(0), 392 numTranslatedFragments(0), 393 numIssuedFragments(0), 394 numRetiredFragments(0), 395 fragmentRequests(), 396 fragmentPackets() 397{ 398 /* Don't know how many elements are needed until the request is 399 * populated by the caller. */ 400} 401 402LSQ::SplitDataRequest::~SplitDataRequest() 403{ 404 for (auto i = fragmentPackets.begin(); 405 i != fragmentPackets.end(); i++) 406 { 407 delete *i; 408 } 409} 410 411void 412LSQ::SplitDataRequest::makeFragmentRequests() 413{ 414 Addr base_addr = request->getVaddr(); 415 unsigned int whole_size = request->getSize(); 416 unsigned int line_width = port.lineWidth; 417 418 unsigned int fragment_size; 419 Addr fragment_addr; 420 421 std::vector<bool> fragment_write_byte_en; 422 423 /* Assume that this transfer is across potentially many block snap 424 * boundaries: 425 * 426 * | _|________|________|________|___ | 427 * | |0| 1 | 2 | 3 | 4 | | 428 * | |_|________|________|________|___| | 429 * | | | | | | 430 * 431 * The first transfer (0) can be up to lineWidth in size. 432 * All the middle transfers (1-3) are lineWidth in size 433 * The last transfer (4) can be from zero to lineWidth - 1 in size 434 */ 435 unsigned int first_fragment_offset = 436 addrBlockOffset(base_addr, line_width); 437 unsigned int last_fragment_size = 438 addrBlockOffset(base_addr + whole_size, line_width); 439 unsigned int first_fragment_size = 440 line_width - first_fragment_offset; 441 442 unsigned int middle_fragments_total_size = 443 whole_size - (first_fragment_size + last_fragment_size); 444 445 assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0); 446 447 unsigned int middle_fragment_count = 448 middle_fragments_total_size / line_width; 449 450 numFragments = 1 /* first */ + middle_fragment_count + 451 (last_fragment_size == 0 ? 0 : 1); 452 453 DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests." 454 " First fragment size: %d Last fragment size: %d\n", 455 numFragments, first_fragment_size, 456 (last_fragment_size == 0 ? line_width : last_fragment_size)); 457 458 assert(((middle_fragment_count * line_width) + 459 first_fragment_size + last_fragment_size) == whole_size); 460 461 fragment_addr = base_addr; 462 fragment_size = first_fragment_size; 463 464 /* Just past the last address in the request */ 465 Addr end_addr = base_addr + whole_size; 466 467 auto& byte_enable = request->getByteEnable(); 468 unsigned int num_disabled_fragments = 0; 469 470 for (unsigned int fragment_index = 0; fragment_index < numFragments; 471 fragment_index++) 472 { 473 bool M5_VAR_USED is_last_fragment = false; 474 475 if (fragment_addr == base_addr) { 476 /* First fragment */ 477 fragment_size = first_fragment_size; 478 } else { 479 if ((fragment_addr + line_width) > end_addr) { 480 /* Adjust size of last fragment */ 481 fragment_size = end_addr - fragment_addr; 482 is_last_fragment = true; 483 } else { 484 /* Middle fragments */ 485 fragment_size = line_width; 486 } 487 } 488 489 RequestPtr fragment = std::make_shared<Request>(); 490 bool disabled_fragment = false; 491 492 fragment->setContext(request->contextId()); 493 if (byte_enable.empty()) { 494 fragment->setVirt(0 /* asid */, 495 fragment_addr, fragment_size, request->getFlags(), 496 request->masterId(), 497 request->getPC()); 498 } else { 499 // Set up byte-enable mask for the current fragment 500 auto it_start = byte_enable.begin() + 501 (fragment_addr - base_addr); 502 auto it_end = byte_enable.begin() + 503 (fragment_addr - base_addr) + fragment_size; 504 if (isAnyActiveElement(it_start, it_end)) { 505 fragment->setVirt(0 /* asid */, 506 fragment_addr, fragment_size, request->getFlags(), 507 request->masterId(), 508 request->getPC()); 509 fragment->setByteEnable(std::vector<bool>(it_start, it_end)); 510 } else { 511 disabled_fragment = true; 512 } 513 } 514 515 if (!disabled_fragment) { 516 DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x" 517 " size: %d (whole request addr: 0x%x size: %d) %s\n", 518 fragment_addr, fragment_size, base_addr, whole_size, 519 (is_last_fragment ? "last fragment" : "")); 520 521 fragmentRequests.push_back(fragment); 522 } else { 523 num_disabled_fragments++; 524 } 525 526 fragment_addr += fragment_size; 527 } 528 assert(numFragments >= num_disabled_fragments); 529 numFragments -= num_disabled_fragments; 530} 531 532void 533LSQ::SplitDataRequest::makeFragmentPackets() 534{ 535 assert(numTranslatedFragments > 0); 536 Addr base_addr = request->getVaddr(); 537 538 DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst); 539 540 for (unsigned int fragment_index = 0; 541 fragment_index < numTranslatedFragments; 542 fragment_index++) 543 { 544 RequestPtr fragment = fragmentRequests[fragment_index]; 545 546 DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s" 547 " (%d, 0x%x)\n", 548 fragment_index, *inst, 549 (fragment->hasPaddr() ? "has paddr" : "no paddr"), 550 (fragment->hasPaddr() ? fragment->getPaddr() : 0)); 551 552 Addr fragment_addr = fragment->getVaddr(); 553 unsigned int fragment_size = fragment->getSize(); 554 555 uint8_t *request_data = NULL; 556 557 if (!isLoad) { 558 /* Split data for Packets. Will become the property of the 559 * outgoing Packets */ 560 request_data = new uint8_t[fragment_size]; 561 std::memcpy(request_data, data + (fragment_addr - base_addr), 562 fragment_size); 563 } 564 565 assert(fragment->hasPaddr()); 566 567 PacketPtr fragment_packet = 568 makePacketForRequest(fragment, isLoad, this, request_data); 569 570 fragmentPackets.push_back(fragment_packet); 571 /* Accumulate flags in parent request */ 572 request->setFlags(fragment->getFlags()); 573 } 574 575 /* Might as well make the overall/response packet here */ 576 /* Get the physical address for the whole request/packet from the first 577 * fragment */ 578 request->setPaddr(fragmentRequests[0]->getPaddr()); 579 makePacket(); 580} 581 582void 583LSQ::SplitDataRequest::startAddrTranslation() 584{ 585 makeFragmentRequests(); 586 587 if (numFragments > 0) { 588 setState(LSQ::LSQRequest::InTranslation); 589 numInTranslationFragments = 0; 590 numTranslatedFragments = 0; 591 592 /* @todo, just do these in sequence for now with 593 * a loop of: 594 * do { 595 * sendNextFragmentToTranslation ; translateTiming ; finish 596 * } while (numTranslatedFragments != numFragments); 597 */ 598 599 /* Do first translation */ 600 sendNextFragmentToTranslation(); 601 } else { 602 disableMemAccess(); 603 setState(LSQ::LSQRequest::Complete); 604 } 605} 606 607PacketPtr 608LSQ::SplitDataRequest::getHeadPacket() 609{ 610 assert(numIssuedFragments < numTranslatedFragments); 611 612 return fragmentPackets[numIssuedFragments]; 613} 614 615void 616LSQ::SplitDataRequest::stepToNextPacket() 617{ 618 assert(numIssuedFragments < numTranslatedFragments); 619 620 numIssuedFragments++; 621} 622 623void 624LSQ::SplitDataRequest::retireResponse(PacketPtr response) 625{ 626 assert(inst->translationFault == NoFault); 627 assert(numRetiredFragments < numTranslatedFragments); 628 629 DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d" 630 " offset: 0x%x (retired fragment num: %d)\n", 631 response->req->getVaddr(), response->req->getSize(), 632 request->getVaddr() - response->req->getVaddr(), 633 numRetiredFragments); 634 635 numRetiredFragments++; 636 637 if (skipped) { 638 /* Skip because we already knew the request had faulted or been 639 * skipped */ 640 DPRINTFS(MinorMem, (&port), "Skipping this fragment\n"); 641 } else if (response->isError()) { 642 /* Mark up the error and leave to execute to handle it */ 643 DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n"); 644 setSkipped(); 645 packet->copyError(response); 646 } else { 647 if (isLoad) { 648 if (!data) { 649 /* For a split transfer, a Packet must be constructed 650 * to contain all returning data. This is that packet's 651 * data */ 652 data = new uint8_t[request->getSize()]; 653 } 654 655 /* Populate the portion of the overall response data represented 656 * by the response fragment */ 657 std::memcpy( 658 data + (response->req->getVaddr() - request->getVaddr()), 659 response->getConstPtr<uint8_t>(), 660 response->req->getSize()); 661 } 662 } 663 664 /* Complete early if we're skipping are no more in-flight accesses */ 665 if (skipped && !hasPacketsInMemSystem()) { 666 DPRINTFS(MinorMem, (&port), "Completed skipped burst\n"); 667 setState(Complete); 668 if (packet->needsResponse()) 669 packet->makeResponse(); 670 } 671 672 if (numRetiredFragments == numTranslatedFragments) 673 setState(Complete); 674 675 if (!skipped && isComplete()) { 676 DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL); 677 678 DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d" 679 " needsResponse: %d packetSize: %s requestSize: %s responseSize:" 680 " %s\n", packet->isRead(), packet->isWrite(), 681 packet->needsResponse(), packet->getSize(), request->getSize(), 682 response->getSize()); 683 684 /* A request can become complete by several paths, this is a sanity 685 * check to make sure the packet's data is created */ 686 if (!data) { 687 data = new uint8_t[request->getSize()]; 688 } 689 690 if (isLoad) { 691 DPRINTFS(MinorMem, (&port), "Copying read data\n"); 692 std::memcpy(packet->getPtr<uint8_t>(), data, request->getSize()); 693 } 694 packet->makeResponse(); 695 } 696 697 /* Packets are all deallocated together in ~SplitLSQRequest */ 698} 699 700void 701LSQ::SplitDataRequest::sendNextFragmentToTranslation() 702{ 703 unsigned int fragment_index = numTranslatedFragments; 704 705 ThreadContext *thread = port.cpu.getContext( 706 inst->id.threadId); 707 708 DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n", 709 fragment_index); 710 711 port.numAccessesInDTLB++; 712 numInTranslationFragments++; 713 714 thread->getDTBPtr()->translateTiming( 715 fragmentRequests[fragment_index], thread, this, (isLoad ? 716 BaseTLB::Read : BaseTLB::Write)); 717} 718 719bool 720LSQ::StoreBuffer::canInsert() const 721{ 722 /* @todo, support store amalgamation */ 723 return slots.size() < numSlots; 724} 725 726void 727LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request) 728{ 729 auto found = std::find(slots.begin(), slots.end(), request); 730 731 if (found != slots.end()) { 732 DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n", 733 request, *found, *(request->inst)); 734 slots.erase(found); 735 736 delete request; 737 } 738} 739 740void 741LSQ::StoreBuffer::insert(LSQRequestPtr request) 742{ 743 if (!canInsert()) { 744 warn("%s: store buffer insertion without space to insert from" 745 " inst: %s\n", name(), *(request->inst)); 746 } 747 748 DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request); 749 750 numUnissuedAccesses++; 751 752 if (request->state != LSQRequest::Complete) 753 request->setState(LSQRequest::StoreInStoreBuffer); 754 755 slots.push_back(request); 756 757 /* Let's try and wake up the processor for the next cycle to step 758 * the store buffer */ 759 lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 760} 761 762LSQ::AddrRangeCoverage 763LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request, 764 unsigned int &found_slot) 765{ 766 unsigned int slot_index = slots.size() - 1; 767 auto i = slots.rbegin(); 768 AddrRangeCoverage ret = NoAddrRangeCoverage; 769 770 /* Traverse the store buffer in reverse order (most to least recent) 771 * and try to find a slot whose address range overlaps this request */ 772 while (ret == NoAddrRangeCoverage && i != slots.rend()) { 773 LSQRequestPtr slot = *i; 774 775 /* Cache maintenance instructions go down via the store path but 776 * they carry no data and they shouldn't be considered 777 * for forwarding */ 778 if (slot->packet && 779 slot->inst->id.threadId == request->inst->id.threadId && 780 !slot->packet->req->isCacheMaintenance()) { 781 AddrRangeCoverage coverage = slot->containsAddrRangeOf(request); 782 783 if (coverage != NoAddrRangeCoverage) { 784 DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:" 785 " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n", 786 slot_index, coverage, 787 request->request->getPaddr(), request->request->getSize(), 788 slot->request->getPaddr(), slot->request->getSize()); 789 790 found_slot = slot_index; 791 ret = coverage; 792 } 793 } 794 795 i++; 796 slot_index--; 797 } 798 799 return ret; 800} 801 802/** Fill the given packet with appropriate date from slot slot_number */ 803void 804LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load, 805 unsigned int slot_number) 806{ 807 assert(slot_number < slots.size()); 808 assert(load->packet); 809 assert(load->isLoad); 810 811 LSQRequestPtr store = slots[slot_number]; 812 813 assert(store->packet); 814 assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage); 815 816 Addr load_addr = load->request->getPaddr(); 817 Addr store_addr = store->request->getPaddr(); 818 Addr addr_offset = load_addr - store_addr; 819 820 unsigned int load_size = load->request->getSize(); 821 822 DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer" 823 " slot: %d addr: 0x%x addressOffset: 0x%x\n", 824 load_size, load_addr, slot_number, 825 store_addr, addr_offset); 826 827 void *load_packet_data = load->packet->getPtr<void>(); 828 void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset; 829 830 std::memcpy(load_packet_data, store_packet_data, load_size); 831} 832 833void 834LSQ::StoreBuffer::countIssuedStore(LSQRequestPtr request) 835{ 836 /* Barriers are accounted for as they are cleared from 837 * the queue, not after their transfers are complete */ 838 if (!request->isBarrier()) 839 numUnissuedAccesses--; 840} 841 842void 843LSQ::StoreBuffer::step() 844{ 845 DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n", 846 numUnissuedAccesses); 847 848 if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) { 849 /* Clear all the leading barriers */ 850 while (!slots.empty() && 851 slots.front()->isComplete() && slots.front()->isBarrier()) 852 { 853 LSQRequestPtr barrier = slots.front(); 854 855 DPRINTF(MinorMem, "Clearing barrier for inst: %s\n", 856 *(barrier->inst)); 857 858 numUnissuedAccesses--; 859 lsq.clearMemBarrier(barrier->inst); 860 slots.pop_front(); 861 862 delete barrier; 863 } 864 865 auto i = slots.begin(); 866 bool issued = true; 867 unsigned int issue_count = 0; 868 869 /* Skip trying if the memory system is busy */ 870 if (lsq.state == LSQ::MemoryNeedsRetry) 871 issued = false; 872 873 /* Try to issue all stores in order starting from the head 874 * of the queue. Responses are allowed to be retired 875 * out of order */ 876 while (issued && 877 issue_count < storeLimitPerCycle && 878 lsq.canSendToMemorySystem() && 879 i != slots.end()) 880 { 881 LSQRequestPtr request = *i; 882 883 DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d" 884 " state: %s\n", 885 *(request->inst), request->sentAllPackets(), 886 request->state); 887 888 if (request->isBarrier() && request->isComplete()) { 889 /* Give up at barriers */ 890 issued = false; 891 } else if (!(request->state == LSQRequest::StoreBufferIssuing && 892 request->sentAllPackets())) 893 { 894 DPRINTF(MinorMem, "Trying to send request: %s to memory" 895 " system\n", *(request->inst)); 896 897 if (lsq.tryToSend(request)) { 898 countIssuedStore(request); 899 issue_count++; 900 } else { 901 /* Don't step on to the next store buffer entry if this 902 * one hasn't issued all its packets as the store 903 * buffer must still enforce ordering */ 904 issued = false; 905 } 906 } 907 i++; 908 } 909 } 910} 911 912void 913LSQ::completeMemBarrierInst(MinorDynInstPtr inst, 914 bool committed) 915{ 916 if (committed) { 917 /* Not already sent to the store buffer as a store request? */ 918 if (!inst->inStoreBuffer) { 919 /* Insert an entry into the store buffer to tick off barriers 920 * until there are none in flight */ 921 storeBuffer.insert(new BarrierDataRequest(*this, inst)); 922 } 923 } else { 924 /* Clear the barrier anyway if it wasn't actually committed */ 925 clearMemBarrier(inst); 926 } 927} 928 929void 930LSQ::StoreBuffer::minorTrace() const 931{ 932 unsigned int size = slots.size(); 933 unsigned int i = 0; 934 std::ostringstream os; 935 936 while (i < size) { 937 LSQRequestPtr request = slots[i]; 938 939 request->reportData(os); 940 941 i++; 942 if (i < numSlots) 943 os << ','; 944 } 945 946 while (i < numSlots) { 947 os << '-'; 948 949 i++; 950 if (i < numSlots) 951 os << ','; 952 } 953 954 MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(), 955 numUnissuedAccesses); 956} 957 958void 959LSQ::tryToSendToTransfers(LSQRequestPtr request) 960{ 961 if (state == MemoryNeedsRetry) { 962 DPRINTF(MinorMem, "Request needs retry, not issuing to" 963 " memory until retry arrives\n"); 964 return; 965 } 966 967 if (request->state == LSQRequest::InTranslation) { 968 DPRINTF(MinorMem, "Request still in translation, not issuing to" 969 " memory\n"); 970 return; 971 } 972 973 assert(request->state == LSQRequest::Translated || 974 request->state == LSQRequest::RequestIssuing || 975 request->state == LSQRequest::Failed || 976 request->state == LSQRequest::Complete); 977 978 if (requests.empty() || requests.front() != request) { 979 DPRINTF(MinorMem, "Request not at front of requests queue, can't" 980 " issue to memory\n"); 981 return; 982 } 983 984 if (transfers.unreservedRemainingSpace() == 0) { 985 DPRINTF(MinorMem, "No space to insert request into transfers" 986 " queue\n"); 987 return; 988 } 989 990 if (request->isComplete() || request->state == LSQRequest::Failed) { 991 DPRINTF(MinorMem, "Passing a %s transfer on to transfers" 992 " queue\n", (request->isComplete() ? "completed" : "failed")); 993 request->setState(LSQRequest::Complete); 994 request->setSkipped(); 995 moveFromRequestsToTransfers(request); 996 return; 997 } 998 999 if (!execute.instIsRightStream(request->inst)) { 1000 /* Wrong stream, try to abort the transfer but only do so if 1001 * there are no packets in flight */ 1002 if (request->hasPacketsInMemSystem()) { 1003 DPRINTF(MinorMem, "Request's inst. is from the wrong stream," 1004 " waiting for responses before aborting request\n"); 1005 } else { 1006 DPRINTF(MinorMem, "Request's inst. is from the wrong stream," 1007 " aborting request\n"); 1008 request->setState(LSQRequest::Complete); 1009 request->setSkipped(); 1010 moveFromRequestsToTransfers(request); 1011 } 1012 return; 1013 } 1014 1015 if (request->inst->translationFault != NoFault) { 1016 if (request->inst->staticInst->isPrefetch()) { 1017 DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n"); 1018 } 1019 DPRINTF(MinorMem, "Moving faulting request into the transfers" 1020 " queue\n"); 1021 request->setState(LSQRequest::Complete); 1022 request->setSkipped(); 1023 moveFromRequestsToTransfers(request); 1024 return; 1025 } 1026 1027 bool is_load = request->isLoad; 1028 bool is_llsc = request->request->isLLSC(); 1029 bool is_swap = request->request->isSwap(); 1030 bool is_atomic = request->request->isAtomic(); 1031 bool bufferable = !(request->request->isStrictlyOrdered() || 1032 is_llsc || is_swap || is_atomic); 1033 1034 if (is_load) { 1035 if (numStoresInTransfers != 0) { 1036 DPRINTF(MinorMem, "Load request with stores still in transfers" 1037 " queue, stalling\n"); 1038 return; 1039 } 1040 } else { 1041 /* Store. Can it be sent to the store buffer? */ 1042 if (bufferable && !request->request->isMmappedIpr()) { 1043 request->setState(LSQRequest::StoreToStoreBuffer); 1044 moveFromRequestsToTransfers(request); 1045 DPRINTF(MinorMem, "Moving store into transfers queue\n"); 1046 return; 1047 } 1048 } 1049 1050 /* Check if this is the head instruction (and so must be executable as 1051 * its stream sequence number was checked above) for loads which must 1052 * not be speculatively issued and stores which must be issued here */ 1053 if (!bufferable) { 1054 if (!execute.instIsHeadInst(request->inst)) { 1055 DPRINTF(MinorMem, "Memory access not the head inst., can't be" 1056 " sure it can be performed, not issuing\n"); 1057 return; 1058 } 1059 1060 unsigned int forwarding_slot = 0; 1061 1062 if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) != 1063 NoAddrRangeCoverage) 1064 { 1065 // There's at least another request that targets the same 1066 // address and is staying in the storeBuffer. Since our 1067 // request is non-bufferable (e.g., strictly ordered or atomic), 1068 // we must wait for the other request in the storeBuffer to 1069 // complete before we can issue this non-bufferable request. 1070 // This is to make sure that the order they access the cache is 1071 // correct. 1072 DPRINTF(MinorMem, "Memory access can receive forwarded data" 1073 " from the store buffer, but need to wait for store buffer" 1074 " to drain\n"); 1075 return; 1076 } 1077 } 1078 1079 /* True: submit this packet to the transfers queue to be sent to the 1080 * memory system. 1081 * False: skip the memory and push a packet for this request onto 1082 * requests */ 1083 bool do_access = true; 1084 1085 if (!is_llsc) { 1086 /* Check for match in the store buffer */ 1087 if (is_load) { 1088 unsigned int forwarding_slot = 0; 1089 AddrRangeCoverage forwarding_result = 1090 storeBuffer.canForwardDataToLoad(request, 1091 forwarding_slot); 1092 1093 switch (forwarding_result) { 1094 case FullAddrRangeCoverage: 1095 /* Forward data from the store buffer into this request and 1096 * repurpose this request's packet into a response packet */ 1097 storeBuffer.forwardStoreData(request, forwarding_slot); 1098 request->packet->makeResponse(); 1099 1100 /* Just move between queues, no access */ 1101 do_access = false; 1102 break; 1103 case PartialAddrRangeCoverage: 1104 DPRINTF(MinorMem, "Load partly satisfied by store buffer" 1105 " data. Must wait for the store to complete\n"); 1106 return; 1107 break; 1108 case NoAddrRangeCoverage: 1109 DPRINTF(MinorMem, "No forwardable data from store buffer\n"); 1110 /* Fall through to try access */ 1111 break; 1112 } 1113 } 1114 } else { 1115 if (!canSendToMemorySystem()) { 1116 DPRINTF(MinorMem, "Can't send request to memory system yet\n"); 1117 return; 1118 } 1119 1120 SimpleThread &thread = *cpu.threads[request->inst->id.threadId]; 1121 1122 TheISA::PCState old_pc = thread.pcState(); 1123 ExecContext context(cpu, thread, execute, request->inst); 1124 1125 /* Handle LLSC requests and tests */ 1126 if (is_load) { 1127 TheISA::handleLockedRead(&context, request->request); 1128 } else { 1129 do_access = TheISA::handleLockedWrite(&context, 1130 request->request, cacheBlockMask); 1131 1132 if (!do_access) { 1133 DPRINTF(MinorMem, "Not perfoming a memory " 1134 "access for store conditional\n"); 1135 } 1136 } 1137 thread.pcState(old_pc); 1138 } 1139 1140 /* See the do_access comment above */ 1141 if (do_access) { 1142 if (!canSendToMemorySystem()) { 1143 DPRINTF(MinorMem, "Can't send request to memory system yet\n"); 1144 return; 1145 } 1146 1147 /* Remember if this is an access which can't be idly 1148 * discarded by an interrupt */ 1149 if (!bufferable && !request->issuedToMemory) { 1150 numAccessesIssuedToMemory++; 1151 request->issuedToMemory = true; 1152 } 1153 1154 if (tryToSend(request)) { 1155 moveFromRequestsToTransfers(request); 1156 } 1157 } else { 1158 request->setState(LSQRequest::Complete); 1159 moveFromRequestsToTransfers(request); 1160 } 1161} 1162 1163bool 1164LSQ::tryToSend(LSQRequestPtr request) 1165{ 1166 bool ret = false; 1167 1168 if (!canSendToMemorySystem()) { 1169 DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n", 1170 *(request->inst)); 1171 } else { 1172 PacketPtr packet = request->getHeadPacket(); 1173 1174 DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n", 1175 *(request->inst), packet->req->getVaddr()); 1176 1177 /* The sender state of the packet *must* be an LSQRequest 1178 * so the response can be correctly handled */ 1179 assert(packet->findNextSenderState<LSQRequest>()); 1180 1181 if (request->request->isMmappedIpr()) { 1182 ThreadContext *thread = 1183 cpu.getContext(cpu.contextToThread( 1184 request->request->contextId())); 1185 1186 if (request->isLoad) { 1187 DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst)); 1188 TheISA::handleIprRead(thread, packet); 1189 } else { 1190 DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst)); 1191 TheISA::handleIprWrite(thread, packet); 1192 } 1193 1194 request->stepToNextPacket(); 1195 ret = request->sentAllPackets(); 1196 1197 if (!ret) { 1198 DPRINTF(MinorMem, "IPR access has another packet: %s\n", 1199 *(request->inst)); 1200 } 1201 1202 if (ret) 1203 request->setState(LSQRequest::Complete); 1204 else 1205 request->setState(LSQRequest::RequestIssuing); 1206 } else if (dcachePort.sendTimingReq(packet)) { 1207 DPRINTF(MinorMem, "Sent data memory request\n"); 1208 1209 numAccessesInMemorySystem++; 1210 1211 request->stepToNextPacket(); 1212 1213 ret = request->sentAllPackets(); 1214 1215 switch (request->state) { 1216 case LSQRequest::Translated: 1217 case LSQRequest::RequestIssuing: 1218 /* Fully or partially issued a request in the transfers 1219 * queue */ 1220 request->setState(LSQRequest::RequestIssuing); 1221 break; 1222 case LSQRequest::StoreInStoreBuffer: 1223 case LSQRequest::StoreBufferIssuing: 1224 /* Fully or partially issued a request in the store 1225 * buffer */ 1226 request->setState(LSQRequest::StoreBufferIssuing); 1227 break; 1228 default: 1229 panic("Unrecognized LSQ request state %d.", request->state); 1230 } 1231 1232 state = MemoryRunning; 1233 } else { 1234 DPRINTF(MinorMem, 1235 "Sending data memory request - needs retry\n"); 1236 1237 /* Needs to be resent, wait for that */ 1238 state = MemoryNeedsRetry; 1239 retryRequest = request; 1240 1241 switch (request->state) { 1242 case LSQRequest::Translated: 1243 case LSQRequest::RequestIssuing: 1244 request->setState(LSQRequest::RequestNeedsRetry); 1245 break; 1246 case LSQRequest::StoreInStoreBuffer: 1247 case LSQRequest::StoreBufferIssuing: 1248 request->setState(LSQRequest::StoreBufferNeedsRetry); 1249 break; 1250 default: 1251 panic("Unrecognized LSQ request state %d.", request->state); 1252 } 1253 } 1254 } 1255 1256 if (ret) 1257 threadSnoop(request); 1258 1259 return ret; 1260} 1261 1262void 1263LSQ::moveFromRequestsToTransfers(LSQRequestPtr request) 1264{ 1265 assert(!requests.empty() && requests.front() == request); 1266 assert(transfers.unreservedRemainingSpace() != 0); 1267 1268 /* Need to count the number of stores in the transfers 1269 * queue so that loads know when their store buffer forwarding 1270 * results will be correct (only when all those stores 1271 * have reached the store buffer) */ 1272 if (!request->isLoad) 1273 numStoresInTransfers++; 1274 1275 requests.pop(); 1276 transfers.push(request); 1277} 1278 1279bool 1280LSQ::canSendToMemorySystem() 1281{ 1282 return state == MemoryRunning && 1283 numAccessesInMemorySystem < inMemorySystemLimit; 1284} 1285 1286bool 1287LSQ::recvTimingResp(PacketPtr response) 1288{ 1289 LSQRequestPtr request = 1290 safe_cast<LSQRequestPtr>(response->popSenderState()); 1291 1292 DPRINTF(MinorMem, "Received response packet inst: %s" 1293 " addr: 0x%x cmd: %s\n", 1294 *(request->inst), response->getAddr(), 1295 response->cmd.toString()); 1296 1297 numAccessesInMemorySystem--; 1298 1299 if (response->isError()) { 1300 DPRINTF(MinorMem, "Received error response packet: %s\n", 1301 *request->inst); 1302 } 1303 1304 switch (request->state) { 1305 case LSQRequest::RequestIssuing: 1306 case LSQRequest::RequestNeedsRetry: 1307 /* Response to a request from the transfers queue */ 1308 request->retireResponse(response); 1309 1310 DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n", 1311 request->hasPacketsInMemSystem(), request->isComplete()); 1312 1313 break; 1314 case LSQRequest::StoreBufferIssuing: 1315 case LSQRequest::StoreBufferNeedsRetry: 1316 /* Response to a request from the store buffer */ 1317 request->retireResponse(response); 1318 1319 /* Remove completed requests unless they are barriers (which will 1320 * need to be removed in order */ 1321 if (request->isComplete()) { 1322 if (!request->isBarrier()) { 1323 storeBuffer.deleteRequest(request); 1324 } else { 1325 DPRINTF(MinorMem, "Completed transfer for barrier: %s" 1326 " leaving the request as it is also a barrier\n", 1327 *(request->inst)); 1328 } 1329 } 1330 break; 1331 default: 1332 panic("Shouldn't be allowed to receive a response from another state"); 1333 } 1334 1335 /* We go to idle even if there are more things in the requests queue 1336 * as it's the job of step to actually step us on to the next 1337 * transaction */ 1338 1339 /* Let's try and wake up the processor for the next cycle */ 1340 cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 1341 1342 /* Never busy */ 1343 return true; 1344} 1345 1346void 1347LSQ::recvReqRetry() 1348{ 1349 DPRINTF(MinorMem, "Received retry request\n"); 1350 1351 assert(state == MemoryNeedsRetry); 1352 1353 switch (retryRequest->state) { 1354 case LSQRequest::RequestNeedsRetry: 1355 /* Retry in the requests queue */ 1356 retryRequest->setState(LSQRequest::Translated); 1357 break; 1358 case LSQRequest::StoreBufferNeedsRetry: 1359 /* Retry in the store buffer */ 1360 retryRequest->setState(LSQRequest::StoreInStoreBuffer); 1361 break; 1362 default: 1363 panic("Unrecognized retry request state %d.", retryRequest->state); 1364 } 1365 1366 /* Set state back to MemoryRunning so that the following 1367 * tryToSend can actually send. Note that this won't 1368 * allow another transfer in as tryToSend should 1369 * issue a memory request and either succeed for this 1370 * request or return the LSQ back to MemoryNeedsRetry */ 1371 state = MemoryRunning; 1372 1373 /* Try to resend the request */ 1374 if (tryToSend(retryRequest)) { 1375 /* Successfully sent, need to move the request */ 1376 switch (retryRequest->state) { 1377 case LSQRequest::RequestIssuing: 1378 /* In the requests queue */ 1379 moveFromRequestsToTransfers(retryRequest); 1380 break; 1381 case LSQRequest::StoreBufferIssuing: 1382 /* In the store buffer */ 1383 storeBuffer.countIssuedStore(retryRequest); 1384 break; 1385 default: 1386 panic("Unrecognized retry request state %d.", retryRequest->state); 1387 } 1388 1389 retryRequest = NULL; 1390 } 1391} 1392 1393LSQ::LSQ(std::string name_, std::string dcache_port_name_, 1394 MinorCPU &cpu_, Execute &execute_, 1395 unsigned int in_memory_system_limit, unsigned int line_width, 1396 unsigned int requests_queue_size, unsigned int transfers_queue_size, 1397 unsigned int store_buffer_size, 1398 unsigned int store_buffer_cycle_store_limit) : 1399 Named(name_), 1400 cpu(cpu_), 1401 execute(execute_), 1402 dcachePort(dcache_port_name_, *this, cpu_), 1403 lastMemBarrier(cpu.numThreads, 0), 1404 state(MemoryRunning), 1405 inMemorySystemLimit(in_memory_system_limit), 1406 lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)), 1407 requests(name_ + ".requests", "addr", requests_queue_size), 1408 transfers(name_ + ".transfers", "addr", transfers_queue_size), 1409 storeBuffer(name_ + ".storeBuffer", 1410 *this, store_buffer_size, store_buffer_cycle_store_limit), 1411 numAccessesInMemorySystem(0), 1412 numAccessesInDTLB(0), 1413 numStoresInTransfers(0), 1414 numAccessesIssuedToMemory(0), 1415 retryRequest(NULL), 1416 cacheBlockMask(~(cpu_.cacheLineSize() - 1)) 1417{ 1418 if (in_memory_system_limit < 1) { 1419 fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_, 1420 in_memory_system_limit); 1421 } 1422 1423 if (store_buffer_cycle_store_limit < 1) { 1424 fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be" 1425 " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit); 1426 } 1427 1428 if (requests_queue_size < 1) { 1429 fatal("%s: executeLSQRequestsQueueSize must be" 1430 " >= 1 (%d)\n", name_, requests_queue_size); 1431 } 1432 1433 if (transfers_queue_size < 1) { 1434 fatal("%s: executeLSQTransfersQueueSize must be" 1435 " >= 1 (%d)\n", name_, transfers_queue_size); 1436 } 1437 1438 if (store_buffer_size < 1) { 1439 fatal("%s: executeLSQStoreBufferSize must be" 1440 " >= 1 (%d)\n", name_, store_buffer_size); 1441 } 1442 1443 if ((lineWidth & (lineWidth - 1)) != 0) { 1444 fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth); 1445 } 1446} 1447 1448LSQ::~LSQ() 1449{ } 1450 1451LSQ::LSQRequest::~LSQRequest() 1452{ 1453 if (packet) 1454 delete packet; 1455 if (data) 1456 delete [] data; 1457} 1458 1459/** 1460 * Step the memory access mechanism on to its next state. In reality, most 1461 * of the stepping is done by the callbacks on the LSQ but this 1462 * function is responsible for issuing memory requests lodged in the 1463 * requests queue. 1464 */ 1465void 1466LSQ::step() 1467{ 1468 /* Try to move address-translated requests between queues and issue 1469 * them */ 1470 if (!requests.empty()) 1471 tryToSendToTransfers(requests.front()); 1472 1473 storeBuffer.step(); 1474} 1475 1476LSQ::LSQRequestPtr 1477LSQ::findResponse(MinorDynInstPtr inst) 1478{ 1479 LSQ::LSQRequestPtr ret = NULL; 1480 1481 if (!transfers.empty()) { 1482 LSQRequestPtr request = transfers.front(); 1483 1484 /* Same instruction and complete access or a store that's 1485 * capable of being moved to the store buffer */ 1486 if (request->inst->id == inst->id) { 1487 bool complete = request->isComplete(); 1488 bool can_store = storeBuffer.canInsert(); 1489 bool to_store_buffer = request->state == 1490 LSQRequest::StoreToStoreBuffer; 1491 1492 if ((complete && !(request->isBarrier() && !can_store)) || 1493 (to_store_buffer && can_store)) 1494 { 1495 ret = request; 1496 } 1497 } 1498 } 1499 1500 if (ret) { 1501 DPRINTF(MinorMem, "Found matching memory response for inst: %s\n", 1502 *inst); 1503 } else { 1504 DPRINTF(MinorMem, "No matching memory response for inst: %s\n", 1505 *inst); 1506 } 1507 1508 return ret; 1509} 1510 1511void 1512LSQ::popResponse(LSQ::LSQRequestPtr response) 1513{ 1514 assert(!transfers.empty() && transfers.front() == response); 1515 1516 transfers.pop(); 1517 1518 if (!response->isLoad) 1519 numStoresInTransfers--; 1520 1521 if (response->issuedToMemory) 1522 numAccessesIssuedToMemory--; 1523 1524 if (response->state != LSQRequest::StoreInStoreBuffer) { 1525 DPRINTF(MinorMem, "Deleting %s request: %s\n", 1526 (response->isLoad ? "load" : "store"), 1527 *(response->inst)); 1528 1529 delete response; 1530 } 1531} 1532 1533void 1534LSQ::sendStoreToStoreBuffer(LSQRequestPtr request) 1535{ 1536 assert(request->state == LSQRequest::StoreToStoreBuffer); 1537 1538 DPRINTF(MinorMem, "Sending store: %s to store buffer\n", 1539 *(request->inst)); 1540 1541 request->inst->inStoreBuffer = true; 1542 1543 storeBuffer.insert(request); 1544} 1545 1546bool 1547LSQ::isDrained() 1548{ 1549 return requests.empty() && transfers.empty() && 1550 storeBuffer.isDrained(); 1551} 1552 1553bool 1554LSQ::needsToTick() 1555{ 1556 bool ret = false; 1557 1558 if (canSendToMemorySystem()) { 1559 bool have_translated_requests = !requests.empty() && 1560 requests.front()->state != LSQRequest::InTranslation && 1561 transfers.unreservedRemainingSpace() != 0; 1562 1563 ret = have_translated_requests || 1564 storeBuffer.numUnissuedStores() != 0; 1565 } 1566 1567 if (ret) 1568 DPRINTF(Activity, "Need to tick\n"); 1569 1570 return ret; 1571} 1572 1573Fault 1574LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, 1575 unsigned int size, Addr addr, Request::Flags flags, 1576 uint64_t *res, AtomicOpFunctorPtr amo_op, 1577 const std::vector<bool>& byteEnable) 1578{ 1579 assert(inst->translationFault == NoFault || inst->inLSQ); 1580 1581 if (inst->inLSQ) { 1582 return inst->translationFault; 1583 } 1584 1585 bool needs_burst = transferNeedsBurst(addr, size, lineWidth); 1586 1587 if (needs_burst && inst->staticInst->isAtomic()) { 1588 // AMO requests that access across a cache line boundary are not 1589 // allowed since the cache does not guarantee AMO ops to be executed 1590 // atomically in two cache lines 1591 // For ISAs such as x86 that requires AMO operations to work on 1592 // accesses that cross cache-line boundaries, the cache needs to be 1593 // modified to support locking both cache lines to guarantee the 1594 // atomicity. 1595 panic("Do not expect cross-cache-line atomic memory request\n"); 1596 } 1597 1598 LSQRequestPtr request; 1599 1600 /* Copy given data into the request. The request will pass this to the 1601 * packet and then it will own the data */ 1602 uint8_t *request_data = NULL; 1603 1604 DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:" 1605 " 0x%x%s lineWidth : 0x%x\n", 1606 (isLoad ? "load" : "store/atomic"), addr, size, flags, 1607 (needs_burst ? " (needs burst)" : ""), lineWidth); 1608 1609 if (!isLoad) { 1610 /* Request_data becomes the property of a ...DataRequest (see below) 1611 * and destroyed by its destructor */ 1612 request_data = new uint8_t[size]; 1613 if (inst->staticInst->isAtomic() || 1614 (flags & Request::STORE_NO_DATA)) { 1615 /* For atomic or store-no-data, just use zeroed data */ 1616 std::memset(request_data, 0, size); 1617 } else { 1618 std::memcpy(request_data, data, size); 1619 } 1620 } 1621 1622 if (needs_burst) { 1623 request = new SplitDataRequest( 1624 *this, inst, isLoad, request_data, res); 1625 } else { 1626 request = new SingleDataRequest( 1627 *this, inst, isLoad, request_data, res); 1628 } 1629 1630 if (inst->traceData) 1631 inst->traceData->setMem(addr, size, flags); 1632 1633 int cid = cpu.threads[inst->id.threadId]->getTC()->contextId(); 1634 request->request->setContext(cid); 1635 request->request->setVirt(0 /* asid */, 1636 addr, size, flags, cpu.dataMasterId(), 1637 /* I've no idea why we need the PC, but give it */ 1638 inst->pc.instAddr(), std::move(amo_op)); 1639 request->request->setByteEnable(byteEnable); 1640 1641 requests.push(request); 1642 inst->inLSQ = true; 1643 request->startAddrTranslation(); 1644 1645 return inst->translationFault; 1646} 1647 1648void 1649LSQ::pushFailedRequest(MinorDynInstPtr inst) 1650{ 1651 LSQRequestPtr request = new FailedDataRequest(*this, inst); 1652 requests.push(request); 1653} 1654 1655void 1656LSQ::minorTrace() const 1657{ 1658 MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d" 1659 " lastMemBarrier=%d\n", 1660 state, numAccessesInDTLB, numAccessesInMemorySystem, 1661 numStoresInTransfers, lastMemBarrier[0]); 1662 requests.minorTrace(); 1663 transfers.minorTrace(); 1664 storeBuffer.minorTrace(); 1665} 1666 1667LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_, 1668 unsigned int store_buffer_size, 1669 unsigned int store_limit_per_cycle) : 1670 Named(name_), lsq(lsq_), 1671 numSlots(store_buffer_size), 1672 storeLimitPerCycle(store_limit_per_cycle), 1673 slots(), 1674 numUnissuedAccesses(0) 1675{ 1676} 1677 1678PacketPtr 1679makePacketForRequest(const RequestPtr &request, bool isLoad, 1680 Packet::SenderState *sender_state, PacketDataPtr data) 1681{ 1682 PacketPtr ret = isLoad ? Packet::createRead(request) 1683 : Packet::createWrite(request); 1684 1685 if (sender_state) 1686 ret->pushSenderState(sender_state); 1687 1688 if (isLoad) { 1689 ret->allocate(); 1690 } else if (!request->isCacheMaintenance()) { 1691 // CMOs are treated as stores but they don't have data. All 1692 // stores otherwise need to allocate for data. 1693 ret->dataDynamic(data); 1694 } 1695 1696 return ret; 1697} 1698 1699void 1700LSQ::issuedMemBarrierInst(MinorDynInstPtr inst) 1701{ 1702 assert(inst->isInst() && inst->staticInst->isMemBarrier()); 1703 assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]); 1704 1705 /* Remember the barrier. We only have a notion of one 1706 * barrier so this may result in some mem refs being 1707 * delayed if they are between barriers */ 1708 lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum; 1709} 1710 1711void 1712LSQ::LSQRequest::makePacket() 1713{ 1714 assert(inst->translationFault == NoFault); 1715 1716 /* Make the function idempotent */ 1717 if (packet) 1718 return; 1719 1720 packet = makePacketForRequest(request, isLoad, this, data); 1721 /* Null the ret data so we know not to deallocate it when the 1722 * ret is destroyed. The data now belongs to the ret and 1723 * the ret is responsible for its destruction */ 1724 data = NULL; 1725} 1726 1727std::ostream & 1728operator <<(std::ostream &os, LSQ::MemoryState state) 1729{ 1730 switch (state) { 1731 case LSQ::MemoryRunning: 1732 os << "MemoryRunning"; 1733 break; 1734 case LSQ::MemoryNeedsRetry: 1735 os << "MemoryNeedsRetry"; 1736 break; 1737 default: 1738 os << "MemoryState-" << static_cast<int>(state); 1739 break; 1740 } 1741 return os; 1742} 1743 1744void 1745LSQ::recvTimingSnoopReq(PacketPtr pkt) 1746{ 1747 /* LLSC operations in Minor can't be speculative and are executed from 1748 * the head of the requests queue. We shouldn't need to do more than 1749 * this action on snoops. */ 1750 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 1751 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { 1752 cpu.wakeup(tid); 1753 } 1754 } 1755 1756 if (pkt->isInvalidate() || pkt->isWrite()) { 1757 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 1758 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, 1759 cacheBlockMask); 1760 } 1761 } 1762} 1763 1764void 1765LSQ::threadSnoop(LSQRequestPtr request) 1766{ 1767 /* LLSC operations in Minor can't be speculative and are executed from 1768 * the head of the requests queue. We shouldn't need to do more than 1769 * this action on snoops. */ 1770 ThreadID req_tid = request->inst->id.threadId; 1771 PacketPtr pkt = request->packet; 1772 1773 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 1774 if (tid != req_tid) { 1775 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { 1776 cpu.wakeup(tid); 1777 } 1778 1779 if (pkt->isInvalidate() || pkt->isWrite()) { 1780 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, 1781 cacheBlockMask); 1782 } 1783 } 1784 } 1785} 1786 1787} 1788