lsq.cc revision 13954:2f400a5f2627
1/* 2 * Copyright (c) 2013-2014,2017-2018 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Andrew Bardsley 38 */ 39 40#include "cpu/minor/lsq.hh" 41 42#include <iomanip> 43#include <sstream> 44 45#include "arch/locked_mem.hh" 46#include "arch/mmapped_ipr.hh" 47#include "base/logging.hh" 48#include "cpu/minor/cpu.hh" 49#include "cpu/minor/exec_context.hh" 50#include "cpu/minor/execute.hh" 51#include "cpu/minor/pipeline.hh" 52#include "cpu/utils.hh" 53#include "debug/Activity.hh" 54#include "debug/MinorMem.hh" 55 56namespace Minor 57{ 58 59LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, 60 PacketDataPtr data_, uint64_t *res_) : 61 SenderState(), 62 port(port_), 63 inst(inst_), 64 isLoad(isLoad_), 65 data(data_), 66 packet(NULL), 67 request(), 68 fault(NoFault), 69 res(res_), 70 skipped(false), 71 issuedToMemory(false), 72 state(NotIssued) 73{ 74 request = std::make_shared<Request>(); 75} 76 77void 78LSQ::LSQRequest::disableMemAccess() 79{ 80 port.cpu.threads[inst->id.threadId]->setMemAccPredicate(false); 81 DPRINTFS(MinorMem, (&port), "Disable mem access for inst:%s\n", *inst); 82} 83 84LSQ::AddrRangeCoverage 85LSQ::LSQRequest::containsAddrRangeOf( 86 Addr req1_addr, unsigned int req1_size, 87 Addr req2_addr, unsigned int req2_size) 88{ 89 /* 'end' here means the address of the byte just past the request 90 * blocks */ 91 Addr req2_end_addr = req2_addr + req2_size; 92 Addr req1_end_addr = req1_addr + req1_size; 93 94 AddrRangeCoverage ret; 95 96 if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr) 97 ret = NoAddrRangeCoverage; 98 else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr) 99 ret = FullAddrRangeCoverage; 100 else 101 ret = PartialAddrRangeCoverage; 102 103 return ret; 104} 105 106LSQ::AddrRangeCoverage 107LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request) 108{ 109 return containsAddrRangeOf(request->getPaddr(), request->getSize(), 110 other_request->request->getPaddr(), other_request->request->getSize()); 111} 112 113bool 114LSQ::LSQRequest::isBarrier() 115{ 116 return inst->isInst() && inst->staticInst->isMemBarrier(); 117} 118 119bool 120LSQ::LSQRequest::needsToBeSentToStoreBuffer() 121{ 122 return state == StoreToStoreBuffer; 123} 124 125void 126LSQ::LSQRequest::setState(LSQRequestState new_state) 127{ 128 DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:" 129 " %s\n", state, new_state, *inst); 130 state = new_state; 131} 132 133bool 134LSQ::LSQRequest::isComplete() const 135{ 136 /* @todo, There is currently only one 'completed' state. This 137 * may not be a good choice */ 138 return state == Complete; 139} 140 141void 142LSQ::LSQRequest::reportData(std::ostream &os) const 143{ 144 os << (isLoad ? 'R' : 'W') << ';'; 145 inst->reportData(os); 146 os << ';' << state; 147} 148 149std::ostream & 150operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage) 151{ 152 switch (coverage) { 153 case LSQ::PartialAddrRangeCoverage: 154 os << "PartialAddrRangeCoverage"; 155 break; 156 case LSQ::FullAddrRangeCoverage: 157 os << "FullAddrRangeCoverage"; 158 break; 159 case LSQ::NoAddrRangeCoverage: 160 os << "NoAddrRangeCoverage"; 161 break; 162 default: 163 os << "AddrRangeCoverage-" << static_cast<int>(coverage); 164 break; 165 } 166 return os; 167} 168 169std::ostream & 170operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state) 171{ 172 switch (state) { 173 case LSQ::LSQRequest::NotIssued: 174 os << "NotIssued"; 175 break; 176 case LSQ::LSQRequest::InTranslation: 177 os << "InTranslation"; 178 break; 179 case LSQ::LSQRequest::Translated: 180 os << "Translated"; 181 break; 182 case LSQ::LSQRequest::Failed: 183 os << "Failed"; 184 break; 185 case LSQ::LSQRequest::RequestIssuing: 186 os << "RequestIssuing"; 187 break; 188 case LSQ::LSQRequest::StoreToStoreBuffer: 189 os << "StoreToStoreBuffer"; 190 break; 191 case LSQ::LSQRequest::StoreInStoreBuffer: 192 os << "StoreInStoreBuffer"; 193 break; 194 case LSQ::LSQRequest::StoreBufferIssuing: 195 os << "StoreBufferIssuing"; 196 break; 197 case LSQ::LSQRequest::RequestNeedsRetry: 198 os << "RequestNeedsRetry"; 199 break; 200 case LSQ::LSQRequest::StoreBufferNeedsRetry: 201 os << "StoreBufferNeedsRetry"; 202 break; 203 case LSQ::LSQRequest::Complete: 204 os << "Complete"; 205 break; 206 default: 207 os << "LSQRequestState-" << static_cast<int>(state); 208 break; 209 } 210 return os; 211} 212 213void 214LSQ::clearMemBarrier(MinorDynInstPtr inst) 215{ 216 bool is_last_barrier = 217 inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId]; 218 219 DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n", 220 (is_last_barrier ? "last" : "a"), *inst); 221 222 if (is_last_barrier) 223 lastMemBarrier[inst->id.threadId] = 0; 224} 225 226void 227LSQ::SingleDataRequest::finish(const Fault &fault_, const RequestPtr &request_, 228 ThreadContext *tc, BaseTLB::Mode mode) 229{ 230 fault = fault_; 231 232 port.numAccessesInDTLB--; 233 234 DPRINTFS(MinorMem, (&port), "Received translation response for" 235 " request: %s\n", *inst); 236 237 makePacket(); 238 239 setState(Translated); 240 port.tryToSendToTransfers(this); 241 242 /* Let's try and wake up the processor for the next cycle */ 243 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 244} 245 246void 247LSQ::SingleDataRequest::startAddrTranslation() 248{ 249 ThreadContext *thread = port.cpu.getContext( 250 inst->id.threadId); 251 252 const auto &byteEnable = request->getByteEnable(); 253 if (byteEnable.size() == 0 || 254 isAnyActiveElement(byteEnable.cbegin(), byteEnable.cend())) { 255 port.numAccessesInDTLB++; 256 257 setState(LSQ::LSQRequest::InTranslation); 258 259 DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n"); 260 /* Submit the translation request. The response will come through 261 * finish/markDelayed on the LSQRequest as it bears the Translation 262 * interface */ 263 thread->getDTBPtr()->translateTiming( 264 request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write)); 265 } else { 266 disableMemAccess(); 267 setState(LSQ::LSQRequest::Complete); 268 } 269} 270 271void 272LSQ::SingleDataRequest::retireResponse(PacketPtr packet_) 273{ 274 DPRINTFS(MinorMem, (&port), "Retiring packet\n"); 275 packet = packet_; 276 packetInFlight = false; 277 setState(Complete); 278} 279 280void 281LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_, 282 ThreadContext *tc, BaseTLB::Mode mode) 283{ 284 fault = fault_; 285 286 port.numAccessesInDTLB--; 287 288 unsigned int M5_VAR_USED expected_fragment_index = 289 numTranslatedFragments; 290 291 numInTranslationFragments--; 292 numTranslatedFragments++; 293 294 DPRINTFS(MinorMem, (&port), "Received translation response for fragment" 295 " %d of request: %s\n", expected_fragment_index, *inst); 296 297 assert(request_ == fragmentRequests[expected_fragment_index]); 298 299 /* Wake up next cycle to get things going again in case the 300 * tryToSendToTransfers does take */ 301 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 302 303 if (fault != NoFault) { 304 /* tryToSendToTransfers will handle the fault */ 305 306 DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:" 307 " %d of request: %s\n", 308 expected_fragment_index, *inst); 309 310 setState(Translated); 311 port.tryToSendToTransfers(this); 312 } else if (numTranslatedFragments == numFragments) { 313 makeFragmentPackets(); 314 315 setState(Translated); 316 port.tryToSendToTransfers(this); 317 } else { 318 /* Avoid calling translateTiming from within ::finish */ 319 assert(!translationEvent.scheduled()); 320 port.cpu.schedule(translationEvent, curTick()); 321 } 322} 323 324LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, 325 bool isLoad_, PacketDataPtr data_, uint64_t *res_) : 326 LSQRequest(port_, inst_, isLoad_, data_, res_), 327 translationEvent([this]{ sendNextFragmentToTranslation(); }, 328 "translationEvent"), 329 numFragments(0), 330 numInTranslationFragments(0), 331 numTranslatedFragments(0), 332 numIssuedFragments(0), 333 numRetiredFragments(0), 334 fragmentRequests(), 335 fragmentPackets() 336{ 337 /* Don't know how many elements are needed until the request is 338 * populated by the caller. */ 339} 340 341LSQ::SplitDataRequest::~SplitDataRequest() 342{ 343 for (auto i = fragmentPackets.begin(); 344 i != fragmentPackets.end(); i++) 345 { 346 delete *i; 347 } 348} 349 350void 351LSQ::SplitDataRequest::makeFragmentRequests() 352{ 353 Addr base_addr = request->getVaddr(); 354 unsigned int whole_size = request->getSize(); 355 unsigned int line_width = port.lineWidth; 356 357 unsigned int fragment_size; 358 Addr fragment_addr; 359 360 std::vector<bool> fragment_write_byte_en; 361 362 /* Assume that this transfer is across potentially many block snap 363 * boundaries: 364 * 365 * | _|________|________|________|___ | 366 * | |0| 1 | 2 | 3 | 4 | | 367 * | |_|________|________|________|___| | 368 * | | | | | | 369 * 370 * The first transfer (0) can be up to lineWidth in size. 371 * All the middle transfers (1-3) are lineWidth in size 372 * The last transfer (4) can be from zero to lineWidth - 1 in size 373 */ 374 unsigned int first_fragment_offset = 375 addrBlockOffset(base_addr, line_width); 376 unsigned int last_fragment_size = 377 addrBlockOffset(base_addr + whole_size, line_width); 378 unsigned int first_fragment_size = 379 line_width - first_fragment_offset; 380 381 unsigned int middle_fragments_total_size = 382 whole_size - (first_fragment_size + last_fragment_size); 383 384 assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0); 385 386 unsigned int middle_fragment_count = 387 middle_fragments_total_size / line_width; 388 389 numFragments = 1 /* first */ + middle_fragment_count + 390 (last_fragment_size == 0 ? 0 : 1); 391 392 DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests." 393 " First fragment size: %d Last fragment size: %d\n", 394 numFragments, first_fragment_size, 395 (last_fragment_size == 0 ? line_width : last_fragment_size)); 396 397 assert(((middle_fragment_count * line_width) + 398 first_fragment_size + last_fragment_size) == whole_size); 399 400 fragment_addr = base_addr; 401 fragment_size = first_fragment_size; 402 403 /* Just past the last address in the request */ 404 Addr end_addr = base_addr + whole_size; 405 406 auto& byte_enable = request->getByteEnable(); 407 unsigned int num_disabled_fragments = 0; 408 409 for (unsigned int fragment_index = 0; fragment_index < numFragments; 410 fragment_index++) 411 { 412 bool M5_VAR_USED is_last_fragment = false; 413 414 if (fragment_addr == base_addr) { 415 /* First fragment */ 416 fragment_size = first_fragment_size; 417 } else { 418 if ((fragment_addr + line_width) > end_addr) { 419 /* Adjust size of last fragment */ 420 fragment_size = end_addr - fragment_addr; 421 is_last_fragment = true; 422 } else { 423 /* Middle fragments */ 424 fragment_size = line_width; 425 } 426 } 427 428 RequestPtr fragment = std::make_shared<Request>(); 429 bool disabled_fragment = false; 430 431 fragment->setContext(request->contextId()); 432 if (byte_enable.empty()) { 433 fragment->setVirt(0 /* asid */, 434 fragment_addr, fragment_size, request->getFlags(), 435 request->masterId(), 436 request->getPC()); 437 } else { 438 // Set up byte-enable mask for the current fragment 439 auto it_start = byte_enable.begin() + 440 (fragment_addr - base_addr); 441 auto it_end = byte_enable.begin() + 442 (fragment_addr - base_addr) + fragment_size; 443 if (isAnyActiveElement(it_start, it_end)) { 444 fragment->setVirt(0 /* asid */, 445 fragment_addr, fragment_size, request->getFlags(), 446 request->masterId(), 447 request->getPC()); 448 fragment->setByteEnable(std::vector<bool>(it_start, it_end)); 449 } else { 450 disabled_fragment = true; 451 } 452 } 453 454 if (!disabled_fragment) { 455 DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x" 456 " size: %d (whole request addr: 0x%x size: %d) %s\n", 457 fragment_addr, fragment_size, base_addr, whole_size, 458 (is_last_fragment ? "last fragment" : "")); 459 460 fragmentRequests.push_back(fragment); 461 } else { 462 num_disabled_fragments++; 463 } 464 465 fragment_addr += fragment_size; 466 } 467 assert(numFragments >= num_disabled_fragments); 468 numFragments -= num_disabled_fragments; 469} 470 471void 472LSQ::SplitDataRequest::makeFragmentPackets() 473{ 474 assert(numTranslatedFragments > 0); 475 Addr base_addr = request->getVaddr(); 476 477 DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst); 478 479 for (unsigned int fragment_index = 0; 480 fragment_index < numTranslatedFragments; 481 fragment_index++) 482 { 483 RequestPtr fragment = fragmentRequests[fragment_index]; 484 485 DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s" 486 " (%d, 0x%x)\n", 487 fragment_index, *inst, 488 (fragment->hasPaddr() ? "has paddr" : "no paddr"), 489 (fragment->hasPaddr() ? fragment->getPaddr() : 0)); 490 491 Addr fragment_addr = fragment->getVaddr(); 492 unsigned int fragment_size = fragment->getSize(); 493 494 uint8_t *request_data = NULL; 495 496 if (!isLoad) { 497 /* Split data for Packets. Will become the property of the 498 * outgoing Packets */ 499 request_data = new uint8_t[fragment_size]; 500 std::memcpy(request_data, data + (fragment_addr - base_addr), 501 fragment_size); 502 } 503 504 assert(fragment->hasPaddr()); 505 506 PacketPtr fragment_packet = 507 makePacketForRequest(fragment, isLoad, this, request_data); 508 509 fragmentPackets.push_back(fragment_packet); 510 /* Accumulate flags in parent request */ 511 request->setFlags(fragment->getFlags()); 512 } 513 514 /* Might as well make the overall/response packet here */ 515 /* Get the physical address for the whole request/packet from the first 516 * fragment */ 517 request->setPaddr(fragmentRequests[0]->getPaddr()); 518 makePacket(); 519} 520 521void 522LSQ::SplitDataRequest::startAddrTranslation() 523{ 524 makeFragmentRequests(); 525 526 if (numFragments > 0) { 527 setState(LSQ::LSQRequest::InTranslation); 528 numInTranslationFragments = 0; 529 numTranslatedFragments = 0; 530 531 /* @todo, just do these in sequence for now with 532 * a loop of: 533 * do { 534 * sendNextFragmentToTranslation ; translateTiming ; finish 535 * } while (numTranslatedFragments != numFragments); 536 */ 537 538 /* Do first translation */ 539 sendNextFragmentToTranslation(); 540 } else { 541 disableMemAccess(); 542 setState(LSQ::LSQRequest::Complete); 543 } 544} 545 546PacketPtr 547LSQ::SplitDataRequest::getHeadPacket() 548{ 549 assert(numIssuedFragments < numTranslatedFragments); 550 551 return fragmentPackets[numIssuedFragments]; 552} 553 554void 555LSQ::SplitDataRequest::stepToNextPacket() 556{ 557 assert(numIssuedFragments < numTranslatedFragments); 558 559 numIssuedFragments++; 560} 561 562void 563LSQ::SplitDataRequest::retireResponse(PacketPtr response) 564{ 565 assert(numRetiredFragments < numTranslatedFragments); 566 567 DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d" 568 " offset: 0x%x (retired fragment num: %d)\n", 569 response->req->getVaddr(), response->req->getSize(), 570 request->getVaddr() - response->req->getVaddr(), 571 numRetiredFragments); 572 573 numRetiredFragments++; 574 575 if (skipped) { 576 /* Skip because we already knew the request had faulted or been 577 * skipped */ 578 DPRINTFS(MinorMem, (&port), "Skipping this fragment\n"); 579 } else if (response->isError()) { 580 /* Mark up the error and leave to execute to handle it */ 581 DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n"); 582 setSkipped(); 583 packet->copyError(response); 584 } else { 585 if (isLoad) { 586 if (!data) { 587 /* For a split transfer, a Packet must be constructed 588 * to contain all returning data. This is that packet's 589 * data */ 590 data = new uint8_t[request->getSize()]; 591 } 592 593 /* Populate the portion of the overall response data represented 594 * by the response fragment */ 595 std::memcpy( 596 data + (response->req->getVaddr() - request->getVaddr()), 597 response->getConstPtr<uint8_t>(), 598 response->req->getSize()); 599 } 600 } 601 602 /* Complete early if we're skipping are no more in-flight accesses */ 603 if (skipped && !hasPacketsInMemSystem()) { 604 DPRINTFS(MinorMem, (&port), "Completed skipped burst\n"); 605 setState(Complete); 606 if (packet->needsResponse()) 607 packet->makeResponse(); 608 } 609 610 if (numRetiredFragments == numTranslatedFragments) 611 setState(Complete); 612 613 if (!skipped && isComplete()) { 614 DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL); 615 616 DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d" 617 " needsResponse: %d packetSize: %s requestSize: %s responseSize:" 618 " %s\n", packet->isRead(), packet->isWrite(), 619 packet->needsResponse(), packet->getSize(), request->getSize(), 620 response->getSize()); 621 622 /* A request can become complete by several paths, this is a sanity 623 * check to make sure the packet's data is created */ 624 if (!data) { 625 data = new uint8_t[request->getSize()]; 626 } 627 628 if (isLoad) { 629 DPRINTFS(MinorMem, (&port), "Copying read data\n"); 630 std::memcpy(packet->getPtr<uint8_t>(), data, request->getSize()); 631 } 632 packet->makeResponse(); 633 } 634 635 /* Packets are all deallocated together in ~SplitLSQRequest */ 636} 637 638void 639LSQ::SplitDataRequest::sendNextFragmentToTranslation() 640{ 641 unsigned int fragment_index = numTranslatedFragments; 642 643 ThreadContext *thread = port.cpu.getContext( 644 inst->id.threadId); 645 646 DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n", 647 fragment_index); 648 649 port.numAccessesInDTLB++; 650 numInTranslationFragments++; 651 652 thread->getDTBPtr()->translateTiming( 653 fragmentRequests[fragment_index], thread, this, (isLoad ? 654 BaseTLB::Read : BaseTLB::Write)); 655} 656 657bool 658LSQ::StoreBuffer::canInsert() const 659{ 660 /* @todo, support store amalgamation */ 661 return slots.size() < numSlots; 662} 663 664void 665LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request) 666{ 667 auto found = std::find(slots.begin(), slots.end(), request); 668 669 if (found != slots.end()) { 670 DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n", 671 request, *found, *(request->inst)); 672 slots.erase(found); 673 674 delete request; 675 } 676} 677 678void 679LSQ::StoreBuffer::insert(LSQRequestPtr request) 680{ 681 if (!canInsert()) { 682 warn("%s: store buffer insertion without space to insert from" 683 " inst: %s\n", name(), *(request->inst)); 684 } 685 686 DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request); 687 688 numUnissuedAccesses++; 689 690 if (request->state != LSQRequest::Complete) 691 request->setState(LSQRequest::StoreInStoreBuffer); 692 693 slots.push_back(request); 694 695 /* Let's try and wake up the processor for the next cycle to step 696 * the store buffer */ 697 lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 698} 699 700LSQ::AddrRangeCoverage 701LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request, 702 unsigned int &found_slot) 703{ 704 unsigned int slot_index = slots.size() - 1; 705 auto i = slots.rbegin(); 706 AddrRangeCoverage ret = NoAddrRangeCoverage; 707 708 /* Traverse the store buffer in reverse order (most to least recent) 709 * and try to find a slot whose address range overlaps this request */ 710 while (ret == NoAddrRangeCoverage && i != slots.rend()) { 711 LSQRequestPtr slot = *i; 712 713 /* Cache maintenance instructions go down via the store path but 714 * they carry no data and they shouldn't be considered 715 * for forwarding */ 716 if (slot->packet && 717 slot->inst->id.threadId == request->inst->id.threadId && 718 !slot->packet->req->isCacheMaintenance()) { 719 AddrRangeCoverage coverage = slot->containsAddrRangeOf(request); 720 721 if (coverage != NoAddrRangeCoverage) { 722 DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:" 723 " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n", 724 slot_index, coverage, 725 request->request->getPaddr(), request->request->getSize(), 726 slot->request->getPaddr(), slot->request->getSize()); 727 728 found_slot = slot_index; 729 ret = coverage; 730 } 731 } 732 733 i++; 734 slot_index--; 735 } 736 737 return ret; 738} 739 740/** Fill the given packet with appropriate date from slot slot_number */ 741void 742LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load, 743 unsigned int slot_number) 744{ 745 assert(slot_number < slots.size()); 746 assert(load->packet); 747 assert(load->isLoad); 748 749 LSQRequestPtr store = slots[slot_number]; 750 751 assert(store->packet); 752 assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage); 753 754 Addr load_addr = load->request->getPaddr(); 755 Addr store_addr = store->request->getPaddr(); 756 Addr addr_offset = load_addr - store_addr; 757 758 unsigned int load_size = load->request->getSize(); 759 760 DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer" 761 " slot: %d addr: 0x%x addressOffset: 0x%x\n", 762 load_size, load_addr, slot_number, 763 store_addr, addr_offset); 764 765 void *load_packet_data = load->packet->getPtr<void>(); 766 void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset; 767 768 std::memcpy(load_packet_data, store_packet_data, load_size); 769} 770 771void 772LSQ::StoreBuffer::countIssuedStore(LSQRequestPtr request) 773{ 774 /* Barriers are accounted for as they are cleared from 775 * the queue, not after their transfers are complete */ 776 if (!request->isBarrier()) 777 numUnissuedAccesses--; 778} 779 780void 781LSQ::StoreBuffer::step() 782{ 783 DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n", 784 numUnissuedAccesses); 785 786 if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) { 787 /* Clear all the leading barriers */ 788 while (!slots.empty() && 789 slots.front()->isComplete() && slots.front()->isBarrier()) 790 { 791 LSQRequestPtr barrier = slots.front(); 792 793 DPRINTF(MinorMem, "Clearing barrier for inst: %s\n", 794 *(barrier->inst)); 795 796 numUnissuedAccesses--; 797 lsq.clearMemBarrier(barrier->inst); 798 slots.pop_front(); 799 800 delete barrier; 801 } 802 803 auto i = slots.begin(); 804 bool issued = true; 805 unsigned int issue_count = 0; 806 807 /* Skip trying if the memory system is busy */ 808 if (lsq.state == LSQ::MemoryNeedsRetry) 809 issued = false; 810 811 /* Try to issue all stores in order starting from the head 812 * of the queue. Responses are allowed to be retired 813 * out of order */ 814 while (issued && 815 issue_count < storeLimitPerCycle && 816 lsq.canSendToMemorySystem() && 817 i != slots.end()) 818 { 819 LSQRequestPtr request = *i; 820 821 DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d" 822 " state: %s\n", 823 *(request->inst), request->sentAllPackets(), 824 request->state); 825 826 if (request->isBarrier() && request->isComplete()) { 827 /* Give up at barriers */ 828 issued = false; 829 } else if (!(request->state == LSQRequest::StoreBufferIssuing && 830 request->sentAllPackets())) 831 { 832 DPRINTF(MinorMem, "Trying to send request: %s to memory" 833 " system\n", *(request->inst)); 834 835 if (lsq.tryToSend(request)) { 836 countIssuedStore(request); 837 issue_count++; 838 } else { 839 /* Don't step on to the next store buffer entry if this 840 * one hasn't issued all its packets as the store 841 * buffer must still enforce ordering */ 842 issued = false; 843 } 844 } 845 i++; 846 } 847 } 848} 849 850void 851LSQ::completeMemBarrierInst(MinorDynInstPtr inst, 852 bool committed) 853{ 854 if (committed) { 855 /* Not already sent to the store buffer as a store request? */ 856 if (!inst->inStoreBuffer) { 857 /* Insert an entry into the store buffer to tick off barriers 858 * until there are none in flight */ 859 storeBuffer.insert(new BarrierDataRequest(*this, inst)); 860 } 861 } else { 862 /* Clear the barrier anyway if it wasn't actually committed */ 863 clearMemBarrier(inst); 864 } 865} 866 867void 868LSQ::StoreBuffer::minorTrace() const 869{ 870 unsigned int size = slots.size(); 871 unsigned int i = 0; 872 std::ostringstream os; 873 874 while (i < size) { 875 LSQRequestPtr request = slots[i]; 876 877 request->reportData(os); 878 879 i++; 880 if (i < numSlots) 881 os << ','; 882 } 883 884 while (i < numSlots) { 885 os << '-'; 886 887 i++; 888 if (i < numSlots) 889 os << ','; 890 } 891 892 MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(), 893 numUnissuedAccesses); 894} 895 896void 897LSQ::tryToSendToTransfers(LSQRequestPtr request) 898{ 899 if (state == MemoryNeedsRetry) { 900 DPRINTF(MinorMem, "Request needs retry, not issuing to" 901 " memory until retry arrives\n"); 902 return; 903 } 904 905 if (request->state == LSQRequest::InTranslation) { 906 DPRINTF(MinorMem, "Request still in translation, not issuing to" 907 " memory\n"); 908 return; 909 } 910 911 assert(request->state == LSQRequest::Translated || 912 request->state == LSQRequest::RequestIssuing || 913 request->state == LSQRequest::Failed || 914 request->state == LSQRequest::Complete); 915 916 if (requests.empty() || requests.front() != request) { 917 DPRINTF(MinorMem, "Request not at front of requests queue, can't" 918 " issue to memory\n"); 919 return; 920 } 921 922 if (transfers.unreservedRemainingSpace() == 0) { 923 DPRINTF(MinorMem, "No space to insert request into transfers" 924 " queue\n"); 925 return; 926 } 927 928 if (request->isComplete() || request->state == LSQRequest::Failed) { 929 DPRINTF(MinorMem, "Passing a %s transfer on to transfers" 930 " queue\n", (request->isComplete() ? "completed" : "failed")); 931 request->setState(LSQRequest::Complete); 932 request->setSkipped(); 933 moveFromRequestsToTransfers(request); 934 return; 935 } 936 937 if (!execute.instIsRightStream(request->inst)) { 938 /* Wrong stream, try to abort the transfer but only do so if 939 * there are no packets in flight */ 940 if (request->hasPacketsInMemSystem()) { 941 DPRINTF(MinorMem, "Request's inst. is from the wrong stream," 942 " waiting for responses before aborting request\n"); 943 } else { 944 DPRINTF(MinorMem, "Request's inst. is from the wrong stream," 945 " aborting request\n"); 946 request->setState(LSQRequest::Complete); 947 request->setSkipped(); 948 moveFromRequestsToTransfers(request); 949 } 950 return; 951 } 952 953 if (request->fault != NoFault) { 954 if (request->inst->staticInst->isPrefetch()) { 955 DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n"); 956 } 957 DPRINTF(MinorMem, "Moving faulting request into the transfers" 958 " queue\n"); 959 request->setState(LSQRequest::Complete); 960 request->setSkipped(); 961 moveFromRequestsToTransfers(request); 962 return; 963 } 964 965 bool is_load = request->isLoad; 966 bool is_llsc = request->request->isLLSC(); 967 bool is_swap = request->request->isSwap(); 968 bool is_atomic = request->request->isAtomic(); 969 bool bufferable = !(request->request->isStrictlyOrdered() || 970 is_llsc || is_swap || is_atomic); 971 972 if (is_load) { 973 if (numStoresInTransfers != 0) { 974 DPRINTF(MinorMem, "Load request with stores still in transfers" 975 " queue, stalling\n"); 976 return; 977 } 978 } else { 979 /* Store. Can it be sent to the store buffer? */ 980 if (bufferable && !request->request->isMmappedIpr()) { 981 request->setState(LSQRequest::StoreToStoreBuffer); 982 moveFromRequestsToTransfers(request); 983 DPRINTF(MinorMem, "Moving store into transfers queue\n"); 984 return; 985 } 986 } 987 988 /* Check if this is the head instruction (and so must be executable as 989 * its stream sequence number was checked above) for loads which must 990 * not be speculatively issued and stores which must be issued here */ 991 if (!bufferable) { 992 if (!execute.instIsHeadInst(request->inst)) { 993 DPRINTF(MinorMem, "Memory access not the head inst., can't be" 994 " sure it can be performed, not issuing\n"); 995 return; 996 } 997 998 unsigned int forwarding_slot = 0; 999 1000 if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) != 1001 NoAddrRangeCoverage) 1002 { 1003 // There's at least another request that targets the same 1004 // address and is staying in the storeBuffer. Since our 1005 // request is non-bufferable (e.g., strictly ordered or atomic), 1006 // we must wait for the other request in the storeBuffer to 1007 // complete before we can issue this non-bufferable request. 1008 // This is to make sure that the order they access the cache is 1009 // correct. 1010 DPRINTF(MinorMem, "Memory access can receive forwarded data" 1011 " from the store buffer, but need to wait for store buffer" 1012 " to drain\n"); 1013 return; 1014 } 1015 } 1016 1017 /* True: submit this packet to the transfers queue to be sent to the 1018 * memory system. 1019 * False: skip the memory and push a packet for this request onto 1020 * requests */ 1021 bool do_access = true; 1022 1023 if (!is_llsc) { 1024 /* Check for match in the store buffer */ 1025 if (is_load) { 1026 unsigned int forwarding_slot = 0; 1027 AddrRangeCoverage forwarding_result = 1028 storeBuffer.canForwardDataToLoad(request, 1029 forwarding_slot); 1030 1031 switch (forwarding_result) { 1032 case FullAddrRangeCoverage: 1033 /* Forward data from the store buffer into this request and 1034 * repurpose this request's packet into a response packet */ 1035 storeBuffer.forwardStoreData(request, forwarding_slot); 1036 request->packet->makeResponse(); 1037 1038 /* Just move between queues, no access */ 1039 do_access = false; 1040 break; 1041 case PartialAddrRangeCoverage: 1042 DPRINTF(MinorMem, "Load partly satisfied by store buffer" 1043 " data. Must wait for the store to complete\n"); 1044 return; 1045 break; 1046 case NoAddrRangeCoverage: 1047 DPRINTF(MinorMem, "No forwardable data from store buffer\n"); 1048 /* Fall through to try access */ 1049 break; 1050 } 1051 } 1052 } else { 1053 if (!canSendToMemorySystem()) { 1054 DPRINTF(MinorMem, "Can't send request to memory system yet\n"); 1055 return; 1056 } 1057 1058 SimpleThread &thread = *cpu.threads[request->inst->id.threadId]; 1059 1060 TheISA::PCState old_pc = thread.pcState(); 1061 ExecContext context(cpu, thread, execute, request->inst); 1062 1063 /* Handle LLSC requests and tests */ 1064 if (is_load) { 1065 TheISA::handleLockedRead(&context, request->request); 1066 } else { 1067 do_access = TheISA::handleLockedWrite(&context, 1068 request->request, cacheBlockMask); 1069 1070 if (!do_access) { 1071 DPRINTF(MinorMem, "Not perfoming a memory " 1072 "access for store conditional\n"); 1073 } 1074 } 1075 thread.pcState(old_pc); 1076 } 1077 1078 /* See the do_access comment above */ 1079 if (do_access) { 1080 if (!canSendToMemorySystem()) { 1081 DPRINTF(MinorMem, "Can't send request to memory system yet\n"); 1082 return; 1083 } 1084 1085 /* Remember if this is an access which can't be idly 1086 * discarded by an interrupt */ 1087 if (!bufferable && !request->issuedToMemory) { 1088 numAccessesIssuedToMemory++; 1089 request->issuedToMemory = true; 1090 } 1091 1092 if (tryToSend(request)) { 1093 moveFromRequestsToTransfers(request); 1094 } 1095 } else { 1096 request->setState(LSQRequest::Complete); 1097 moveFromRequestsToTransfers(request); 1098 } 1099} 1100 1101bool 1102LSQ::tryToSend(LSQRequestPtr request) 1103{ 1104 bool ret = false; 1105 1106 if (!canSendToMemorySystem()) { 1107 DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n", 1108 *(request->inst)); 1109 } else { 1110 PacketPtr packet = request->getHeadPacket(); 1111 1112 DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n", 1113 *(request->inst), packet->req->getVaddr()); 1114 1115 /* The sender state of the packet *must* be an LSQRequest 1116 * so the response can be correctly handled */ 1117 assert(packet->findNextSenderState<LSQRequest>()); 1118 1119 if (request->request->isMmappedIpr()) { 1120 ThreadContext *thread = 1121 cpu.getContext(cpu.contextToThread( 1122 request->request->contextId())); 1123 1124 if (request->isLoad) { 1125 DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst)); 1126 TheISA::handleIprRead(thread, packet); 1127 } else { 1128 DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst)); 1129 TheISA::handleIprWrite(thread, packet); 1130 } 1131 1132 request->stepToNextPacket(); 1133 ret = request->sentAllPackets(); 1134 1135 if (!ret) { 1136 DPRINTF(MinorMem, "IPR access has another packet: %s\n", 1137 *(request->inst)); 1138 } 1139 1140 if (ret) 1141 request->setState(LSQRequest::Complete); 1142 else 1143 request->setState(LSQRequest::RequestIssuing); 1144 } else if (dcachePort.sendTimingReq(packet)) { 1145 DPRINTF(MinorMem, "Sent data memory request\n"); 1146 1147 numAccessesInMemorySystem++; 1148 1149 request->stepToNextPacket(); 1150 1151 ret = request->sentAllPackets(); 1152 1153 switch (request->state) { 1154 case LSQRequest::Translated: 1155 case LSQRequest::RequestIssuing: 1156 /* Fully or partially issued a request in the transfers 1157 * queue */ 1158 request->setState(LSQRequest::RequestIssuing); 1159 break; 1160 case LSQRequest::StoreInStoreBuffer: 1161 case LSQRequest::StoreBufferIssuing: 1162 /* Fully or partially issued a request in the store 1163 * buffer */ 1164 request->setState(LSQRequest::StoreBufferIssuing); 1165 break; 1166 default: 1167 panic("Unrecognized LSQ request state %d.", request->state); 1168 } 1169 1170 state = MemoryRunning; 1171 } else { 1172 DPRINTF(MinorMem, 1173 "Sending data memory request - needs retry\n"); 1174 1175 /* Needs to be resent, wait for that */ 1176 state = MemoryNeedsRetry; 1177 retryRequest = request; 1178 1179 switch (request->state) { 1180 case LSQRequest::Translated: 1181 case LSQRequest::RequestIssuing: 1182 request->setState(LSQRequest::RequestNeedsRetry); 1183 break; 1184 case LSQRequest::StoreInStoreBuffer: 1185 case LSQRequest::StoreBufferIssuing: 1186 request->setState(LSQRequest::StoreBufferNeedsRetry); 1187 break; 1188 default: 1189 panic("Unrecognized LSQ request state %d.", request->state); 1190 } 1191 } 1192 } 1193 1194 if (ret) 1195 threadSnoop(request); 1196 1197 return ret; 1198} 1199 1200void 1201LSQ::moveFromRequestsToTransfers(LSQRequestPtr request) 1202{ 1203 assert(!requests.empty() && requests.front() == request); 1204 assert(transfers.unreservedRemainingSpace() != 0); 1205 1206 /* Need to count the number of stores in the transfers 1207 * queue so that loads know when their store buffer forwarding 1208 * results will be correct (only when all those stores 1209 * have reached the store buffer) */ 1210 if (!request->isLoad) 1211 numStoresInTransfers++; 1212 1213 requests.pop(); 1214 transfers.push(request); 1215} 1216 1217bool 1218LSQ::canSendToMemorySystem() 1219{ 1220 return state == MemoryRunning && 1221 numAccessesInMemorySystem < inMemorySystemLimit; 1222} 1223 1224bool 1225LSQ::recvTimingResp(PacketPtr response) 1226{ 1227 LSQRequestPtr request = 1228 safe_cast<LSQRequestPtr>(response->popSenderState()); 1229 1230 DPRINTF(MinorMem, "Received response packet inst: %s" 1231 " addr: 0x%x cmd: %s\n", 1232 *(request->inst), response->getAddr(), 1233 response->cmd.toString()); 1234 1235 numAccessesInMemorySystem--; 1236 1237 if (response->isError()) { 1238 DPRINTF(MinorMem, "Received error response packet: %s\n", 1239 *request->inst); 1240 } 1241 1242 switch (request->state) { 1243 case LSQRequest::RequestIssuing: 1244 case LSQRequest::RequestNeedsRetry: 1245 /* Response to a request from the transfers queue */ 1246 request->retireResponse(response); 1247 1248 DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n", 1249 request->hasPacketsInMemSystem(), request->isComplete()); 1250 1251 break; 1252 case LSQRequest::StoreBufferIssuing: 1253 case LSQRequest::StoreBufferNeedsRetry: 1254 /* Response to a request from the store buffer */ 1255 request->retireResponse(response); 1256 1257 /* Remove completed requests unless they are barriers (which will 1258 * need to be removed in order */ 1259 if (request->isComplete()) { 1260 if (!request->isBarrier()) { 1261 storeBuffer.deleteRequest(request); 1262 } else { 1263 DPRINTF(MinorMem, "Completed transfer for barrier: %s" 1264 " leaving the request as it is also a barrier\n", 1265 *(request->inst)); 1266 } 1267 } 1268 break; 1269 default: 1270 panic("Shouldn't be allowed to receive a response from another state"); 1271 } 1272 1273 /* We go to idle even if there are more things in the requests queue 1274 * as it's the job of step to actually step us on to the next 1275 * transaction */ 1276 1277 /* Let's try and wake up the processor for the next cycle */ 1278 cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 1279 1280 /* Never busy */ 1281 return true; 1282} 1283 1284void 1285LSQ::recvReqRetry() 1286{ 1287 DPRINTF(MinorMem, "Received retry request\n"); 1288 1289 assert(state == MemoryNeedsRetry); 1290 1291 switch (retryRequest->state) { 1292 case LSQRequest::RequestNeedsRetry: 1293 /* Retry in the requests queue */ 1294 retryRequest->setState(LSQRequest::Translated); 1295 break; 1296 case LSQRequest::StoreBufferNeedsRetry: 1297 /* Retry in the store buffer */ 1298 retryRequest->setState(LSQRequest::StoreInStoreBuffer); 1299 break; 1300 default: 1301 panic("Unrecognized retry request state %d.", retryRequest->state); 1302 } 1303 1304 /* Set state back to MemoryRunning so that the following 1305 * tryToSend can actually send. Note that this won't 1306 * allow another transfer in as tryToSend should 1307 * issue a memory request and either succeed for this 1308 * request or return the LSQ back to MemoryNeedsRetry */ 1309 state = MemoryRunning; 1310 1311 /* Try to resend the request */ 1312 if (tryToSend(retryRequest)) { 1313 /* Successfully sent, need to move the request */ 1314 switch (retryRequest->state) { 1315 case LSQRequest::RequestIssuing: 1316 /* In the requests queue */ 1317 moveFromRequestsToTransfers(retryRequest); 1318 break; 1319 case LSQRequest::StoreBufferIssuing: 1320 /* In the store buffer */ 1321 storeBuffer.countIssuedStore(retryRequest); 1322 break; 1323 default: 1324 panic("Unrecognized retry request state %d.", retryRequest->state); 1325 } 1326 1327 retryRequest = NULL; 1328 } 1329} 1330 1331LSQ::LSQ(std::string name_, std::string dcache_port_name_, 1332 MinorCPU &cpu_, Execute &execute_, 1333 unsigned int in_memory_system_limit, unsigned int line_width, 1334 unsigned int requests_queue_size, unsigned int transfers_queue_size, 1335 unsigned int store_buffer_size, 1336 unsigned int store_buffer_cycle_store_limit) : 1337 Named(name_), 1338 cpu(cpu_), 1339 execute(execute_), 1340 dcachePort(dcache_port_name_, *this, cpu_), 1341 lastMemBarrier(cpu.numThreads, 0), 1342 state(MemoryRunning), 1343 inMemorySystemLimit(in_memory_system_limit), 1344 lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)), 1345 requests(name_ + ".requests", "addr", requests_queue_size), 1346 transfers(name_ + ".transfers", "addr", transfers_queue_size), 1347 storeBuffer(name_ + ".storeBuffer", 1348 *this, store_buffer_size, store_buffer_cycle_store_limit), 1349 numAccessesInMemorySystem(0), 1350 numAccessesInDTLB(0), 1351 numStoresInTransfers(0), 1352 numAccessesIssuedToMemory(0), 1353 retryRequest(NULL), 1354 cacheBlockMask(~(cpu_.cacheLineSize() - 1)) 1355{ 1356 if (in_memory_system_limit < 1) { 1357 fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_, 1358 in_memory_system_limit); 1359 } 1360 1361 if (store_buffer_cycle_store_limit < 1) { 1362 fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be" 1363 " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit); 1364 } 1365 1366 if (requests_queue_size < 1) { 1367 fatal("%s: executeLSQRequestsQueueSize must be" 1368 " >= 1 (%d)\n", name_, requests_queue_size); 1369 } 1370 1371 if (transfers_queue_size < 1) { 1372 fatal("%s: executeLSQTransfersQueueSize must be" 1373 " >= 1 (%d)\n", name_, transfers_queue_size); 1374 } 1375 1376 if (store_buffer_size < 1) { 1377 fatal("%s: executeLSQStoreBufferSize must be" 1378 " >= 1 (%d)\n", name_, store_buffer_size); 1379 } 1380 1381 if ((lineWidth & (lineWidth - 1)) != 0) { 1382 fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth); 1383 } 1384} 1385 1386LSQ::~LSQ() 1387{ } 1388 1389LSQ::LSQRequest::~LSQRequest() 1390{ 1391 if (packet) 1392 delete packet; 1393 if (data) 1394 delete [] data; 1395} 1396 1397/** 1398 * Step the memory access mechanism on to its next state. In reality, most 1399 * of the stepping is done by the callbacks on the LSQ but this 1400 * function is responsible for issuing memory requests lodged in the 1401 * requests queue. 1402 */ 1403void 1404LSQ::step() 1405{ 1406 /* Try to move address-translated requests between queues and issue 1407 * them */ 1408 if (!requests.empty()) 1409 tryToSendToTransfers(requests.front()); 1410 1411 storeBuffer.step(); 1412} 1413 1414LSQ::LSQRequestPtr 1415LSQ::findResponse(MinorDynInstPtr inst) 1416{ 1417 LSQ::LSQRequestPtr ret = NULL; 1418 1419 if (!transfers.empty()) { 1420 LSQRequestPtr request = transfers.front(); 1421 1422 /* Same instruction and complete access or a store that's 1423 * capable of being moved to the store buffer */ 1424 if (request->inst->id == inst->id) { 1425 bool complete = request->isComplete(); 1426 bool can_store = storeBuffer.canInsert(); 1427 bool to_store_buffer = request->state == 1428 LSQRequest::StoreToStoreBuffer; 1429 1430 if ((complete && !(request->isBarrier() && !can_store)) || 1431 (to_store_buffer && can_store)) 1432 { 1433 ret = request; 1434 } 1435 } 1436 } 1437 1438 if (ret) { 1439 DPRINTF(MinorMem, "Found matching memory response for inst: %s\n", 1440 *inst); 1441 } else { 1442 DPRINTF(MinorMem, "No matching memory response for inst: %s\n", 1443 *inst); 1444 } 1445 1446 return ret; 1447} 1448 1449void 1450LSQ::popResponse(LSQ::LSQRequestPtr response) 1451{ 1452 assert(!transfers.empty() && transfers.front() == response); 1453 1454 transfers.pop(); 1455 1456 if (!response->isLoad) 1457 numStoresInTransfers--; 1458 1459 if (response->issuedToMemory) 1460 numAccessesIssuedToMemory--; 1461 1462 if (response->state != LSQRequest::StoreInStoreBuffer) { 1463 DPRINTF(MinorMem, "Deleting %s request: %s\n", 1464 (response->isLoad ? "load" : "store"), 1465 *(response->inst)); 1466 1467 delete response; 1468 } 1469} 1470 1471void 1472LSQ::sendStoreToStoreBuffer(LSQRequestPtr request) 1473{ 1474 assert(request->state == LSQRequest::StoreToStoreBuffer); 1475 1476 DPRINTF(MinorMem, "Sending store: %s to store buffer\n", 1477 *(request->inst)); 1478 1479 request->inst->inStoreBuffer = true; 1480 1481 storeBuffer.insert(request); 1482} 1483 1484bool 1485LSQ::isDrained() 1486{ 1487 return requests.empty() && transfers.empty() && 1488 storeBuffer.isDrained(); 1489} 1490 1491bool 1492LSQ::needsToTick() 1493{ 1494 bool ret = false; 1495 1496 if (canSendToMemorySystem()) { 1497 bool have_translated_requests = !requests.empty() && 1498 requests.front()->state != LSQRequest::InTranslation && 1499 transfers.unreservedRemainingSpace() != 0; 1500 1501 ret = have_translated_requests || 1502 storeBuffer.numUnissuedStores() != 0; 1503 } 1504 1505 if (ret) 1506 DPRINTF(Activity, "Need to tick\n"); 1507 1508 return ret; 1509} 1510 1511void 1512LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, 1513 unsigned int size, Addr addr, Request::Flags flags, 1514 uint64_t *res, AtomicOpFunctor *amo_op, 1515 const std::vector<bool>& byteEnable) 1516{ 1517 bool needs_burst = transferNeedsBurst(addr, size, lineWidth); 1518 1519 if (needs_burst && inst->staticInst->isAtomic()) { 1520 // AMO requests that access across a cache line boundary are not 1521 // allowed since the cache does not guarantee AMO ops to be executed 1522 // atomically in two cache lines 1523 // For ISAs such as x86 that requires AMO operations to work on 1524 // accesses that cross cache-line boundaries, the cache needs to be 1525 // modified to support locking both cache lines to guarantee the 1526 // atomicity. 1527 panic("Do not expect cross-cache-line atomic memory request\n"); 1528 } 1529 1530 LSQRequestPtr request; 1531 1532 /* Copy given data into the request. The request will pass this to the 1533 * packet and then it will own the data */ 1534 uint8_t *request_data = NULL; 1535 1536 DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:" 1537 " 0x%x%s lineWidth : 0x%x\n", 1538 (isLoad ? "load" : "store/atomic"), addr, size, flags, 1539 (needs_burst ? " (needs burst)" : ""), lineWidth); 1540 1541 if (!isLoad) { 1542 /* Request_data becomes the property of a ...DataRequest (see below) 1543 * and destroyed by its destructor */ 1544 request_data = new uint8_t[size]; 1545 if (inst->staticInst->isAtomic() || 1546 (flags & Request::STORE_NO_DATA)) { 1547 /* For atomic or store-no-data, just use zeroed data */ 1548 std::memset(request_data, 0, size); 1549 } else { 1550 std::memcpy(request_data, data, size); 1551 } 1552 } 1553 1554 if (needs_burst) { 1555 request = new SplitDataRequest( 1556 *this, inst, isLoad, request_data, res); 1557 } else { 1558 request = new SingleDataRequest( 1559 *this, inst, isLoad, request_data, res); 1560 } 1561 1562 if (inst->traceData) 1563 inst->traceData->setMem(addr, size, flags); 1564 1565 int cid = cpu.threads[inst->id.threadId]->getTC()->contextId(); 1566 request->request->setContext(cid); 1567 request->request->setVirt(0 /* asid */, 1568 addr, size, flags, cpu.dataMasterId(), 1569 /* I've no idea why we need the PC, but give it */ 1570 inst->pc.instAddr(), amo_op); 1571 if (!byteEnable.empty()) { 1572 request->request->setByteEnable(byteEnable); 1573 } 1574 1575 requests.push(request); 1576 request->startAddrTranslation(); 1577} 1578 1579void 1580LSQ::pushFailedRequest(MinorDynInstPtr inst) 1581{ 1582 LSQRequestPtr request = new FailedDataRequest(*this, inst); 1583 requests.push(request); 1584} 1585 1586void 1587LSQ::minorTrace() const 1588{ 1589 MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d" 1590 " lastMemBarrier=%d\n", 1591 state, numAccessesInDTLB, numAccessesInMemorySystem, 1592 numStoresInTransfers, lastMemBarrier[0]); 1593 requests.minorTrace(); 1594 transfers.minorTrace(); 1595 storeBuffer.minorTrace(); 1596} 1597 1598LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_, 1599 unsigned int store_buffer_size, 1600 unsigned int store_limit_per_cycle) : 1601 Named(name_), lsq(lsq_), 1602 numSlots(store_buffer_size), 1603 storeLimitPerCycle(store_limit_per_cycle), 1604 slots(), 1605 numUnissuedAccesses(0) 1606{ 1607} 1608 1609PacketPtr 1610makePacketForRequest(const RequestPtr &request, bool isLoad, 1611 Packet::SenderState *sender_state, PacketDataPtr data) 1612{ 1613 PacketPtr ret = isLoad ? Packet::createRead(request) 1614 : Packet::createWrite(request); 1615 1616 if (sender_state) 1617 ret->pushSenderState(sender_state); 1618 1619 if (isLoad) { 1620 ret->allocate(); 1621 } else if (!request->isCacheMaintenance()) { 1622 // CMOs are treated as stores but they don't have data. All 1623 // stores otherwise need to allocate for data. 1624 ret->dataDynamic(data); 1625 } 1626 1627 return ret; 1628} 1629 1630void 1631LSQ::issuedMemBarrierInst(MinorDynInstPtr inst) 1632{ 1633 assert(inst->isInst() && inst->staticInst->isMemBarrier()); 1634 assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]); 1635 1636 /* Remember the barrier. We only have a notion of one 1637 * barrier so this may result in some mem refs being 1638 * delayed if they are between barriers */ 1639 lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum; 1640} 1641 1642void 1643LSQ::LSQRequest::makePacket() 1644{ 1645 /* Make the function idempotent */ 1646 if (packet) 1647 return; 1648 1649 // if the translation faulted, do not create a packet 1650 if (fault != NoFault) { 1651 assert(packet == NULL); 1652 return; 1653 } 1654 1655 packet = makePacketForRequest(request, isLoad, this, data); 1656 /* Null the ret data so we know not to deallocate it when the 1657 * ret is destroyed. The data now belongs to the ret and 1658 * the ret is responsible for its destruction */ 1659 data = NULL; 1660} 1661 1662std::ostream & 1663operator <<(std::ostream &os, LSQ::MemoryState state) 1664{ 1665 switch (state) { 1666 case LSQ::MemoryRunning: 1667 os << "MemoryRunning"; 1668 break; 1669 case LSQ::MemoryNeedsRetry: 1670 os << "MemoryNeedsRetry"; 1671 break; 1672 default: 1673 os << "MemoryState-" << static_cast<int>(state); 1674 break; 1675 } 1676 return os; 1677} 1678 1679void 1680LSQ::recvTimingSnoopReq(PacketPtr pkt) 1681{ 1682 /* LLSC operations in Minor can't be speculative and are executed from 1683 * the head of the requests queue. We shouldn't need to do more than 1684 * this action on snoops. */ 1685 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 1686 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { 1687 cpu.wakeup(tid); 1688 } 1689 } 1690 1691 if (pkt->isInvalidate() || pkt->isWrite()) { 1692 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 1693 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, 1694 cacheBlockMask); 1695 } 1696 } 1697} 1698 1699void 1700LSQ::threadSnoop(LSQRequestPtr request) 1701{ 1702 /* LLSC operations in Minor can't be speculative and are executed from 1703 * the head of the requests queue. We shouldn't need to do more than 1704 * this action on snoops. */ 1705 ThreadID req_tid = request->inst->id.threadId; 1706 PacketPtr pkt = request->packet; 1707 1708 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 1709 if (tid != req_tid) { 1710 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { 1711 cpu.wakeup(tid); 1712 } 1713 1714 if (pkt->isInvalidate() || pkt->isWrite()) { 1715 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, 1716 cacheBlockMask); 1717 } 1718 } 1719 } 1720} 1721 1722} 1723