47#include "cpu/minor/cpu.hh" 48#include "cpu/minor/exec_context.hh" 49#include "cpu/minor/execute.hh" 50#include "cpu/minor/pipeline.hh" 51#include "debug/Activity.hh" 52#include "debug/MinorMem.hh" 53 54namespace Minor 55{ 56 57/** Returns the offset of addr into an aligned a block of size block_size */ 58static Addr 59addrBlockOffset(Addr addr, unsigned int block_size) 60{ 61 return addr & (block_size - 1); 62} 63 64/** Returns true if the given [addr .. addr+size-1] transfer needs to be 65 * fragmented across a block size of block_size */ 66static bool 67transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size) 68{ 69 return (addrBlockOffset(addr, block_size) + size) > block_size; 70} 71 72LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, 73 PacketDataPtr data_, uint64_t *res_) : 74 SenderState(), 75 port(port_), 76 inst(inst_), 77 isLoad(isLoad_), 78 data(data_), 79 packet(NULL), 80 request(), 81 fault(NoFault), 82 res(res_), 83 skipped(false), 84 issuedToMemory(false), 85 state(NotIssued) 86{ 87 request = std::make_shared<Request>(); 88} 89 90LSQ::AddrRangeCoverage 91LSQ::LSQRequest::containsAddrRangeOf( 92 Addr req1_addr, unsigned int req1_size, 93 Addr req2_addr, unsigned int req2_size) 94{ 95 /* 'end' here means the address of the byte just past the request 96 * blocks */ 97 Addr req2_end_addr = req2_addr + req2_size; 98 Addr req1_end_addr = req1_addr + req1_size; 99 100 AddrRangeCoverage ret; 101 102 if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr) 103 ret = NoAddrRangeCoverage; 104 else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr) 105 ret = FullAddrRangeCoverage; 106 else 107 ret = PartialAddrRangeCoverage; 108 109 return ret; 110} 111 112LSQ::AddrRangeCoverage 113LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request) 114{ 115 return containsAddrRangeOf(request->getPaddr(), request->getSize(), 116 other_request->request->getPaddr(), other_request->request->getSize()); 117} 118 119bool 120LSQ::LSQRequest::isBarrier() 121{ 122 return inst->isInst() && inst->staticInst->isMemBarrier(); 123} 124 125bool 126LSQ::LSQRequest::needsToBeSentToStoreBuffer() 127{ 128 return state == StoreToStoreBuffer; 129} 130 131void 132LSQ::LSQRequest::setState(LSQRequestState new_state) 133{ 134 DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:" 135 " %s\n", state, new_state, *inst); 136 state = new_state; 137} 138 139bool 140LSQ::LSQRequest::isComplete() const 141{ 142 /* @todo, There is currently only one 'completed' state. This 143 * may not be a good choice */ 144 return state == Complete; 145} 146 147void 148LSQ::LSQRequest::reportData(std::ostream &os) const 149{ 150 os << (isLoad ? 'R' : 'W') << ';'; 151 inst->reportData(os); 152 os << ';' << state; 153} 154 155std::ostream & 156operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage) 157{ 158 switch (coverage) { 159 case LSQ::PartialAddrRangeCoverage: 160 os << "PartialAddrRangeCoverage"; 161 break; 162 case LSQ::FullAddrRangeCoverage: 163 os << "FullAddrRangeCoverage"; 164 break; 165 case LSQ::NoAddrRangeCoverage: 166 os << "NoAddrRangeCoverage"; 167 break; 168 default: 169 os << "AddrRangeCoverage-" << static_cast<int>(coverage); 170 break; 171 } 172 return os; 173} 174 175std::ostream & 176operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state) 177{ 178 switch (state) { 179 case LSQ::LSQRequest::NotIssued: 180 os << "NotIssued"; 181 break; 182 case LSQ::LSQRequest::InTranslation: 183 os << "InTranslation"; 184 break; 185 case LSQ::LSQRequest::Translated: 186 os << "Translated"; 187 break; 188 case LSQ::LSQRequest::Failed: 189 os << "Failed"; 190 break; 191 case LSQ::LSQRequest::RequestIssuing: 192 os << "RequestIssuing"; 193 break; 194 case LSQ::LSQRequest::StoreToStoreBuffer: 195 os << "StoreToStoreBuffer"; 196 break; 197 case LSQ::LSQRequest::StoreInStoreBuffer: 198 os << "StoreInStoreBuffer"; 199 break; 200 case LSQ::LSQRequest::StoreBufferIssuing: 201 os << "StoreBufferIssuing"; 202 break; 203 case LSQ::LSQRequest::RequestNeedsRetry: 204 os << "RequestNeedsRetry"; 205 break; 206 case LSQ::LSQRequest::StoreBufferNeedsRetry: 207 os << "StoreBufferNeedsRetry"; 208 break; 209 case LSQ::LSQRequest::Complete: 210 os << "Complete"; 211 break; 212 default: 213 os << "LSQRequestState-" << static_cast<int>(state); 214 break; 215 } 216 return os; 217} 218 219void 220LSQ::clearMemBarrier(MinorDynInstPtr inst) 221{ 222 bool is_last_barrier = 223 inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId]; 224 225 DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n", 226 (is_last_barrier ? "last" : "a"), *inst); 227 228 if (is_last_barrier) 229 lastMemBarrier[inst->id.threadId] = 0; 230} 231 232void 233LSQ::SingleDataRequest::finish(const Fault &fault_, const RequestPtr &request_, 234 ThreadContext *tc, BaseTLB::Mode mode) 235{ 236 fault = fault_; 237 238 port.numAccessesInDTLB--; 239 240 DPRINTFS(MinorMem, (&port), "Received translation response for" 241 " request: %s\n", *inst); 242 243 makePacket(); 244 245 setState(Translated); 246 port.tryToSendToTransfers(this); 247 248 /* Let's try and wake up the processor for the next cycle */ 249 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 250} 251 252void 253LSQ::SingleDataRequest::startAddrTranslation() 254{ 255 ThreadContext *thread = port.cpu.getContext( 256 inst->id.threadId); 257 258 port.numAccessesInDTLB++; 259 260 setState(LSQ::LSQRequest::InTranslation); 261 262 DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n"); 263 /* Submit the translation request. The response will come through 264 * finish/markDelayed on the LSQRequest as it bears the Translation 265 * interface */ 266 thread->getDTBPtr()->translateTiming( 267 request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write)); 268} 269 270void 271LSQ::SingleDataRequest::retireResponse(PacketPtr packet_) 272{ 273 DPRINTFS(MinorMem, (&port), "Retiring packet\n"); 274 packet = packet_; 275 packetInFlight = false; 276 setState(Complete); 277} 278 279void 280LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_, 281 ThreadContext *tc, BaseTLB::Mode mode) 282{ 283 fault = fault_; 284 285 port.numAccessesInDTLB--; 286 287 unsigned int M5_VAR_USED expected_fragment_index = 288 numTranslatedFragments; 289 290 numInTranslationFragments--; 291 numTranslatedFragments++; 292 293 DPRINTFS(MinorMem, (&port), "Received translation response for fragment" 294 " %d of request: %s\n", expected_fragment_index, *inst); 295 296 assert(request_ == fragmentRequests[expected_fragment_index]); 297 298 /* Wake up next cycle to get things going again in case the 299 * tryToSendToTransfers does take */ 300 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 301 302 if (fault != NoFault) { 303 /* tryToSendToTransfers will handle the fault */ 304 305 DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:" 306 " %d of request: %s\n", 307 expected_fragment_index, *inst); 308 309 setState(Translated); 310 port.tryToSendToTransfers(this); 311 } else if (numTranslatedFragments == numFragments) { 312 makeFragmentPackets(); 313 314 setState(Translated); 315 port.tryToSendToTransfers(this); 316 } else { 317 /* Avoid calling translateTiming from within ::finish */ 318 assert(!translationEvent.scheduled()); 319 port.cpu.schedule(translationEvent, curTick()); 320 } 321} 322 323LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, 324 bool isLoad_, PacketDataPtr data_, uint64_t *res_) : 325 LSQRequest(port_, inst_, isLoad_, data_, res_), 326 translationEvent([this]{ sendNextFragmentToTranslation(); }, 327 "translationEvent"), 328 numFragments(0), 329 numInTranslationFragments(0), 330 numTranslatedFragments(0), 331 numIssuedFragments(0), 332 numRetiredFragments(0), 333 fragmentRequests(), 334 fragmentPackets() 335{ 336 /* Don't know how many elements are needed until the request is 337 * populated by the caller. */ 338} 339 340LSQ::SplitDataRequest::~SplitDataRequest() 341{ 342 for (auto i = fragmentPackets.begin(); 343 i != fragmentPackets.end(); i++) 344 { 345 delete *i; 346 } 347} 348 349void 350LSQ::SplitDataRequest::makeFragmentRequests() 351{ 352 Addr base_addr = request->getVaddr(); 353 unsigned int whole_size = request->getSize(); 354 unsigned int line_width = port.lineWidth; 355 356 unsigned int fragment_size; 357 Addr fragment_addr; 358 359 /* Assume that this transfer is across potentially many block snap 360 * boundaries: 361 * 362 * | _|________|________|________|___ | 363 * | |0| 1 | 2 | 3 | 4 | | 364 * | |_|________|________|________|___| | 365 * | | | | | | 366 * 367 * The first transfer (0) can be up to lineWidth in size. 368 * All the middle transfers (1-3) are lineWidth in size 369 * The last transfer (4) can be from zero to lineWidth - 1 in size 370 */ 371 unsigned int first_fragment_offset = 372 addrBlockOffset(base_addr, line_width); 373 unsigned int last_fragment_size = 374 addrBlockOffset(base_addr + whole_size, line_width); 375 unsigned int first_fragment_size = 376 line_width - first_fragment_offset; 377 378 unsigned int middle_fragments_total_size = 379 whole_size - (first_fragment_size + last_fragment_size); 380 381 assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0); 382 383 unsigned int middle_fragment_count = 384 middle_fragments_total_size / line_width; 385 386 numFragments = 1 /* first */ + middle_fragment_count + 387 (last_fragment_size == 0 ? 0 : 1); 388 389 DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests." 390 " First fragment size: %d Last fragment size: %d\n", 391 numFragments, first_fragment_size, 392 (last_fragment_size == 0 ? line_width : last_fragment_size)); 393 394 assert(((middle_fragment_count * line_width) + 395 first_fragment_size + last_fragment_size) == whole_size); 396 397 fragment_addr = base_addr; 398 fragment_size = first_fragment_size; 399 400 /* Just past the last address in the request */ 401 Addr end_addr = base_addr + whole_size; 402 403 for (unsigned int fragment_index = 0; fragment_index < numFragments; 404 fragment_index++) 405 { 406 bool M5_VAR_USED is_last_fragment = false; 407 408 if (fragment_addr == base_addr) { 409 /* First fragment */ 410 fragment_size = first_fragment_size; 411 } else { 412 if ((fragment_addr + line_width) > end_addr) { 413 /* Adjust size of last fragment */ 414 fragment_size = end_addr - fragment_addr; 415 is_last_fragment = true; 416 } else { 417 /* Middle fragments */ 418 fragment_size = line_width; 419 } 420 } 421 422 RequestPtr fragment = std::make_shared<Request>(); 423 424 fragment->setContext(request->contextId()); 425 fragment->setVirt(0 /* asid */, 426 fragment_addr, fragment_size, request->getFlags(), 427 request->masterId(), 428 request->getPC()); 429 430 DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x size: %d" 431 " (whole request addr: 0x%x size: %d) %s\n", 432 fragment_addr, fragment_size, base_addr, whole_size, 433 (is_last_fragment ? "last fragment" : "")); 434 435 fragment_addr += fragment_size; 436 437 fragmentRequests.push_back(fragment); 438 } 439} 440 441void 442LSQ::SplitDataRequest::makeFragmentPackets() 443{ 444 Addr base_addr = request->getVaddr(); 445 446 DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst); 447 448 for (unsigned int fragment_index = 0; fragment_index < numFragments; 449 fragment_index++) 450 { 451 RequestPtr fragment = fragmentRequests[fragment_index]; 452 453 DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s" 454 " (%d, 0x%x)\n", 455 fragment_index, *inst, 456 (fragment->hasPaddr() ? "has paddr" : "no paddr"), 457 (fragment->hasPaddr() ? fragment->getPaddr() : 0)); 458 459 Addr fragment_addr = fragment->getVaddr(); 460 unsigned int fragment_size = fragment->getSize(); 461 462 uint8_t *request_data = NULL; 463 464 if (!isLoad) { 465 /* Split data for Packets. Will become the property of the 466 * outgoing Packets */ 467 request_data = new uint8_t[fragment_size]; 468 std::memcpy(request_data, data + (fragment_addr - base_addr), 469 fragment_size); 470 } 471 472 assert(fragment->hasPaddr()); 473 474 PacketPtr fragment_packet = 475 makePacketForRequest(fragment, isLoad, this, request_data); 476 477 fragmentPackets.push_back(fragment_packet); 478 /* Accumulate flags in parent request */ 479 request->setFlags(fragment->getFlags()); 480 } 481 482 /* Might as well make the overall/response packet here */ 483 /* Get the physical address for the whole request/packet from the first 484 * fragment */ 485 request->setPaddr(fragmentRequests[0]->getPaddr()); 486 makePacket(); 487} 488 489void 490LSQ::SplitDataRequest::startAddrTranslation() 491{ 492 setState(LSQ::LSQRequest::InTranslation); 493 494 makeFragmentRequests(); 495 496 numInTranslationFragments = 0; 497 numTranslatedFragments = 0; 498 499 /* @todo, just do these in sequence for now with 500 * a loop of: 501 * do { 502 * sendNextFragmentToTranslation ; translateTiming ; finish 503 * } while (numTranslatedFragments != numFragments); 504 */ 505 506 /* Do first translation */ 507 sendNextFragmentToTranslation(); 508} 509 510PacketPtr 511LSQ::SplitDataRequest::getHeadPacket() 512{ 513 assert(numIssuedFragments < numFragments); 514 515 return fragmentPackets[numIssuedFragments]; 516} 517 518void 519LSQ::SplitDataRequest::stepToNextPacket() 520{ 521 assert(numIssuedFragments < numFragments); 522 523 numIssuedFragments++; 524} 525 526void 527LSQ::SplitDataRequest::retireResponse(PacketPtr response) 528{ 529 assert(numRetiredFragments < numFragments); 530 531 DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d" 532 " offset: 0x%x (retired fragment num: %d) %s\n", 533 response->req->getVaddr(), response->req->getSize(), 534 request->getVaddr() - response->req->getVaddr(), 535 numRetiredFragments, 536 (fault == NoFault ? "" : fault->name())); 537 538 numRetiredFragments++; 539 540 if (skipped) { 541 /* Skip because we already knew the request had faulted or been 542 * skipped */ 543 DPRINTFS(MinorMem, (&port), "Skipping this fragment\n"); 544 } else if (response->isError()) { 545 /* Mark up the error and leave to execute to handle it */ 546 DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n"); 547 setSkipped(); 548 packet->copyError(response); 549 } else { 550 if (isLoad) { 551 if (!data) { 552 /* For a split transfer, a Packet must be constructed 553 * to contain all returning data. This is that packet's 554 * data */ 555 data = new uint8_t[request->getSize()]; 556 } 557 558 /* Populate the portion of the overall response data represented 559 * by the response fragment */ 560 std::memcpy( 561 data + (response->req->getVaddr() - request->getVaddr()), 562 response->getConstPtr<uint8_t>(), 563 response->req->getSize()); 564 } 565 } 566 567 /* Complete early if we're skipping are no more in-flight accesses */ 568 if (skipped && !hasPacketsInMemSystem()) { 569 DPRINTFS(MinorMem, (&port), "Completed skipped burst\n"); 570 setState(Complete); 571 if (packet->needsResponse()) 572 packet->makeResponse(); 573 } 574 575 if (numRetiredFragments == numFragments) 576 setState(Complete); 577 578 if (!skipped && isComplete()) { 579 DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL); 580 581 DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d" 582 " needsResponse: %d packetSize: %s requestSize: %s responseSize:" 583 " %s\n", packet->isRead(), packet->isWrite(), 584 packet->needsResponse(), packet->getSize(), request->getSize(), 585 response->getSize()); 586 587 /* A request can become complete by several paths, this is a sanity 588 * check to make sure the packet's data is created */ 589 if (!data) { 590 data = new uint8_t[request->getSize()]; 591 } 592 593 if (isLoad) { 594 DPRINTFS(MinorMem, (&port), "Copying read data\n"); 595 std::memcpy(packet->getPtr<uint8_t>(), data, request->getSize()); 596 } 597 packet->makeResponse(); 598 } 599 600 /* Packets are all deallocated together in ~SplitLSQRequest */ 601} 602 603void 604LSQ::SplitDataRequest::sendNextFragmentToTranslation() 605{ 606 unsigned int fragment_index = numTranslatedFragments; 607 608 ThreadContext *thread = port.cpu.getContext( 609 inst->id.threadId); 610 611 DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n", 612 fragment_index); 613 614 port.numAccessesInDTLB++; 615 numInTranslationFragments++; 616 617 thread->getDTBPtr()->translateTiming( 618 fragmentRequests[fragment_index], thread, this, (isLoad ? 619 BaseTLB::Read : BaseTLB::Write)); 620} 621 622bool 623LSQ::StoreBuffer::canInsert() const 624{ 625 /* @todo, support store amalgamation */ 626 return slots.size() < numSlots; 627} 628 629void 630LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request) 631{ 632 auto found = std::find(slots.begin(), slots.end(), request); 633 634 if (found != slots.end()) { 635 DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n", 636 request, *found, *(request->inst)); 637 slots.erase(found); 638 639 delete request; 640 } 641} 642 643void 644LSQ::StoreBuffer::insert(LSQRequestPtr request) 645{ 646 if (!canInsert()) { 647 warn("%s: store buffer insertion without space to insert from" 648 " inst: %s\n", name(), *(request->inst)); 649 } 650 651 DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request); 652 653 numUnissuedAccesses++; 654 655 if (request->state != LSQRequest::Complete) 656 request->setState(LSQRequest::StoreInStoreBuffer); 657 658 slots.push_back(request); 659 660 /* Let's try and wake up the processor for the next cycle to step 661 * the store buffer */ 662 lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 663} 664 665LSQ::AddrRangeCoverage 666LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request, 667 unsigned int &found_slot) 668{ 669 unsigned int slot_index = slots.size() - 1; 670 auto i = slots.rbegin(); 671 AddrRangeCoverage ret = NoAddrRangeCoverage; 672 673 /* Traverse the store buffer in reverse order (most to least recent) 674 * and try to find a slot whose address range overlaps this request */ 675 while (ret == NoAddrRangeCoverage && i != slots.rend()) { 676 LSQRequestPtr slot = *i; 677 678 /* Cache maintenance instructions go down via the store path * 679 * but they carry no data and they shouldn't be considered for 680 * forwarding */ 681 if (slot->packet && 682 slot->inst->id.threadId == request->inst->id.threadId && 683 !slot->packet->req->isCacheMaintenance()) { 684 AddrRangeCoverage coverage = slot->containsAddrRangeOf(request); 685 686 if (coverage != NoAddrRangeCoverage) { 687 DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:" 688 " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n", 689 slot_index, coverage, 690 request->request->getPaddr(), request->request->getSize(), 691 slot->request->getPaddr(), slot->request->getSize()); 692 693 found_slot = slot_index; 694 ret = coverage; 695 } 696 } 697 698 i++; 699 slot_index--; 700 } 701 702 return ret; 703} 704 705/** Fill the given packet with appropriate date from slot slot_number */ 706void 707LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load, 708 unsigned int slot_number) 709{ 710 assert(slot_number < slots.size()); 711 assert(load->packet); 712 assert(load->isLoad); 713 714 LSQRequestPtr store = slots[slot_number]; 715 716 assert(store->packet); 717 assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage); 718 719 Addr load_addr = load->request->getPaddr(); 720 Addr store_addr = store->request->getPaddr(); 721 Addr addr_offset = load_addr - store_addr; 722 723 unsigned int load_size = load->request->getSize(); 724 725 DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer" 726 " slot: %d addr: 0x%x addressOffset: 0x%x\n", 727 load_size, load_addr, slot_number, 728 store_addr, addr_offset); 729 730 void *load_packet_data = load->packet->getPtr<void>(); 731 void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset; 732 733 std::memcpy(load_packet_data, store_packet_data, load_size); 734} 735 736void 737LSQ::StoreBuffer::countIssuedStore(LSQRequestPtr request) 738{ 739 /* Barriers are accounted for as they are cleared from 740 * the queue, not after their transfers are complete */ 741 if (!request->isBarrier()) 742 numUnissuedAccesses--; 743} 744 745void 746LSQ::StoreBuffer::step() 747{ 748 DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n", 749 numUnissuedAccesses); 750 751 if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) { 752 /* Clear all the leading barriers */ 753 while (!slots.empty() && 754 slots.front()->isComplete() && slots.front()->isBarrier()) 755 { 756 LSQRequestPtr barrier = slots.front(); 757 758 DPRINTF(MinorMem, "Clearing barrier for inst: %s\n", 759 *(barrier->inst)); 760 761 numUnissuedAccesses--; 762 lsq.clearMemBarrier(barrier->inst); 763 slots.pop_front(); 764 765 delete barrier; 766 } 767 768 auto i = slots.begin(); 769 bool issued = true; 770 unsigned int issue_count = 0; 771 772 /* Skip trying if the memory system is busy */ 773 if (lsq.state == LSQ::MemoryNeedsRetry) 774 issued = false; 775 776 /* Try to issue all stores in order starting from the head 777 * of the queue. Responses are allowed to be retired 778 * out of order */ 779 while (issued && 780 issue_count < storeLimitPerCycle && 781 lsq.canSendToMemorySystem() && 782 i != slots.end()) 783 { 784 LSQRequestPtr request = *i; 785 786 DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d" 787 " state: %s\n", 788 *(request->inst), request->sentAllPackets(), 789 request->state); 790 791 if (request->isBarrier() && request->isComplete()) { 792 /* Give up at barriers */ 793 issued = false; 794 } else if (!(request->state == LSQRequest::StoreBufferIssuing && 795 request->sentAllPackets())) 796 { 797 DPRINTF(MinorMem, "Trying to send request: %s to memory" 798 " system\n", *(request->inst)); 799 800 if (lsq.tryToSend(request)) { 801 countIssuedStore(request); 802 issue_count++; 803 } else { 804 /* Don't step on to the next store buffer entry if this 805 * one hasn't issued all its packets as the store 806 * buffer must still enforce ordering */ 807 issued = false; 808 } 809 } 810 i++; 811 } 812 } 813} 814 815void 816LSQ::completeMemBarrierInst(MinorDynInstPtr inst, 817 bool committed) 818{ 819 if (committed) { 820 /* Not already sent to the store buffer as a store request? */ 821 if (!inst->inStoreBuffer) { 822 /* Insert an entry into the store buffer to tick off barriers 823 * until there are none in flight */ 824 storeBuffer.insert(new BarrierDataRequest(*this, inst)); 825 } 826 } else { 827 /* Clear the barrier anyway if it wasn't actually committed */ 828 clearMemBarrier(inst); 829 } 830} 831 832void 833LSQ::StoreBuffer::minorTrace() const 834{ 835 unsigned int size = slots.size(); 836 unsigned int i = 0; 837 std::ostringstream os; 838 839 while (i < size) { 840 LSQRequestPtr request = slots[i]; 841 842 request->reportData(os); 843 844 i++; 845 if (i < numSlots) 846 os << ','; 847 } 848 849 while (i < numSlots) { 850 os << '-'; 851 852 i++; 853 if (i < numSlots) 854 os << ','; 855 } 856 857 MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(), 858 numUnissuedAccesses); 859} 860 861void 862LSQ::tryToSendToTransfers(LSQRequestPtr request) 863{ 864 if (state == MemoryNeedsRetry) { 865 DPRINTF(MinorMem, "Request needs retry, not issuing to" 866 " memory until retry arrives\n"); 867 return; 868 } 869 870 if (request->state == LSQRequest::InTranslation) { 871 DPRINTF(MinorMem, "Request still in translation, not issuing to" 872 " memory\n"); 873 return; 874 } 875 876 assert(request->state == LSQRequest::Translated || 877 request->state == LSQRequest::RequestIssuing || 878 request->state == LSQRequest::Failed || 879 request->state == LSQRequest::Complete); 880 881 if (requests.empty() || requests.front() != request) { 882 DPRINTF(MinorMem, "Request not at front of requests queue, can't" 883 " issue to memory\n"); 884 return; 885 } 886 887 if (transfers.unreservedRemainingSpace() == 0) { 888 DPRINTF(MinorMem, "No space to insert request into transfers" 889 " queue\n"); 890 return; 891 } 892 893 if (request->isComplete() || request->state == LSQRequest::Failed) { 894 DPRINTF(MinorMem, "Passing a %s transfer on to transfers" 895 " queue\n", (request->isComplete() ? "completed" : "failed")); 896 request->setState(LSQRequest::Complete); 897 request->setSkipped(); 898 moveFromRequestsToTransfers(request); 899 return; 900 } 901 902 if (!execute.instIsRightStream(request->inst)) { 903 /* Wrong stream, try to abort the transfer but only do so if 904 * there are no packets in flight */ 905 if (request->hasPacketsInMemSystem()) { 906 DPRINTF(MinorMem, "Request's inst. is from the wrong stream," 907 " waiting for responses before aborting request\n"); 908 } else { 909 DPRINTF(MinorMem, "Request's inst. is from the wrong stream," 910 " aborting request\n"); 911 request->setState(LSQRequest::Complete); 912 request->setSkipped(); 913 moveFromRequestsToTransfers(request); 914 } 915 return; 916 } 917 918 if (request->fault != NoFault) { 919 if (request->inst->staticInst->isPrefetch()) { 920 DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n"); 921 } 922 DPRINTF(MinorMem, "Moving faulting request into the transfers" 923 " queue\n"); 924 request->setState(LSQRequest::Complete); 925 request->setSkipped(); 926 moveFromRequestsToTransfers(request); 927 return; 928 } 929 930 bool is_load = request->isLoad; 931 bool is_llsc = request->request->isLLSC(); 932 bool is_swap = request->request->isSwap(); 933 bool bufferable = !(request->request->isStrictlyOrdered() || 934 is_llsc || is_swap); 935 936 if (is_load) { 937 if (numStoresInTransfers != 0) { 938 DPRINTF(MinorMem, "Load request with stores still in transfers" 939 " queue, stalling\n"); 940 return; 941 } 942 } else { 943 /* Store. Can it be sent to the store buffer? */ 944 if (bufferable && !request->request->isMmappedIpr()) { 945 request->setState(LSQRequest::StoreToStoreBuffer); 946 moveFromRequestsToTransfers(request); 947 DPRINTF(MinorMem, "Moving store into transfers queue\n"); 948 return; 949 } 950 } 951 952 /* Check if this is the head instruction (and so must be executable as 953 * its stream sequence number was checked above) for loads which must 954 * not be speculatively issued and stores which must be issued here */ 955 if (!bufferable) { 956 if (!execute.instIsHeadInst(request->inst)) { 957 DPRINTF(MinorMem, "Memory access not the head inst., can't be" 958 " sure it can be performed, not issuing\n"); 959 return; 960 } 961 962 unsigned int forwarding_slot = 0; 963 964 if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) != 965 NoAddrRangeCoverage) 966 { 967 DPRINTF(MinorMem, "Memory access can receive forwarded data" 968 " from the store buffer, need to wait for store buffer to" 969 " drain\n"); 970 return; 971 } 972 } 973 974 /* True: submit this packet to the transfers queue to be sent to the 975 * memory system. 976 * False: skip the memory and push a packet for this request onto 977 * requests */ 978 bool do_access = true; 979 980 if (!is_llsc) { 981 /* Check for match in the store buffer */ 982 if (is_load) { 983 unsigned int forwarding_slot = 0; 984 AddrRangeCoverage forwarding_result = 985 storeBuffer.canForwardDataToLoad(request, 986 forwarding_slot); 987 988 switch (forwarding_result) { 989 case FullAddrRangeCoverage: 990 /* Forward data from the store buffer into this request and 991 * repurpose this request's packet into a response packet */ 992 storeBuffer.forwardStoreData(request, forwarding_slot); 993 request->packet->makeResponse(); 994 995 /* Just move between queues, no access */ 996 do_access = false; 997 break; 998 case PartialAddrRangeCoverage: 999 DPRINTF(MinorMem, "Load partly satisfied by store buffer" 1000 " data. Must wait for the store to complete\n"); 1001 return; 1002 break; 1003 case NoAddrRangeCoverage: 1004 DPRINTF(MinorMem, "No forwardable data from store buffer\n"); 1005 /* Fall through to try access */ 1006 break; 1007 } 1008 } 1009 } else { 1010 if (!canSendToMemorySystem()) { 1011 DPRINTF(MinorMem, "Can't send request to memory system yet\n"); 1012 return; 1013 } 1014 1015 SimpleThread &thread = *cpu.threads[request->inst->id.threadId]; 1016 1017 TheISA::PCState old_pc = thread.pcState(); 1018 ExecContext context(cpu, thread, execute, request->inst); 1019 1020 /* Handle LLSC requests and tests */ 1021 if (is_load) { 1022 TheISA::handleLockedRead(&context, request->request); 1023 } else { 1024 do_access = TheISA::handleLockedWrite(&context, 1025 request->request, cacheBlockMask); 1026 1027 if (!do_access) { 1028 DPRINTF(MinorMem, "Not perfoming a memory " 1029 "access for store conditional\n"); 1030 } 1031 } 1032 thread.pcState(old_pc); 1033 } 1034 1035 /* See the do_access comment above */ 1036 if (do_access) { 1037 if (!canSendToMemorySystem()) { 1038 DPRINTF(MinorMem, "Can't send request to memory system yet\n"); 1039 return; 1040 } 1041 1042 /* Remember if this is an access which can't be idly 1043 * discarded by an interrupt */ 1044 if (!bufferable && !request->issuedToMemory) { 1045 numAccessesIssuedToMemory++; 1046 request->issuedToMemory = true; 1047 } 1048 1049 if (tryToSend(request)) { 1050 moveFromRequestsToTransfers(request); 1051 } 1052 } else { 1053 request->setState(LSQRequest::Complete); 1054 moveFromRequestsToTransfers(request); 1055 } 1056} 1057 1058bool 1059LSQ::tryToSend(LSQRequestPtr request) 1060{ 1061 bool ret = false; 1062 1063 if (!canSendToMemorySystem()) { 1064 DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n", 1065 *(request->inst)); 1066 } else { 1067 PacketPtr packet = request->getHeadPacket(); 1068 1069 DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n", 1070 *(request->inst), packet->req->getVaddr()); 1071 1072 /* The sender state of the packet *must* be an LSQRequest 1073 * so the response can be correctly handled */ 1074 assert(packet->findNextSenderState<LSQRequest>()); 1075 1076 if (request->request->isMmappedIpr()) { 1077 ThreadContext *thread = 1078 cpu.getContext(cpu.contextToThread( 1079 request->request->contextId())); 1080 1081 if (request->isLoad) { 1082 DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst)); 1083 TheISA::handleIprRead(thread, packet); 1084 } else { 1085 DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst)); 1086 TheISA::handleIprWrite(thread, packet); 1087 } 1088 1089 request->stepToNextPacket(); 1090 ret = request->sentAllPackets(); 1091 1092 if (!ret) { 1093 DPRINTF(MinorMem, "IPR access has another packet: %s\n", 1094 *(request->inst)); 1095 } 1096 1097 if (ret) 1098 request->setState(LSQRequest::Complete); 1099 else 1100 request->setState(LSQRequest::RequestIssuing); 1101 } else if (dcachePort.sendTimingReq(packet)) { 1102 DPRINTF(MinorMem, "Sent data memory request\n"); 1103 1104 numAccessesInMemorySystem++; 1105 1106 request->stepToNextPacket(); 1107 1108 ret = request->sentAllPackets(); 1109 1110 switch (request->state) { 1111 case LSQRequest::Translated: 1112 case LSQRequest::RequestIssuing: 1113 /* Fully or partially issued a request in the transfers 1114 * queue */ 1115 request->setState(LSQRequest::RequestIssuing); 1116 break; 1117 case LSQRequest::StoreInStoreBuffer: 1118 case LSQRequest::StoreBufferIssuing: 1119 /* Fully or partially issued a request in the store 1120 * buffer */ 1121 request->setState(LSQRequest::StoreBufferIssuing); 1122 break; 1123 default:
| 48#include "cpu/minor/cpu.hh" 49#include "cpu/minor/exec_context.hh" 50#include "cpu/minor/execute.hh" 51#include "cpu/minor/pipeline.hh" 52#include "debug/Activity.hh" 53#include "debug/MinorMem.hh" 54 55namespace Minor 56{ 57 58/** Returns the offset of addr into an aligned a block of size block_size */ 59static Addr 60addrBlockOffset(Addr addr, unsigned int block_size) 61{ 62 return addr & (block_size - 1); 63} 64 65/** Returns true if the given [addr .. addr+size-1] transfer needs to be 66 * fragmented across a block size of block_size */ 67static bool 68transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size) 69{ 70 return (addrBlockOffset(addr, block_size) + size) > block_size; 71} 72 73LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, 74 PacketDataPtr data_, uint64_t *res_) : 75 SenderState(), 76 port(port_), 77 inst(inst_), 78 isLoad(isLoad_), 79 data(data_), 80 packet(NULL), 81 request(), 82 fault(NoFault), 83 res(res_), 84 skipped(false), 85 issuedToMemory(false), 86 state(NotIssued) 87{ 88 request = std::make_shared<Request>(); 89} 90 91LSQ::AddrRangeCoverage 92LSQ::LSQRequest::containsAddrRangeOf( 93 Addr req1_addr, unsigned int req1_size, 94 Addr req2_addr, unsigned int req2_size) 95{ 96 /* 'end' here means the address of the byte just past the request 97 * blocks */ 98 Addr req2_end_addr = req2_addr + req2_size; 99 Addr req1_end_addr = req1_addr + req1_size; 100 101 AddrRangeCoverage ret; 102 103 if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr) 104 ret = NoAddrRangeCoverage; 105 else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr) 106 ret = FullAddrRangeCoverage; 107 else 108 ret = PartialAddrRangeCoverage; 109 110 return ret; 111} 112 113LSQ::AddrRangeCoverage 114LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request) 115{ 116 return containsAddrRangeOf(request->getPaddr(), request->getSize(), 117 other_request->request->getPaddr(), other_request->request->getSize()); 118} 119 120bool 121LSQ::LSQRequest::isBarrier() 122{ 123 return inst->isInst() && inst->staticInst->isMemBarrier(); 124} 125 126bool 127LSQ::LSQRequest::needsToBeSentToStoreBuffer() 128{ 129 return state == StoreToStoreBuffer; 130} 131 132void 133LSQ::LSQRequest::setState(LSQRequestState new_state) 134{ 135 DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:" 136 " %s\n", state, new_state, *inst); 137 state = new_state; 138} 139 140bool 141LSQ::LSQRequest::isComplete() const 142{ 143 /* @todo, There is currently only one 'completed' state. This 144 * may not be a good choice */ 145 return state == Complete; 146} 147 148void 149LSQ::LSQRequest::reportData(std::ostream &os) const 150{ 151 os << (isLoad ? 'R' : 'W') << ';'; 152 inst->reportData(os); 153 os << ';' << state; 154} 155 156std::ostream & 157operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage) 158{ 159 switch (coverage) { 160 case LSQ::PartialAddrRangeCoverage: 161 os << "PartialAddrRangeCoverage"; 162 break; 163 case LSQ::FullAddrRangeCoverage: 164 os << "FullAddrRangeCoverage"; 165 break; 166 case LSQ::NoAddrRangeCoverage: 167 os << "NoAddrRangeCoverage"; 168 break; 169 default: 170 os << "AddrRangeCoverage-" << static_cast<int>(coverage); 171 break; 172 } 173 return os; 174} 175 176std::ostream & 177operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state) 178{ 179 switch (state) { 180 case LSQ::LSQRequest::NotIssued: 181 os << "NotIssued"; 182 break; 183 case LSQ::LSQRequest::InTranslation: 184 os << "InTranslation"; 185 break; 186 case LSQ::LSQRequest::Translated: 187 os << "Translated"; 188 break; 189 case LSQ::LSQRequest::Failed: 190 os << "Failed"; 191 break; 192 case LSQ::LSQRequest::RequestIssuing: 193 os << "RequestIssuing"; 194 break; 195 case LSQ::LSQRequest::StoreToStoreBuffer: 196 os << "StoreToStoreBuffer"; 197 break; 198 case LSQ::LSQRequest::StoreInStoreBuffer: 199 os << "StoreInStoreBuffer"; 200 break; 201 case LSQ::LSQRequest::StoreBufferIssuing: 202 os << "StoreBufferIssuing"; 203 break; 204 case LSQ::LSQRequest::RequestNeedsRetry: 205 os << "RequestNeedsRetry"; 206 break; 207 case LSQ::LSQRequest::StoreBufferNeedsRetry: 208 os << "StoreBufferNeedsRetry"; 209 break; 210 case LSQ::LSQRequest::Complete: 211 os << "Complete"; 212 break; 213 default: 214 os << "LSQRequestState-" << static_cast<int>(state); 215 break; 216 } 217 return os; 218} 219 220void 221LSQ::clearMemBarrier(MinorDynInstPtr inst) 222{ 223 bool is_last_barrier = 224 inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId]; 225 226 DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n", 227 (is_last_barrier ? "last" : "a"), *inst); 228 229 if (is_last_barrier) 230 lastMemBarrier[inst->id.threadId] = 0; 231} 232 233void 234LSQ::SingleDataRequest::finish(const Fault &fault_, const RequestPtr &request_, 235 ThreadContext *tc, BaseTLB::Mode mode) 236{ 237 fault = fault_; 238 239 port.numAccessesInDTLB--; 240 241 DPRINTFS(MinorMem, (&port), "Received translation response for" 242 " request: %s\n", *inst); 243 244 makePacket(); 245 246 setState(Translated); 247 port.tryToSendToTransfers(this); 248 249 /* Let's try and wake up the processor for the next cycle */ 250 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 251} 252 253void 254LSQ::SingleDataRequest::startAddrTranslation() 255{ 256 ThreadContext *thread = port.cpu.getContext( 257 inst->id.threadId); 258 259 port.numAccessesInDTLB++; 260 261 setState(LSQ::LSQRequest::InTranslation); 262 263 DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n"); 264 /* Submit the translation request. The response will come through 265 * finish/markDelayed on the LSQRequest as it bears the Translation 266 * interface */ 267 thread->getDTBPtr()->translateTiming( 268 request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write)); 269} 270 271void 272LSQ::SingleDataRequest::retireResponse(PacketPtr packet_) 273{ 274 DPRINTFS(MinorMem, (&port), "Retiring packet\n"); 275 packet = packet_; 276 packetInFlight = false; 277 setState(Complete); 278} 279 280void 281LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_, 282 ThreadContext *tc, BaseTLB::Mode mode) 283{ 284 fault = fault_; 285 286 port.numAccessesInDTLB--; 287 288 unsigned int M5_VAR_USED expected_fragment_index = 289 numTranslatedFragments; 290 291 numInTranslationFragments--; 292 numTranslatedFragments++; 293 294 DPRINTFS(MinorMem, (&port), "Received translation response for fragment" 295 " %d of request: %s\n", expected_fragment_index, *inst); 296 297 assert(request_ == fragmentRequests[expected_fragment_index]); 298 299 /* Wake up next cycle to get things going again in case the 300 * tryToSendToTransfers does take */ 301 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 302 303 if (fault != NoFault) { 304 /* tryToSendToTransfers will handle the fault */ 305 306 DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:" 307 " %d of request: %s\n", 308 expected_fragment_index, *inst); 309 310 setState(Translated); 311 port.tryToSendToTransfers(this); 312 } else if (numTranslatedFragments == numFragments) { 313 makeFragmentPackets(); 314 315 setState(Translated); 316 port.tryToSendToTransfers(this); 317 } else { 318 /* Avoid calling translateTiming from within ::finish */ 319 assert(!translationEvent.scheduled()); 320 port.cpu.schedule(translationEvent, curTick()); 321 } 322} 323 324LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, 325 bool isLoad_, PacketDataPtr data_, uint64_t *res_) : 326 LSQRequest(port_, inst_, isLoad_, data_, res_), 327 translationEvent([this]{ sendNextFragmentToTranslation(); }, 328 "translationEvent"), 329 numFragments(0), 330 numInTranslationFragments(0), 331 numTranslatedFragments(0), 332 numIssuedFragments(0), 333 numRetiredFragments(0), 334 fragmentRequests(), 335 fragmentPackets() 336{ 337 /* Don't know how many elements are needed until the request is 338 * populated by the caller. */ 339} 340 341LSQ::SplitDataRequest::~SplitDataRequest() 342{ 343 for (auto i = fragmentPackets.begin(); 344 i != fragmentPackets.end(); i++) 345 { 346 delete *i; 347 } 348} 349 350void 351LSQ::SplitDataRequest::makeFragmentRequests() 352{ 353 Addr base_addr = request->getVaddr(); 354 unsigned int whole_size = request->getSize(); 355 unsigned int line_width = port.lineWidth; 356 357 unsigned int fragment_size; 358 Addr fragment_addr; 359 360 /* Assume that this transfer is across potentially many block snap 361 * boundaries: 362 * 363 * | _|________|________|________|___ | 364 * | |0| 1 | 2 | 3 | 4 | | 365 * | |_|________|________|________|___| | 366 * | | | | | | 367 * 368 * The first transfer (0) can be up to lineWidth in size. 369 * All the middle transfers (1-3) are lineWidth in size 370 * The last transfer (4) can be from zero to lineWidth - 1 in size 371 */ 372 unsigned int first_fragment_offset = 373 addrBlockOffset(base_addr, line_width); 374 unsigned int last_fragment_size = 375 addrBlockOffset(base_addr + whole_size, line_width); 376 unsigned int first_fragment_size = 377 line_width - first_fragment_offset; 378 379 unsigned int middle_fragments_total_size = 380 whole_size - (first_fragment_size + last_fragment_size); 381 382 assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0); 383 384 unsigned int middle_fragment_count = 385 middle_fragments_total_size / line_width; 386 387 numFragments = 1 /* first */ + middle_fragment_count + 388 (last_fragment_size == 0 ? 0 : 1); 389 390 DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests." 391 " First fragment size: %d Last fragment size: %d\n", 392 numFragments, first_fragment_size, 393 (last_fragment_size == 0 ? line_width : last_fragment_size)); 394 395 assert(((middle_fragment_count * line_width) + 396 first_fragment_size + last_fragment_size) == whole_size); 397 398 fragment_addr = base_addr; 399 fragment_size = first_fragment_size; 400 401 /* Just past the last address in the request */ 402 Addr end_addr = base_addr + whole_size; 403 404 for (unsigned int fragment_index = 0; fragment_index < numFragments; 405 fragment_index++) 406 { 407 bool M5_VAR_USED is_last_fragment = false; 408 409 if (fragment_addr == base_addr) { 410 /* First fragment */ 411 fragment_size = first_fragment_size; 412 } else { 413 if ((fragment_addr + line_width) > end_addr) { 414 /* Adjust size of last fragment */ 415 fragment_size = end_addr - fragment_addr; 416 is_last_fragment = true; 417 } else { 418 /* Middle fragments */ 419 fragment_size = line_width; 420 } 421 } 422 423 RequestPtr fragment = std::make_shared<Request>(); 424 425 fragment->setContext(request->contextId()); 426 fragment->setVirt(0 /* asid */, 427 fragment_addr, fragment_size, request->getFlags(), 428 request->masterId(), 429 request->getPC()); 430 431 DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x size: %d" 432 " (whole request addr: 0x%x size: %d) %s\n", 433 fragment_addr, fragment_size, base_addr, whole_size, 434 (is_last_fragment ? "last fragment" : "")); 435 436 fragment_addr += fragment_size; 437 438 fragmentRequests.push_back(fragment); 439 } 440} 441 442void 443LSQ::SplitDataRequest::makeFragmentPackets() 444{ 445 Addr base_addr = request->getVaddr(); 446 447 DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst); 448 449 for (unsigned int fragment_index = 0; fragment_index < numFragments; 450 fragment_index++) 451 { 452 RequestPtr fragment = fragmentRequests[fragment_index]; 453 454 DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s" 455 " (%d, 0x%x)\n", 456 fragment_index, *inst, 457 (fragment->hasPaddr() ? "has paddr" : "no paddr"), 458 (fragment->hasPaddr() ? fragment->getPaddr() : 0)); 459 460 Addr fragment_addr = fragment->getVaddr(); 461 unsigned int fragment_size = fragment->getSize(); 462 463 uint8_t *request_data = NULL; 464 465 if (!isLoad) { 466 /* Split data for Packets. Will become the property of the 467 * outgoing Packets */ 468 request_data = new uint8_t[fragment_size]; 469 std::memcpy(request_data, data + (fragment_addr - base_addr), 470 fragment_size); 471 } 472 473 assert(fragment->hasPaddr()); 474 475 PacketPtr fragment_packet = 476 makePacketForRequest(fragment, isLoad, this, request_data); 477 478 fragmentPackets.push_back(fragment_packet); 479 /* Accumulate flags in parent request */ 480 request->setFlags(fragment->getFlags()); 481 } 482 483 /* Might as well make the overall/response packet here */ 484 /* Get the physical address for the whole request/packet from the first 485 * fragment */ 486 request->setPaddr(fragmentRequests[0]->getPaddr()); 487 makePacket(); 488} 489 490void 491LSQ::SplitDataRequest::startAddrTranslation() 492{ 493 setState(LSQ::LSQRequest::InTranslation); 494 495 makeFragmentRequests(); 496 497 numInTranslationFragments = 0; 498 numTranslatedFragments = 0; 499 500 /* @todo, just do these in sequence for now with 501 * a loop of: 502 * do { 503 * sendNextFragmentToTranslation ; translateTiming ; finish 504 * } while (numTranslatedFragments != numFragments); 505 */ 506 507 /* Do first translation */ 508 sendNextFragmentToTranslation(); 509} 510 511PacketPtr 512LSQ::SplitDataRequest::getHeadPacket() 513{ 514 assert(numIssuedFragments < numFragments); 515 516 return fragmentPackets[numIssuedFragments]; 517} 518 519void 520LSQ::SplitDataRequest::stepToNextPacket() 521{ 522 assert(numIssuedFragments < numFragments); 523 524 numIssuedFragments++; 525} 526 527void 528LSQ::SplitDataRequest::retireResponse(PacketPtr response) 529{ 530 assert(numRetiredFragments < numFragments); 531 532 DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d" 533 " offset: 0x%x (retired fragment num: %d) %s\n", 534 response->req->getVaddr(), response->req->getSize(), 535 request->getVaddr() - response->req->getVaddr(), 536 numRetiredFragments, 537 (fault == NoFault ? "" : fault->name())); 538 539 numRetiredFragments++; 540 541 if (skipped) { 542 /* Skip because we already knew the request had faulted or been 543 * skipped */ 544 DPRINTFS(MinorMem, (&port), "Skipping this fragment\n"); 545 } else if (response->isError()) { 546 /* Mark up the error and leave to execute to handle it */ 547 DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n"); 548 setSkipped(); 549 packet->copyError(response); 550 } else { 551 if (isLoad) { 552 if (!data) { 553 /* For a split transfer, a Packet must be constructed 554 * to contain all returning data. This is that packet's 555 * data */ 556 data = new uint8_t[request->getSize()]; 557 } 558 559 /* Populate the portion of the overall response data represented 560 * by the response fragment */ 561 std::memcpy( 562 data + (response->req->getVaddr() - request->getVaddr()), 563 response->getConstPtr<uint8_t>(), 564 response->req->getSize()); 565 } 566 } 567 568 /* Complete early if we're skipping are no more in-flight accesses */ 569 if (skipped && !hasPacketsInMemSystem()) { 570 DPRINTFS(MinorMem, (&port), "Completed skipped burst\n"); 571 setState(Complete); 572 if (packet->needsResponse()) 573 packet->makeResponse(); 574 } 575 576 if (numRetiredFragments == numFragments) 577 setState(Complete); 578 579 if (!skipped && isComplete()) { 580 DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL); 581 582 DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d" 583 " needsResponse: %d packetSize: %s requestSize: %s responseSize:" 584 " %s\n", packet->isRead(), packet->isWrite(), 585 packet->needsResponse(), packet->getSize(), request->getSize(), 586 response->getSize()); 587 588 /* A request can become complete by several paths, this is a sanity 589 * check to make sure the packet's data is created */ 590 if (!data) { 591 data = new uint8_t[request->getSize()]; 592 } 593 594 if (isLoad) { 595 DPRINTFS(MinorMem, (&port), "Copying read data\n"); 596 std::memcpy(packet->getPtr<uint8_t>(), data, request->getSize()); 597 } 598 packet->makeResponse(); 599 } 600 601 /* Packets are all deallocated together in ~SplitLSQRequest */ 602} 603 604void 605LSQ::SplitDataRequest::sendNextFragmentToTranslation() 606{ 607 unsigned int fragment_index = numTranslatedFragments; 608 609 ThreadContext *thread = port.cpu.getContext( 610 inst->id.threadId); 611 612 DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n", 613 fragment_index); 614 615 port.numAccessesInDTLB++; 616 numInTranslationFragments++; 617 618 thread->getDTBPtr()->translateTiming( 619 fragmentRequests[fragment_index], thread, this, (isLoad ? 620 BaseTLB::Read : BaseTLB::Write)); 621} 622 623bool 624LSQ::StoreBuffer::canInsert() const 625{ 626 /* @todo, support store amalgamation */ 627 return slots.size() < numSlots; 628} 629 630void 631LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request) 632{ 633 auto found = std::find(slots.begin(), slots.end(), request); 634 635 if (found != slots.end()) { 636 DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n", 637 request, *found, *(request->inst)); 638 slots.erase(found); 639 640 delete request; 641 } 642} 643 644void 645LSQ::StoreBuffer::insert(LSQRequestPtr request) 646{ 647 if (!canInsert()) { 648 warn("%s: store buffer insertion without space to insert from" 649 " inst: %s\n", name(), *(request->inst)); 650 } 651 652 DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request); 653 654 numUnissuedAccesses++; 655 656 if (request->state != LSQRequest::Complete) 657 request->setState(LSQRequest::StoreInStoreBuffer); 658 659 slots.push_back(request); 660 661 /* Let's try and wake up the processor for the next cycle to step 662 * the store buffer */ 663 lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); 664} 665 666LSQ::AddrRangeCoverage 667LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request, 668 unsigned int &found_slot) 669{ 670 unsigned int slot_index = slots.size() - 1; 671 auto i = slots.rbegin(); 672 AddrRangeCoverage ret = NoAddrRangeCoverage; 673 674 /* Traverse the store buffer in reverse order (most to least recent) 675 * and try to find a slot whose address range overlaps this request */ 676 while (ret == NoAddrRangeCoverage && i != slots.rend()) { 677 LSQRequestPtr slot = *i; 678 679 /* Cache maintenance instructions go down via the store path * 680 * but they carry no data and they shouldn't be considered for 681 * forwarding */ 682 if (slot->packet && 683 slot->inst->id.threadId == request->inst->id.threadId && 684 !slot->packet->req->isCacheMaintenance()) { 685 AddrRangeCoverage coverage = slot->containsAddrRangeOf(request); 686 687 if (coverage != NoAddrRangeCoverage) { 688 DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:" 689 " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n", 690 slot_index, coverage, 691 request->request->getPaddr(), request->request->getSize(), 692 slot->request->getPaddr(), slot->request->getSize()); 693 694 found_slot = slot_index; 695 ret = coverage; 696 } 697 } 698 699 i++; 700 slot_index--; 701 } 702 703 return ret; 704} 705 706/** Fill the given packet with appropriate date from slot slot_number */ 707void 708LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load, 709 unsigned int slot_number) 710{ 711 assert(slot_number < slots.size()); 712 assert(load->packet); 713 assert(load->isLoad); 714 715 LSQRequestPtr store = slots[slot_number]; 716 717 assert(store->packet); 718 assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage); 719 720 Addr load_addr = load->request->getPaddr(); 721 Addr store_addr = store->request->getPaddr(); 722 Addr addr_offset = load_addr - store_addr; 723 724 unsigned int load_size = load->request->getSize(); 725 726 DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer" 727 " slot: %d addr: 0x%x addressOffset: 0x%x\n", 728 load_size, load_addr, slot_number, 729 store_addr, addr_offset); 730 731 void *load_packet_data = load->packet->getPtr<void>(); 732 void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset; 733 734 std::memcpy(load_packet_data, store_packet_data, load_size); 735} 736 737void 738LSQ::StoreBuffer::countIssuedStore(LSQRequestPtr request) 739{ 740 /* Barriers are accounted for as they are cleared from 741 * the queue, not after their transfers are complete */ 742 if (!request->isBarrier()) 743 numUnissuedAccesses--; 744} 745 746void 747LSQ::StoreBuffer::step() 748{ 749 DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n", 750 numUnissuedAccesses); 751 752 if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) { 753 /* Clear all the leading barriers */ 754 while (!slots.empty() && 755 slots.front()->isComplete() && slots.front()->isBarrier()) 756 { 757 LSQRequestPtr barrier = slots.front(); 758 759 DPRINTF(MinorMem, "Clearing barrier for inst: %s\n", 760 *(barrier->inst)); 761 762 numUnissuedAccesses--; 763 lsq.clearMemBarrier(barrier->inst); 764 slots.pop_front(); 765 766 delete barrier; 767 } 768 769 auto i = slots.begin(); 770 bool issued = true; 771 unsigned int issue_count = 0; 772 773 /* Skip trying if the memory system is busy */ 774 if (lsq.state == LSQ::MemoryNeedsRetry) 775 issued = false; 776 777 /* Try to issue all stores in order starting from the head 778 * of the queue. Responses are allowed to be retired 779 * out of order */ 780 while (issued && 781 issue_count < storeLimitPerCycle && 782 lsq.canSendToMemorySystem() && 783 i != slots.end()) 784 { 785 LSQRequestPtr request = *i; 786 787 DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d" 788 " state: %s\n", 789 *(request->inst), request->sentAllPackets(), 790 request->state); 791 792 if (request->isBarrier() && request->isComplete()) { 793 /* Give up at barriers */ 794 issued = false; 795 } else if (!(request->state == LSQRequest::StoreBufferIssuing && 796 request->sentAllPackets())) 797 { 798 DPRINTF(MinorMem, "Trying to send request: %s to memory" 799 " system\n", *(request->inst)); 800 801 if (lsq.tryToSend(request)) { 802 countIssuedStore(request); 803 issue_count++; 804 } else { 805 /* Don't step on to the next store buffer entry if this 806 * one hasn't issued all its packets as the store 807 * buffer must still enforce ordering */ 808 issued = false; 809 } 810 } 811 i++; 812 } 813 } 814} 815 816void 817LSQ::completeMemBarrierInst(MinorDynInstPtr inst, 818 bool committed) 819{ 820 if (committed) { 821 /* Not already sent to the store buffer as a store request? */ 822 if (!inst->inStoreBuffer) { 823 /* Insert an entry into the store buffer to tick off barriers 824 * until there are none in flight */ 825 storeBuffer.insert(new BarrierDataRequest(*this, inst)); 826 } 827 } else { 828 /* Clear the barrier anyway if it wasn't actually committed */ 829 clearMemBarrier(inst); 830 } 831} 832 833void 834LSQ::StoreBuffer::minorTrace() const 835{ 836 unsigned int size = slots.size(); 837 unsigned int i = 0; 838 std::ostringstream os; 839 840 while (i < size) { 841 LSQRequestPtr request = slots[i]; 842 843 request->reportData(os); 844 845 i++; 846 if (i < numSlots) 847 os << ','; 848 } 849 850 while (i < numSlots) { 851 os << '-'; 852 853 i++; 854 if (i < numSlots) 855 os << ','; 856 } 857 858 MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(), 859 numUnissuedAccesses); 860} 861 862void 863LSQ::tryToSendToTransfers(LSQRequestPtr request) 864{ 865 if (state == MemoryNeedsRetry) { 866 DPRINTF(MinorMem, "Request needs retry, not issuing to" 867 " memory until retry arrives\n"); 868 return; 869 } 870 871 if (request->state == LSQRequest::InTranslation) { 872 DPRINTF(MinorMem, "Request still in translation, not issuing to" 873 " memory\n"); 874 return; 875 } 876 877 assert(request->state == LSQRequest::Translated || 878 request->state == LSQRequest::RequestIssuing || 879 request->state == LSQRequest::Failed || 880 request->state == LSQRequest::Complete); 881 882 if (requests.empty() || requests.front() != request) { 883 DPRINTF(MinorMem, "Request not at front of requests queue, can't" 884 " issue to memory\n"); 885 return; 886 } 887 888 if (transfers.unreservedRemainingSpace() == 0) { 889 DPRINTF(MinorMem, "No space to insert request into transfers" 890 " queue\n"); 891 return; 892 } 893 894 if (request->isComplete() || request->state == LSQRequest::Failed) { 895 DPRINTF(MinorMem, "Passing a %s transfer on to transfers" 896 " queue\n", (request->isComplete() ? "completed" : "failed")); 897 request->setState(LSQRequest::Complete); 898 request->setSkipped(); 899 moveFromRequestsToTransfers(request); 900 return; 901 } 902 903 if (!execute.instIsRightStream(request->inst)) { 904 /* Wrong stream, try to abort the transfer but only do so if 905 * there are no packets in flight */ 906 if (request->hasPacketsInMemSystem()) { 907 DPRINTF(MinorMem, "Request's inst. is from the wrong stream," 908 " waiting for responses before aborting request\n"); 909 } else { 910 DPRINTF(MinorMem, "Request's inst. is from the wrong stream," 911 " aborting request\n"); 912 request->setState(LSQRequest::Complete); 913 request->setSkipped(); 914 moveFromRequestsToTransfers(request); 915 } 916 return; 917 } 918 919 if (request->fault != NoFault) { 920 if (request->inst->staticInst->isPrefetch()) { 921 DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n"); 922 } 923 DPRINTF(MinorMem, "Moving faulting request into the transfers" 924 " queue\n"); 925 request->setState(LSQRequest::Complete); 926 request->setSkipped(); 927 moveFromRequestsToTransfers(request); 928 return; 929 } 930 931 bool is_load = request->isLoad; 932 bool is_llsc = request->request->isLLSC(); 933 bool is_swap = request->request->isSwap(); 934 bool bufferable = !(request->request->isStrictlyOrdered() || 935 is_llsc || is_swap); 936 937 if (is_load) { 938 if (numStoresInTransfers != 0) { 939 DPRINTF(MinorMem, "Load request with stores still in transfers" 940 " queue, stalling\n"); 941 return; 942 } 943 } else { 944 /* Store. Can it be sent to the store buffer? */ 945 if (bufferable && !request->request->isMmappedIpr()) { 946 request->setState(LSQRequest::StoreToStoreBuffer); 947 moveFromRequestsToTransfers(request); 948 DPRINTF(MinorMem, "Moving store into transfers queue\n"); 949 return; 950 } 951 } 952 953 /* Check if this is the head instruction (and so must be executable as 954 * its stream sequence number was checked above) for loads which must 955 * not be speculatively issued and stores which must be issued here */ 956 if (!bufferable) { 957 if (!execute.instIsHeadInst(request->inst)) { 958 DPRINTF(MinorMem, "Memory access not the head inst., can't be" 959 " sure it can be performed, not issuing\n"); 960 return; 961 } 962 963 unsigned int forwarding_slot = 0; 964 965 if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) != 966 NoAddrRangeCoverage) 967 { 968 DPRINTF(MinorMem, "Memory access can receive forwarded data" 969 " from the store buffer, need to wait for store buffer to" 970 " drain\n"); 971 return; 972 } 973 } 974 975 /* True: submit this packet to the transfers queue to be sent to the 976 * memory system. 977 * False: skip the memory and push a packet for this request onto 978 * requests */ 979 bool do_access = true; 980 981 if (!is_llsc) { 982 /* Check for match in the store buffer */ 983 if (is_load) { 984 unsigned int forwarding_slot = 0; 985 AddrRangeCoverage forwarding_result = 986 storeBuffer.canForwardDataToLoad(request, 987 forwarding_slot); 988 989 switch (forwarding_result) { 990 case FullAddrRangeCoverage: 991 /* Forward data from the store buffer into this request and 992 * repurpose this request's packet into a response packet */ 993 storeBuffer.forwardStoreData(request, forwarding_slot); 994 request->packet->makeResponse(); 995 996 /* Just move between queues, no access */ 997 do_access = false; 998 break; 999 case PartialAddrRangeCoverage: 1000 DPRINTF(MinorMem, "Load partly satisfied by store buffer" 1001 " data. Must wait for the store to complete\n"); 1002 return; 1003 break; 1004 case NoAddrRangeCoverage: 1005 DPRINTF(MinorMem, "No forwardable data from store buffer\n"); 1006 /* Fall through to try access */ 1007 break; 1008 } 1009 } 1010 } else { 1011 if (!canSendToMemorySystem()) { 1012 DPRINTF(MinorMem, "Can't send request to memory system yet\n"); 1013 return; 1014 } 1015 1016 SimpleThread &thread = *cpu.threads[request->inst->id.threadId]; 1017 1018 TheISA::PCState old_pc = thread.pcState(); 1019 ExecContext context(cpu, thread, execute, request->inst); 1020 1021 /* Handle LLSC requests and tests */ 1022 if (is_load) { 1023 TheISA::handleLockedRead(&context, request->request); 1024 } else { 1025 do_access = TheISA::handleLockedWrite(&context, 1026 request->request, cacheBlockMask); 1027 1028 if (!do_access) { 1029 DPRINTF(MinorMem, "Not perfoming a memory " 1030 "access for store conditional\n"); 1031 } 1032 } 1033 thread.pcState(old_pc); 1034 } 1035 1036 /* See the do_access comment above */ 1037 if (do_access) { 1038 if (!canSendToMemorySystem()) { 1039 DPRINTF(MinorMem, "Can't send request to memory system yet\n"); 1040 return; 1041 } 1042 1043 /* Remember if this is an access which can't be idly 1044 * discarded by an interrupt */ 1045 if (!bufferable && !request->issuedToMemory) { 1046 numAccessesIssuedToMemory++; 1047 request->issuedToMemory = true; 1048 } 1049 1050 if (tryToSend(request)) { 1051 moveFromRequestsToTransfers(request); 1052 } 1053 } else { 1054 request->setState(LSQRequest::Complete); 1055 moveFromRequestsToTransfers(request); 1056 } 1057} 1058 1059bool 1060LSQ::tryToSend(LSQRequestPtr request) 1061{ 1062 bool ret = false; 1063 1064 if (!canSendToMemorySystem()) { 1065 DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n", 1066 *(request->inst)); 1067 } else { 1068 PacketPtr packet = request->getHeadPacket(); 1069 1070 DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n", 1071 *(request->inst), packet->req->getVaddr()); 1072 1073 /* The sender state of the packet *must* be an LSQRequest 1074 * so the response can be correctly handled */ 1075 assert(packet->findNextSenderState<LSQRequest>()); 1076 1077 if (request->request->isMmappedIpr()) { 1078 ThreadContext *thread = 1079 cpu.getContext(cpu.contextToThread( 1080 request->request->contextId())); 1081 1082 if (request->isLoad) { 1083 DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst)); 1084 TheISA::handleIprRead(thread, packet); 1085 } else { 1086 DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst)); 1087 TheISA::handleIprWrite(thread, packet); 1088 } 1089 1090 request->stepToNextPacket(); 1091 ret = request->sentAllPackets(); 1092 1093 if (!ret) { 1094 DPRINTF(MinorMem, "IPR access has another packet: %s\n", 1095 *(request->inst)); 1096 } 1097 1098 if (ret) 1099 request->setState(LSQRequest::Complete); 1100 else 1101 request->setState(LSQRequest::RequestIssuing); 1102 } else if (dcachePort.sendTimingReq(packet)) { 1103 DPRINTF(MinorMem, "Sent data memory request\n"); 1104 1105 numAccessesInMemorySystem++; 1106 1107 request->stepToNextPacket(); 1108 1109 ret = request->sentAllPackets(); 1110 1111 switch (request->state) { 1112 case LSQRequest::Translated: 1113 case LSQRequest::RequestIssuing: 1114 /* Fully or partially issued a request in the transfers 1115 * queue */ 1116 request->setState(LSQRequest::RequestIssuing); 1117 break; 1118 case LSQRequest::StoreInStoreBuffer: 1119 case LSQRequest::StoreBufferIssuing: 1120 /* Fully or partially issued a request in the store 1121 * buffer */ 1122 request->setState(LSQRequest::StoreBufferIssuing); 1123 break; 1124 default:
|
1288 } 1289 1290 retryRequest = NULL; 1291 } 1292} 1293 1294LSQ::LSQ(std::string name_, std::string dcache_port_name_, 1295 MinorCPU &cpu_, Execute &execute_, 1296 unsigned int in_memory_system_limit, unsigned int line_width, 1297 unsigned int requests_queue_size, unsigned int transfers_queue_size, 1298 unsigned int store_buffer_size, 1299 unsigned int store_buffer_cycle_store_limit) : 1300 Named(name_), 1301 cpu(cpu_), 1302 execute(execute_), 1303 dcachePort(dcache_port_name_, *this, cpu_), 1304 lastMemBarrier(cpu.numThreads, 0), 1305 state(MemoryRunning), 1306 inMemorySystemLimit(in_memory_system_limit), 1307 lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)), 1308 requests(name_ + ".requests", "addr", requests_queue_size), 1309 transfers(name_ + ".transfers", "addr", transfers_queue_size), 1310 storeBuffer(name_ + ".storeBuffer", 1311 *this, store_buffer_size, store_buffer_cycle_store_limit), 1312 numAccessesInMemorySystem(0), 1313 numAccessesInDTLB(0), 1314 numStoresInTransfers(0), 1315 numAccessesIssuedToMemory(0), 1316 retryRequest(NULL), 1317 cacheBlockMask(~(cpu_.cacheLineSize() - 1)) 1318{ 1319 if (in_memory_system_limit < 1) { 1320 fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_, 1321 in_memory_system_limit); 1322 } 1323 1324 if (store_buffer_cycle_store_limit < 1) { 1325 fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be" 1326 " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit); 1327 } 1328 1329 if (requests_queue_size < 1) { 1330 fatal("%s: executeLSQRequestsQueueSize must be" 1331 " >= 1 (%d)\n", name_, requests_queue_size); 1332 } 1333 1334 if (transfers_queue_size < 1) { 1335 fatal("%s: executeLSQTransfersQueueSize must be" 1336 " >= 1 (%d)\n", name_, transfers_queue_size); 1337 } 1338 1339 if (store_buffer_size < 1) { 1340 fatal("%s: executeLSQStoreBufferSize must be" 1341 " >= 1 (%d)\n", name_, store_buffer_size); 1342 } 1343 1344 if ((lineWidth & (lineWidth - 1)) != 0) { 1345 fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth); 1346 } 1347} 1348 1349LSQ::~LSQ() 1350{ } 1351 1352LSQ::LSQRequest::~LSQRequest() 1353{ 1354 if (packet) 1355 delete packet; 1356 if (data) 1357 delete [] data; 1358} 1359 1360/** 1361 * Step the memory access mechanism on to its next state. In reality, most 1362 * of the stepping is done by the callbacks on the LSQ but this 1363 * function is responsible for issuing memory requests lodged in the 1364 * requests queue. 1365 */ 1366void 1367LSQ::step() 1368{ 1369 /* Try to move address-translated requests between queues and issue 1370 * them */ 1371 if (!requests.empty()) 1372 tryToSendToTransfers(requests.front()); 1373 1374 storeBuffer.step(); 1375} 1376 1377LSQ::LSQRequestPtr 1378LSQ::findResponse(MinorDynInstPtr inst) 1379{ 1380 LSQ::LSQRequestPtr ret = NULL; 1381 1382 if (!transfers.empty()) { 1383 LSQRequestPtr request = transfers.front(); 1384 1385 /* Same instruction and complete access or a store that's 1386 * capable of being moved to the store buffer */ 1387 if (request->inst->id == inst->id) { 1388 bool complete = request->isComplete(); 1389 bool can_store = storeBuffer.canInsert(); 1390 bool to_store_buffer = request->state == 1391 LSQRequest::StoreToStoreBuffer; 1392 1393 if ((complete && !(request->isBarrier() && !can_store)) || 1394 (to_store_buffer && can_store)) 1395 { 1396 ret = request; 1397 } 1398 } 1399 } 1400 1401 if (ret) { 1402 DPRINTF(MinorMem, "Found matching memory response for inst: %s\n", 1403 *inst); 1404 } else { 1405 DPRINTF(MinorMem, "No matching memory response for inst: %s\n", 1406 *inst); 1407 } 1408 1409 return ret; 1410} 1411 1412void 1413LSQ::popResponse(LSQ::LSQRequestPtr response) 1414{ 1415 assert(!transfers.empty() && transfers.front() == response); 1416 1417 transfers.pop(); 1418 1419 if (!response->isLoad) 1420 numStoresInTransfers--; 1421 1422 if (response->issuedToMemory) 1423 numAccessesIssuedToMemory--; 1424 1425 if (response->state != LSQRequest::StoreInStoreBuffer) { 1426 DPRINTF(MinorMem, "Deleting %s request: %s\n", 1427 (response->isLoad ? "load" : "store"), 1428 *(response->inst)); 1429 1430 delete response; 1431 } 1432} 1433 1434void 1435LSQ::sendStoreToStoreBuffer(LSQRequestPtr request) 1436{ 1437 assert(request->state == LSQRequest::StoreToStoreBuffer); 1438 1439 DPRINTF(MinorMem, "Sending store: %s to store buffer\n", 1440 *(request->inst)); 1441 1442 request->inst->inStoreBuffer = true; 1443 1444 storeBuffer.insert(request); 1445} 1446 1447bool 1448LSQ::isDrained() 1449{ 1450 return requests.empty() && transfers.empty() && 1451 storeBuffer.isDrained(); 1452} 1453 1454bool 1455LSQ::needsToTick() 1456{ 1457 bool ret = false; 1458 1459 if (canSendToMemorySystem()) { 1460 bool have_translated_requests = !requests.empty() && 1461 requests.front()->state != LSQRequest::InTranslation && 1462 transfers.unreservedRemainingSpace() != 0; 1463 1464 ret = have_translated_requests || 1465 storeBuffer.numUnissuedStores() != 0; 1466 } 1467 1468 if (ret) 1469 DPRINTF(Activity, "Need to tick\n"); 1470 1471 return ret; 1472} 1473 1474void 1475LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, 1476 unsigned int size, Addr addr, Request::Flags flags, 1477 uint64_t *res) 1478{ 1479 bool needs_burst = transferNeedsBurst(addr, size, lineWidth); 1480 LSQRequestPtr request; 1481 1482 /* Copy given data into the request. The request will pass this to the 1483 * packet and then it will own the data */ 1484 uint8_t *request_data = NULL; 1485 1486 DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:" 1487 " 0x%x%s lineWidth : 0x%x\n", 1488 (isLoad ? "load" : "store"), addr, size, flags, 1489 (needs_burst ? " (needs burst)" : ""), lineWidth); 1490 1491 if (!isLoad) { 1492 /* request_data becomes the property of a ...DataRequest (see below) 1493 * and destroyed by its destructor */ 1494 request_data = new uint8_t[size]; 1495 if (flags & Request::STORE_NO_DATA) { 1496 /* For cache zeroing, just use zeroed data */ 1497 std::memset(request_data, 0, size); 1498 } else { 1499 std::memcpy(request_data, data, size); 1500 } 1501 } 1502 1503 if (needs_burst) { 1504 request = new SplitDataRequest( 1505 *this, inst, isLoad, request_data, res); 1506 } else { 1507 request = new SingleDataRequest( 1508 *this, inst, isLoad, request_data, res); 1509 } 1510 1511 if (inst->traceData) 1512 inst->traceData->setMem(addr, size, flags); 1513 1514 int cid = cpu.threads[inst->id.threadId]->getTC()->contextId(); 1515 request->request->setContext(cid); 1516 request->request->setVirt(0 /* asid */, 1517 addr, size, flags, cpu.dataMasterId(), 1518 /* I've no idea why we need the PC, but give it */ 1519 inst->pc.instAddr()); 1520 1521 requests.push(request); 1522 request->startAddrTranslation(); 1523} 1524 1525void 1526LSQ::pushFailedRequest(MinorDynInstPtr inst) 1527{ 1528 LSQRequestPtr request = new FailedDataRequest(*this, inst); 1529 requests.push(request); 1530} 1531 1532void 1533LSQ::minorTrace() const 1534{ 1535 MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d" 1536 " lastMemBarrier=%d\n", 1537 state, numAccessesInDTLB, numAccessesInMemorySystem, 1538 numStoresInTransfers, lastMemBarrier[0]); 1539 requests.minorTrace(); 1540 transfers.minorTrace(); 1541 storeBuffer.minorTrace(); 1542} 1543 1544LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_, 1545 unsigned int store_buffer_size, 1546 unsigned int store_limit_per_cycle) : 1547 Named(name_), lsq(lsq_), 1548 numSlots(store_buffer_size), 1549 storeLimitPerCycle(store_limit_per_cycle), 1550 slots(), 1551 numUnissuedAccesses(0) 1552{ 1553} 1554 1555PacketPtr 1556makePacketForRequest(const RequestPtr &request, bool isLoad, 1557 Packet::SenderState *sender_state, PacketDataPtr data) 1558{ 1559 PacketPtr ret = isLoad ? Packet::createRead(request) 1560 : Packet::createWrite(request); 1561 1562 if (sender_state) 1563 ret->pushSenderState(sender_state); 1564 1565 if (isLoad) { 1566 ret->allocate(); 1567 } else if (!request->isCacheMaintenance()) { 1568 // CMOs are treated as stores but they don't have data. All 1569 // stores otherwise need to allocate for data. 1570 ret->dataDynamic(data); 1571 } 1572 1573 return ret; 1574} 1575 1576void 1577LSQ::issuedMemBarrierInst(MinorDynInstPtr inst) 1578{ 1579 assert(inst->isInst() && inst->staticInst->isMemBarrier()); 1580 assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]); 1581 1582 /* Remember the barrier. We only have a notion of one 1583 * barrier so this may result in some mem refs being 1584 * delayed if they are between barriers */ 1585 lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum; 1586} 1587 1588void 1589LSQ::LSQRequest::makePacket() 1590{ 1591 /* Make the function idempotent */ 1592 if (packet) 1593 return; 1594 1595 // if the translation faulted, do not create a packet 1596 if (fault != NoFault) { 1597 assert(packet == NULL); 1598 return; 1599 } 1600 1601 packet = makePacketForRequest(request, isLoad, this, data); 1602 /* Null the ret data so we know not to deallocate it when the 1603 * ret is destroyed. The data now belongs to the ret and 1604 * the ret is responsible for its destruction */ 1605 data = NULL; 1606} 1607 1608std::ostream & 1609operator <<(std::ostream &os, LSQ::MemoryState state) 1610{ 1611 switch (state) { 1612 case LSQ::MemoryRunning: 1613 os << "MemoryRunning"; 1614 break; 1615 case LSQ::MemoryNeedsRetry: 1616 os << "MemoryNeedsRetry"; 1617 break; 1618 default: 1619 os << "MemoryState-" << static_cast<int>(state); 1620 break; 1621 } 1622 return os; 1623} 1624 1625void 1626LSQ::recvTimingSnoopReq(PacketPtr pkt) 1627{ 1628 /* LLSC operations in Minor can't be speculative and are executed from 1629 * the head of the requests queue. We shouldn't need to do more than 1630 * this action on snoops. */ 1631 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 1632 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { 1633 cpu.wakeup(tid); 1634 } 1635 } 1636 1637 if (pkt->isInvalidate() || pkt->isWrite()) { 1638 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 1639 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, 1640 cacheBlockMask); 1641 } 1642 } 1643} 1644 1645void 1646LSQ::threadSnoop(LSQRequestPtr request) 1647{ 1648 /* LLSC operations in Minor can't be speculative and are executed from 1649 * the head of the requests queue. We shouldn't need to do more than 1650 * this action on snoops. */ 1651 ThreadID req_tid = request->inst->id.threadId; 1652 PacketPtr pkt = request->packet; 1653 1654 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 1655 if (tid != req_tid) { 1656 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { 1657 cpu.wakeup(tid); 1658 } 1659 1660 if (pkt->isInvalidate() || pkt->isWrite()) { 1661 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, 1662 cacheBlockMask); 1663 } 1664 } 1665 } 1666} 1667 1668}
| 1283 } 1284 1285 retryRequest = NULL; 1286 } 1287} 1288 1289LSQ::LSQ(std::string name_, std::string dcache_port_name_, 1290 MinorCPU &cpu_, Execute &execute_, 1291 unsigned int in_memory_system_limit, unsigned int line_width, 1292 unsigned int requests_queue_size, unsigned int transfers_queue_size, 1293 unsigned int store_buffer_size, 1294 unsigned int store_buffer_cycle_store_limit) : 1295 Named(name_), 1296 cpu(cpu_), 1297 execute(execute_), 1298 dcachePort(dcache_port_name_, *this, cpu_), 1299 lastMemBarrier(cpu.numThreads, 0), 1300 state(MemoryRunning), 1301 inMemorySystemLimit(in_memory_system_limit), 1302 lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)), 1303 requests(name_ + ".requests", "addr", requests_queue_size), 1304 transfers(name_ + ".transfers", "addr", transfers_queue_size), 1305 storeBuffer(name_ + ".storeBuffer", 1306 *this, store_buffer_size, store_buffer_cycle_store_limit), 1307 numAccessesInMemorySystem(0), 1308 numAccessesInDTLB(0), 1309 numStoresInTransfers(0), 1310 numAccessesIssuedToMemory(0), 1311 retryRequest(NULL), 1312 cacheBlockMask(~(cpu_.cacheLineSize() - 1)) 1313{ 1314 if (in_memory_system_limit < 1) { 1315 fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_, 1316 in_memory_system_limit); 1317 } 1318 1319 if (store_buffer_cycle_store_limit < 1) { 1320 fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be" 1321 " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit); 1322 } 1323 1324 if (requests_queue_size < 1) { 1325 fatal("%s: executeLSQRequestsQueueSize must be" 1326 " >= 1 (%d)\n", name_, requests_queue_size); 1327 } 1328 1329 if (transfers_queue_size < 1) { 1330 fatal("%s: executeLSQTransfersQueueSize must be" 1331 " >= 1 (%d)\n", name_, transfers_queue_size); 1332 } 1333 1334 if (store_buffer_size < 1) { 1335 fatal("%s: executeLSQStoreBufferSize must be" 1336 " >= 1 (%d)\n", name_, store_buffer_size); 1337 } 1338 1339 if ((lineWidth & (lineWidth - 1)) != 0) { 1340 fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth); 1341 } 1342} 1343 1344LSQ::~LSQ() 1345{ } 1346 1347LSQ::LSQRequest::~LSQRequest() 1348{ 1349 if (packet) 1350 delete packet; 1351 if (data) 1352 delete [] data; 1353} 1354 1355/** 1356 * Step the memory access mechanism on to its next state. In reality, most 1357 * of the stepping is done by the callbacks on the LSQ but this 1358 * function is responsible for issuing memory requests lodged in the 1359 * requests queue. 1360 */ 1361void 1362LSQ::step() 1363{ 1364 /* Try to move address-translated requests between queues and issue 1365 * them */ 1366 if (!requests.empty()) 1367 tryToSendToTransfers(requests.front()); 1368 1369 storeBuffer.step(); 1370} 1371 1372LSQ::LSQRequestPtr 1373LSQ::findResponse(MinorDynInstPtr inst) 1374{ 1375 LSQ::LSQRequestPtr ret = NULL; 1376 1377 if (!transfers.empty()) { 1378 LSQRequestPtr request = transfers.front(); 1379 1380 /* Same instruction and complete access or a store that's 1381 * capable of being moved to the store buffer */ 1382 if (request->inst->id == inst->id) { 1383 bool complete = request->isComplete(); 1384 bool can_store = storeBuffer.canInsert(); 1385 bool to_store_buffer = request->state == 1386 LSQRequest::StoreToStoreBuffer; 1387 1388 if ((complete && !(request->isBarrier() && !can_store)) || 1389 (to_store_buffer && can_store)) 1390 { 1391 ret = request; 1392 } 1393 } 1394 } 1395 1396 if (ret) { 1397 DPRINTF(MinorMem, "Found matching memory response for inst: %s\n", 1398 *inst); 1399 } else { 1400 DPRINTF(MinorMem, "No matching memory response for inst: %s\n", 1401 *inst); 1402 } 1403 1404 return ret; 1405} 1406 1407void 1408LSQ::popResponse(LSQ::LSQRequestPtr response) 1409{ 1410 assert(!transfers.empty() && transfers.front() == response); 1411 1412 transfers.pop(); 1413 1414 if (!response->isLoad) 1415 numStoresInTransfers--; 1416 1417 if (response->issuedToMemory) 1418 numAccessesIssuedToMemory--; 1419 1420 if (response->state != LSQRequest::StoreInStoreBuffer) { 1421 DPRINTF(MinorMem, "Deleting %s request: %s\n", 1422 (response->isLoad ? "load" : "store"), 1423 *(response->inst)); 1424 1425 delete response; 1426 } 1427} 1428 1429void 1430LSQ::sendStoreToStoreBuffer(LSQRequestPtr request) 1431{ 1432 assert(request->state == LSQRequest::StoreToStoreBuffer); 1433 1434 DPRINTF(MinorMem, "Sending store: %s to store buffer\n", 1435 *(request->inst)); 1436 1437 request->inst->inStoreBuffer = true; 1438 1439 storeBuffer.insert(request); 1440} 1441 1442bool 1443LSQ::isDrained() 1444{ 1445 return requests.empty() && transfers.empty() && 1446 storeBuffer.isDrained(); 1447} 1448 1449bool 1450LSQ::needsToTick() 1451{ 1452 bool ret = false; 1453 1454 if (canSendToMemorySystem()) { 1455 bool have_translated_requests = !requests.empty() && 1456 requests.front()->state != LSQRequest::InTranslation && 1457 transfers.unreservedRemainingSpace() != 0; 1458 1459 ret = have_translated_requests || 1460 storeBuffer.numUnissuedStores() != 0; 1461 } 1462 1463 if (ret) 1464 DPRINTF(Activity, "Need to tick\n"); 1465 1466 return ret; 1467} 1468 1469void 1470LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, 1471 unsigned int size, Addr addr, Request::Flags flags, 1472 uint64_t *res) 1473{ 1474 bool needs_burst = transferNeedsBurst(addr, size, lineWidth); 1475 LSQRequestPtr request; 1476 1477 /* Copy given data into the request. The request will pass this to the 1478 * packet and then it will own the data */ 1479 uint8_t *request_data = NULL; 1480 1481 DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:" 1482 " 0x%x%s lineWidth : 0x%x\n", 1483 (isLoad ? "load" : "store"), addr, size, flags, 1484 (needs_burst ? " (needs burst)" : ""), lineWidth); 1485 1486 if (!isLoad) { 1487 /* request_data becomes the property of a ...DataRequest (see below) 1488 * and destroyed by its destructor */ 1489 request_data = new uint8_t[size]; 1490 if (flags & Request::STORE_NO_DATA) { 1491 /* For cache zeroing, just use zeroed data */ 1492 std::memset(request_data, 0, size); 1493 } else { 1494 std::memcpy(request_data, data, size); 1495 } 1496 } 1497 1498 if (needs_burst) { 1499 request = new SplitDataRequest( 1500 *this, inst, isLoad, request_data, res); 1501 } else { 1502 request = new SingleDataRequest( 1503 *this, inst, isLoad, request_data, res); 1504 } 1505 1506 if (inst->traceData) 1507 inst->traceData->setMem(addr, size, flags); 1508 1509 int cid = cpu.threads[inst->id.threadId]->getTC()->contextId(); 1510 request->request->setContext(cid); 1511 request->request->setVirt(0 /* asid */, 1512 addr, size, flags, cpu.dataMasterId(), 1513 /* I've no idea why we need the PC, but give it */ 1514 inst->pc.instAddr()); 1515 1516 requests.push(request); 1517 request->startAddrTranslation(); 1518} 1519 1520void 1521LSQ::pushFailedRequest(MinorDynInstPtr inst) 1522{ 1523 LSQRequestPtr request = new FailedDataRequest(*this, inst); 1524 requests.push(request); 1525} 1526 1527void 1528LSQ::minorTrace() const 1529{ 1530 MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d" 1531 " lastMemBarrier=%d\n", 1532 state, numAccessesInDTLB, numAccessesInMemorySystem, 1533 numStoresInTransfers, lastMemBarrier[0]); 1534 requests.minorTrace(); 1535 transfers.minorTrace(); 1536 storeBuffer.minorTrace(); 1537} 1538 1539LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_, 1540 unsigned int store_buffer_size, 1541 unsigned int store_limit_per_cycle) : 1542 Named(name_), lsq(lsq_), 1543 numSlots(store_buffer_size), 1544 storeLimitPerCycle(store_limit_per_cycle), 1545 slots(), 1546 numUnissuedAccesses(0) 1547{ 1548} 1549 1550PacketPtr 1551makePacketForRequest(const RequestPtr &request, bool isLoad, 1552 Packet::SenderState *sender_state, PacketDataPtr data) 1553{ 1554 PacketPtr ret = isLoad ? Packet::createRead(request) 1555 : Packet::createWrite(request); 1556 1557 if (sender_state) 1558 ret->pushSenderState(sender_state); 1559 1560 if (isLoad) { 1561 ret->allocate(); 1562 } else if (!request->isCacheMaintenance()) { 1563 // CMOs are treated as stores but they don't have data. All 1564 // stores otherwise need to allocate for data. 1565 ret->dataDynamic(data); 1566 } 1567 1568 return ret; 1569} 1570 1571void 1572LSQ::issuedMemBarrierInst(MinorDynInstPtr inst) 1573{ 1574 assert(inst->isInst() && inst->staticInst->isMemBarrier()); 1575 assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]); 1576 1577 /* Remember the barrier. We only have a notion of one 1578 * barrier so this may result in some mem refs being 1579 * delayed if they are between barriers */ 1580 lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum; 1581} 1582 1583void 1584LSQ::LSQRequest::makePacket() 1585{ 1586 /* Make the function idempotent */ 1587 if (packet) 1588 return; 1589 1590 // if the translation faulted, do not create a packet 1591 if (fault != NoFault) { 1592 assert(packet == NULL); 1593 return; 1594 } 1595 1596 packet = makePacketForRequest(request, isLoad, this, data); 1597 /* Null the ret data so we know not to deallocate it when the 1598 * ret is destroyed. The data now belongs to the ret and 1599 * the ret is responsible for its destruction */ 1600 data = NULL; 1601} 1602 1603std::ostream & 1604operator <<(std::ostream &os, LSQ::MemoryState state) 1605{ 1606 switch (state) { 1607 case LSQ::MemoryRunning: 1608 os << "MemoryRunning"; 1609 break; 1610 case LSQ::MemoryNeedsRetry: 1611 os << "MemoryNeedsRetry"; 1612 break; 1613 default: 1614 os << "MemoryState-" << static_cast<int>(state); 1615 break; 1616 } 1617 return os; 1618} 1619 1620void 1621LSQ::recvTimingSnoopReq(PacketPtr pkt) 1622{ 1623 /* LLSC operations in Minor can't be speculative and are executed from 1624 * the head of the requests queue. We shouldn't need to do more than 1625 * this action on snoops. */ 1626 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 1627 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { 1628 cpu.wakeup(tid); 1629 } 1630 } 1631 1632 if (pkt->isInvalidate() || pkt->isWrite()) { 1633 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 1634 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, 1635 cacheBlockMask); 1636 } 1637 } 1638} 1639 1640void 1641LSQ::threadSnoop(LSQRequestPtr request) 1642{ 1643 /* LLSC operations in Minor can't be speculative and are executed from 1644 * the head of the requests queue. We shouldn't need to do more than 1645 * this action on snoops. */ 1646 ThreadID req_tid = request->inst->id.threadId; 1647 PacketPtr pkt = request->packet; 1648 1649 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 1650 if (tid != req_tid) { 1651 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { 1652 cpu.wakeup(tid); 1653 } 1654 1655 if (pkt->isInvalidate() || pkt->isWrite()) { 1656 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, 1657 cacheBlockMask); 1658 } 1659 } 1660 } 1661} 1662 1663}
|