45#include "arch/generic/debugfaults.hh" 46#include "arch/locked_mem.hh" 47#include "base/str.hh" 48#include "config/the_isa.hh" 49#include "cpu/checker/cpu.hh" 50#include "cpu/o3/lsq.hh" 51#include "cpu/o3/lsq_unit.hh" 52#include "debug/Activity.hh" 53#include "debug/IEW.hh" 54#include "debug/LSQUnit.hh" 55#include "debug/O3PipeView.hh" 56#include "mem/packet.hh" 57#include "mem/request.hh" 58 59template<class Impl> 60LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, 61 LSQUnit *lsq_ptr) 62 : Event(Default_Pri, AutoDelete), 63 inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr) 64{ 65} 66 67template<class Impl> 68void 69LSQUnit<Impl>::WritebackEvent::process() 70{ 71 assert(!lsqPtr->cpu->switchedOut()); 72 73 lsqPtr->writeback(inst, pkt); 74 75 if (pkt->senderState) 76 delete pkt->senderState; 77 78 delete pkt->req; 79 delete pkt; 80} 81 82template<class Impl> 83const char * 84LSQUnit<Impl>::WritebackEvent::description() const 85{ 86 return "Store writeback"; 87} 88 89template<class Impl> 90void 91LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) 92{ 93 LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState); 94 DynInstPtr inst = state->inst; 95 DPRINTF(IEW, "Writeback event [sn:%lli].\n", inst->seqNum); 96 DPRINTF(Activity, "Activity: Writeback event [sn:%lli].\n", inst->seqNum); 97 98 //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); 99 100 // If this is a split access, wait until all packets are received. 101 if (TheISA::HasUnalignedMemAcc && !state->complete()) { 102 delete pkt->req; 103 delete pkt; 104 return; 105 } 106 107 assert(!cpu->switchedOut()); 108 if (inst->isSquashed()) { 109 iewStage->decrWb(inst->seqNum); 110 } else { 111 if (!state->noWB) { 112 if (!TheISA::HasUnalignedMemAcc || !state->isSplit || 113 !state->isLoad) { 114 writeback(inst, pkt); 115 } else { 116 writeback(inst, state->mainPkt); 117 } 118 } 119 120 if (inst->isStore()) { 121 completeStore(state->idx); 122 } 123 } 124 125 if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) { 126 delete state->mainPkt->req; 127 delete state->mainPkt; 128 } 129 delete state; 130 delete pkt->req; 131 delete pkt; 132} 133 134template <class Impl> 135LSQUnit<Impl>::LSQUnit() 136 : loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false), 137 isStoreBlocked(false), isLoadBlocked(false), 138 loadBlockedHandled(false), storeInFlight(false), hasPendingPkt(false) 139{ 140} 141 142template<class Impl> 143void 144LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, 145 LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, 146 unsigned id) 147{ 148 cpu = cpu_ptr; 149 iewStage = iew_ptr; 150 151 DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); 152 153 lsq = lsq_ptr; 154 155 lsqID = id; 156 157 // Add 1 for the sentinel entry (they are circular queues). 158 LQEntries = maxLQEntries + 1; 159 SQEntries = maxSQEntries + 1; 160 161 //Due to uint8_t index in LSQSenderState 162 assert(LQEntries <= 256); 163 assert(SQEntries <= 256); 164 165 loadQueue.resize(LQEntries); 166 storeQueue.resize(SQEntries); 167 168 depCheckShift = params->LSQDepCheckShift; 169 checkLoads = params->LSQCheckLoads; 170 cachePorts = params->cachePorts; 171 needsTSO = params->needsTSO; 172 173 resetState(); 174} 175 176 177template<class Impl> 178void 179LSQUnit<Impl>::resetState() 180{ 181 loads = stores = storesToWB = 0; 182 183 loadHead = loadTail = 0; 184 185 storeHead = storeWBIdx = storeTail = 0; 186 187 usedPorts = 0; 188 189 retryPkt = NULL; 190 memDepViolator = NULL; 191 192 blockedLoadSeqNum = 0; 193 194 stalled = false; 195 isLoadBlocked = false; 196 loadBlockedHandled = false; 197 198 cacheBlockMask = ~(cpu->cacheLineSize() - 1); 199} 200 201template<class Impl> 202std::string 203LSQUnit<Impl>::name() const 204{ 205 if (Impl::MaxThreads == 1) { 206 return iewStage->name() + ".lsq"; 207 } else { 208 return iewStage->name() + ".lsq.thread" + to_string(lsqID); 209 } 210} 211 212template<class Impl> 213void 214LSQUnit<Impl>::regStats() 215{ 216 lsqForwLoads 217 .name(name() + ".forwLoads") 218 .desc("Number of loads that had data forwarded from stores"); 219 220 invAddrLoads 221 .name(name() + ".invAddrLoads") 222 .desc("Number of loads ignored due to an invalid address"); 223 224 lsqSquashedLoads 225 .name(name() + ".squashedLoads") 226 .desc("Number of loads squashed"); 227 228 lsqIgnoredResponses 229 .name(name() + ".ignoredResponses") 230 .desc("Number of memory responses ignored because the instruction is squashed"); 231 232 lsqMemOrderViolation 233 .name(name() + ".memOrderViolation") 234 .desc("Number of memory ordering violations"); 235 236 lsqSquashedStores 237 .name(name() + ".squashedStores") 238 .desc("Number of stores squashed"); 239 240 invAddrSwpfs 241 .name(name() + ".invAddrSwpfs") 242 .desc("Number of software prefetches ignored due to an invalid address"); 243 244 lsqBlockedLoads 245 .name(name() + ".blockedLoads") 246 .desc("Number of blocked loads due to partial load-store forwarding"); 247 248 lsqRescheduledLoads 249 .name(name() + ".rescheduledLoads") 250 .desc("Number of loads that were rescheduled"); 251 252 lsqCacheBlocked 253 .name(name() + ".cacheBlocked") 254 .desc("Number of times an access to memory failed due to the cache being blocked"); 255} 256 257template<class Impl> 258void 259LSQUnit<Impl>::setDcachePort(MasterPort *dcache_port) 260{ 261 dcachePort = dcache_port; 262} 263 264template<class Impl> 265void 266LSQUnit<Impl>::clearLQ() 267{ 268 loadQueue.clear(); 269} 270 271template<class Impl> 272void 273LSQUnit<Impl>::clearSQ() 274{ 275 storeQueue.clear(); 276} 277 278template<class Impl> 279void 280LSQUnit<Impl>::drainSanityCheck() const 281{ 282 for (int i = 0; i < loadQueue.size(); ++i) 283 assert(!loadQueue[i]); 284 285 assert(storesToWB == 0); 286 assert(!retryPkt); 287} 288 289template<class Impl> 290void 291LSQUnit<Impl>::takeOverFrom() 292{ 293 resetState(); 294} 295 296template<class Impl> 297void 298LSQUnit<Impl>::resizeLQ(unsigned size) 299{ 300 unsigned size_plus_sentinel = size + 1; 301 assert(size_plus_sentinel >= LQEntries); 302 303 if (size_plus_sentinel > LQEntries) { 304 while (size_plus_sentinel > loadQueue.size()) { 305 DynInstPtr dummy; 306 loadQueue.push_back(dummy); 307 LQEntries++; 308 } 309 } else { 310 LQEntries = size_plus_sentinel; 311 } 312 313 assert(LQEntries <= 256); 314} 315 316template<class Impl> 317void 318LSQUnit<Impl>::resizeSQ(unsigned size) 319{ 320 unsigned size_plus_sentinel = size + 1; 321 if (size_plus_sentinel > SQEntries) { 322 while (size_plus_sentinel > storeQueue.size()) { 323 SQEntry dummy; 324 storeQueue.push_back(dummy); 325 SQEntries++; 326 } 327 } else { 328 SQEntries = size_plus_sentinel; 329 } 330 331 assert(SQEntries <= 256); 332} 333 334template <class Impl> 335void 336LSQUnit<Impl>::insert(DynInstPtr &inst) 337{ 338 assert(inst->isMemRef()); 339 340 assert(inst->isLoad() || inst->isStore()); 341 342 if (inst->isLoad()) { 343 insertLoad(inst); 344 } else { 345 insertStore(inst); 346 } 347 348 inst->setInLSQ(); 349} 350 351template <class Impl> 352void 353LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst) 354{ 355 assert((loadTail + 1) % LQEntries != loadHead); 356 assert(loads < LQEntries); 357 358 DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n", 359 load_inst->pcState(), loadTail, load_inst->seqNum); 360 361 load_inst->lqIdx = loadTail; 362 363 if (stores == 0) { 364 load_inst->sqIdx = -1; 365 } else { 366 load_inst->sqIdx = storeTail; 367 } 368 369 loadQueue[loadTail] = load_inst; 370 371 incrLdIdx(loadTail); 372 373 ++loads; 374} 375 376template <class Impl> 377void 378LSQUnit<Impl>::insertStore(DynInstPtr &store_inst) 379{ 380 // Make sure it is not full before inserting an instruction. 381 assert((storeTail + 1) % SQEntries != storeHead); 382 assert(stores < SQEntries); 383 384 DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n", 385 store_inst->pcState(), storeTail, store_inst->seqNum); 386 387 store_inst->sqIdx = storeTail; 388 store_inst->lqIdx = loadTail; 389 390 storeQueue[storeTail] = SQEntry(store_inst); 391 392 incrStIdx(storeTail); 393 394 ++stores; 395} 396 397template <class Impl> 398typename Impl::DynInstPtr 399LSQUnit<Impl>::getMemDepViolator() 400{ 401 DynInstPtr temp = memDepViolator; 402 403 memDepViolator = NULL; 404 405 return temp; 406} 407 408template <class Impl> 409unsigned 410LSQUnit<Impl>::numFreeEntries() 411{ 412 unsigned free_lq_entries = LQEntries - loads; 413 unsigned free_sq_entries = SQEntries - stores; 414 415 // Both the LQ and SQ entries have an extra dummy entry to differentiate 416 // empty/full conditions. Subtract 1 from the free entries. 417 if (free_lq_entries < free_sq_entries) { 418 return free_lq_entries - 1; 419 } else { 420 return free_sq_entries - 1; 421 } 422} 423 424template <class Impl> 425void 426LSQUnit<Impl>::checkSnoop(PacketPtr pkt) 427{ 428 int load_idx = loadHead; 429 430 // Unlock the cpu-local monitor when the CPU sees a snoop to a locked 431 // address. The CPU can speculatively execute a LL operation after a pending 432 // SC operation in the pipeline and that can make the cache monitor the CPU 433 // is connected to valid while it really shouldn't be. 434 for (int x = 0; x < cpu->numActiveThreads(); x++) { 435 ThreadContext *tc = cpu->getContext(x); 436 bool no_squash = cpu->thread[x]->noSquashFromTC; 437 cpu->thread[x]->noSquashFromTC = true; 438 TheISA::handleLockedSnoop(tc, pkt, cacheBlockMask); 439 cpu->thread[x]->noSquashFromTC = no_squash; 440 } 441 442 // If this is the only load in the LSQ we don't care 443 if (load_idx == loadTail) 444 return; 445 incrLdIdx(load_idx); 446 447 DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr()); 448 Addr invalidate_addr = pkt->getAddr() & cacheBlockMask; 449 while (load_idx != loadTail) { 450 DynInstPtr ld_inst = loadQueue[load_idx]; 451 452 if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) { 453 incrLdIdx(load_idx); 454 continue; 455 } 456 457 Addr load_addr = ld_inst->physEffAddr & cacheBlockMask; 458 DPRINTF(LSQUnit, "-- inst [sn:%lli] load_addr: %#x to pktAddr:%#x\n", 459 ld_inst->seqNum, load_addr, invalidate_addr); 460 461 if (load_addr == invalidate_addr) { 462 if (ld_inst->possibleLoadViolation()) { 463 DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n", 464 ld_inst->physEffAddr, pkt->getAddr(), ld_inst->seqNum); 465 466 // Mark the load for re-execution 467 ld_inst->fault = new ReExec; 468 } else { 469 // If a older load checks this and it's true 470 // then we might have missed the snoop 471 // in which case we need to invalidate to be sure 472 ld_inst->hitExternalSnoop(true); 473 } 474 } 475 incrLdIdx(load_idx); 476 } 477 return; 478} 479 480template <class Impl> 481Fault 482LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst) 483{ 484 Addr inst_eff_addr1 = inst->effAddr >> depCheckShift; 485 Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift; 486 487 /** @todo in theory you only need to check an instruction that has executed 488 * however, there isn't a good way in the pipeline at the moment to check 489 * all instructions that will execute before the store writes back. Thus, 490 * like the implementation that came before it, we're overly conservative. 491 */ 492 while (load_idx != loadTail) { 493 DynInstPtr ld_inst = loadQueue[load_idx]; 494 if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) { 495 incrLdIdx(load_idx); 496 continue; 497 } 498 499 Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift; 500 Addr ld_eff_addr2 = 501 (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift; 502 503 if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) { 504 if (inst->isLoad()) { 505 // If this load is to the same block as an external snoop 506 // invalidate that we've observed then the load needs to be 507 // squashed as it could have newer data 508 if (ld_inst->hitExternalSnoop()) { 509 if (!memDepViolator || 510 ld_inst->seqNum < memDepViolator->seqNum) { 511 DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] " 512 "and [sn:%lli] at address %#x\n", 513 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 514 memDepViolator = ld_inst; 515 516 ++lsqMemOrderViolation; 517 518 return new GenericISA::M5PanicFault( 519 "Detected fault with inst [sn:%lli] and " 520 "[sn:%lli] at address %#x\n", 521 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 522 } 523 } 524 525 // Otherwise, mark the load has a possible load violation 526 // and if we see a snoop before it's commited, we need to squash 527 ld_inst->possibleLoadViolation(true); 528 DPRINTF(LSQUnit, "Found possible load violaiton at addr: %#x" 529 " between instructions [sn:%lli] and [sn:%lli]\n", 530 inst_eff_addr1, inst->seqNum, ld_inst->seqNum); 531 } else { 532 // A load/store incorrectly passed this store. 533 // Check if we already have a violator, or if it's newer 534 // squash and refetch. 535 if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum) 536 break; 537 538 DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and " 539 "[sn:%lli] at address %#x\n", 540 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 541 memDepViolator = ld_inst; 542 543 ++lsqMemOrderViolation; 544 545 return new GenericISA::M5PanicFault("Detected fault with " 546 "inst [sn:%lli] and [sn:%lli] at address %#x\n", 547 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 548 } 549 } 550 551 incrLdIdx(load_idx); 552 } 553 return NoFault; 554} 555 556 557 558 559template <class Impl> 560Fault 561LSQUnit<Impl>::executeLoad(DynInstPtr &inst) 562{ 563 using namespace TheISA; 564 // Execute a specific load. 565 Fault load_fault = NoFault; 566 567 DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n", 568 inst->pcState(), inst->seqNum); 569 570 assert(!inst->isSquashed()); 571 572 load_fault = inst->initiateAcc(); 573 574 if (inst->isTranslationDelayed() && 575 load_fault == NoFault) 576 return load_fault; 577 578 // If the instruction faulted or predicated false, then we need to send it 579 // along to commit without the instruction completing. 580 if (load_fault != NoFault || inst->readPredicate() == false) { 581 // Send this instruction to commit, also make sure iew stage 582 // realizes there is activity. 583 // Mark it as executed unless it is an uncached load that 584 // needs to hit the head of commit. 585 if (inst->readPredicate() == false) 586 inst->forwardOldRegs(); 587 DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n", 588 inst->seqNum, 589 (load_fault != NoFault ? "fault" : "predication")); 590 if (!(inst->hasRequest() && inst->uncacheable()) || 591 inst->isAtCommit()) { 592 inst->setExecuted(); 593 } 594 iewStage->instToCommit(inst); 595 iewStage->activityThisCycle(); 596 } else if (!loadBlocked()) { 597 assert(inst->effAddrValid()); 598 int load_idx = inst->lqIdx; 599 incrLdIdx(load_idx); 600 601 if (checkLoads) 602 return checkViolations(load_idx, inst); 603 } 604 605 return load_fault; 606} 607 608template <class Impl> 609Fault 610LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) 611{ 612 using namespace TheISA; 613 // Make sure that a store exists. 614 assert(stores != 0); 615 616 int store_idx = store_inst->sqIdx; 617 618 DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n", 619 store_inst->pcState(), store_inst->seqNum); 620 621 assert(!store_inst->isSquashed()); 622 623 // Check the recently completed loads to see if any match this store's 624 // address. If so, then we have a memory ordering violation. 625 int load_idx = store_inst->lqIdx; 626 627 Fault store_fault = store_inst->initiateAcc(); 628 629 if (store_inst->isTranslationDelayed() && 630 store_fault == NoFault) 631 return store_fault; 632 633 if (store_inst->readPredicate() == false) 634 store_inst->forwardOldRegs(); 635 636 if (storeQueue[store_idx].size == 0) { 637 DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n", 638 store_inst->pcState(), store_inst->seqNum); 639 640 return store_fault; 641 } else if (store_inst->readPredicate() == false) { 642 DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n", 643 store_inst->seqNum); 644 return store_fault; 645 } 646 647 assert(store_fault == NoFault); 648 649 if (store_inst->isStoreConditional()) { 650 // Store conditionals need to set themselves as able to 651 // writeback if we haven't had a fault by here. 652 storeQueue[store_idx].canWB = true; 653 654 ++storesToWB; 655 } 656 657 return checkViolations(load_idx, store_inst); 658 659} 660 661template <class Impl> 662void 663LSQUnit<Impl>::commitLoad() 664{ 665 assert(loadQueue[loadHead]); 666 667 DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n", 668 loadQueue[loadHead]->pcState()); 669 670 loadQueue[loadHead] = NULL; 671 672 incrLdIdx(loadHead); 673 674 --loads; 675} 676 677template <class Impl> 678void 679LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst) 680{ 681 assert(loads == 0 || loadQueue[loadHead]); 682 683 while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { 684 commitLoad(); 685 } 686} 687 688template <class Impl> 689void 690LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst) 691{ 692 assert(stores == 0 || storeQueue[storeHead].inst); 693 694 int store_idx = storeHead; 695 696 while (store_idx != storeTail) { 697 assert(storeQueue[store_idx].inst); 698 // Mark any stores that are now committed and have not yet 699 // been marked as able to write back. 700 if (!storeQueue[store_idx].canWB) { 701 if (storeQueue[store_idx].inst->seqNum > youngest_inst) { 702 break; 703 } 704 DPRINTF(LSQUnit, "Marking store as able to write back, PC " 705 "%s [sn:%lli]\n", 706 storeQueue[store_idx].inst->pcState(), 707 storeQueue[store_idx].inst->seqNum); 708 709 storeQueue[store_idx].canWB = true; 710 711 ++storesToWB; 712 } 713 714 incrStIdx(store_idx); 715 } 716} 717 718template <class Impl> 719void 720LSQUnit<Impl>::writebackPendingStore() 721{ 722 if (hasPendingPkt) { 723 assert(pendingPkt != NULL); 724 725 // If the cache is blocked, this will store the packet for retry. 726 if (sendStore(pendingPkt)) { 727 storePostSend(pendingPkt); 728 } 729 pendingPkt = NULL; 730 hasPendingPkt = false; 731 } 732} 733 734template <class Impl> 735void 736LSQUnit<Impl>::writebackStores() 737{ 738 // First writeback the second packet from any split store that didn't 739 // complete last cycle because there weren't enough cache ports available. 740 if (TheISA::HasUnalignedMemAcc) { 741 writebackPendingStore(); 742 } 743 744 while (storesToWB > 0 && 745 storeWBIdx != storeTail && 746 storeQueue[storeWBIdx].inst && 747 storeQueue[storeWBIdx].canWB && 748 ((!needsTSO) || (!storeInFlight)) && 749 usedPorts < cachePorts) { 750 751 if (isStoreBlocked || lsq->cacheBlocked()) { 752 DPRINTF(LSQUnit, "Unable to write back any more stores, cache" 753 " is blocked!\n"); 754 break; 755 } 756 757 // Store didn't write any data so no need to write it back to 758 // memory. 759 if (storeQueue[storeWBIdx].size == 0) { 760 completeStore(storeWBIdx); 761 762 incrStIdx(storeWBIdx); 763 764 continue; 765 } 766 767 ++usedPorts; 768 769 if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { 770 incrStIdx(storeWBIdx); 771 772 continue; 773 } 774 775 assert(storeQueue[storeWBIdx].req); 776 assert(!storeQueue[storeWBIdx].committed); 777 778 if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) { 779 assert(storeQueue[storeWBIdx].sreqLow); 780 assert(storeQueue[storeWBIdx].sreqHigh); 781 } 782 783 DynInstPtr inst = storeQueue[storeWBIdx].inst; 784 785 Request *req = storeQueue[storeWBIdx].req; 786 RequestPtr sreqLow = storeQueue[storeWBIdx].sreqLow; 787 RequestPtr sreqHigh = storeQueue[storeWBIdx].sreqHigh; 788 789 storeQueue[storeWBIdx].committed = true; 790 791 assert(!inst->memData); 792 inst->memData = new uint8_t[64]; 793 794 memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize()); 795 796 MemCmd command = 797 req->isSwap() ? MemCmd::SwapReq : 798 (req->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq); 799 PacketPtr data_pkt; 800 PacketPtr snd_data_pkt = NULL; 801 802 LSQSenderState *state = new LSQSenderState; 803 state->isLoad = false; 804 state->idx = storeWBIdx; 805 state->inst = inst; 806 807 if (!TheISA::HasUnalignedMemAcc || !storeQueue[storeWBIdx].isSplit) { 808 809 // Build a single data packet if the store isn't split. 810 data_pkt = new Packet(req, command); 811 data_pkt->dataStatic(inst->memData); 812 data_pkt->senderState = state; 813 } else { 814 // Create two packets if the store is split in two. 815 data_pkt = new Packet(sreqLow, command); 816 snd_data_pkt = new Packet(sreqHigh, command); 817 818 data_pkt->dataStatic(inst->memData); 819 snd_data_pkt->dataStatic(inst->memData + sreqLow->getSize()); 820 821 data_pkt->senderState = state; 822 snd_data_pkt->senderState = state; 823 824 state->isSplit = true; 825 state->outstanding = 2; 826 827 // Can delete the main request now. 828 delete req; 829 req = sreqLow; 830 } 831 832 DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s " 833 "to Addr:%#x, data:%#x [sn:%lli]\n", 834 storeWBIdx, inst->pcState(), 835 req->getPaddr(), (int)*(inst->memData), 836 inst->seqNum); 837 838 // @todo: Remove this SC hack once the memory system handles it. 839 if (inst->isStoreConditional()) { 840 assert(!storeQueue[storeWBIdx].isSplit); 841 // Disable recording the result temporarily. Writing to 842 // misc regs normally updates the result, but this is not 843 // the desired behavior when handling store conditionals. 844 inst->recordResult(false); 845 bool success = TheISA::handleLockedWrite(inst.get(), req); 846 inst->recordResult(true); 847 848 if (!success) { 849 // Instantly complete this store. 850 DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. " 851 "Instantly completing it.\n", 852 inst->seqNum); 853 WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); 854 cpu->schedule(wb, curTick() + 1); 855 if (cpu->checker) { 856 // Make sure to set the LLSC data for verification 857 // if checker is loaded 858 inst->reqToVerify->setExtraData(0); 859 inst->completeAcc(data_pkt); 860 } 861 completeStore(storeWBIdx); 862 incrStIdx(storeWBIdx); 863 continue; 864 } 865 } else { 866 // Non-store conditionals do not need a writeback. 867 state->noWB = true; 868 } 869 870 bool split = 871 TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit; 872 873 ThreadContext *thread = cpu->tcBase(lsqID); 874 875 if (req->isMmappedIpr()) { 876 assert(!inst->isStoreConditional()); 877 TheISA::handleIprWrite(thread, data_pkt); 878 delete data_pkt; 879 if (split) { 880 assert(snd_data_pkt->req->isMmappedIpr()); 881 TheISA::handleIprWrite(thread, snd_data_pkt); 882 delete snd_data_pkt; 883 delete sreqLow; 884 delete sreqHigh; 885 } 886 delete state; 887 delete req; 888 completeStore(storeWBIdx); 889 incrStIdx(storeWBIdx); 890 } else if (!sendStore(data_pkt)) { 891 DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will" 892 "retry later\n", 893 inst->seqNum); 894 895 // Need to store the second packet, if split. 896 if (split) { 897 state->pktToSend = true; 898 state->pendingPacket = snd_data_pkt; 899 } 900 } else { 901 902 // If split, try to send the second packet too 903 if (split) { 904 assert(snd_data_pkt); 905 906 // Ensure there are enough ports to use. 907 if (usedPorts < cachePorts) { 908 ++usedPorts; 909 if (sendStore(snd_data_pkt)) { 910 storePostSend(snd_data_pkt); 911 } else { 912 DPRINTF(IEW, "D-Cache became blocked when writing" 913 " [sn:%lli] second packet, will retry later\n", 914 inst->seqNum); 915 } 916 } else { 917 918 // Store the packet for when there's free ports. 919 assert(pendingPkt == NULL); 920 pendingPkt = snd_data_pkt; 921 hasPendingPkt = true; 922 } 923 } else { 924 925 // Not a split store. 926 storePostSend(data_pkt); 927 } 928 } 929 } 930 931 // Not sure this should set it to 0. 932 usedPorts = 0; 933 934 assert(stores >= 0 && storesToWB >= 0); 935} 936 937/*template <class Impl> 938void 939LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum) 940{ 941 list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(), 942 mshrSeqNums.end(), 943 seqNum); 944 945 if (mshr_it != mshrSeqNums.end()) { 946 mshrSeqNums.erase(mshr_it); 947 DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size()); 948 } 949}*/ 950 951template <class Impl> 952void 953LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) 954{ 955 DPRINTF(LSQUnit, "Squashing until [sn:%lli]!" 956 "(Loads:%i Stores:%i)\n", squashed_num, loads, stores); 957 958 int load_idx = loadTail; 959 decrLdIdx(load_idx); 960 961 while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { 962 DPRINTF(LSQUnit,"Load Instruction PC %s squashed, " 963 "[sn:%lli]\n", 964 loadQueue[load_idx]->pcState(), 965 loadQueue[load_idx]->seqNum); 966 967 if (isStalled() && load_idx == stallingLoadIdx) { 968 stalled = false; 969 stallingStoreIsn = 0; 970 stallingLoadIdx = 0; 971 } 972 973 // Clear the smart pointer to make sure it is decremented. 974 loadQueue[load_idx]->setSquashed(); 975 loadQueue[load_idx] = NULL; 976 --loads; 977 978 // Inefficient! 979 loadTail = load_idx; 980 981 decrLdIdx(load_idx); 982 ++lsqSquashedLoads; 983 } 984 985 if (isLoadBlocked) { 986 if (squashed_num < blockedLoadSeqNum) { 987 isLoadBlocked = false; 988 loadBlockedHandled = false; 989 blockedLoadSeqNum = 0; 990 } 991 } 992 993 if (memDepViolator && squashed_num < memDepViolator->seqNum) { 994 memDepViolator = NULL; 995 } 996 997 int store_idx = storeTail; 998 decrStIdx(store_idx); 999 1000 while (stores != 0 && 1001 storeQueue[store_idx].inst->seqNum > squashed_num) { 1002 // Instructions marked as can WB are already committed. 1003 if (storeQueue[store_idx].canWB) { 1004 break; 1005 } 1006 1007 DPRINTF(LSQUnit,"Store Instruction PC %s squashed, " 1008 "idx:%i [sn:%lli]\n", 1009 storeQueue[store_idx].inst->pcState(), 1010 store_idx, storeQueue[store_idx].inst->seqNum); 1011 1012 // I don't think this can happen. It should have been cleared 1013 // by the stalling load. 1014 if (isStalled() && 1015 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { 1016 panic("Is stalled should have been cleared by stalling load!\n"); 1017 stalled = false; 1018 stallingStoreIsn = 0; 1019 } 1020 1021 // Clear the smart pointer to make sure it is decremented. 1022 storeQueue[store_idx].inst->setSquashed(); 1023 storeQueue[store_idx].inst = NULL; 1024 storeQueue[store_idx].canWB = 0; 1025 1026 // Must delete request now that it wasn't handed off to 1027 // memory. This is quite ugly. @todo: Figure out the proper 1028 // place to really handle request deletes. 1029 delete storeQueue[store_idx].req; 1030 if (TheISA::HasUnalignedMemAcc && storeQueue[store_idx].isSplit) { 1031 delete storeQueue[store_idx].sreqLow; 1032 delete storeQueue[store_idx].sreqHigh; 1033 1034 storeQueue[store_idx].sreqLow = NULL; 1035 storeQueue[store_idx].sreqHigh = NULL; 1036 } 1037 1038 storeQueue[store_idx].req = NULL; 1039 --stores; 1040 1041 // Inefficient! 1042 storeTail = store_idx; 1043 1044 decrStIdx(store_idx); 1045 ++lsqSquashedStores; 1046 } 1047} 1048 1049template <class Impl> 1050void 1051LSQUnit<Impl>::storePostSend(PacketPtr pkt) 1052{ 1053 if (isStalled() && 1054 storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { 1055 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " 1056 "load idx:%i\n", 1057 stallingStoreIsn, stallingLoadIdx); 1058 stalled = false; 1059 stallingStoreIsn = 0; 1060 iewStage->replayMemInst(loadQueue[stallingLoadIdx]); 1061 } 1062 1063 if (!storeQueue[storeWBIdx].inst->isStoreConditional()) { 1064 // The store is basically completed at this time. This 1065 // only works so long as the checker doesn't try to 1066 // verify the value in memory for stores. 1067 storeQueue[storeWBIdx].inst->setCompleted(); 1068 1069 if (cpu->checker) { 1070 cpu->checker->verify(storeQueue[storeWBIdx].inst); 1071 } 1072 } 1073 1074 if (needsTSO) { 1075 storeInFlight = true; 1076 } 1077 1078 incrStIdx(storeWBIdx); 1079} 1080 1081template <class Impl> 1082void 1083LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) 1084{ 1085 iewStage->wakeCPU(); 1086 1087 // Squashed instructions do not need to complete their access. 1088 if (inst->isSquashed()) { 1089 iewStage->decrWb(inst->seqNum); 1090 assert(!inst->isStore()); 1091 ++lsqIgnoredResponses; 1092 return; 1093 } 1094 1095 if (!inst->isExecuted()) { 1096 inst->setExecuted(); 1097 1098 // Complete access to copy data to proper place. 1099 inst->completeAcc(pkt); 1100 } 1101 1102 // Need to insert instruction into queue to commit 1103 iewStage->instToCommit(inst); 1104 1105 iewStage->activityThisCycle(); 1106 1107 // see if this load changed the PC 1108 iewStage->checkMisprediction(inst); 1109} 1110 1111template <class Impl> 1112void 1113LSQUnit<Impl>::completeStore(int store_idx) 1114{ 1115 assert(storeQueue[store_idx].inst); 1116 storeQueue[store_idx].completed = true; 1117 --storesToWB; 1118 // A bit conservative because a store completion may not free up entries, 1119 // but hopefully avoids two store completions in one cycle from making 1120 // the CPU tick twice. 1121 cpu->wakeCPU(); 1122 cpu->activityThisCycle(); 1123 1124 if (store_idx == storeHead) { 1125 do { 1126 incrStIdx(storeHead); 1127 1128 --stores; 1129 } while (storeQueue[storeHead].completed && 1130 storeHead != storeTail); 1131 1132 iewStage->updateLSQNextCycle = true; 1133 } 1134 1135 DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head " 1136 "idx:%i\n", 1137 storeQueue[store_idx].inst->seqNum, store_idx, storeHead); 1138 1139#if TRACING_ON 1140 if (DTRACE(O3PipeView)) { 1141 storeQueue[store_idx].inst->storeTick = 1142 curTick() - storeQueue[store_idx].inst->fetchTick; 1143 } 1144#endif 1145 1146 if (isStalled() && 1147 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { 1148 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " 1149 "load idx:%i\n", 1150 stallingStoreIsn, stallingLoadIdx); 1151 stalled = false; 1152 stallingStoreIsn = 0; 1153 iewStage->replayMemInst(loadQueue[stallingLoadIdx]); 1154 } 1155 1156 storeQueue[store_idx].inst->setCompleted(); 1157 1158 if (needsTSO) { 1159 storeInFlight = false; 1160 } 1161 1162 // Tell the checker we've completed this instruction. Some stores 1163 // may get reported twice to the checker, but the checker can 1164 // handle that case. 1165 if (cpu->checker) { 1166 cpu->checker->verify(storeQueue[store_idx].inst); 1167 } 1168} 1169 1170template <class Impl> 1171bool 1172LSQUnit<Impl>::sendStore(PacketPtr data_pkt) 1173{ 1174 if (!dcachePort->sendTimingReq(data_pkt)) { 1175 // Need to handle becoming blocked on a store. 1176 isStoreBlocked = true; 1177 ++lsqCacheBlocked; 1178 assert(retryPkt == NULL); 1179 retryPkt = data_pkt; 1180 lsq->setRetryTid(lsqID); 1181 return false; 1182 } 1183 return true; 1184} 1185 1186template <class Impl> 1187void 1188LSQUnit<Impl>::recvRetry() 1189{ 1190 if (isStoreBlocked) { 1191 DPRINTF(LSQUnit, "Receiving retry: store blocked\n"); 1192 assert(retryPkt != NULL); 1193 1194 LSQSenderState *state = 1195 dynamic_cast<LSQSenderState *>(retryPkt->senderState); 1196 1197 if (dcachePort->sendTimingReq(retryPkt)) { 1198 // Don't finish the store unless this is the last packet. 1199 if (!TheISA::HasUnalignedMemAcc || !state->pktToSend || 1200 state->pendingPacket == retryPkt) { 1201 state->pktToSend = false; 1202 storePostSend(retryPkt); 1203 } 1204 retryPkt = NULL; 1205 isStoreBlocked = false; 1206 lsq->setRetryTid(InvalidThreadID); 1207 1208 // Send any outstanding packet. 1209 if (TheISA::HasUnalignedMemAcc && state->pktToSend) { 1210 assert(state->pendingPacket); 1211 if (sendStore(state->pendingPacket)) { 1212 storePostSend(state->pendingPacket); 1213 } 1214 } 1215 } else { 1216 // Still blocked! 1217 ++lsqCacheBlocked; 1218 lsq->setRetryTid(lsqID); 1219 } 1220 } else if (isLoadBlocked) { 1221 DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " 1222 "no need to resend packet.\n"); 1223 } else { 1224 DPRINTF(LSQUnit, "Retry received but LSQ is no longer blocked.\n"); 1225 } 1226} 1227 1228template <class Impl> 1229inline void 1230LSQUnit<Impl>::incrStIdx(int &store_idx) const 1231{ 1232 if (++store_idx >= SQEntries) 1233 store_idx = 0; 1234} 1235 1236template <class Impl> 1237inline void 1238LSQUnit<Impl>::decrStIdx(int &store_idx) const 1239{ 1240 if (--store_idx < 0) 1241 store_idx += SQEntries; 1242} 1243 1244template <class Impl> 1245inline void 1246LSQUnit<Impl>::incrLdIdx(int &load_idx) const 1247{ 1248 if (++load_idx >= LQEntries) 1249 load_idx = 0; 1250} 1251 1252template <class Impl> 1253inline void 1254LSQUnit<Impl>::decrLdIdx(int &load_idx) const 1255{ 1256 if (--load_idx < 0) 1257 load_idx += LQEntries; 1258} 1259 1260template <class Impl> 1261void 1262LSQUnit<Impl>::dumpInsts() const 1263{ 1264 cprintf("Load store queue: Dumping instructions.\n"); 1265 cprintf("Load queue size: %i\n", loads); 1266 cprintf("Load queue: "); 1267 1268 int load_idx = loadHead; 1269 1270 while (load_idx != loadTail && loadQueue[load_idx]) { 1271 const DynInstPtr &inst(loadQueue[load_idx]); 1272 cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum); 1273 1274 incrLdIdx(load_idx); 1275 } 1276 cprintf("\n"); 1277 1278 cprintf("Store queue size: %i\n", stores); 1279 cprintf("Store queue: "); 1280 1281 int store_idx = storeHead; 1282 1283 while (store_idx != storeTail && storeQueue[store_idx].inst) { 1284 const DynInstPtr &inst(storeQueue[store_idx].inst); 1285 cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum); 1286 1287 incrStIdx(store_idx); 1288 } 1289 1290 cprintf("\n"); 1291}
| 48#include "arch/generic/debugfaults.hh" 49#include "arch/locked_mem.hh" 50#include "base/str.hh" 51#include "config/the_isa.hh" 52#include "cpu/checker/cpu.hh" 53#include "cpu/o3/lsq.hh" 54#include "cpu/o3/lsq_unit.hh" 55#include "debug/Activity.hh" 56#include "debug/IEW.hh" 57#include "debug/LSQUnit.hh" 58#include "debug/O3PipeView.hh" 59#include "mem/packet.hh" 60#include "mem/request.hh" 61 62template<class Impl> 63LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, 64 LSQUnit *lsq_ptr) 65 : Event(Default_Pri, AutoDelete), 66 inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr) 67{ 68} 69 70template<class Impl> 71void 72LSQUnit<Impl>::WritebackEvent::process() 73{ 74 assert(!lsqPtr->cpu->switchedOut()); 75 76 lsqPtr->writeback(inst, pkt); 77 78 if (pkt->senderState) 79 delete pkt->senderState; 80 81 delete pkt->req; 82 delete pkt; 83} 84 85template<class Impl> 86const char * 87LSQUnit<Impl>::WritebackEvent::description() const 88{ 89 return "Store writeback"; 90} 91 92template<class Impl> 93void 94LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) 95{ 96 LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState); 97 DynInstPtr inst = state->inst; 98 DPRINTF(IEW, "Writeback event [sn:%lli].\n", inst->seqNum); 99 DPRINTF(Activity, "Activity: Writeback event [sn:%lli].\n", inst->seqNum); 100 101 //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); 102 103 // If this is a split access, wait until all packets are received. 104 if (TheISA::HasUnalignedMemAcc && !state->complete()) { 105 delete pkt->req; 106 delete pkt; 107 return; 108 } 109 110 assert(!cpu->switchedOut()); 111 if (inst->isSquashed()) { 112 iewStage->decrWb(inst->seqNum); 113 } else { 114 if (!state->noWB) { 115 if (!TheISA::HasUnalignedMemAcc || !state->isSplit || 116 !state->isLoad) { 117 writeback(inst, pkt); 118 } else { 119 writeback(inst, state->mainPkt); 120 } 121 } 122 123 if (inst->isStore()) { 124 completeStore(state->idx); 125 } 126 } 127 128 if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) { 129 delete state->mainPkt->req; 130 delete state->mainPkt; 131 } 132 delete state; 133 delete pkt->req; 134 delete pkt; 135} 136 137template <class Impl> 138LSQUnit<Impl>::LSQUnit() 139 : loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false), 140 isStoreBlocked(false), isLoadBlocked(false), 141 loadBlockedHandled(false), storeInFlight(false), hasPendingPkt(false) 142{ 143} 144 145template<class Impl> 146void 147LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, 148 LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, 149 unsigned id) 150{ 151 cpu = cpu_ptr; 152 iewStage = iew_ptr; 153 154 DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); 155 156 lsq = lsq_ptr; 157 158 lsqID = id; 159 160 // Add 1 for the sentinel entry (they are circular queues). 161 LQEntries = maxLQEntries + 1; 162 SQEntries = maxSQEntries + 1; 163 164 //Due to uint8_t index in LSQSenderState 165 assert(LQEntries <= 256); 166 assert(SQEntries <= 256); 167 168 loadQueue.resize(LQEntries); 169 storeQueue.resize(SQEntries); 170 171 depCheckShift = params->LSQDepCheckShift; 172 checkLoads = params->LSQCheckLoads; 173 cachePorts = params->cachePorts; 174 needsTSO = params->needsTSO; 175 176 resetState(); 177} 178 179 180template<class Impl> 181void 182LSQUnit<Impl>::resetState() 183{ 184 loads = stores = storesToWB = 0; 185 186 loadHead = loadTail = 0; 187 188 storeHead = storeWBIdx = storeTail = 0; 189 190 usedPorts = 0; 191 192 retryPkt = NULL; 193 memDepViolator = NULL; 194 195 blockedLoadSeqNum = 0; 196 197 stalled = false; 198 isLoadBlocked = false; 199 loadBlockedHandled = false; 200 201 cacheBlockMask = ~(cpu->cacheLineSize() - 1); 202} 203 204template<class Impl> 205std::string 206LSQUnit<Impl>::name() const 207{ 208 if (Impl::MaxThreads == 1) { 209 return iewStage->name() + ".lsq"; 210 } else { 211 return iewStage->name() + ".lsq.thread" + to_string(lsqID); 212 } 213} 214 215template<class Impl> 216void 217LSQUnit<Impl>::regStats() 218{ 219 lsqForwLoads 220 .name(name() + ".forwLoads") 221 .desc("Number of loads that had data forwarded from stores"); 222 223 invAddrLoads 224 .name(name() + ".invAddrLoads") 225 .desc("Number of loads ignored due to an invalid address"); 226 227 lsqSquashedLoads 228 .name(name() + ".squashedLoads") 229 .desc("Number of loads squashed"); 230 231 lsqIgnoredResponses 232 .name(name() + ".ignoredResponses") 233 .desc("Number of memory responses ignored because the instruction is squashed"); 234 235 lsqMemOrderViolation 236 .name(name() + ".memOrderViolation") 237 .desc("Number of memory ordering violations"); 238 239 lsqSquashedStores 240 .name(name() + ".squashedStores") 241 .desc("Number of stores squashed"); 242 243 invAddrSwpfs 244 .name(name() + ".invAddrSwpfs") 245 .desc("Number of software prefetches ignored due to an invalid address"); 246 247 lsqBlockedLoads 248 .name(name() + ".blockedLoads") 249 .desc("Number of blocked loads due to partial load-store forwarding"); 250 251 lsqRescheduledLoads 252 .name(name() + ".rescheduledLoads") 253 .desc("Number of loads that were rescheduled"); 254 255 lsqCacheBlocked 256 .name(name() + ".cacheBlocked") 257 .desc("Number of times an access to memory failed due to the cache being blocked"); 258} 259 260template<class Impl> 261void 262LSQUnit<Impl>::setDcachePort(MasterPort *dcache_port) 263{ 264 dcachePort = dcache_port; 265} 266 267template<class Impl> 268void 269LSQUnit<Impl>::clearLQ() 270{ 271 loadQueue.clear(); 272} 273 274template<class Impl> 275void 276LSQUnit<Impl>::clearSQ() 277{ 278 storeQueue.clear(); 279} 280 281template<class Impl> 282void 283LSQUnit<Impl>::drainSanityCheck() const 284{ 285 for (int i = 0; i < loadQueue.size(); ++i) 286 assert(!loadQueue[i]); 287 288 assert(storesToWB == 0); 289 assert(!retryPkt); 290} 291 292template<class Impl> 293void 294LSQUnit<Impl>::takeOverFrom() 295{ 296 resetState(); 297} 298 299template<class Impl> 300void 301LSQUnit<Impl>::resizeLQ(unsigned size) 302{ 303 unsigned size_plus_sentinel = size + 1; 304 assert(size_plus_sentinel >= LQEntries); 305 306 if (size_plus_sentinel > LQEntries) { 307 while (size_plus_sentinel > loadQueue.size()) { 308 DynInstPtr dummy; 309 loadQueue.push_back(dummy); 310 LQEntries++; 311 } 312 } else { 313 LQEntries = size_plus_sentinel; 314 } 315 316 assert(LQEntries <= 256); 317} 318 319template<class Impl> 320void 321LSQUnit<Impl>::resizeSQ(unsigned size) 322{ 323 unsigned size_plus_sentinel = size + 1; 324 if (size_plus_sentinel > SQEntries) { 325 while (size_plus_sentinel > storeQueue.size()) { 326 SQEntry dummy; 327 storeQueue.push_back(dummy); 328 SQEntries++; 329 } 330 } else { 331 SQEntries = size_plus_sentinel; 332 } 333 334 assert(SQEntries <= 256); 335} 336 337template <class Impl> 338void 339LSQUnit<Impl>::insert(DynInstPtr &inst) 340{ 341 assert(inst->isMemRef()); 342 343 assert(inst->isLoad() || inst->isStore()); 344 345 if (inst->isLoad()) { 346 insertLoad(inst); 347 } else { 348 insertStore(inst); 349 } 350 351 inst->setInLSQ(); 352} 353 354template <class Impl> 355void 356LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst) 357{ 358 assert((loadTail + 1) % LQEntries != loadHead); 359 assert(loads < LQEntries); 360 361 DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n", 362 load_inst->pcState(), loadTail, load_inst->seqNum); 363 364 load_inst->lqIdx = loadTail; 365 366 if (stores == 0) { 367 load_inst->sqIdx = -1; 368 } else { 369 load_inst->sqIdx = storeTail; 370 } 371 372 loadQueue[loadTail] = load_inst; 373 374 incrLdIdx(loadTail); 375 376 ++loads; 377} 378 379template <class Impl> 380void 381LSQUnit<Impl>::insertStore(DynInstPtr &store_inst) 382{ 383 // Make sure it is not full before inserting an instruction. 384 assert((storeTail + 1) % SQEntries != storeHead); 385 assert(stores < SQEntries); 386 387 DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n", 388 store_inst->pcState(), storeTail, store_inst->seqNum); 389 390 store_inst->sqIdx = storeTail; 391 store_inst->lqIdx = loadTail; 392 393 storeQueue[storeTail] = SQEntry(store_inst); 394 395 incrStIdx(storeTail); 396 397 ++stores; 398} 399 400template <class Impl> 401typename Impl::DynInstPtr 402LSQUnit<Impl>::getMemDepViolator() 403{ 404 DynInstPtr temp = memDepViolator; 405 406 memDepViolator = NULL; 407 408 return temp; 409} 410 411template <class Impl> 412unsigned 413LSQUnit<Impl>::numFreeEntries() 414{ 415 unsigned free_lq_entries = LQEntries - loads; 416 unsigned free_sq_entries = SQEntries - stores; 417 418 // Both the LQ and SQ entries have an extra dummy entry to differentiate 419 // empty/full conditions. Subtract 1 from the free entries. 420 if (free_lq_entries < free_sq_entries) { 421 return free_lq_entries - 1; 422 } else { 423 return free_sq_entries - 1; 424 } 425} 426 427template <class Impl> 428void 429LSQUnit<Impl>::checkSnoop(PacketPtr pkt) 430{ 431 int load_idx = loadHead; 432 433 // Unlock the cpu-local monitor when the CPU sees a snoop to a locked 434 // address. The CPU can speculatively execute a LL operation after a pending 435 // SC operation in the pipeline and that can make the cache monitor the CPU 436 // is connected to valid while it really shouldn't be. 437 for (int x = 0; x < cpu->numActiveThreads(); x++) { 438 ThreadContext *tc = cpu->getContext(x); 439 bool no_squash = cpu->thread[x]->noSquashFromTC; 440 cpu->thread[x]->noSquashFromTC = true; 441 TheISA::handleLockedSnoop(tc, pkt, cacheBlockMask); 442 cpu->thread[x]->noSquashFromTC = no_squash; 443 } 444 445 // If this is the only load in the LSQ we don't care 446 if (load_idx == loadTail) 447 return; 448 incrLdIdx(load_idx); 449 450 DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr()); 451 Addr invalidate_addr = pkt->getAddr() & cacheBlockMask; 452 while (load_idx != loadTail) { 453 DynInstPtr ld_inst = loadQueue[load_idx]; 454 455 if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) { 456 incrLdIdx(load_idx); 457 continue; 458 } 459 460 Addr load_addr = ld_inst->physEffAddr & cacheBlockMask; 461 DPRINTF(LSQUnit, "-- inst [sn:%lli] load_addr: %#x to pktAddr:%#x\n", 462 ld_inst->seqNum, load_addr, invalidate_addr); 463 464 if (load_addr == invalidate_addr) { 465 if (ld_inst->possibleLoadViolation()) { 466 DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n", 467 ld_inst->physEffAddr, pkt->getAddr(), ld_inst->seqNum); 468 469 // Mark the load for re-execution 470 ld_inst->fault = new ReExec; 471 } else { 472 // If a older load checks this and it's true 473 // then we might have missed the snoop 474 // in which case we need to invalidate to be sure 475 ld_inst->hitExternalSnoop(true); 476 } 477 } 478 incrLdIdx(load_idx); 479 } 480 return; 481} 482 483template <class Impl> 484Fault 485LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst) 486{ 487 Addr inst_eff_addr1 = inst->effAddr >> depCheckShift; 488 Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift; 489 490 /** @todo in theory you only need to check an instruction that has executed 491 * however, there isn't a good way in the pipeline at the moment to check 492 * all instructions that will execute before the store writes back. Thus, 493 * like the implementation that came before it, we're overly conservative. 494 */ 495 while (load_idx != loadTail) { 496 DynInstPtr ld_inst = loadQueue[load_idx]; 497 if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) { 498 incrLdIdx(load_idx); 499 continue; 500 } 501 502 Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift; 503 Addr ld_eff_addr2 = 504 (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift; 505 506 if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) { 507 if (inst->isLoad()) { 508 // If this load is to the same block as an external snoop 509 // invalidate that we've observed then the load needs to be 510 // squashed as it could have newer data 511 if (ld_inst->hitExternalSnoop()) { 512 if (!memDepViolator || 513 ld_inst->seqNum < memDepViolator->seqNum) { 514 DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] " 515 "and [sn:%lli] at address %#x\n", 516 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 517 memDepViolator = ld_inst; 518 519 ++lsqMemOrderViolation; 520 521 return new GenericISA::M5PanicFault( 522 "Detected fault with inst [sn:%lli] and " 523 "[sn:%lli] at address %#x\n", 524 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 525 } 526 } 527 528 // Otherwise, mark the load has a possible load violation 529 // and if we see a snoop before it's commited, we need to squash 530 ld_inst->possibleLoadViolation(true); 531 DPRINTF(LSQUnit, "Found possible load violaiton at addr: %#x" 532 " between instructions [sn:%lli] and [sn:%lli]\n", 533 inst_eff_addr1, inst->seqNum, ld_inst->seqNum); 534 } else { 535 // A load/store incorrectly passed this store. 536 // Check if we already have a violator, or if it's newer 537 // squash and refetch. 538 if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum) 539 break; 540 541 DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and " 542 "[sn:%lli] at address %#x\n", 543 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 544 memDepViolator = ld_inst; 545 546 ++lsqMemOrderViolation; 547 548 return new GenericISA::M5PanicFault("Detected fault with " 549 "inst [sn:%lli] and [sn:%lli] at address %#x\n", 550 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 551 } 552 } 553 554 incrLdIdx(load_idx); 555 } 556 return NoFault; 557} 558 559 560 561 562template <class Impl> 563Fault 564LSQUnit<Impl>::executeLoad(DynInstPtr &inst) 565{ 566 using namespace TheISA; 567 // Execute a specific load. 568 Fault load_fault = NoFault; 569 570 DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n", 571 inst->pcState(), inst->seqNum); 572 573 assert(!inst->isSquashed()); 574 575 load_fault = inst->initiateAcc(); 576 577 if (inst->isTranslationDelayed() && 578 load_fault == NoFault) 579 return load_fault; 580 581 // If the instruction faulted or predicated false, then we need to send it 582 // along to commit without the instruction completing. 583 if (load_fault != NoFault || inst->readPredicate() == false) { 584 // Send this instruction to commit, also make sure iew stage 585 // realizes there is activity. 586 // Mark it as executed unless it is an uncached load that 587 // needs to hit the head of commit. 588 if (inst->readPredicate() == false) 589 inst->forwardOldRegs(); 590 DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n", 591 inst->seqNum, 592 (load_fault != NoFault ? "fault" : "predication")); 593 if (!(inst->hasRequest() && inst->uncacheable()) || 594 inst->isAtCommit()) { 595 inst->setExecuted(); 596 } 597 iewStage->instToCommit(inst); 598 iewStage->activityThisCycle(); 599 } else if (!loadBlocked()) { 600 assert(inst->effAddrValid()); 601 int load_idx = inst->lqIdx; 602 incrLdIdx(load_idx); 603 604 if (checkLoads) 605 return checkViolations(load_idx, inst); 606 } 607 608 return load_fault; 609} 610 611template <class Impl> 612Fault 613LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) 614{ 615 using namespace TheISA; 616 // Make sure that a store exists. 617 assert(stores != 0); 618 619 int store_idx = store_inst->sqIdx; 620 621 DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n", 622 store_inst->pcState(), store_inst->seqNum); 623 624 assert(!store_inst->isSquashed()); 625 626 // Check the recently completed loads to see if any match this store's 627 // address. If so, then we have a memory ordering violation. 628 int load_idx = store_inst->lqIdx; 629 630 Fault store_fault = store_inst->initiateAcc(); 631 632 if (store_inst->isTranslationDelayed() && 633 store_fault == NoFault) 634 return store_fault; 635 636 if (store_inst->readPredicate() == false) 637 store_inst->forwardOldRegs(); 638 639 if (storeQueue[store_idx].size == 0) { 640 DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n", 641 store_inst->pcState(), store_inst->seqNum); 642 643 return store_fault; 644 } else if (store_inst->readPredicate() == false) { 645 DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n", 646 store_inst->seqNum); 647 return store_fault; 648 } 649 650 assert(store_fault == NoFault); 651 652 if (store_inst->isStoreConditional()) { 653 // Store conditionals need to set themselves as able to 654 // writeback if we haven't had a fault by here. 655 storeQueue[store_idx].canWB = true; 656 657 ++storesToWB; 658 } 659 660 return checkViolations(load_idx, store_inst); 661 662} 663 664template <class Impl> 665void 666LSQUnit<Impl>::commitLoad() 667{ 668 assert(loadQueue[loadHead]); 669 670 DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n", 671 loadQueue[loadHead]->pcState()); 672 673 loadQueue[loadHead] = NULL; 674 675 incrLdIdx(loadHead); 676 677 --loads; 678} 679 680template <class Impl> 681void 682LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst) 683{ 684 assert(loads == 0 || loadQueue[loadHead]); 685 686 while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { 687 commitLoad(); 688 } 689} 690 691template <class Impl> 692void 693LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst) 694{ 695 assert(stores == 0 || storeQueue[storeHead].inst); 696 697 int store_idx = storeHead; 698 699 while (store_idx != storeTail) { 700 assert(storeQueue[store_idx].inst); 701 // Mark any stores that are now committed and have not yet 702 // been marked as able to write back. 703 if (!storeQueue[store_idx].canWB) { 704 if (storeQueue[store_idx].inst->seqNum > youngest_inst) { 705 break; 706 } 707 DPRINTF(LSQUnit, "Marking store as able to write back, PC " 708 "%s [sn:%lli]\n", 709 storeQueue[store_idx].inst->pcState(), 710 storeQueue[store_idx].inst->seqNum); 711 712 storeQueue[store_idx].canWB = true; 713 714 ++storesToWB; 715 } 716 717 incrStIdx(store_idx); 718 } 719} 720 721template <class Impl> 722void 723LSQUnit<Impl>::writebackPendingStore() 724{ 725 if (hasPendingPkt) { 726 assert(pendingPkt != NULL); 727 728 // If the cache is blocked, this will store the packet for retry. 729 if (sendStore(pendingPkt)) { 730 storePostSend(pendingPkt); 731 } 732 pendingPkt = NULL; 733 hasPendingPkt = false; 734 } 735} 736 737template <class Impl> 738void 739LSQUnit<Impl>::writebackStores() 740{ 741 // First writeback the second packet from any split store that didn't 742 // complete last cycle because there weren't enough cache ports available. 743 if (TheISA::HasUnalignedMemAcc) { 744 writebackPendingStore(); 745 } 746 747 while (storesToWB > 0 && 748 storeWBIdx != storeTail && 749 storeQueue[storeWBIdx].inst && 750 storeQueue[storeWBIdx].canWB && 751 ((!needsTSO) || (!storeInFlight)) && 752 usedPorts < cachePorts) { 753 754 if (isStoreBlocked || lsq->cacheBlocked()) { 755 DPRINTF(LSQUnit, "Unable to write back any more stores, cache" 756 " is blocked!\n"); 757 break; 758 } 759 760 // Store didn't write any data so no need to write it back to 761 // memory. 762 if (storeQueue[storeWBIdx].size == 0) { 763 completeStore(storeWBIdx); 764 765 incrStIdx(storeWBIdx); 766 767 continue; 768 } 769 770 ++usedPorts; 771 772 if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { 773 incrStIdx(storeWBIdx); 774 775 continue; 776 } 777 778 assert(storeQueue[storeWBIdx].req); 779 assert(!storeQueue[storeWBIdx].committed); 780 781 if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) { 782 assert(storeQueue[storeWBIdx].sreqLow); 783 assert(storeQueue[storeWBIdx].sreqHigh); 784 } 785 786 DynInstPtr inst = storeQueue[storeWBIdx].inst; 787 788 Request *req = storeQueue[storeWBIdx].req; 789 RequestPtr sreqLow = storeQueue[storeWBIdx].sreqLow; 790 RequestPtr sreqHigh = storeQueue[storeWBIdx].sreqHigh; 791 792 storeQueue[storeWBIdx].committed = true; 793 794 assert(!inst->memData); 795 inst->memData = new uint8_t[64]; 796 797 memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize()); 798 799 MemCmd command = 800 req->isSwap() ? MemCmd::SwapReq : 801 (req->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq); 802 PacketPtr data_pkt; 803 PacketPtr snd_data_pkt = NULL; 804 805 LSQSenderState *state = new LSQSenderState; 806 state->isLoad = false; 807 state->idx = storeWBIdx; 808 state->inst = inst; 809 810 if (!TheISA::HasUnalignedMemAcc || !storeQueue[storeWBIdx].isSplit) { 811 812 // Build a single data packet if the store isn't split. 813 data_pkt = new Packet(req, command); 814 data_pkt->dataStatic(inst->memData); 815 data_pkt->senderState = state; 816 } else { 817 // Create two packets if the store is split in two. 818 data_pkt = new Packet(sreqLow, command); 819 snd_data_pkt = new Packet(sreqHigh, command); 820 821 data_pkt->dataStatic(inst->memData); 822 snd_data_pkt->dataStatic(inst->memData + sreqLow->getSize()); 823 824 data_pkt->senderState = state; 825 snd_data_pkt->senderState = state; 826 827 state->isSplit = true; 828 state->outstanding = 2; 829 830 // Can delete the main request now. 831 delete req; 832 req = sreqLow; 833 } 834 835 DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s " 836 "to Addr:%#x, data:%#x [sn:%lli]\n", 837 storeWBIdx, inst->pcState(), 838 req->getPaddr(), (int)*(inst->memData), 839 inst->seqNum); 840 841 // @todo: Remove this SC hack once the memory system handles it. 842 if (inst->isStoreConditional()) { 843 assert(!storeQueue[storeWBIdx].isSplit); 844 // Disable recording the result temporarily. Writing to 845 // misc regs normally updates the result, but this is not 846 // the desired behavior when handling store conditionals. 847 inst->recordResult(false); 848 bool success = TheISA::handleLockedWrite(inst.get(), req); 849 inst->recordResult(true); 850 851 if (!success) { 852 // Instantly complete this store. 853 DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. " 854 "Instantly completing it.\n", 855 inst->seqNum); 856 WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); 857 cpu->schedule(wb, curTick() + 1); 858 if (cpu->checker) { 859 // Make sure to set the LLSC data for verification 860 // if checker is loaded 861 inst->reqToVerify->setExtraData(0); 862 inst->completeAcc(data_pkt); 863 } 864 completeStore(storeWBIdx); 865 incrStIdx(storeWBIdx); 866 continue; 867 } 868 } else { 869 // Non-store conditionals do not need a writeback. 870 state->noWB = true; 871 } 872 873 bool split = 874 TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit; 875 876 ThreadContext *thread = cpu->tcBase(lsqID); 877 878 if (req->isMmappedIpr()) { 879 assert(!inst->isStoreConditional()); 880 TheISA::handleIprWrite(thread, data_pkt); 881 delete data_pkt; 882 if (split) { 883 assert(snd_data_pkt->req->isMmappedIpr()); 884 TheISA::handleIprWrite(thread, snd_data_pkt); 885 delete snd_data_pkt; 886 delete sreqLow; 887 delete sreqHigh; 888 } 889 delete state; 890 delete req; 891 completeStore(storeWBIdx); 892 incrStIdx(storeWBIdx); 893 } else if (!sendStore(data_pkt)) { 894 DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will" 895 "retry later\n", 896 inst->seqNum); 897 898 // Need to store the second packet, if split. 899 if (split) { 900 state->pktToSend = true; 901 state->pendingPacket = snd_data_pkt; 902 } 903 } else { 904 905 // If split, try to send the second packet too 906 if (split) { 907 assert(snd_data_pkt); 908 909 // Ensure there are enough ports to use. 910 if (usedPorts < cachePorts) { 911 ++usedPorts; 912 if (sendStore(snd_data_pkt)) { 913 storePostSend(snd_data_pkt); 914 } else { 915 DPRINTF(IEW, "D-Cache became blocked when writing" 916 " [sn:%lli] second packet, will retry later\n", 917 inst->seqNum); 918 } 919 } else { 920 921 // Store the packet for when there's free ports. 922 assert(pendingPkt == NULL); 923 pendingPkt = snd_data_pkt; 924 hasPendingPkt = true; 925 } 926 } else { 927 928 // Not a split store. 929 storePostSend(data_pkt); 930 } 931 } 932 } 933 934 // Not sure this should set it to 0. 935 usedPorts = 0; 936 937 assert(stores >= 0 && storesToWB >= 0); 938} 939 940/*template <class Impl> 941void 942LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum) 943{ 944 list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(), 945 mshrSeqNums.end(), 946 seqNum); 947 948 if (mshr_it != mshrSeqNums.end()) { 949 mshrSeqNums.erase(mshr_it); 950 DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size()); 951 } 952}*/ 953 954template <class Impl> 955void 956LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) 957{ 958 DPRINTF(LSQUnit, "Squashing until [sn:%lli]!" 959 "(Loads:%i Stores:%i)\n", squashed_num, loads, stores); 960 961 int load_idx = loadTail; 962 decrLdIdx(load_idx); 963 964 while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { 965 DPRINTF(LSQUnit,"Load Instruction PC %s squashed, " 966 "[sn:%lli]\n", 967 loadQueue[load_idx]->pcState(), 968 loadQueue[load_idx]->seqNum); 969 970 if (isStalled() && load_idx == stallingLoadIdx) { 971 stalled = false; 972 stallingStoreIsn = 0; 973 stallingLoadIdx = 0; 974 } 975 976 // Clear the smart pointer to make sure it is decremented. 977 loadQueue[load_idx]->setSquashed(); 978 loadQueue[load_idx] = NULL; 979 --loads; 980 981 // Inefficient! 982 loadTail = load_idx; 983 984 decrLdIdx(load_idx); 985 ++lsqSquashedLoads; 986 } 987 988 if (isLoadBlocked) { 989 if (squashed_num < blockedLoadSeqNum) { 990 isLoadBlocked = false; 991 loadBlockedHandled = false; 992 blockedLoadSeqNum = 0; 993 } 994 } 995 996 if (memDepViolator && squashed_num < memDepViolator->seqNum) { 997 memDepViolator = NULL; 998 } 999 1000 int store_idx = storeTail; 1001 decrStIdx(store_idx); 1002 1003 while (stores != 0 && 1004 storeQueue[store_idx].inst->seqNum > squashed_num) { 1005 // Instructions marked as can WB are already committed. 1006 if (storeQueue[store_idx].canWB) { 1007 break; 1008 } 1009 1010 DPRINTF(LSQUnit,"Store Instruction PC %s squashed, " 1011 "idx:%i [sn:%lli]\n", 1012 storeQueue[store_idx].inst->pcState(), 1013 store_idx, storeQueue[store_idx].inst->seqNum); 1014 1015 // I don't think this can happen. It should have been cleared 1016 // by the stalling load. 1017 if (isStalled() && 1018 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { 1019 panic("Is stalled should have been cleared by stalling load!\n"); 1020 stalled = false; 1021 stallingStoreIsn = 0; 1022 } 1023 1024 // Clear the smart pointer to make sure it is decremented. 1025 storeQueue[store_idx].inst->setSquashed(); 1026 storeQueue[store_idx].inst = NULL; 1027 storeQueue[store_idx].canWB = 0; 1028 1029 // Must delete request now that it wasn't handed off to 1030 // memory. This is quite ugly. @todo: Figure out the proper 1031 // place to really handle request deletes. 1032 delete storeQueue[store_idx].req; 1033 if (TheISA::HasUnalignedMemAcc && storeQueue[store_idx].isSplit) { 1034 delete storeQueue[store_idx].sreqLow; 1035 delete storeQueue[store_idx].sreqHigh; 1036 1037 storeQueue[store_idx].sreqLow = NULL; 1038 storeQueue[store_idx].sreqHigh = NULL; 1039 } 1040 1041 storeQueue[store_idx].req = NULL; 1042 --stores; 1043 1044 // Inefficient! 1045 storeTail = store_idx; 1046 1047 decrStIdx(store_idx); 1048 ++lsqSquashedStores; 1049 } 1050} 1051 1052template <class Impl> 1053void 1054LSQUnit<Impl>::storePostSend(PacketPtr pkt) 1055{ 1056 if (isStalled() && 1057 storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { 1058 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " 1059 "load idx:%i\n", 1060 stallingStoreIsn, stallingLoadIdx); 1061 stalled = false; 1062 stallingStoreIsn = 0; 1063 iewStage->replayMemInst(loadQueue[stallingLoadIdx]); 1064 } 1065 1066 if (!storeQueue[storeWBIdx].inst->isStoreConditional()) { 1067 // The store is basically completed at this time. This 1068 // only works so long as the checker doesn't try to 1069 // verify the value in memory for stores. 1070 storeQueue[storeWBIdx].inst->setCompleted(); 1071 1072 if (cpu->checker) { 1073 cpu->checker->verify(storeQueue[storeWBIdx].inst); 1074 } 1075 } 1076 1077 if (needsTSO) { 1078 storeInFlight = true; 1079 } 1080 1081 incrStIdx(storeWBIdx); 1082} 1083 1084template <class Impl> 1085void 1086LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) 1087{ 1088 iewStage->wakeCPU(); 1089 1090 // Squashed instructions do not need to complete their access. 1091 if (inst->isSquashed()) { 1092 iewStage->decrWb(inst->seqNum); 1093 assert(!inst->isStore()); 1094 ++lsqIgnoredResponses; 1095 return; 1096 } 1097 1098 if (!inst->isExecuted()) { 1099 inst->setExecuted(); 1100 1101 // Complete access to copy data to proper place. 1102 inst->completeAcc(pkt); 1103 } 1104 1105 // Need to insert instruction into queue to commit 1106 iewStage->instToCommit(inst); 1107 1108 iewStage->activityThisCycle(); 1109 1110 // see if this load changed the PC 1111 iewStage->checkMisprediction(inst); 1112} 1113 1114template <class Impl> 1115void 1116LSQUnit<Impl>::completeStore(int store_idx) 1117{ 1118 assert(storeQueue[store_idx].inst); 1119 storeQueue[store_idx].completed = true; 1120 --storesToWB; 1121 // A bit conservative because a store completion may not free up entries, 1122 // but hopefully avoids two store completions in one cycle from making 1123 // the CPU tick twice. 1124 cpu->wakeCPU(); 1125 cpu->activityThisCycle(); 1126 1127 if (store_idx == storeHead) { 1128 do { 1129 incrStIdx(storeHead); 1130 1131 --stores; 1132 } while (storeQueue[storeHead].completed && 1133 storeHead != storeTail); 1134 1135 iewStage->updateLSQNextCycle = true; 1136 } 1137 1138 DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head " 1139 "idx:%i\n", 1140 storeQueue[store_idx].inst->seqNum, store_idx, storeHead); 1141 1142#if TRACING_ON 1143 if (DTRACE(O3PipeView)) { 1144 storeQueue[store_idx].inst->storeTick = 1145 curTick() - storeQueue[store_idx].inst->fetchTick; 1146 } 1147#endif 1148 1149 if (isStalled() && 1150 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { 1151 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " 1152 "load idx:%i\n", 1153 stallingStoreIsn, stallingLoadIdx); 1154 stalled = false; 1155 stallingStoreIsn = 0; 1156 iewStage->replayMemInst(loadQueue[stallingLoadIdx]); 1157 } 1158 1159 storeQueue[store_idx].inst->setCompleted(); 1160 1161 if (needsTSO) { 1162 storeInFlight = false; 1163 } 1164 1165 // Tell the checker we've completed this instruction. Some stores 1166 // may get reported twice to the checker, but the checker can 1167 // handle that case. 1168 if (cpu->checker) { 1169 cpu->checker->verify(storeQueue[store_idx].inst); 1170 } 1171} 1172 1173template <class Impl> 1174bool 1175LSQUnit<Impl>::sendStore(PacketPtr data_pkt) 1176{ 1177 if (!dcachePort->sendTimingReq(data_pkt)) { 1178 // Need to handle becoming blocked on a store. 1179 isStoreBlocked = true; 1180 ++lsqCacheBlocked; 1181 assert(retryPkt == NULL); 1182 retryPkt = data_pkt; 1183 lsq->setRetryTid(lsqID); 1184 return false; 1185 } 1186 return true; 1187} 1188 1189template <class Impl> 1190void 1191LSQUnit<Impl>::recvRetry() 1192{ 1193 if (isStoreBlocked) { 1194 DPRINTF(LSQUnit, "Receiving retry: store blocked\n"); 1195 assert(retryPkt != NULL); 1196 1197 LSQSenderState *state = 1198 dynamic_cast<LSQSenderState *>(retryPkt->senderState); 1199 1200 if (dcachePort->sendTimingReq(retryPkt)) { 1201 // Don't finish the store unless this is the last packet. 1202 if (!TheISA::HasUnalignedMemAcc || !state->pktToSend || 1203 state->pendingPacket == retryPkt) { 1204 state->pktToSend = false; 1205 storePostSend(retryPkt); 1206 } 1207 retryPkt = NULL; 1208 isStoreBlocked = false; 1209 lsq->setRetryTid(InvalidThreadID); 1210 1211 // Send any outstanding packet. 1212 if (TheISA::HasUnalignedMemAcc && state->pktToSend) { 1213 assert(state->pendingPacket); 1214 if (sendStore(state->pendingPacket)) { 1215 storePostSend(state->pendingPacket); 1216 } 1217 } 1218 } else { 1219 // Still blocked! 1220 ++lsqCacheBlocked; 1221 lsq->setRetryTid(lsqID); 1222 } 1223 } else if (isLoadBlocked) { 1224 DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " 1225 "no need to resend packet.\n"); 1226 } else { 1227 DPRINTF(LSQUnit, "Retry received but LSQ is no longer blocked.\n"); 1228 } 1229} 1230 1231template <class Impl> 1232inline void 1233LSQUnit<Impl>::incrStIdx(int &store_idx) const 1234{ 1235 if (++store_idx >= SQEntries) 1236 store_idx = 0; 1237} 1238 1239template <class Impl> 1240inline void 1241LSQUnit<Impl>::decrStIdx(int &store_idx) const 1242{ 1243 if (--store_idx < 0) 1244 store_idx += SQEntries; 1245} 1246 1247template <class Impl> 1248inline void 1249LSQUnit<Impl>::incrLdIdx(int &load_idx) const 1250{ 1251 if (++load_idx >= LQEntries) 1252 load_idx = 0; 1253} 1254 1255template <class Impl> 1256inline void 1257LSQUnit<Impl>::decrLdIdx(int &load_idx) const 1258{ 1259 if (--load_idx < 0) 1260 load_idx += LQEntries; 1261} 1262 1263template <class Impl> 1264void 1265LSQUnit<Impl>::dumpInsts() const 1266{ 1267 cprintf("Load store queue: Dumping instructions.\n"); 1268 cprintf("Load queue size: %i\n", loads); 1269 cprintf("Load queue: "); 1270 1271 int load_idx = loadHead; 1272 1273 while (load_idx != loadTail && loadQueue[load_idx]) { 1274 const DynInstPtr &inst(loadQueue[load_idx]); 1275 cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum); 1276 1277 incrLdIdx(load_idx); 1278 } 1279 cprintf("\n"); 1280 1281 cprintf("Store queue size: %i\n", stores); 1282 cprintf("Store queue: "); 1283 1284 int store_idx = storeHead; 1285 1286 while (store_idx != storeTail && storeQueue[store_idx].inst) { 1287 const DynInstPtr &inst(storeQueue[store_idx].inst); 1288 cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum); 1289 1290 incrStIdx(store_idx); 1291 } 1292 1293 cprintf("\n"); 1294}
|