lsq_unit_impl.hh revision 9814
1 2/* 3 * Copyright (c) 2010-2012 ARM Limited 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2004-2005 The Regents of The University of Michigan 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions are 20 * met: redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer; 22 * redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution; 25 * neither the name of the copyright holders nor the names of its 26 * contributors may be used to endorse or promote products derived from 27 * this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 * 41 * Authors: Kevin Lim 42 * Korey Sewell 43 */ 44 45#include "arch/generic/debugfaults.hh" 46#include "arch/locked_mem.hh" 47#include "base/str.hh" 48#include "config/the_isa.hh" 49#include "cpu/checker/cpu.hh" 50#include "cpu/o3/lsq.hh" 51#include "cpu/o3/lsq_unit.hh" 52#include "debug/Activity.hh" 53#include "debug/IEW.hh" 54#include "debug/LSQUnit.hh" 55#include "debug/O3PipeView.hh" 56#include "mem/packet.hh" 57#include "mem/request.hh" 58 59template<class Impl> 60LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, 61 LSQUnit *lsq_ptr) 62 : Event(Default_Pri, AutoDelete), 63 inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr) 64{ 65} 66 67template<class Impl> 68void 69LSQUnit<Impl>::WritebackEvent::process() 70{ 71 assert(!lsqPtr->cpu->switchedOut()); 72 73 lsqPtr->writeback(inst, pkt); 74 75 if (pkt->senderState) 76 delete pkt->senderState; 77 78 delete pkt->req; 79 delete pkt; 80} 81 82template<class Impl> 83const char * 84LSQUnit<Impl>::WritebackEvent::description() const 85{ 86 return "Store writeback"; 87} 88 89template<class Impl> 90void 91LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) 92{ 93 LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState); 94 DynInstPtr inst = state->inst; 95 DPRINTF(IEW, "Writeback event [sn:%lli].\n", inst->seqNum); 96 DPRINTF(Activity, "Activity: Writeback event [sn:%lli].\n", inst->seqNum); 97 98 //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); 99 100 // If this is a split access, wait until all packets are received. 101 if (TheISA::HasUnalignedMemAcc && !state->complete()) { 102 delete pkt->req; 103 delete pkt; 104 return; 105 } 106 107 assert(!cpu->switchedOut()); 108 if (inst->isSquashed()) { 109 iewStage->decrWb(inst->seqNum); 110 } else { 111 if (!state->noWB) { 112 if (!TheISA::HasUnalignedMemAcc || !state->isSplit || 113 !state->isLoad) { 114 writeback(inst, pkt); 115 } else { 116 writeback(inst, state->mainPkt); 117 } 118 } 119 120 if (inst->isStore()) { 121 completeStore(state->idx); 122 } 123 } 124 125 if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) { 126 delete state->mainPkt->req; 127 delete state->mainPkt; 128 } 129 delete state; 130 delete pkt->req; 131 delete pkt; 132} 133 134template <class Impl> 135LSQUnit<Impl>::LSQUnit() 136 : loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false), 137 isStoreBlocked(false), isLoadBlocked(false), 138 loadBlockedHandled(false), storeInFlight(false), hasPendingPkt(false) 139{ 140} 141 142template<class Impl> 143void 144LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, 145 LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, 146 unsigned id) 147{ 148 cpu = cpu_ptr; 149 iewStage = iew_ptr; 150 151 DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); 152 153 lsq = lsq_ptr; 154 155 lsqID = id; 156 157 // Add 1 for the sentinel entry (they are circular queues). 158 LQEntries = maxLQEntries + 1; 159 SQEntries = maxSQEntries + 1; 160 161 loadQueue.resize(LQEntries); 162 storeQueue.resize(SQEntries); 163 164 depCheckShift = params->LSQDepCheckShift; 165 checkLoads = params->LSQCheckLoads; 166 cachePorts = params->cachePorts; 167 needsTSO = params->needsTSO; 168 169 resetState(); 170} 171 172 173template<class Impl> 174void 175LSQUnit<Impl>::resetState() 176{ 177 loads = stores = storesToWB = 0; 178 179 loadHead = loadTail = 0; 180 181 storeHead = storeWBIdx = storeTail = 0; 182 183 usedPorts = 0; 184 185 retryPkt = NULL; 186 memDepViolator = NULL; 187 188 blockedLoadSeqNum = 0; 189 190 stalled = false; 191 isLoadBlocked = false; 192 loadBlockedHandled = false; 193 194 cacheBlockMask = ~(cpu->cacheLineSize() - 1); 195} 196 197template<class Impl> 198std::string 199LSQUnit<Impl>::name() const 200{ 201 if (Impl::MaxThreads == 1) { 202 return iewStage->name() + ".lsq"; 203 } else { 204 return iewStage->name() + ".lsq.thread" + to_string(lsqID); 205 } 206} 207 208template<class Impl> 209void 210LSQUnit<Impl>::regStats() 211{ 212 lsqForwLoads 213 .name(name() + ".forwLoads") 214 .desc("Number of loads that had data forwarded from stores"); 215 216 invAddrLoads 217 .name(name() + ".invAddrLoads") 218 .desc("Number of loads ignored due to an invalid address"); 219 220 lsqSquashedLoads 221 .name(name() + ".squashedLoads") 222 .desc("Number of loads squashed"); 223 224 lsqIgnoredResponses 225 .name(name() + ".ignoredResponses") 226 .desc("Number of memory responses ignored because the instruction is squashed"); 227 228 lsqMemOrderViolation 229 .name(name() + ".memOrderViolation") 230 .desc("Number of memory ordering violations"); 231 232 lsqSquashedStores 233 .name(name() + ".squashedStores") 234 .desc("Number of stores squashed"); 235 236 invAddrSwpfs 237 .name(name() + ".invAddrSwpfs") 238 .desc("Number of software prefetches ignored due to an invalid address"); 239 240 lsqBlockedLoads 241 .name(name() + ".blockedLoads") 242 .desc("Number of blocked loads due to partial load-store forwarding"); 243 244 lsqRescheduledLoads 245 .name(name() + ".rescheduledLoads") 246 .desc("Number of loads that were rescheduled"); 247 248 lsqCacheBlocked 249 .name(name() + ".cacheBlocked") 250 .desc("Number of times an access to memory failed due to the cache being blocked"); 251} 252 253template<class Impl> 254void 255LSQUnit<Impl>::setDcachePort(MasterPort *dcache_port) 256{ 257 dcachePort = dcache_port; 258} 259 260template<class Impl> 261void 262LSQUnit<Impl>::clearLQ() 263{ 264 loadQueue.clear(); 265} 266 267template<class Impl> 268void 269LSQUnit<Impl>::clearSQ() 270{ 271 storeQueue.clear(); 272} 273 274template<class Impl> 275void 276LSQUnit<Impl>::drainSanityCheck() const 277{ 278 for (int i = 0; i < loadQueue.size(); ++i) 279 assert(!loadQueue[i]); 280 281 assert(storesToWB == 0); 282 assert(!retryPkt); 283} 284 285template<class Impl> 286void 287LSQUnit<Impl>::takeOverFrom() 288{ 289 resetState(); 290} 291 292template<class Impl> 293void 294LSQUnit<Impl>::resizeLQ(unsigned size) 295{ 296 unsigned size_plus_sentinel = size + 1; 297 assert(size_plus_sentinel >= LQEntries); 298 299 if (size_plus_sentinel > LQEntries) { 300 while (size_plus_sentinel > loadQueue.size()) { 301 DynInstPtr dummy; 302 loadQueue.push_back(dummy); 303 LQEntries++; 304 } 305 } else { 306 LQEntries = size_plus_sentinel; 307 } 308 309} 310 311template<class Impl> 312void 313LSQUnit<Impl>::resizeSQ(unsigned size) 314{ 315 unsigned size_plus_sentinel = size + 1; 316 if (size_plus_sentinel > SQEntries) { 317 while (size_plus_sentinel > storeQueue.size()) { 318 SQEntry dummy; 319 storeQueue.push_back(dummy); 320 SQEntries++; 321 } 322 } else { 323 SQEntries = size_plus_sentinel; 324 } 325} 326 327template <class Impl> 328void 329LSQUnit<Impl>::insert(DynInstPtr &inst) 330{ 331 assert(inst->isMemRef()); 332 333 assert(inst->isLoad() || inst->isStore()); 334 335 if (inst->isLoad()) { 336 insertLoad(inst); 337 } else { 338 insertStore(inst); 339 } 340 341 inst->setInLSQ(); 342} 343 344template <class Impl> 345void 346LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst) 347{ 348 assert((loadTail + 1) % LQEntries != loadHead); 349 assert(loads < LQEntries); 350 351 DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n", 352 load_inst->pcState(), loadTail, load_inst->seqNum); 353 354 load_inst->lqIdx = loadTail; 355 356 if (stores == 0) { 357 load_inst->sqIdx = -1; 358 } else { 359 load_inst->sqIdx = storeTail; 360 } 361 362 loadQueue[loadTail] = load_inst; 363 364 incrLdIdx(loadTail); 365 366 ++loads; 367} 368 369template <class Impl> 370void 371LSQUnit<Impl>::insertStore(DynInstPtr &store_inst) 372{ 373 // Make sure it is not full before inserting an instruction. 374 assert((storeTail + 1) % SQEntries != storeHead); 375 assert(stores < SQEntries); 376 377 DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n", 378 store_inst->pcState(), storeTail, store_inst->seqNum); 379 380 store_inst->sqIdx = storeTail; 381 store_inst->lqIdx = loadTail; 382 383 storeQueue[storeTail] = SQEntry(store_inst); 384 385 incrStIdx(storeTail); 386 387 ++stores; 388} 389 390template <class Impl> 391typename Impl::DynInstPtr 392LSQUnit<Impl>::getMemDepViolator() 393{ 394 DynInstPtr temp = memDepViolator; 395 396 memDepViolator = NULL; 397 398 return temp; 399} 400 401template <class Impl> 402unsigned 403LSQUnit<Impl>::numFreeEntries() 404{ 405 unsigned free_lq_entries = LQEntries - loads; 406 unsigned free_sq_entries = SQEntries - stores; 407 408 // Both the LQ and SQ entries have an extra dummy entry to differentiate 409 // empty/full conditions. Subtract 1 from the free entries. 410 if (free_lq_entries < free_sq_entries) { 411 return free_lq_entries - 1; 412 } else { 413 return free_sq_entries - 1; 414 } 415} 416 417template <class Impl> 418void 419LSQUnit<Impl>::checkSnoop(PacketPtr pkt) 420{ 421 int load_idx = loadHead; 422 423 // Unlock the cpu-local monitor when the CPU sees a snoop to a locked 424 // address. The CPU can speculatively execute a LL operation after a pending 425 // SC operation in the pipeline and that can make the cache monitor the CPU 426 // is connected to valid while it really shouldn't be. 427 for (int x = 0; x < cpu->numActiveThreads(); x++) { 428 ThreadContext *tc = cpu->getContext(x); 429 bool no_squash = cpu->thread[x]->noSquashFromTC; 430 cpu->thread[x]->noSquashFromTC = true; 431 TheISA::handleLockedSnoop(tc, pkt, cacheBlockMask); 432 cpu->thread[x]->noSquashFromTC = no_squash; 433 } 434 435 // If this is the only load in the LSQ we don't care 436 if (load_idx == loadTail) 437 return; 438 incrLdIdx(load_idx); 439 440 DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr()); 441 Addr invalidate_addr = pkt->getAddr() & cacheBlockMask; 442 while (load_idx != loadTail) { 443 DynInstPtr ld_inst = loadQueue[load_idx]; 444 445 if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) { 446 incrLdIdx(load_idx); 447 continue; 448 } 449 450 Addr load_addr = ld_inst->physEffAddr & cacheBlockMask; 451 DPRINTF(LSQUnit, "-- inst [sn:%lli] load_addr: %#x to pktAddr:%#x\n", 452 ld_inst->seqNum, load_addr, invalidate_addr); 453 454 if (load_addr == invalidate_addr) { 455 if (ld_inst->possibleLoadViolation()) { 456 DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n", 457 ld_inst->physEffAddr, pkt->getAddr(), ld_inst->seqNum); 458 459 // Mark the load for re-execution 460 ld_inst->fault = new ReExec; 461 } else { 462 // If a older load checks this and it's true 463 // then we might have missed the snoop 464 // in which case we need to invalidate to be sure 465 ld_inst->hitExternalSnoop(true); 466 } 467 } 468 incrLdIdx(load_idx); 469 } 470 return; 471} 472 473template <class Impl> 474Fault 475LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst) 476{ 477 Addr inst_eff_addr1 = inst->effAddr >> depCheckShift; 478 Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift; 479 480 /** @todo in theory you only need to check an instruction that has executed 481 * however, there isn't a good way in the pipeline at the moment to check 482 * all instructions that will execute before the store writes back. Thus, 483 * like the implementation that came before it, we're overly conservative. 484 */ 485 while (load_idx != loadTail) { 486 DynInstPtr ld_inst = loadQueue[load_idx]; 487 if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) { 488 incrLdIdx(load_idx); 489 continue; 490 } 491 492 Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift; 493 Addr ld_eff_addr2 = 494 (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift; 495 496 if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) { 497 if (inst->isLoad()) { 498 // If this load is to the same block as an external snoop 499 // invalidate that we've observed then the load needs to be 500 // squashed as it could have newer data 501 if (ld_inst->hitExternalSnoop()) { 502 if (!memDepViolator || 503 ld_inst->seqNum < memDepViolator->seqNum) { 504 DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] " 505 "and [sn:%lli] at address %#x\n", 506 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 507 memDepViolator = ld_inst; 508 509 ++lsqMemOrderViolation; 510 511 return new GenericISA::M5PanicFault( 512 "Detected fault with inst [sn:%lli] and " 513 "[sn:%lli] at address %#x\n", 514 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 515 } 516 } 517 518 // Otherwise, mark the load has a possible load violation 519 // and if we see a snoop before it's commited, we need to squash 520 ld_inst->possibleLoadViolation(true); 521 DPRINTF(LSQUnit, "Found possible load violaiton at addr: %#x" 522 " between instructions [sn:%lli] and [sn:%lli]\n", 523 inst_eff_addr1, inst->seqNum, ld_inst->seqNum); 524 } else { 525 // A load/store incorrectly passed this store. 526 // Check if we already have a violator, or if it's newer 527 // squash and refetch. 528 if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum) 529 break; 530 531 DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and " 532 "[sn:%lli] at address %#x\n", 533 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 534 memDepViolator = ld_inst; 535 536 ++lsqMemOrderViolation; 537 538 return new GenericISA::M5PanicFault("Detected fault with " 539 "inst [sn:%lli] and [sn:%lli] at address %#x\n", 540 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 541 } 542 } 543 544 incrLdIdx(load_idx); 545 } 546 return NoFault; 547} 548 549 550 551 552template <class Impl> 553Fault 554LSQUnit<Impl>::executeLoad(DynInstPtr &inst) 555{ 556 using namespace TheISA; 557 // Execute a specific load. 558 Fault load_fault = NoFault; 559 560 DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n", 561 inst->pcState(), inst->seqNum); 562 563 assert(!inst->isSquashed()); 564 565 load_fault = inst->initiateAcc(); 566 567 if (inst->isTranslationDelayed() && 568 load_fault == NoFault) 569 return load_fault; 570 571 // If the instruction faulted or predicated false, then we need to send it 572 // along to commit without the instruction completing. 573 if (load_fault != NoFault || inst->readPredicate() == false) { 574 // Send this instruction to commit, also make sure iew stage 575 // realizes there is activity. 576 // Mark it as executed unless it is an uncached load that 577 // needs to hit the head of commit. 578 if (inst->readPredicate() == false) 579 inst->forwardOldRegs(); 580 DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n", 581 inst->seqNum, 582 (load_fault != NoFault ? "fault" : "predication")); 583 if (!(inst->hasRequest() && inst->uncacheable()) || 584 inst->isAtCommit()) { 585 inst->setExecuted(); 586 } 587 iewStage->instToCommit(inst); 588 iewStage->activityThisCycle(); 589 } else if (!loadBlocked()) { 590 assert(inst->effAddrValid()); 591 int load_idx = inst->lqIdx; 592 incrLdIdx(load_idx); 593 594 if (checkLoads) 595 return checkViolations(load_idx, inst); 596 } 597 598 return load_fault; 599} 600 601template <class Impl> 602Fault 603LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) 604{ 605 using namespace TheISA; 606 // Make sure that a store exists. 607 assert(stores != 0); 608 609 int store_idx = store_inst->sqIdx; 610 611 DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n", 612 store_inst->pcState(), store_inst->seqNum); 613 614 assert(!store_inst->isSquashed()); 615 616 // Check the recently completed loads to see if any match this store's 617 // address. If so, then we have a memory ordering violation. 618 int load_idx = store_inst->lqIdx; 619 620 Fault store_fault = store_inst->initiateAcc(); 621 622 if (store_inst->isTranslationDelayed() && 623 store_fault == NoFault) 624 return store_fault; 625 626 if (store_inst->readPredicate() == false) 627 store_inst->forwardOldRegs(); 628 629 if (storeQueue[store_idx].size == 0) { 630 DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n", 631 store_inst->pcState(), store_inst->seqNum); 632 633 return store_fault; 634 } else if (store_inst->readPredicate() == false) { 635 DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n", 636 store_inst->seqNum); 637 return store_fault; 638 } 639 640 assert(store_fault == NoFault); 641 642 if (store_inst->isStoreConditional()) { 643 // Store conditionals need to set themselves as able to 644 // writeback if we haven't had a fault by here. 645 storeQueue[store_idx].canWB = true; 646 647 ++storesToWB; 648 } 649 650 return checkViolations(load_idx, store_inst); 651 652} 653 654template <class Impl> 655void 656LSQUnit<Impl>::commitLoad() 657{ 658 assert(loadQueue[loadHead]); 659 660 DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n", 661 loadQueue[loadHead]->pcState()); 662 663 loadQueue[loadHead] = NULL; 664 665 incrLdIdx(loadHead); 666 667 --loads; 668} 669 670template <class Impl> 671void 672LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst) 673{ 674 assert(loads == 0 || loadQueue[loadHead]); 675 676 while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { 677 commitLoad(); 678 } 679} 680 681template <class Impl> 682void 683LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst) 684{ 685 assert(stores == 0 || storeQueue[storeHead].inst); 686 687 int store_idx = storeHead; 688 689 while (store_idx != storeTail) { 690 assert(storeQueue[store_idx].inst); 691 // Mark any stores that are now committed and have not yet 692 // been marked as able to write back. 693 if (!storeQueue[store_idx].canWB) { 694 if (storeQueue[store_idx].inst->seqNum > youngest_inst) { 695 break; 696 } 697 DPRINTF(LSQUnit, "Marking store as able to write back, PC " 698 "%s [sn:%lli]\n", 699 storeQueue[store_idx].inst->pcState(), 700 storeQueue[store_idx].inst->seqNum); 701 702 storeQueue[store_idx].canWB = true; 703 704 ++storesToWB; 705 } 706 707 incrStIdx(store_idx); 708 } 709} 710 711template <class Impl> 712void 713LSQUnit<Impl>::writebackPendingStore() 714{ 715 if (hasPendingPkt) { 716 assert(pendingPkt != NULL); 717 718 // If the cache is blocked, this will store the packet for retry. 719 if (sendStore(pendingPkt)) { 720 storePostSend(pendingPkt); 721 } 722 pendingPkt = NULL; 723 hasPendingPkt = false; 724 } 725} 726 727template <class Impl> 728void 729LSQUnit<Impl>::writebackStores() 730{ 731 // First writeback the second packet from any split store that didn't 732 // complete last cycle because there weren't enough cache ports available. 733 if (TheISA::HasUnalignedMemAcc) { 734 writebackPendingStore(); 735 } 736 737 while (storesToWB > 0 && 738 storeWBIdx != storeTail && 739 storeQueue[storeWBIdx].inst && 740 storeQueue[storeWBIdx].canWB && 741 ((!needsTSO) || (!storeInFlight)) && 742 usedPorts < cachePorts) { 743 744 if (isStoreBlocked || lsq->cacheBlocked()) { 745 DPRINTF(LSQUnit, "Unable to write back any more stores, cache" 746 " is blocked!\n"); 747 break; 748 } 749 750 // Store didn't write any data so no need to write it back to 751 // memory. 752 if (storeQueue[storeWBIdx].size == 0) { 753 completeStore(storeWBIdx); 754 755 incrStIdx(storeWBIdx); 756 757 continue; 758 } 759 760 ++usedPorts; 761 762 if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { 763 incrStIdx(storeWBIdx); 764 765 continue; 766 } 767 768 assert(storeQueue[storeWBIdx].req); 769 assert(!storeQueue[storeWBIdx].committed); 770 771 if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) { 772 assert(storeQueue[storeWBIdx].sreqLow); 773 assert(storeQueue[storeWBIdx].sreqHigh); 774 } 775 776 DynInstPtr inst = storeQueue[storeWBIdx].inst; 777 778 Request *req = storeQueue[storeWBIdx].req; 779 RequestPtr sreqLow = storeQueue[storeWBIdx].sreqLow; 780 RequestPtr sreqHigh = storeQueue[storeWBIdx].sreqHigh; 781 782 storeQueue[storeWBIdx].committed = true; 783 784 assert(!inst->memData); 785 inst->memData = new uint8_t[64]; 786 787 memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize()); 788 789 MemCmd command = 790 req->isSwap() ? MemCmd::SwapReq : 791 (req->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq); 792 PacketPtr data_pkt; 793 PacketPtr snd_data_pkt = NULL; 794 795 LSQSenderState *state = new LSQSenderState; 796 state->isLoad = false; 797 state->idx = storeWBIdx; 798 state->inst = inst; 799 800 if (!TheISA::HasUnalignedMemAcc || !storeQueue[storeWBIdx].isSplit) { 801 802 // Build a single data packet if the store isn't split. 803 data_pkt = new Packet(req, command); 804 data_pkt->dataStatic(inst->memData); 805 data_pkt->senderState = state; 806 } else { 807 // Create two packets if the store is split in two. 808 data_pkt = new Packet(sreqLow, command); 809 snd_data_pkt = new Packet(sreqHigh, command); 810 811 data_pkt->dataStatic(inst->memData); 812 snd_data_pkt->dataStatic(inst->memData + sreqLow->getSize()); 813 814 data_pkt->senderState = state; 815 snd_data_pkt->senderState = state; 816 817 state->isSplit = true; 818 state->outstanding = 2; 819 820 // Can delete the main request now. 821 delete req; 822 req = sreqLow; 823 } 824 825 DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s " 826 "to Addr:%#x, data:%#x [sn:%lli]\n", 827 storeWBIdx, inst->pcState(), 828 req->getPaddr(), (int)*(inst->memData), 829 inst->seqNum); 830 831 // @todo: Remove this SC hack once the memory system handles it. 832 if (inst->isStoreConditional()) { 833 assert(!storeQueue[storeWBIdx].isSplit); 834 // Disable recording the result temporarily. Writing to 835 // misc regs normally updates the result, but this is not 836 // the desired behavior when handling store conditionals. 837 inst->recordResult(false); 838 bool success = TheISA::handleLockedWrite(inst.get(), req); 839 inst->recordResult(true); 840 841 if (!success) { 842 // Instantly complete this store. 843 DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. " 844 "Instantly completing it.\n", 845 inst->seqNum); 846 WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); 847 cpu->schedule(wb, curTick() + 1); 848 if (cpu->checker) { 849 // Make sure to set the LLSC data for verification 850 // if checker is loaded 851 inst->reqToVerify->setExtraData(0); 852 inst->completeAcc(data_pkt); 853 } 854 completeStore(storeWBIdx); 855 incrStIdx(storeWBIdx); 856 continue; 857 } 858 } else { 859 // Non-store conditionals do not need a writeback. 860 state->noWB = true; 861 } 862 863 bool split = 864 TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit; 865 866 ThreadContext *thread = cpu->tcBase(lsqID); 867 868 if (req->isMmappedIpr()) { 869 assert(!inst->isStoreConditional()); 870 TheISA::handleIprWrite(thread, data_pkt); 871 delete data_pkt; 872 if (split) { 873 assert(snd_data_pkt->req->isMmappedIpr()); 874 TheISA::handleIprWrite(thread, snd_data_pkt); 875 delete snd_data_pkt; 876 delete sreqLow; 877 delete sreqHigh; 878 } 879 delete state; 880 delete req; 881 completeStore(storeWBIdx); 882 incrStIdx(storeWBIdx); 883 } else if (!sendStore(data_pkt)) { 884 DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will" 885 "retry later\n", 886 inst->seqNum); 887 888 // Need to store the second packet, if split. 889 if (split) { 890 state->pktToSend = true; 891 state->pendingPacket = snd_data_pkt; 892 } 893 } else { 894 895 // If split, try to send the second packet too 896 if (split) { 897 assert(snd_data_pkt); 898 899 // Ensure there are enough ports to use. 900 if (usedPorts < cachePorts) { 901 ++usedPorts; 902 if (sendStore(snd_data_pkt)) { 903 storePostSend(snd_data_pkt); 904 } else { 905 DPRINTF(IEW, "D-Cache became blocked when writing" 906 " [sn:%lli] second packet, will retry later\n", 907 inst->seqNum); 908 } 909 } else { 910 911 // Store the packet for when there's free ports. 912 assert(pendingPkt == NULL); 913 pendingPkt = snd_data_pkt; 914 hasPendingPkt = true; 915 } 916 } else { 917 918 // Not a split store. 919 storePostSend(data_pkt); 920 } 921 } 922 } 923 924 // Not sure this should set it to 0. 925 usedPorts = 0; 926 927 assert(stores >= 0 && storesToWB >= 0); 928} 929 930/*template <class Impl> 931void 932LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum) 933{ 934 list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(), 935 mshrSeqNums.end(), 936 seqNum); 937 938 if (mshr_it != mshrSeqNums.end()) { 939 mshrSeqNums.erase(mshr_it); 940 DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size()); 941 } 942}*/ 943 944template <class Impl> 945void 946LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) 947{ 948 DPRINTF(LSQUnit, "Squashing until [sn:%lli]!" 949 "(Loads:%i Stores:%i)\n", squashed_num, loads, stores); 950 951 int load_idx = loadTail; 952 decrLdIdx(load_idx); 953 954 while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { 955 DPRINTF(LSQUnit,"Load Instruction PC %s squashed, " 956 "[sn:%lli]\n", 957 loadQueue[load_idx]->pcState(), 958 loadQueue[load_idx]->seqNum); 959 960 if (isStalled() && load_idx == stallingLoadIdx) { 961 stalled = false; 962 stallingStoreIsn = 0; 963 stallingLoadIdx = 0; 964 } 965 966 // Clear the smart pointer to make sure it is decremented. 967 loadQueue[load_idx]->setSquashed(); 968 loadQueue[load_idx] = NULL; 969 --loads; 970 971 // Inefficient! 972 loadTail = load_idx; 973 974 decrLdIdx(load_idx); 975 ++lsqSquashedLoads; 976 } 977 978 if (isLoadBlocked) { 979 if (squashed_num < blockedLoadSeqNum) { 980 isLoadBlocked = false; 981 loadBlockedHandled = false; 982 blockedLoadSeqNum = 0; 983 } 984 } 985 986 if (memDepViolator && squashed_num < memDepViolator->seqNum) { 987 memDepViolator = NULL; 988 } 989 990 int store_idx = storeTail; 991 decrStIdx(store_idx); 992 993 while (stores != 0 && 994 storeQueue[store_idx].inst->seqNum > squashed_num) { 995 // Instructions marked as can WB are already committed. 996 if (storeQueue[store_idx].canWB) { 997 break; 998 } 999 1000 DPRINTF(LSQUnit,"Store Instruction PC %s squashed, " 1001 "idx:%i [sn:%lli]\n", 1002 storeQueue[store_idx].inst->pcState(), 1003 store_idx, storeQueue[store_idx].inst->seqNum); 1004 1005 // I don't think this can happen. It should have been cleared 1006 // by the stalling load. 1007 if (isStalled() && 1008 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { 1009 panic("Is stalled should have been cleared by stalling load!\n"); 1010 stalled = false; 1011 stallingStoreIsn = 0; 1012 } 1013 1014 // Clear the smart pointer to make sure it is decremented. 1015 storeQueue[store_idx].inst->setSquashed(); 1016 storeQueue[store_idx].inst = NULL; 1017 storeQueue[store_idx].canWB = 0; 1018 1019 // Must delete request now that it wasn't handed off to 1020 // memory. This is quite ugly. @todo: Figure out the proper 1021 // place to really handle request deletes. 1022 delete storeQueue[store_idx].req; 1023 if (TheISA::HasUnalignedMemAcc && storeQueue[store_idx].isSplit) { 1024 delete storeQueue[store_idx].sreqLow; 1025 delete storeQueue[store_idx].sreqHigh; 1026 1027 storeQueue[store_idx].sreqLow = NULL; 1028 storeQueue[store_idx].sreqHigh = NULL; 1029 } 1030 1031 storeQueue[store_idx].req = NULL; 1032 --stores; 1033 1034 // Inefficient! 1035 storeTail = store_idx; 1036 1037 decrStIdx(store_idx); 1038 ++lsqSquashedStores; 1039 } 1040} 1041 1042template <class Impl> 1043void 1044LSQUnit<Impl>::storePostSend(PacketPtr pkt) 1045{ 1046 if (isStalled() && 1047 storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { 1048 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " 1049 "load idx:%i\n", 1050 stallingStoreIsn, stallingLoadIdx); 1051 stalled = false; 1052 stallingStoreIsn = 0; 1053 iewStage->replayMemInst(loadQueue[stallingLoadIdx]); 1054 } 1055 1056 if (!storeQueue[storeWBIdx].inst->isStoreConditional()) { 1057 // The store is basically completed at this time. This 1058 // only works so long as the checker doesn't try to 1059 // verify the value in memory for stores. 1060 storeQueue[storeWBIdx].inst->setCompleted(); 1061 1062 if (cpu->checker) { 1063 cpu->checker->verify(storeQueue[storeWBIdx].inst); 1064 } 1065 } 1066 1067 if (needsTSO) { 1068 storeInFlight = true; 1069 } 1070 1071 incrStIdx(storeWBIdx); 1072} 1073 1074template <class Impl> 1075void 1076LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) 1077{ 1078 iewStage->wakeCPU(); 1079 1080 // Squashed instructions do not need to complete their access. 1081 if (inst->isSquashed()) { 1082 iewStage->decrWb(inst->seqNum); 1083 assert(!inst->isStore()); 1084 ++lsqIgnoredResponses; 1085 return; 1086 } 1087 1088 if (!inst->isExecuted()) { 1089 inst->setExecuted(); 1090 1091 // Complete access to copy data to proper place. 1092 inst->completeAcc(pkt); 1093 } 1094 1095 // Need to insert instruction into queue to commit 1096 iewStage->instToCommit(inst); 1097 1098 iewStage->activityThisCycle(); 1099 1100 // see if this load changed the PC 1101 iewStage->checkMisprediction(inst); 1102} 1103 1104template <class Impl> 1105void 1106LSQUnit<Impl>::completeStore(int store_idx) 1107{ 1108 assert(storeQueue[store_idx].inst); 1109 storeQueue[store_idx].completed = true; 1110 --storesToWB; 1111 // A bit conservative because a store completion may not free up entries, 1112 // but hopefully avoids two store completions in one cycle from making 1113 // the CPU tick twice. 1114 cpu->wakeCPU(); 1115 cpu->activityThisCycle(); 1116 1117 if (store_idx == storeHead) { 1118 do { 1119 incrStIdx(storeHead); 1120 1121 --stores; 1122 } while (storeQueue[storeHead].completed && 1123 storeHead != storeTail); 1124 1125 iewStage->updateLSQNextCycle = true; 1126 } 1127 1128 DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head " 1129 "idx:%i\n", 1130 storeQueue[store_idx].inst->seqNum, store_idx, storeHead); 1131 1132#if TRACING_ON 1133 if (DTRACE(O3PipeView)) { 1134 storeQueue[store_idx].inst->storeTick = 1135 curTick() - storeQueue[store_idx].inst->fetchTick; 1136 } 1137#endif 1138 1139 if (isStalled() && 1140 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { 1141 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " 1142 "load idx:%i\n", 1143 stallingStoreIsn, stallingLoadIdx); 1144 stalled = false; 1145 stallingStoreIsn = 0; 1146 iewStage->replayMemInst(loadQueue[stallingLoadIdx]); 1147 } 1148 1149 storeQueue[store_idx].inst->setCompleted(); 1150 1151 if (needsTSO) { 1152 storeInFlight = false; 1153 } 1154 1155 // Tell the checker we've completed this instruction. Some stores 1156 // may get reported twice to the checker, but the checker can 1157 // handle that case. 1158 if (cpu->checker) { 1159 cpu->checker->verify(storeQueue[store_idx].inst); 1160 } 1161} 1162 1163template <class Impl> 1164bool 1165LSQUnit<Impl>::sendStore(PacketPtr data_pkt) 1166{ 1167 if (!dcachePort->sendTimingReq(data_pkt)) { 1168 // Need to handle becoming blocked on a store. 1169 isStoreBlocked = true; 1170 ++lsqCacheBlocked; 1171 assert(retryPkt == NULL); 1172 retryPkt = data_pkt; 1173 lsq->setRetryTid(lsqID); 1174 return false; 1175 } 1176 return true; 1177} 1178 1179template <class Impl> 1180void 1181LSQUnit<Impl>::recvRetry() 1182{ 1183 if (isStoreBlocked) { 1184 DPRINTF(LSQUnit, "Receiving retry: store blocked\n"); 1185 assert(retryPkt != NULL); 1186 1187 LSQSenderState *state = 1188 dynamic_cast<LSQSenderState *>(retryPkt->senderState); 1189 1190 if (dcachePort->sendTimingReq(retryPkt)) { 1191 // Don't finish the store unless this is the last packet. 1192 if (!TheISA::HasUnalignedMemAcc || !state->pktToSend || 1193 state->pendingPacket == retryPkt) { 1194 state->pktToSend = false; 1195 storePostSend(retryPkt); 1196 } 1197 retryPkt = NULL; 1198 isStoreBlocked = false; 1199 lsq->setRetryTid(InvalidThreadID); 1200 1201 // Send any outstanding packet. 1202 if (TheISA::HasUnalignedMemAcc && state->pktToSend) { 1203 assert(state->pendingPacket); 1204 if (sendStore(state->pendingPacket)) { 1205 storePostSend(state->pendingPacket); 1206 } 1207 } 1208 } else { 1209 // Still blocked! 1210 ++lsqCacheBlocked; 1211 lsq->setRetryTid(lsqID); 1212 } 1213 } else if (isLoadBlocked) { 1214 DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " 1215 "no need to resend packet.\n"); 1216 } else { 1217 DPRINTF(LSQUnit, "Retry received but LSQ is no longer blocked.\n"); 1218 } 1219} 1220 1221template <class Impl> 1222inline void 1223LSQUnit<Impl>::incrStIdx(int &store_idx) const 1224{ 1225 if (++store_idx >= SQEntries) 1226 store_idx = 0; 1227} 1228 1229template <class Impl> 1230inline void 1231LSQUnit<Impl>::decrStIdx(int &store_idx) const 1232{ 1233 if (--store_idx < 0) 1234 store_idx += SQEntries; 1235} 1236 1237template <class Impl> 1238inline void 1239LSQUnit<Impl>::incrLdIdx(int &load_idx) const 1240{ 1241 if (++load_idx >= LQEntries) 1242 load_idx = 0; 1243} 1244 1245template <class Impl> 1246inline void 1247LSQUnit<Impl>::decrLdIdx(int &load_idx) const 1248{ 1249 if (--load_idx < 0) 1250 load_idx += LQEntries; 1251} 1252 1253template <class Impl> 1254void 1255LSQUnit<Impl>::dumpInsts() const 1256{ 1257 cprintf("Load store queue: Dumping instructions.\n"); 1258 cprintf("Load queue size: %i\n", loads); 1259 cprintf("Load queue: "); 1260 1261 int load_idx = loadHead; 1262 1263 while (load_idx != loadTail && loadQueue[load_idx]) { 1264 const DynInstPtr &inst(loadQueue[load_idx]); 1265 cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum); 1266 1267 incrLdIdx(load_idx); 1268 } 1269 cprintf("\n"); 1270 1271 cprintf("Store queue size: %i\n", stores); 1272 cprintf("Store queue: "); 1273 1274 int store_idx = storeHead; 1275 1276 while (store_idx != storeTail && storeQueue[store_idx].inst) { 1277 const DynInstPtr &inst(storeQueue[store_idx].inst); 1278 cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum); 1279 1280 incrStIdx(store_idx); 1281 } 1282 1283 cprintf("\n"); 1284} 1285