lsq_unit_impl.hh revision 10020:2f33cb012383
1 2/* 3 * Copyright (c) 2010-2012 ARM Limited 4 * All rights reserved 5 * 6 * The license below extends only to copyright in the software and shall 7 * not be construed as granting a license to any other intellectual 8 * property including but not limited to intellectual property relating 9 * to a hardware implementation of the functionality of the software 10 * licensed hereunder. You may use the software subject to the license 11 * terms below provided that you ensure that this notice is replicated 12 * unmodified and in its entirety in all distributions of the software, 13 * modified or unmodified, in source code or in binary form. 14 * 15 * Copyright (c) 2004-2005 The Regents of The University of Michigan 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions are 20 * met: redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer; 22 * redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution; 25 * neither the name of the copyright holders nor the names of its 26 * contributors may be used to endorse or promote products derived from 27 * this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 * 41 * Authors: Kevin Lim 42 * Korey Sewell 43 */ 44 45#ifndef __CPU_O3_LSQ_UNIT_IMPL_HH__ 46#define __CPU_O3_LSQ_UNIT_IMPL_HH__ 47 48#include "arch/generic/debugfaults.hh" 49#include "arch/locked_mem.hh" 50#include "base/str.hh" 51#include "config/the_isa.hh" 52#include "cpu/checker/cpu.hh" 53#include "cpu/o3/lsq.hh" 54#include "cpu/o3/lsq_unit.hh" 55#include "debug/Activity.hh" 56#include "debug/IEW.hh" 57#include "debug/LSQUnit.hh" 58#include "debug/O3PipeView.hh" 59#include "mem/packet.hh" 60#include "mem/request.hh" 61 62template<class Impl> 63LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, 64 LSQUnit *lsq_ptr) 65 : Event(Default_Pri, AutoDelete), 66 inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr) 67{ 68} 69 70template<class Impl> 71void 72LSQUnit<Impl>::WritebackEvent::process() 73{ 74 assert(!lsqPtr->cpu->switchedOut()); 75 76 lsqPtr->writeback(inst, pkt); 77 78 if (pkt->senderState) 79 delete pkt->senderState; 80 81 delete pkt->req; 82 delete pkt; 83} 84 85template<class Impl> 86const char * 87LSQUnit<Impl>::WritebackEvent::description() const 88{ 89 return "Store writeback"; 90} 91 92template<class Impl> 93void 94LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) 95{ 96 LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState); 97 DynInstPtr inst = state->inst; 98 DPRINTF(IEW, "Writeback event [sn:%lli].\n", inst->seqNum); 99 DPRINTF(Activity, "Activity: Writeback event [sn:%lli].\n", inst->seqNum); 100 101 //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); 102 103 // If this is a split access, wait until all packets are received. 104 if (TheISA::HasUnalignedMemAcc && !state->complete()) { 105 delete pkt->req; 106 delete pkt; 107 return; 108 } 109 110 assert(!cpu->switchedOut()); 111 if (inst->isSquashed()) { 112 iewStage->decrWb(inst->seqNum); 113 } else { 114 if (!state->noWB) { 115 if (!TheISA::HasUnalignedMemAcc || !state->isSplit || 116 !state->isLoad) { 117 writeback(inst, pkt); 118 } else { 119 writeback(inst, state->mainPkt); 120 } 121 } 122 123 if (inst->isStore()) { 124 completeStore(state->idx); 125 } 126 } 127 128 if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) { 129 delete state->mainPkt->req; 130 delete state->mainPkt; 131 } 132 133 pkt->req->setAccessLatency(); 134 delete state; 135 delete pkt->req; 136 delete pkt; 137} 138 139template <class Impl> 140LSQUnit<Impl>::LSQUnit() 141 : loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false), 142 isStoreBlocked(false), isLoadBlocked(false), 143 loadBlockedHandled(false), storeInFlight(false), hasPendingPkt(false) 144{ 145} 146 147template<class Impl> 148void 149LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, 150 LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, 151 unsigned id) 152{ 153 cpu = cpu_ptr; 154 iewStage = iew_ptr; 155 156 DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); 157 158 lsq = lsq_ptr; 159 160 lsqID = id; 161 162 // Add 1 for the sentinel entry (they are circular queues). 163 LQEntries = maxLQEntries + 1; 164 SQEntries = maxSQEntries + 1; 165 166 //Due to uint8_t index in LSQSenderState 167 assert(LQEntries <= 256); 168 assert(SQEntries <= 256); 169 170 loadQueue.resize(LQEntries); 171 storeQueue.resize(SQEntries); 172 173 depCheckShift = params->LSQDepCheckShift; 174 checkLoads = params->LSQCheckLoads; 175 cachePorts = params->cachePorts; 176 needsTSO = params->needsTSO; 177 178 resetState(); 179} 180 181 182template<class Impl> 183void 184LSQUnit<Impl>::resetState() 185{ 186 loads = stores = storesToWB = 0; 187 188 loadHead = loadTail = 0; 189 190 storeHead = storeWBIdx = storeTail = 0; 191 192 usedPorts = 0; 193 194 retryPkt = NULL; 195 memDepViolator = NULL; 196 197 blockedLoadSeqNum = 0; 198 199 stalled = false; 200 isLoadBlocked = false; 201 loadBlockedHandled = false; 202 203 cacheBlockMask = ~(cpu->cacheLineSize() - 1); 204} 205 206template<class Impl> 207std::string 208LSQUnit<Impl>::name() const 209{ 210 if (Impl::MaxThreads == 1) { 211 return iewStage->name() + ".lsq"; 212 } else { 213 return iewStage->name() + ".lsq.thread" + to_string(lsqID); 214 } 215} 216 217template<class Impl> 218void 219LSQUnit<Impl>::regStats() 220{ 221 lsqForwLoads 222 .name(name() + ".forwLoads") 223 .desc("Number of loads that had data forwarded from stores"); 224 225 invAddrLoads 226 .name(name() + ".invAddrLoads") 227 .desc("Number of loads ignored due to an invalid address"); 228 229 lsqSquashedLoads 230 .name(name() + ".squashedLoads") 231 .desc("Number of loads squashed"); 232 233 lsqIgnoredResponses 234 .name(name() + ".ignoredResponses") 235 .desc("Number of memory responses ignored because the instruction is squashed"); 236 237 lsqMemOrderViolation 238 .name(name() + ".memOrderViolation") 239 .desc("Number of memory ordering violations"); 240 241 lsqSquashedStores 242 .name(name() + ".squashedStores") 243 .desc("Number of stores squashed"); 244 245 invAddrSwpfs 246 .name(name() + ".invAddrSwpfs") 247 .desc("Number of software prefetches ignored due to an invalid address"); 248 249 lsqBlockedLoads 250 .name(name() + ".blockedLoads") 251 .desc("Number of blocked loads due to partial load-store forwarding"); 252 253 lsqRescheduledLoads 254 .name(name() + ".rescheduledLoads") 255 .desc("Number of loads that were rescheduled"); 256 257 lsqCacheBlocked 258 .name(name() + ".cacheBlocked") 259 .desc("Number of times an access to memory failed due to the cache being blocked"); 260} 261 262template<class Impl> 263void 264LSQUnit<Impl>::setDcachePort(MasterPort *dcache_port) 265{ 266 dcachePort = dcache_port; 267} 268 269template<class Impl> 270void 271LSQUnit<Impl>::clearLQ() 272{ 273 loadQueue.clear(); 274} 275 276template<class Impl> 277void 278LSQUnit<Impl>::clearSQ() 279{ 280 storeQueue.clear(); 281} 282 283template<class Impl> 284void 285LSQUnit<Impl>::drainSanityCheck() const 286{ 287 for (int i = 0; i < loadQueue.size(); ++i) 288 assert(!loadQueue[i]); 289 290 assert(storesToWB == 0); 291 assert(!retryPkt); 292} 293 294template<class Impl> 295void 296LSQUnit<Impl>::takeOverFrom() 297{ 298 resetState(); 299} 300 301template<class Impl> 302void 303LSQUnit<Impl>::resizeLQ(unsigned size) 304{ 305 unsigned size_plus_sentinel = size + 1; 306 assert(size_plus_sentinel >= LQEntries); 307 308 if (size_plus_sentinel > LQEntries) { 309 while (size_plus_sentinel > loadQueue.size()) { 310 DynInstPtr dummy; 311 loadQueue.push_back(dummy); 312 LQEntries++; 313 } 314 } else { 315 LQEntries = size_plus_sentinel; 316 } 317 318 assert(LQEntries <= 256); 319} 320 321template<class Impl> 322void 323LSQUnit<Impl>::resizeSQ(unsigned size) 324{ 325 unsigned size_plus_sentinel = size + 1; 326 if (size_plus_sentinel > SQEntries) { 327 while (size_plus_sentinel > storeQueue.size()) { 328 SQEntry dummy; 329 storeQueue.push_back(dummy); 330 SQEntries++; 331 } 332 } else { 333 SQEntries = size_plus_sentinel; 334 } 335 336 assert(SQEntries <= 256); 337} 338 339template <class Impl> 340void 341LSQUnit<Impl>::insert(DynInstPtr &inst) 342{ 343 assert(inst->isMemRef()); 344 345 assert(inst->isLoad() || inst->isStore()); 346 347 if (inst->isLoad()) { 348 insertLoad(inst); 349 } else { 350 insertStore(inst); 351 } 352 353 inst->setInLSQ(); 354} 355 356template <class Impl> 357void 358LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst) 359{ 360 assert((loadTail + 1) % LQEntries != loadHead); 361 assert(loads < LQEntries); 362 363 DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n", 364 load_inst->pcState(), loadTail, load_inst->seqNum); 365 366 load_inst->lqIdx = loadTail; 367 368 if (stores == 0) { 369 load_inst->sqIdx = -1; 370 } else { 371 load_inst->sqIdx = storeTail; 372 } 373 374 loadQueue[loadTail] = load_inst; 375 376 incrLdIdx(loadTail); 377 378 ++loads; 379} 380 381template <class Impl> 382void 383LSQUnit<Impl>::insertStore(DynInstPtr &store_inst) 384{ 385 // Make sure it is not full before inserting an instruction. 386 assert((storeTail + 1) % SQEntries != storeHead); 387 assert(stores < SQEntries); 388 389 DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n", 390 store_inst->pcState(), storeTail, store_inst->seqNum); 391 392 store_inst->sqIdx = storeTail; 393 store_inst->lqIdx = loadTail; 394 395 storeQueue[storeTail] = SQEntry(store_inst); 396 397 incrStIdx(storeTail); 398 399 ++stores; 400} 401 402template <class Impl> 403typename Impl::DynInstPtr 404LSQUnit<Impl>::getMemDepViolator() 405{ 406 DynInstPtr temp = memDepViolator; 407 408 memDepViolator = NULL; 409 410 return temp; 411} 412 413template <class Impl> 414unsigned 415LSQUnit<Impl>::numFreeEntries() 416{ 417 unsigned free_lq_entries = LQEntries - loads; 418 unsigned free_sq_entries = SQEntries - stores; 419 420 // Both the LQ and SQ entries have an extra dummy entry to differentiate 421 // empty/full conditions. Subtract 1 from the free entries. 422 if (free_lq_entries < free_sq_entries) { 423 return free_lq_entries - 1; 424 } else { 425 return free_sq_entries - 1; 426 } 427} 428 429template <class Impl> 430void 431LSQUnit<Impl>::checkSnoop(PacketPtr pkt) 432{ 433 int load_idx = loadHead; 434 435 // Unlock the cpu-local monitor when the CPU sees a snoop to a locked 436 // address. The CPU can speculatively execute a LL operation after a pending 437 // SC operation in the pipeline and that can make the cache monitor the CPU 438 // is connected to valid while it really shouldn't be. 439 for (int x = 0; x < cpu->numActiveThreads(); x++) { 440 ThreadContext *tc = cpu->getContext(x); 441 bool no_squash = cpu->thread[x]->noSquashFromTC; 442 cpu->thread[x]->noSquashFromTC = true; 443 TheISA::handleLockedSnoop(tc, pkt, cacheBlockMask); 444 cpu->thread[x]->noSquashFromTC = no_squash; 445 } 446 447 // If this is the only load in the LSQ we don't care 448 if (load_idx == loadTail) 449 return; 450 incrLdIdx(load_idx); 451 452 DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr()); 453 Addr invalidate_addr = pkt->getAddr() & cacheBlockMask; 454 while (load_idx != loadTail) { 455 DynInstPtr ld_inst = loadQueue[load_idx]; 456 457 if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) { 458 incrLdIdx(load_idx); 459 continue; 460 } 461 462 Addr load_addr = ld_inst->physEffAddr & cacheBlockMask; 463 DPRINTF(LSQUnit, "-- inst [sn:%lli] load_addr: %#x to pktAddr:%#x\n", 464 ld_inst->seqNum, load_addr, invalidate_addr); 465 466 if (load_addr == invalidate_addr) { 467 if (ld_inst->possibleLoadViolation()) { 468 DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n", 469 ld_inst->physEffAddr, pkt->getAddr(), ld_inst->seqNum); 470 471 // Mark the load for re-execution 472 ld_inst->fault = new ReExec; 473 } else { 474 // If a older load checks this and it's true 475 // then we might have missed the snoop 476 // in which case we need to invalidate to be sure 477 ld_inst->hitExternalSnoop(true); 478 } 479 } 480 incrLdIdx(load_idx); 481 } 482 return; 483} 484 485template <class Impl> 486Fault 487LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst) 488{ 489 Addr inst_eff_addr1 = inst->effAddr >> depCheckShift; 490 Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift; 491 492 /** @todo in theory you only need to check an instruction that has executed 493 * however, there isn't a good way in the pipeline at the moment to check 494 * all instructions that will execute before the store writes back. Thus, 495 * like the implementation that came before it, we're overly conservative. 496 */ 497 while (load_idx != loadTail) { 498 DynInstPtr ld_inst = loadQueue[load_idx]; 499 if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) { 500 incrLdIdx(load_idx); 501 continue; 502 } 503 504 Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift; 505 Addr ld_eff_addr2 = 506 (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift; 507 508 if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) { 509 if (inst->isLoad()) { 510 // If this load is to the same block as an external snoop 511 // invalidate that we've observed then the load needs to be 512 // squashed as it could have newer data 513 if (ld_inst->hitExternalSnoop()) { 514 if (!memDepViolator || 515 ld_inst->seqNum < memDepViolator->seqNum) { 516 DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] " 517 "and [sn:%lli] at address %#x\n", 518 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 519 memDepViolator = ld_inst; 520 521 ++lsqMemOrderViolation; 522 523 return new GenericISA::M5PanicFault( 524 "Detected fault with inst [sn:%lli] and " 525 "[sn:%lli] at address %#x\n", 526 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 527 } 528 } 529 530 // Otherwise, mark the load has a possible load violation 531 // and if we see a snoop before it's commited, we need to squash 532 ld_inst->possibleLoadViolation(true); 533 DPRINTF(LSQUnit, "Found possible load violaiton at addr: %#x" 534 " between instructions [sn:%lli] and [sn:%lli]\n", 535 inst_eff_addr1, inst->seqNum, ld_inst->seqNum); 536 } else { 537 // A load/store incorrectly passed this store. 538 // Check if we already have a violator, or if it's newer 539 // squash and refetch. 540 if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum) 541 break; 542 543 DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and " 544 "[sn:%lli] at address %#x\n", 545 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 546 memDepViolator = ld_inst; 547 548 ++lsqMemOrderViolation; 549 550 return new GenericISA::M5PanicFault("Detected fault with " 551 "inst [sn:%lli] and [sn:%lli] at address %#x\n", 552 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 553 } 554 } 555 556 incrLdIdx(load_idx); 557 } 558 return NoFault; 559} 560 561 562 563 564template <class Impl> 565Fault 566LSQUnit<Impl>::executeLoad(DynInstPtr &inst) 567{ 568 using namespace TheISA; 569 // Execute a specific load. 570 Fault load_fault = NoFault; 571 572 DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n", 573 inst->pcState(), inst->seqNum); 574 575 assert(!inst->isSquashed()); 576 577 load_fault = inst->initiateAcc(); 578 579 if (inst->isTranslationDelayed() && 580 load_fault == NoFault) 581 return load_fault; 582 583 // If the instruction faulted or predicated false, then we need to send it 584 // along to commit without the instruction completing. 585 if (load_fault != NoFault || inst->readPredicate() == false) { 586 // Send this instruction to commit, also make sure iew stage 587 // realizes there is activity. 588 // Mark it as executed unless it is an uncached load that 589 // needs to hit the head of commit. 590 if (inst->readPredicate() == false) 591 inst->forwardOldRegs(); 592 DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n", 593 inst->seqNum, 594 (load_fault != NoFault ? "fault" : "predication")); 595 if (!(inst->hasRequest() && inst->uncacheable()) || 596 inst->isAtCommit()) { 597 inst->setExecuted(); 598 } 599 iewStage->instToCommit(inst); 600 iewStage->activityThisCycle(); 601 } else if (!loadBlocked()) { 602 assert(inst->effAddrValid()); 603 int load_idx = inst->lqIdx; 604 incrLdIdx(load_idx); 605 606 if (checkLoads) 607 return checkViolations(load_idx, inst); 608 } 609 610 return load_fault; 611} 612 613template <class Impl> 614Fault 615LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) 616{ 617 using namespace TheISA; 618 // Make sure that a store exists. 619 assert(stores != 0); 620 621 int store_idx = store_inst->sqIdx; 622 623 DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n", 624 store_inst->pcState(), store_inst->seqNum); 625 626 assert(!store_inst->isSquashed()); 627 628 // Check the recently completed loads to see if any match this store's 629 // address. If so, then we have a memory ordering violation. 630 int load_idx = store_inst->lqIdx; 631 632 Fault store_fault = store_inst->initiateAcc(); 633 634 if (store_inst->isTranslationDelayed() && 635 store_fault == NoFault) 636 return store_fault; 637 638 if (store_inst->readPredicate() == false) 639 store_inst->forwardOldRegs(); 640 641 if (storeQueue[store_idx].size == 0) { 642 DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n", 643 store_inst->pcState(), store_inst->seqNum); 644 645 return store_fault; 646 } else if (store_inst->readPredicate() == false) { 647 DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n", 648 store_inst->seqNum); 649 return store_fault; 650 } 651 652 assert(store_fault == NoFault); 653 654 if (store_inst->isStoreConditional()) { 655 // Store conditionals need to set themselves as able to 656 // writeback if we haven't had a fault by here. 657 storeQueue[store_idx].canWB = true; 658 659 ++storesToWB; 660 } 661 662 return checkViolations(load_idx, store_inst); 663 664} 665 666template <class Impl> 667void 668LSQUnit<Impl>::commitLoad() 669{ 670 assert(loadQueue[loadHead]); 671 672 DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n", 673 loadQueue[loadHead]->pcState()); 674 675 loadQueue[loadHead] = NULL; 676 677 incrLdIdx(loadHead); 678 679 --loads; 680} 681 682template <class Impl> 683void 684LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst) 685{ 686 assert(loads == 0 || loadQueue[loadHead]); 687 688 while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { 689 commitLoad(); 690 } 691} 692 693template <class Impl> 694void 695LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst) 696{ 697 assert(stores == 0 || storeQueue[storeHead].inst); 698 699 int store_idx = storeHead; 700 701 while (store_idx != storeTail) { 702 assert(storeQueue[store_idx].inst); 703 // Mark any stores that are now committed and have not yet 704 // been marked as able to write back. 705 if (!storeQueue[store_idx].canWB) { 706 if (storeQueue[store_idx].inst->seqNum > youngest_inst) { 707 break; 708 } 709 DPRINTF(LSQUnit, "Marking store as able to write back, PC " 710 "%s [sn:%lli]\n", 711 storeQueue[store_idx].inst->pcState(), 712 storeQueue[store_idx].inst->seqNum); 713 714 storeQueue[store_idx].canWB = true; 715 716 ++storesToWB; 717 } 718 719 incrStIdx(store_idx); 720 } 721} 722 723template <class Impl> 724void 725LSQUnit<Impl>::writebackPendingStore() 726{ 727 if (hasPendingPkt) { 728 assert(pendingPkt != NULL); 729 730 // If the cache is blocked, this will store the packet for retry. 731 if (sendStore(pendingPkt)) { 732 storePostSend(pendingPkt); 733 } 734 pendingPkt = NULL; 735 hasPendingPkt = false; 736 } 737} 738 739template <class Impl> 740void 741LSQUnit<Impl>::writebackStores() 742{ 743 // First writeback the second packet from any split store that didn't 744 // complete last cycle because there weren't enough cache ports available. 745 if (TheISA::HasUnalignedMemAcc) { 746 writebackPendingStore(); 747 } 748 749 while (storesToWB > 0 && 750 storeWBIdx != storeTail && 751 storeQueue[storeWBIdx].inst && 752 storeQueue[storeWBIdx].canWB && 753 ((!needsTSO) || (!storeInFlight)) && 754 usedPorts < cachePorts) { 755 756 if (isStoreBlocked || lsq->cacheBlocked()) { 757 DPRINTF(LSQUnit, "Unable to write back any more stores, cache" 758 " is blocked!\n"); 759 break; 760 } 761 762 // Store didn't write any data so no need to write it back to 763 // memory. 764 if (storeQueue[storeWBIdx].size == 0) { 765 completeStore(storeWBIdx); 766 767 incrStIdx(storeWBIdx); 768 769 continue; 770 } 771 772 ++usedPorts; 773 774 if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { 775 incrStIdx(storeWBIdx); 776 777 continue; 778 } 779 780 assert(storeQueue[storeWBIdx].req); 781 assert(!storeQueue[storeWBIdx].committed); 782 783 if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) { 784 assert(storeQueue[storeWBIdx].sreqLow); 785 assert(storeQueue[storeWBIdx].sreqHigh); 786 } 787 788 DynInstPtr inst = storeQueue[storeWBIdx].inst; 789 790 Request *req = storeQueue[storeWBIdx].req; 791 RequestPtr sreqLow = storeQueue[storeWBIdx].sreqLow; 792 RequestPtr sreqHigh = storeQueue[storeWBIdx].sreqHigh; 793 794 storeQueue[storeWBIdx].committed = true; 795 796 assert(!inst->memData); 797 inst->memData = new uint8_t[64]; 798 799 memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize()); 800 801 MemCmd command = 802 req->isSwap() ? MemCmd::SwapReq : 803 (req->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq); 804 PacketPtr data_pkt; 805 PacketPtr snd_data_pkt = NULL; 806 807 LSQSenderState *state = new LSQSenderState; 808 state->isLoad = false; 809 state->idx = storeWBIdx; 810 state->inst = inst; 811 812 if (!TheISA::HasUnalignedMemAcc || !storeQueue[storeWBIdx].isSplit) { 813 814 // Build a single data packet if the store isn't split. 815 data_pkt = new Packet(req, command); 816 data_pkt->dataStatic(inst->memData); 817 data_pkt->senderState = state; 818 } else { 819 // Create two packets if the store is split in two. 820 data_pkt = new Packet(sreqLow, command); 821 snd_data_pkt = new Packet(sreqHigh, command); 822 823 data_pkt->dataStatic(inst->memData); 824 snd_data_pkt->dataStatic(inst->memData + sreqLow->getSize()); 825 826 data_pkt->senderState = state; 827 snd_data_pkt->senderState = state; 828 829 state->isSplit = true; 830 state->outstanding = 2; 831 832 // Can delete the main request now. 833 delete req; 834 req = sreqLow; 835 } 836 837 DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s " 838 "to Addr:%#x, data:%#x [sn:%lli]\n", 839 storeWBIdx, inst->pcState(), 840 req->getPaddr(), (int)*(inst->memData), 841 inst->seqNum); 842 843 // @todo: Remove this SC hack once the memory system handles it. 844 if (inst->isStoreConditional()) { 845 assert(!storeQueue[storeWBIdx].isSplit); 846 // Disable recording the result temporarily. Writing to 847 // misc regs normally updates the result, but this is not 848 // the desired behavior when handling store conditionals. 849 inst->recordResult(false); 850 bool success = TheISA::handleLockedWrite(inst.get(), req); 851 inst->recordResult(true); 852 853 if (!success) { 854 // Instantly complete this store. 855 DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. " 856 "Instantly completing it.\n", 857 inst->seqNum); 858 WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); 859 cpu->schedule(wb, curTick() + 1); 860 if (cpu->checker) { 861 // Make sure to set the LLSC data for verification 862 // if checker is loaded 863 inst->reqToVerify->setExtraData(0); 864 inst->completeAcc(data_pkt); 865 } 866 completeStore(storeWBIdx); 867 incrStIdx(storeWBIdx); 868 continue; 869 } 870 } else { 871 // Non-store conditionals do not need a writeback. 872 state->noWB = true; 873 } 874 875 bool split = 876 TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit; 877 878 ThreadContext *thread = cpu->tcBase(lsqID); 879 880 if (req->isMmappedIpr()) { 881 assert(!inst->isStoreConditional()); 882 TheISA::handleIprWrite(thread, data_pkt); 883 delete data_pkt; 884 if (split) { 885 assert(snd_data_pkt->req->isMmappedIpr()); 886 TheISA::handleIprWrite(thread, snd_data_pkt); 887 delete snd_data_pkt; 888 delete sreqLow; 889 delete sreqHigh; 890 } 891 delete state; 892 delete req; 893 completeStore(storeWBIdx); 894 incrStIdx(storeWBIdx); 895 } else if (!sendStore(data_pkt)) { 896 DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will" 897 "retry later\n", 898 inst->seqNum); 899 900 // Need to store the second packet, if split. 901 if (split) { 902 state->pktToSend = true; 903 state->pendingPacket = snd_data_pkt; 904 } 905 } else { 906 907 // If split, try to send the second packet too 908 if (split) { 909 assert(snd_data_pkt); 910 911 // Ensure there are enough ports to use. 912 if (usedPorts < cachePorts) { 913 ++usedPorts; 914 if (sendStore(snd_data_pkt)) { 915 storePostSend(snd_data_pkt); 916 } else { 917 DPRINTF(IEW, "D-Cache became blocked when writing" 918 " [sn:%lli] second packet, will retry later\n", 919 inst->seqNum); 920 } 921 } else { 922 923 // Store the packet for when there's free ports. 924 assert(pendingPkt == NULL); 925 pendingPkt = snd_data_pkt; 926 hasPendingPkt = true; 927 } 928 } else { 929 930 // Not a split store. 931 storePostSend(data_pkt); 932 } 933 } 934 } 935 936 // Not sure this should set it to 0. 937 usedPorts = 0; 938 939 assert(stores >= 0 && storesToWB >= 0); 940} 941 942/*template <class Impl> 943void 944LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum) 945{ 946 list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(), 947 mshrSeqNums.end(), 948 seqNum); 949 950 if (mshr_it != mshrSeqNums.end()) { 951 mshrSeqNums.erase(mshr_it); 952 DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size()); 953 } 954}*/ 955 956template <class Impl> 957void 958LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) 959{ 960 DPRINTF(LSQUnit, "Squashing until [sn:%lli]!" 961 "(Loads:%i Stores:%i)\n", squashed_num, loads, stores); 962 963 int load_idx = loadTail; 964 decrLdIdx(load_idx); 965 966 while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { 967 DPRINTF(LSQUnit,"Load Instruction PC %s squashed, " 968 "[sn:%lli]\n", 969 loadQueue[load_idx]->pcState(), 970 loadQueue[load_idx]->seqNum); 971 972 if (isStalled() && load_idx == stallingLoadIdx) { 973 stalled = false; 974 stallingStoreIsn = 0; 975 stallingLoadIdx = 0; 976 } 977 978 // Clear the smart pointer to make sure it is decremented. 979 loadQueue[load_idx]->setSquashed(); 980 loadQueue[load_idx] = NULL; 981 --loads; 982 983 // Inefficient! 984 loadTail = load_idx; 985 986 decrLdIdx(load_idx); 987 ++lsqSquashedLoads; 988 } 989 990 if (isLoadBlocked) { 991 if (squashed_num < blockedLoadSeqNum) { 992 isLoadBlocked = false; 993 loadBlockedHandled = false; 994 blockedLoadSeqNum = 0; 995 } 996 } 997 998 if (memDepViolator && squashed_num < memDepViolator->seqNum) { 999 memDepViolator = NULL; 1000 } 1001 1002 int store_idx = storeTail; 1003 decrStIdx(store_idx); 1004 1005 while (stores != 0 && 1006 storeQueue[store_idx].inst->seqNum > squashed_num) { 1007 // Instructions marked as can WB are already committed. 1008 if (storeQueue[store_idx].canWB) { 1009 break; 1010 } 1011 1012 DPRINTF(LSQUnit,"Store Instruction PC %s squashed, " 1013 "idx:%i [sn:%lli]\n", 1014 storeQueue[store_idx].inst->pcState(), 1015 store_idx, storeQueue[store_idx].inst->seqNum); 1016 1017 // I don't think this can happen. It should have been cleared 1018 // by the stalling load. 1019 if (isStalled() && 1020 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { 1021 panic("Is stalled should have been cleared by stalling load!\n"); 1022 stalled = false; 1023 stallingStoreIsn = 0; 1024 } 1025 1026 // Clear the smart pointer to make sure it is decremented. 1027 storeQueue[store_idx].inst->setSquashed(); 1028 storeQueue[store_idx].inst = NULL; 1029 storeQueue[store_idx].canWB = 0; 1030 1031 // Must delete request now that it wasn't handed off to 1032 // memory. This is quite ugly. @todo: Figure out the proper 1033 // place to really handle request deletes. 1034 delete storeQueue[store_idx].req; 1035 if (TheISA::HasUnalignedMemAcc && storeQueue[store_idx].isSplit) { 1036 delete storeQueue[store_idx].sreqLow; 1037 delete storeQueue[store_idx].sreqHigh; 1038 1039 storeQueue[store_idx].sreqLow = NULL; 1040 storeQueue[store_idx].sreqHigh = NULL; 1041 } 1042 1043 storeQueue[store_idx].req = NULL; 1044 --stores; 1045 1046 // Inefficient! 1047 storeTail = store_idx; 1048 1049 decrStIdx(store_idx); 1050 ++lsqSquashedStores; 1051 } 1052} 1053 1054template <class Impl> 1055void 1056LSQUnit<Impl>::storePostSend(PacketPtr pkt) 1057{ 1058 if (isStalled() && 1059 storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { 1060 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " 1061 "load idx:%i\n", 1062 stallingStoreIsn, stallingLoadIdx); 1063 stalled = false; 1064 stallingStoreIsn = 0; 1065 iewStage->replayMemInst(loadQueue[stallingLoadIdx]); 1066 } 1067 1068 if (!storeQueue[storeWBIdx].inst->isStoreConditional()) { 1069 // The store is basically completed at this time. This 1070 // only works so long as the checker doesn't try to 1071 // verify the value in memory for stores. 1072 storeQueue[storeWBIdx].inst->setCompleted(); 1073 1074 if (cpu->checker) { 1075 cpu->checker->verify(storeQueue[storeWBIdx].inst); 1076 } 1077 } 1078 1079 if (needsTSO) { 1080 storeInFlight = true; 1081 } 1082 1083 incrStIdx(storeWBIdx); 1084} 1085 1086template <class Impl> 1087void 1088LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) 1089{ 1090 iewStage->wakeCPU(); 1091 1092 // Squashed instructions do not need to complete their access. 1093 if (inst->isSquashed()) { 1094 iewStage->decrWb(inst->seqNum); 1095 assert(!inst->isStore()); 1096 ++lsqIgnoredResponses; 1097 return; 1098 } 1099 1100 if (!inst->isExecuted()) { 1101 inst->setExecuted(); 1102 1103 // Complete access to copy data to proper place. 1104 inst->completeAcc(pkt); 1105 } 1106 1107 // Need to insert instruction into queue to commit 1108 iewStage->instToCommit(inst); 1109 1110 iewStage->activityThisCycle(); 1111 1112 // see if this load changed the PC 1113 iewStage->checkMisprediction(inst); 1114} 1115 1116template <class Impl> 1117void 1118LSQUnit<Impl>::completeStore(int store_idx) 1119{ 1120 assert(storeQueue[store_idx].inst); 1121 storeQueue[store_idx].completed = true; 1122 --storesToWB; 1123 // A bit conservative because a store completion may not free up entries, 1124 // but hopefully avoids two store completions in one cycle from making 1125 // the CPU tick twice. 1126 cpu->wakeCPU(); 1127 cpu->activityThisCycle(); 1128 1129 if (store_idx == storeHead) { 1130 do { 1131 incrStIdx(storeHead); 1132 1133 --stores; 1134 } while (storeQueue[storeHead].completed && 1135 storeHead != storeTail); 1136 1137 iewStage->updateLSQNextCycle = true; 1138 } 1139 1140 DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head " 1141 "idx:%i\n", 1142 storeQueue[store_idx].inst->seqNum, store_idx, storeHead); 1143 1144#if TRACING_ON 1145 if (DTRACE(O3PipeView)) { 1146 storeQueue[store_idx].inst->storeTick = 1147 curTick() - storeQueue[store_idx].inst->fetchTick; 1148 } 1149#endif 1150 1151 if (isStalled() && 1152 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { 1153 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " 1154 "load idx:%i\n", 1155 stallingStoreIsn, stallingLoadIdx); 1156 stalled = false; 1157 stallingStoreIsn = 0; 1158 iewStage->replayMemInst(loadQueue[stallingLoadIdx]); 1159 } 1160 1161 storeQueue[store_idx].inst->setCompleted(); 1162 1163 if (needsTSO) { 1164 storeInFlight = false; 1165 } 1166 1167 // Tell the checker we've completed this instruction. Some stores 1168 // may get reported twice to the checker, but the checker can 1169 // handle that case. 1170 if (cpu->checker) { 1171 cpu->checker->verify(storeQueue[store_idx].inst); 1172 } 1173} 1174 1175template <class Impl> 1176bool 1177LSQUnit<Impl>::sendStore(PacketPtr data_pkt) 1178{ 1179 if (!dcachePort->sendTimingReq(data_pkt)) { 1180 // Need to handle becoming blocked on a store. 1181 isStoreBlocked = true; 1182 ++lsqCacheBlocked; 1183 assert(retryPkt == NULL); 1184 retryPkt = data_pkt; 1185 lsq->setRetryTid(lsqID); 1186 return false; 1187 } 1188 return true; 1189} 1190 1191template <class Impl> 1192void 1193LSQUnit<Impl>::recvRetry() 1194{ 1195 if (isStoreBlocked) { 1196 DPRINTF(LSQUnit, "Receiving retry: store blocked\n"); 1197 assert(retryPkt != NULL); 1198 1199 LSQSenderState *state = 1200 dynamic_cast<LSQSenderState *>(retryPkt->senderState); 1201 1202 if (dcachePort->sendTimingReq(retryPkt)) { 1203 // Don't finish the store unless this is the last packet. 1204 if (!TheISA::HasUnalignedMemAcc || !state->pktToSend || 1205 state->pendingPacket == retryPkt) { 1206 state->pktToSend = false; 1207 storePostSend(retryPkt); 1208 } 1209 retryPkt = NULL; 1210 isStoreBlocked = false; 1211 lsq->setRetryTid(InvalidThreadID); 1212 1213 // Send any outstanding packet. 1214 if (TheISA::HasUnalignedMemAcc && state->pktToSend) { 1215 assert(state->pendingPacket); 1216 if (sendStore(state->pendingPacket)) { 1217 storePostSend(state->pendingPacket); 1218 } 1219 } 1220 } else { 1221 // Still blocked! 1222 ++lsqCacheBlocked; 1223 lsq->setRetryTid(lsqID); 1224 } 1225 } else if (isLoadBlocked) { 1226 DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " 1227 "no need to resend packet.\n"); 1228 } else { 1229 DPRINTF(LSQUnit, "Retry received but LSQ is no longer blocked.\n"); 1230 } 1231} 1232 1233template <class Impl> 1234inline void 1235LSQUnit<Impl>::incrStIdx(int &store_idx) const 1236{ 1237 if (++store_idx >= SQEntries) 1238 store_idx = 0; 1239} 1240 1241template <class Impl> 1242inline void 1243LSQUnit<Impl>::decrStIdx(int &store_idx) const 1244{ 1245 if (--store_idx < 0) 1246 store_idx += SQEntries; 1247} 1248 1249template <class Impl> 1250inline void 1251LSQUnit<Impl>::incrLdIdx(int &load_idx) const 1252{ 1253 if (++load_idx >= LQEntries) 1254 load_idx = 0; 1255} 1256 1257template <class Impl> 1258inline void 1259LSQUnit<Impl>::decrLdIdx(int &load_idx) const 1260{ 1261 if (--load_idx < 0) 1262 load_idx += LQEntries; 1263} 1264 1265template <class Impl> 1266void 1267LSQUnit<Impl>::dumpInsts() const 1268{ 1269 cprintf("Load store queue: Dumping instructions.\n"); 1270 cprintf("Load queue size: %i\n", loads); 1271 cprintf("Load queue: "); 1272 1273 int load_idx = loadHead; 1274 1275 while (load_idx != loadTail && loadQueue[load_idx]) { 1276 const DynInstPtr &inst(loadQueue[load_idx]); 1277 cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum); 1278 1279 incrLdIdx(load_idx); 1280 } 1281 cprintf("\n"); 1282 1283 cprintf("Store queue size: %i\n", stores); 1284 cprintf("Store queue: "); 1285 1286 int store_idx = storeHead; 1287 1288 while (store_idx != storeTail && storeQueue[store_idx].inst) { 1289 const DynInstPtr &inst(storeQueue[store_idx].inst); 1290 cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum); 1291 1292 incrStIdx(store_idx); 1293 } 1294 1295 cprintf("\n"); 1296} 1297 1298#endif//__CPU_O3_LSQ_UNIT_IMPL_HH__ 1299