lsq_unit_impl.hh revision 11243:f876d08c7b21
1 2/* 3 * Copyright (c) 2010-2014 ARM Limited 4 * Copyright (c) 2013 Advanced Micro Devices, Inc. 5 * All rights reserved 6 * 7 * The license below extends only to copyright in the software and shall 8 * not be construed as granting a license to any other intellectual 9 * property including but not limited to intellectual property relating 10 * to a hardware implementation of the functionality of the software 11 * licensed hereunder. You may use the software subject to the license 12 * terms below provided that you ensure that this notice is replicated 13 * unmodified and in its entirety in all distributions of the software, 14 * modified or unmodified, in source code or in binary form. 15 * 16 * Copyright (c) 2004-2005 The Regents of The University of Michigan 17 * All rights reserved. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions are 21 * met: redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer; 23 * redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution; 26 * neither the name of the copyright holders nor the names of its 27 * contributors may be used to endorse or promote products derived from 28 * this software without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 31 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 32 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 33 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 34 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 36 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 40 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41 * 42 * Authors: Kevin Lim 43 * Korey Sewell 44 */ 45 46#ifndef __CPU_O3_LSQ_UNIT_IMPL_HH__ 47#define __CPU_O3_LSQ_UNIT_IMPL_HH__ 48 49#include "arch/generic/debugfaults.hh" 50#include "arch/locked_mem.hh" 51#include "base/str.hh" 52#include "config/the_isa.hh" 53#include "cpu/checker/cpu.hh" 54#include "cpu/o3/lsq.hh" 55#include "cpu/o3/lsq_unit.hh" 56#include "debug/Activity.hh" 57#include "debug/IEW.hh" 58#include "debug/LSQUnit.hh" 59#include "debug/O3PipeView.hh" 60#include "mem/packet.hh" 61#include "mem/request.hh" 62 63template<class Impl> 64LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, 65 LSQUnit *lsq_ptr) 66 : Event(Default_Pri, AutoDelete), 67 inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr) 68{ 69} 70 71template<class Impl> 72void 73LSQUnit<Impl>::WritebackEvent::process() 74{ 75 assert(!lsqPtr->cpu->switchedOut()); 76 77 lsqPtr->writeback(inst, pkt); 78 79 if (pkt->senderState) 80 delete pkt->senderState; 81 82 delete pkt->req; 83 delete pkt; 84} 85 86template<class Impl> 87const char * 88LSQUnit<Impl>::WritebackEvent::description() const 89{ 90 return "Store writeback"; 91} 92 93template<class Impl> 94void 95LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) 96{ 97 LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState); 98 DynInstPtr inst = state->inst; 99 DPRINTF(IEW, "Writeback event [sn:%lli].\n", inst->seqNum); 100 DPRINTF(Activity, "Activity: Writeback event [sn:%lli].\n", inst->seqNum); 101 102 if (state->cacheBlocked) { 103 // This is the first half of a previous split load, 104 // where the 2nd half blocked, ignore this response 105 DPRINTF(IEW, "[sn:%lli]: Response from first half of earlier " 106 "blocked split load recieved. Ignoring.\n", inst->seqNum); 107 delete state; 108 return; 109 } 110 111 // If this is a split access, wait until all packets are received. 112 if (TheISA::HasUnalignedMemAcc && !state->complete()) { 113 return; 114 } 115 116 assert(!cpu->switchedOut()); 117 if (!inst->isSquashed()) { 118 if (!state->noWB) { 119 if (!TheISA::HasUnalignedMemAcc || !state->isSplit || 120 !state->isLoad) { 121 writeback(inst, pkt); 122 } else { 123 writeback(inst, state->mainPkt); 124 } 125 } 126 127 if (inst->isStore()) { 128 completeStore(state->idx); 129 } 130 } 131 132 if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) { 133 delete state->mainPkt->req; 134 delete state->mainPkt; 135 } 136 137 pkt->req->setAccessLatency(); 138 cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt)); 139 140 delete state; 141} 142 143template <class Impl> 144LSQUnit<Impl>::LSQUnit() 145 : loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false), 146 isStoreBlocked(false), storeInFlight(false), hasPendingPkt(false), 147 pendingPkt(nullptr) 148{ 149} 150 151template<class Impl> 152void 153LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, 154 LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, 155 unsigned id) 156{ 157 cpu = cpu_ptr; 158 iewStage = iew_ptr; 159 160 lsq = lsq_ptr; 161 162 lsqID = id; 163 164 DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); 165 166 // Add 1 for the sentinel entry (they are circular queues). 167 LQEntries = maxLQEntries + 1; 168 SQEntries = maxSQEntries + 1; 169 170 //Due to uint8_t index in LSQSenderState 171 assert(LQEntries <= 256); 172 assert(SQEntries <= 256); 173 174 loadQueue.resize(LQEntries); 175 storeQueue.resize(SQEntries); 176 177 depCheckShift = params->LSQDepCheckShift; 178 checkLoads = params->LSQCheckLoads; 179 cachePorts = params->cachePorts; 180 needsTSO = params->needsTSO; 181 182 resetState(); 183} 184 185 186template<class Impl> 187void 188LSQUnit<Impl>::resetState() 189{ 190 loads = stores = storesToWB = 0; 191 192 loadHead = loadTail = 0; 193 194 storeHead = storeWBIdx = storeTail = 0; 195 196 usedPorts = 0; 197 198 retryPkt = NULL; 199 memDepViolator = NULL; 200 201 stalled = false; 202 203 cacheBlockMask = ~(cpu->cacheLineSize() - 1); 204} 205 206template<class Impl> 207std::string 208LSQUnit<Impl>::name() const 209{ 210 if (Impl::MaxThreads == 1) { 211 return iewStage->name() + ".lsq"; 212 } else { 213 return iewStage->name() + ".lsq.thread" + std::to_string(lsqID); 214 } 215} 216 217template<class Impl> 218void 219LSQUnit<Impl>::regStats() 220{ 221 lsqForwLoads 222 .name(name() + ".forwLoads") 223 .desc("Number of loads that had data forwarded from stores"); 224 225 invAddrLoads 226 .name(name() + ".invAddrLoads") 227 .desc("Number of loads ignored due to an invalid address"); 228 229 lsqSquashedLoads 230 .name(name() + ".squashedLoads") 231 .desc("Number of loads squashed"); 232 233 lsqIgnoredResponses 234 .name(name() + ".ignoredResponses") 235 .desc("Number of memory responses ignored because the instruction is squashed"); 236 237 lsqMemOrderViolation 238 .name(name() + ".memOrderViolation") 239 .desc("Number of memory ordering violations"); 240 241 lsqSquashedStores 242 .name(name() + ".squashedStores") 243 .desc("Number of stores squashed"); 244 245 invAddrSwpfs 246 .name(name() + ".invAddrSwpfs") 247 .desc("Number of software prefetches ignored due to an invalid address"); 248 249 lsqBlockedLoads 250 .name(name() + ".blockedLoads") 251 .desc("Number of blocked loads due to partial load-store forwarding"); 252 253 lsqRescheduledLoads 254 .name(name() + ".rescheduledLoads") 255 .desc("Number of loads that were rescheduled"); 256 257 lsqCacheBlocked 258 .name(name() + ".cacheBlocked") 259 .desc("Number of times an access to memory failed due to the cache being blocked"); 260} 261 262template<class Impl> 263void 264LSQUnit<Impl>::setDcachePort(MasterPort *dcache_port) 265{ 266 dcachePort = dcache_port; 267} 268 269template<class Impl> 270void 271LSQUnit<Impl>::clearLQ() 272{ 273 loadQueue.clear(); 274} 275 276template<class Impl> 277void 278LSQUnit<Impl>::clearSQ() 279{ 280 storeQueue.clear(); 281} 282 283template<class Impl> 284void 285LSQUnit<Impl>::drainSanityCheck() const 286{ 287 for (int i = 0; i < loadQueue.size(); ++i) 288 assert(!loadQueue[i]); 289 290 assert(storesToWB == 0); 291 assert(!retryPkt); 292} 293 294template<class Impl> 295void 296LSQUnit<Impl>::takeOverFrom() 297{ 298 resetState(); 299} 300 301template<class Impl> 302void 303LSQUnit<Impl>::resizeLQ(unsigned size) 304{ 305 unsigned size_plus_sentinel = size + 1; 306 assert(size_plus_sentinel >= LQEntries); 307 308 if (size_plus_sentinel > LQEntries) { 309 while (size_plus_sentinel > loadQueue.size()) { 310 DynInstPtr dummy; 311 loadQueue.push_back(dummy); 312 LQEntries++; 313 } 314 } else { 315 LQEntries = size_plus_sentinel; 316 } 317 318 assert(LQEntries <= 256); 319} 320 321template<class Impl> 322void 323LSQUnit<Impl>::resizeSQ(unsigned size) 324{ 325 unsigned size_plus_sentinel = size + 1; 326 if (size_plus_sentinel > SQEntries) { 327 while (size_plus_sentinel > storeQueue.size()) { 328 SQEntry dummy; 329 storeQueue.push_back(dummy); 330 SQEntries++; 331 } 332 } else { 333 SQEntries = size_plus_sentinel; 334 } 335 336 assert(SQEntries <= 256); 337} 338 339template <class Impl> 340void 341LSQUnit<Impl>::insert(DynInstPtr &inst) 342{ 343 assert(inst->isMemRef()); 344 345 assert(inst->isLoad() || inst->isStore()); 346 347 if (inst->isLoad()) { 348 insertLoad(inst); 349 } else { 350 insertStore(inst); 351 } 352 353 inst->setInLSQ(); 354} 355 356template <class Impl> 357void 358LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst) 359{ 360 assert((loadTail + 1) % LQEntries != loadHead); 361 assert(loads < LQEntries); 362 363 DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n", 364 load_inst->pcState(), loadTail, load_inst->seqNum); 365 366 load_inst->lqIdx = loadTail; 367 368 if (stores == 0) { 369 load_inst->sqIdx = -1; 370 } else { 371 load_inst->sqIdx = storeTail; 372 } 373 374 loadQueue[loadTail] = load_inst; 375 376 incrLdIdx(loadTail); 377 378 ++loads; 379} 380 381template <class Impl> 382void 383LSQUnit<Impl>::insertStore(DynInstPtr &store_inst) 384{ 385 // Make sure it is not full before inserting an instruction. 386 assert((storeTail + 1) % SQEntries != storeHead); 387 assert(stores < SQEntries); 388 389 DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n", 390 store_inst->pcState(), storeTail, store_inst->seqNum); 391 392 store_inst->sqIdx = storeTail; 393 store_inst->lqIdx = loadTail; 394 395 storeQueue[storeTail] = SQEntry(store_inst); 396 397 incrStIdx(storeTail); 398 399 ++stores; 400} 401 402template <class Impl> 403typename Impl::DynInstPtr 404LSQUnit<Impl>::getMemDepViolator() 405{ 406 DynInstPtr temp = memDepViolator; 407 408 memDepViolator = NULL; 409 410 return temp; 411} 412 413template <class Impl> 414unsigned 415LSQUnit<Impl>::numFreeLoadEntries() 416{ 417 //LQ has an extra dummy entry to differentiate 418 //empty/full conditions. Subtract 1 from the free entries. 419 DPRINTF(LSQUnit, "LQ size: %d, #loads occupied: %d\n", LQEntries, loads); 420 return LQEntries - loads - 1; 421} 422 423template <class Impl> 424unsigned 425LSQUnit<Impl>::numFreeStoreEntries() 426{ 427 //SQ has an extra dummy entry to differentiate 428 //empty/full conditions. Subtract 1 from the free entries. 429 DPRINTF(LSQUnit, "SQ size: %d, #stores occupied: %d\n", SQEntries, stores); 430 return SQEntries - stores - 1; 431 432 } 433 434template <class Impl> 435void 436LSQUnit<Impl>::checkSnoop(PacketPtr pkt) 437{ 438 int load_idx = loadHead; 439 DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr()); 440 441 // Unlock the cpu-local monitor when the CPU sees a snoop to a locked 442 // address. The CPU can speculatively execute a LL operation after a pending 443 // SC operation in the pipeline and that can make the cache monitor the CPU 444 // is connected to valid while it really shouldn't be. 445 for (int x = 0; x < cpu->numContexts(); x++) { 446 ThreadContext *tc = cpu->getContext(x); 447 bool no_squash = cpu->thread[x]->noSquashFromTC; 448 cpu->thread[x]->noSquashFromTC = true; 449 TheISA::handleLockedSnoop(tc, pkt, cacheBlockMask); 450 cpu->thread[x]->noSquashFromTC = no_squash; 451 } 452 453 Addr invalidate_addr = pkt->getAddr() & cacheBlockMask; 454 455 DynInstPtr ld_inst = loadQueue[load_idx]; 456 if (ld_inst) { 457 Addr load_addr_low = ld_inst->physEffAddrLow & cacheBlockMask; 458 Addr load_addr_high = ld_inst->physEffAddrHigh & cacheBlockMask; 459 460 // Check that this snoop didn't just invalidate our lock flag 461 if (ld_inst->effAddrValid() && (load_addr_low == invalidate_addr 462 || load_addr_high == invalidate_addr) 463 && ld_inst->memReqFlags & Request::LLSC) 464 TheISA::handleLockedSnoopHit(ld_inst.get()); 465 } 466 467 // If this is the only load in the LSQ we don't care 468 if (load_idx == loadTail) 469 return; 470 471 incrLdIdx(load_idx); 472 473 bool force_squash = false; 474 475 while (load_idx != loadTail) { 476 DynInstPtr ld_inst = loadQueue[load_idx]; 477 478 if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) { 479 incrLdIdx(load_idx); 480 continue; 481 } 482 483 Addr load_addr_low = ld_inst->physEffAddrLow & cacheBlockMask; 484 Addr load_addr_high = ld_inst->physEffAddrHigh & cacheBlockMask; 485 486 DPRINTF(LSQUnit, "-- inst [sn:%lli] load_addr: %#x to pktAddr:%#x\n", 487 ld_inst->seqNum, load_addr_low, invalidate_addr); 488 489 if ((load_addr_low == invalidate_addr 490 || load_addr_high == invalidate_addr) || force_squash) { 491 if (needsTSO) { 492 // If we have a TSO system, as all loads must be ordered with 493 // all other loads, this load as well as *all* subsequent loads 494 // need to be squashed to prevent possible load reordering. 495 force_squash = true; 496 } 497 if (ld_inst->possibleLoadViolation() || force_squash) { 498 DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n", 499 pkt->getAddr(), ld_inst->seqNum); 500 501 // Mark the load for re-execution 502 ld_inst->fault = std::make_shared<ReExec>(); 503 } else { 504 DPRINTF(LSQUnit, "HitExternal Snoop for addr %#x [sn:%lli]\n", 505 pkt->getAddr(), ld_inst->seqNum); 506 507 // Make sure that we don't lose a snoop hitting a LOCKED 508 // address since the LOCK* flags don't get updated until 509 // commit. 510 if (ld_inst->memReqFlags & Request::LLSC) 511 TheISA::handleLockedSnoopHit(ld_inst.get()); 512 513 // If a older load checks this and it's true 514 // then we might have missed the snoop 515 // in which case we need to invalidate to be sure 516 ld_inst->hitExternalSnoop(true); 517 } 518 } 519 incrLdIdx(load_idx); 520 } 521 return; 522} 523 524template <class Impl> 525Fault 526LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst) 527{ 528 Addr inst_eff_addr1 = inst->effAddr >> depCheckShift; 529 Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift; 530 531 /** @todo in theory you only need to check an instruction that has executed 532 * however, there isn't a good way in the pipeline at the moment to check 533 * all instructions that will execute before the store writes back. Thus, 534 * like the implementation that came before it, we're overly conservative. 535 */ 536 while (load_idx != loadTail) { 537 DynInstPtr ld_inst = loadQueue[load_idx]; 538 if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) { 539 incrLdIdx(load_idx); 540 continue; 541 } 542 543 Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift; 544 Addr ld_eff_addr2 = 545 (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift; 546 547 if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) { 548 if (inst->isLoad()) { 549 // If this load is to the same block as an external snoop 550 // invalidate that we've observed then the load needs to be 551 // squashed as it could have newer data 552 if (ld_inst->hitExternalSnoop()) { 553 if (!memDepViolator || 554 ld_inst->seqNum < memDepViolator->seqNum) { 555 DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] " 556 "and [sn:%lli] at address %#x\n", 557 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 558 memDepViolator = ld_inst; 559 560 ++lsqMemOrderViolation; 561 562 return std::make_shared<GenericISA::M5PanicFault>( 563 "Detected fault with inst [sn:%lli] and " 564 "[sn:%lli] at address %#x\n", 565 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 566 } 567 } 568 569 // Otherwise, mark the load has a possible load violation 570 // and if we see a snoop before it's commited, we need to squash 571 ld_inst->possibleLoadViolation(true); 572 DPRINTF(LSQUnit, "Found possible load violation at addr: %#x" 573 " between instructions [sn:%lli] and [sn:%lli]\n", 574 inst_eff_addr1, inst->seqNum, ld_inst->seqNum); 575 } else { 576 // A load/store incorrectly passed this store. 577 // Check if we already have a violator, or if it's newer 578 // squash and refetch. 579 if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum) 580 break; 581 582 DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and " 583 "[sn:%lli] at address %#x\n", 584 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 585 memDepViolator = ld_inst; 586 587 ++lsqMemOrderViolation; 588 589 return std::make_shared<GenericISA::M5PanicFault>( 590 "Detected fault with " 591 "inst [sn:%lli] and [sn:%lli] at address %#x\n", 592 inst->seqNum, ld_inst->seqNum, ld_eff_addr1); 593 } 594 } 595 596 incrLdIdx(load_idx); 597 } 598 return NoFault; 599} 600 601 602 603 604template <class Impl> 605Fault 606LSQUnit<Impl>::executeLoad(DynInstPtr &inst) 607{ 608 using namespace TheISA; 609 // Execute a specific load. 610 Fault load_fault = NoFault; 611 612 DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n", 613 inst->pcState(), inst->seqNum); 614 615 assert(!inst->isSquashed()); 616 617 load_fault = inst->initiateAcc(); 618 619 if (inst->isTranslationDelayed() && 620 load_fault == NoFault) 621 return load_fault; 622 623 // If the instruction faulted or predicated false, then we need to send it 624 // along to commit without the instruction completing. 625 if (load_fault != NoFault || !inst->readPredicate()) { 626 // Send this instruction to commit, also make sure iew stage 627 // realizes there is activity. Mark it as executed unless it 628 // is a strictly ordered load that needs to hit the head of 629 // commit. 630 if (!inst->readPredicate()) 631 inst->forwardOldRegs(); 632 DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n", 633 inst->seqNum, 634 (load_fault != NoFault ? "fault" : "predication")); 635 if (!(inst->hasRequest() && inst->strictlyOrdered()) || 636 inst->isAtCommit()) { 637 inst->setExecuted(); 638 } 639 iewStage->instToCommit(inst); 640 iewStage->activityThisCycle(); 641 } else { 642 assert(inst->effAddrValid()); 643 int load_idx = inst->lqIdx; 644 incrLdIdx(load_idx); 645 646 if (checkLoads) 647 return checkViolations(load_idx, inst); 648 } 649 650 return load_fault; 651} 652 653template <class Impl> 654Fault 655LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) 656{ 657 using namespace TheISA; 658 // Make sure that a store exists. 659 assert(stores != 0); 660 661 int store_idx = store_inst->sqIdx; 662 663 DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n", 664 store_inst->pcState(), store_inst->seqNum); 665 666 assert(!store_inst->isSquashed()); 667 668 // Check the recently completed loads to see if any match this store's 669 // address. If so, then we have a memory ordering violation. 670 int load_idx = store_inst->lqIdx; 671 672 Fault store_fault = store_inst->initiateAcc(); 673 674 if (store_inst->isTranslationDelayed() && 675 store_fault == NoFault) 676 return store_fault; 677 678 if (!store_inst->readPredicate()) 679 store_inst->forwardOldRegs(); 680 681 if (storeQueue[store_idx].size == 0) { 682 DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n", 683 store_inst->pcState(), store_inst->seqNum); 684 685 return store_fault; 686 } else if (!store_inst->readPredicate()) { 687 DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n", 688 store_inst->seqNum); 689 return store_fault; 690 } 691 692 assert(store_fault == NoFault); 693 694 if (store_inst->isStoreConditional()) { 695 // Store conditionals need to set themselves as able to 696 // writeback if we haven't had a fault by here. 697 storeQueue[store_idx].canWB = true; 698 699 ++storesToWB; 700 } 701 702 return checkViolations(load_idx, store_inst); 703 704} 705 706template <class Impl> 707void 708LSQUnit<Impl>::commitLoad() 709{ 710 assert(loadQueue[loadHead]); 711 712 DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n", 713 loadQueue[loadHead]->pcState()); 714 715 loadQueue[loadHead] = NULL; 716 717 incrLdIdx(loadHead); 718 719 --loads; 720} 721 722template <class Impl> 723void 724LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst) 725{ 726 assert(loads == 0 || loadQueue[loadHead]); 727 728 while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { 729 commitLoad(); 730 } 731} 732 733template <class Impl> 734void 735LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst) 736{ 737 assert(stores == 0 || storeQueue[storeHead].inst); 738 739 int store_idx = storeHead; 740 741 while (store_idx != storeTail) { 742 assert(storeQueue[store_idx].inst); 743 // Mark any stores that are now committed and have not yet 744 // been marked as able to write back. 745 if (!storeQueue[store_idx].canWB) { 746 if (storeQueue[store_idx].inst->seqNum > youngest_inst) { 747 break; 748 } 749 DPRINTF(LSQUnit, "Marking store as able to write back, PC " 750 "%s [sn:%lli]\n", 751 storeQueue[store_idx].inst->pcState(), 752 storeQueue[store_idx].inst->seqNum); 753 754 storeQueue[store_idx].canWB = true; 755 756 ++storesToWB; 757 } 758 759 incrStIdx(store_idx); 760 } 761} 762 763template <class Impl> 764void 765LSQUnit<Impl>::writebackPendingStore() 766{ 767 if (hasPendingPkt) { 768 assert(pendingPkt != NULL); 769 770 // If the cache is blocked, this will store the packet for retry. 771 if (sendStore(pendingPkt)) { 772 storePostSend(pendingPkt); 773 } 774 pendingPkt = NULL; 775 hasPendingPkt = false; 776 } 777} 778 779template <class Impl> 780void 781LSQUnit<Impl>::writebackStores() 782{ 783 // First writeback the second packet from any split store that didn't 784 // complete last cycle because there weren't enough cache ports available. 785 if (TheISA::HasUnalignedMemAcc) { 786 writebackPendingStore(); 787 } 788 789 while (storesToWB > 0 && 790 storeWBIdx != storeTail && 791 storeQueue[storeWBIdx].inst && 792 storeQueue[storeWBIdx].canWB && 793 ((!needsTSO) || (!storeInFlight)) && 794 usedPorts < cachePorts) { 795 796 if (isStoreBlocked) { 797 DPRINTF(LSQUnit, "Unable to write back any more stores, cache" 798 " is blocked!\n"); 799 break; 800 } 801 802 // Store didn't write any data so no need to write it back to 803 // memory. 804 if (storeQueue[storeWBIdx].size == 0) { 805 completeStore(storeWBIdx); 806 807 incrStIdx(storeWBIdx); 808 809 continue; 810 } 811 812 ++usedPorts; 813 814 if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { 815 incrStIdx(storeWBIdx); 816 817 continue; 818 } 819 820 assert(storeQueue[storeWBIdx].req); 821 assert(!storeQueue[storeWBIdx].committed); 822 823 if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) { 824 assert(storeQueue[storeWBIdx].sreqLow); 825 assert(storeQueue[storeWBIdx].sreqHigh); 826 } 827 828 DynInstPtr inst = storeQueue[storeWBIdx].inst; 829 830 Request *req = storeQueue[storeWBIdx].req; 831 RequestPtr sreqLow = storeQueue[storeWBIdx].sreqLow; 832 RequestPtr sreqHigh = storeQueue[storeWBIdx].sreqHigh; 833 834 storeQueue[storeWBIdx].committed = true; 835 836 assert(!inst->memData); 837 inst->memData = new uint8_t[req->getSize()]; 838 839 if (storeQueue[storeWBIdx].isAllZeros) 840 memset(inst->memData, 0, req->getSize()); 841 else 842 memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize()); 843 844 PacketPtr data_pkt; 845 PacketPtr snd_data_pkt = NULL; 846 847 LSQSenderState *state = new LSQSenderState; 848 state->isLoad = false; 849 state->idx = storeWBIdx; 850 state->inst = inst; 851 852 if (!TheISA::HasUnalignedMemAcc || !storeQueue[storeWBIdx].isSplit) { 853 854 // Build a single data packet if the store isn't split. 855 data_pkt = Packet::createWrite(req); 856 data_pkt->dataStatic(inst->memData); 857 data_pkt->senderState = state; 858 } else { 859 // Create two packets if the store is split in two. 860 data_pkt = Packet::createWrite(sreqLow); 861 snd_data_pkt = Packet::createWrite(sreqHigh); 862 863 data_pkt->dataStatic(inst->memData); 864 snd_data_pkt->dataStatic(inst->memData + sreqLow->getSize()); 865 866 data_pkt->senderState = state; 867 snd_data_pkt->senderState = state; 868 869 state->isSplit = true; 870 state->outstanding = 2; 871 872 // Can delete the main request now. 873 delete req; 874 req = sreqLow; 875 } 876 877 DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s " 878 "to Addr:%#x, data:%#x [sn:%lli]\n", 879 storeWBIdx, inst->pcState(), 880 req->getPaddr(), (int)*(inst->memData), 881 inst->seqNum); 882 883 // @todo: Remove this SC hack once the memory system handles it. 884 if (inst->isStoreConditional()) { 885 assert(!storeQueue[storeWBIdx].isSplit); 886 // Disable recording the result temporarily. Writing to 887 // misc regs normally updates the result, but this is not 888 // the desired behavior when handling store conditionals. 889 inst->recordResult(false); 890 bool success = TheISA::handleLockedWrite(inst.get(), req, cacheBlockMask); 891 inst->recordResult(true); 892 893 if (!success) { 894 // Instantly complete this store. 895 DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. " 896 "Instantly completing it.\n", 897 inst->seqNum); 898 WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); 899 cpu->schedule(wb, curTick() + 1); 900 if (cpu->checker) { 901 // Make sure to set the LLSC data for verification 902 // if checker is loaded 903 inst->reqToVerify->setExtraData(0); 904 inst->completeAcc(data_pkt); 905 } 906 completeStore(storeWBIdx); 907 incrStIdx(storeWBIdx); 908 continue; 909 } 910 } else { 911 // Non-store conditionals do not need a writeback. 912 state->noWB = true; 913 } 914 915 bool split = 916 TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit; 917 918 ThreadContext *thread = cpu->tcBase(lsqID); 919 920 if (req->isMmappedIpr()) { 921 assert(!inst->isStoreConditional()); 922 TheISA::handleIprWrite(thread, data_pkt); 923 delete data_pkt; 924 if (split) { 925 assert(snd_data_pkt->req->isMmappedIpr()); 926 TheISA::handleIprWrite(thread, snd_data_pkt); 927 delete snd_data_pkt; 928 delete sreqLow; 929 delete sreqHigh; 930 } 931 delete state; 932 delete req; 933 completeStore(storeWBIdx); 934 incrStIdx(storeWBIdx); 935 } else if (!sendStore(data_pkt)) { 936 DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will" 937 "retry later\n", 938 inst->seqNum); 939 940 // Need to store the second packet, if split. 941 if (split) { 942 state->pktToSend = true; 943 state->pendingPacket = snd_data_pkt; 944 } 945 } else { 946 947 // If split, try to send the second packet too 948 if (split) { 949 assert(snd_data_pkt); 950 951 // Ensure there are enough ports to use. 952 if (usedPorts < cachePorts) { 953 ++usedPorts; 954 if (sendStore(snd_data_pkt)) { 955 storePostSend(snd_data_pkt); 956 } else { 957 DPRINTF(IEW, "D-Cache became blocked when writing" 958 " [sn:%lli] second packet, will retry later\n", 959 inst->seqNum); 960 } 961 } else { 962 963 // Store the packet for when there's free ports. 964 assert(pendingPkt == NULL); 965 pendingPkt = snd_data_pkt; 966 hasPendingPkt = true; 967 } 968 } else { 969 970 // Not a split store. 971 storePostSend(data_pkt); 972 } 973 } 974 } 975 976 // Not sure this should set it to 0. 977 usedPorts = 0; 978 979 assert(stores >= 0 && storesToWB >= 0); 980} 981 982/*template <class Impl> 983void 984LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum) 985{ 986 list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(), 987 mshrSeqNums.end(), 988 seqNum); 989 990 if (mshr_it != mshrSeqNums.end()) { 991 mshrSeqNums.erase(mshr_it); 992 DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size()); 993 } 994}*/ 995 996template <class Impl> 997void 998LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) 999{ 1000 DPRINTF(LSQUnit, "Squashing until [sn:%lli]!" 1001 "(Loads:%i Stores:%i)\n", squashed_num, loads, stores); 1002 1003 int load_idx = loadTail; 1004 decrLdIdx(load_idx); 1005 1006 while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { 1007 DPRINTF(LSQUnit,"Load Instruction PC %s squashed, " 1008 "[sn:%lli]\n", 1009 loadQueue[load_idx]->pcState(), 1010 loadQueue[load_idx]->seqNum); 1011 1012 if (isStalled() && load_idx == stallingLoadIdx) { 1013 stalled = false; 1014 stallingStoreIsn = 0; 1015 stallingLoadIdx = 0; 1016 } 1017 1018 // Clear the smart pointer to make sure it is decremented. 1019 loadQueue[load_idx]->setSquashed(); 1020 loadQueue[load_idx] = NULL; 1021 --loads; 1022 1023 // Inefficient! 1024 loadTail = load_idx; 1025 1026 decrLdIdx(load_idx); 1027 ++lsqSquashedLoads; 1028 } 1029 1030 if (memDepViolator && squashed_num < memDepViolator->seqNum) { 1031 memDepViolator = NULL; 1032 } 1033 1034 int store_idx = storeTail; 1035 decrStIdx(store_idx); 1036 1037 while (stores != 0 && 1038 storeQueue[store_idx].inst->seqNum > squashed_num) { 1039 // Instructions marked as can WB are already committed. 1040 if (storeQueue[store_idx].canWB) { 1041 break; 1042 } 1043 1044 DPRINTF(LSQUnit,"Store Instruction PC %s squashed, " 1045 "idx:%i [sn:%lli]\n", 1046 storeQueue[store_idx].inst->pcState(), 1047 store_idx, storeQueue[store_idx].inst->seqNum); 1048 1049 // I don't think this can happen. It should have been cleared 1050 // by the stalling load. 1051 if (isStalled() && 1052 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { 1053 panic("Is stalled should have been cleared by stalling load!\n"); 1054 stalled = false; 1055 stallingStoreIsn = 0; 1056 } 1057 1058 // Clear the smart pointer to make sure it is decremented. 1059 storeQueue[store_idx].inst->setSquashed(); 1060 storeQueue[store_idx].inst = NULL; 1061 storeQueue[store_idx].canWB = 0; 1062 1063 // Must delete request now that it wasn't handed off to 1064 // memory. This is quite ugly. @todo: Figure out the proper 1065 // place to really handle request deletes. 1066 delete storeQueue[store_idx].req; 1067 if (TheISA::HasUnalignedMemAcc && storeQueue[store_idx].isSplit) { 1068 delete storeQueue[store_idx].sreqLow; 1069 delete storeQueue[store_idx].sreqHigh; 1070 1071 storeQueue[store_idx].sreqLow = NULL; 1072 storeQueue[store_idx].sreqHigh = NULL; 1073 } 1074 1075 storeQueue[store_idx].req = NULL; 1076 --stores; 1077 1078 // Inefficient! 1079 storeTail = store_idx; 1080 1081 decrStIdx(store_idx); 1082 ++lsqSquashedStores; 1083 } 1084} 1085 1086template <class Impl> 1087void 1088LSQUnit<Impl>::storePostSend(PacketPtr pkt) 1089{ 1090 if (isStalled() && 1091 storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { 1092 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " 1093 "load idx:%i\n", 1094 stallingStoreIsn, stallingLoadIdx); 1095 stalled = false; 1096 stallingStoreIsn = 0; 1097 iewStage->replayMemInst(loadQueue[stallingLoadIdx]); 1098 } 1099 1100 if (!storeQueue[storeWBIdx].inst->isStoreConditional()) { 1101 // The store is basically completed at this time. This 1102 // only works so long as the checker doesn't try to 1103 // verify the value in memory for stores. 1104 storeQueue[storeWBIdx].inst->setCompleted(); 1105 1106 if (cpu->checker) { 1107 cpu->checker->verify(storeQueue[storeWBIdx].inst); 1108 } 1109 } 1110 1111 if (needsTSO) { 1112 storeInFlight = true; 1113 } 1114 1115 incrStIdx(storeWBIdx); 1116} 1117 1118template <class Impl> 1119void 1120LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) 1121{ 1122 iewStage->wakeCPU(); 1123 1124 // Squashed instructions do not need to complete their access. 1125 if (inst->isSquashed()) { 1126 assert(!inst->isStore()); 1127 ++lsqIgnoredResponses; 1128 return; 1129 } 1130 1131 if (!inst->isExecuted()) { 1132 inst->setExecuted(); 1133 1134 if (inst->fault == NoFault) { 1135 // Complete access to copy data to proper place. 1136 inst->completeAcc(pkt); 1137 } else { 1138 // If the instruction has an outstanding fault, we cannot complete 1139 // the access as this discards the current fault. 1140 1141 // If we have an outstanding fault, the fault should only be of 1142 // type ReExec. 1143 assert(dynamic_cast<ReExec*>(inst->fault.get()) != nullptr); 1144 1145 DPRINTF(LSQUnit, "Not completing instruction [sn:%lli] access " 1146 "due to pending fault.\n", inst->seqNum); 1147 } 1148 } 1149 1150 // Need to insert instruction into queue to commit 1151 iewStage->instToCommit(inst); 1152 1153 iewStage->activityThisCycle(); 1154 1155 // see if this load changed the PC 1156 iewStage->checkMisprediction(inst); 1157} 1158 1159template <class Impl> 1160void 1161LSQUnit<Impl>::completeStore(int store_idx) 1162{ 1163 assert(storeQueue[store_idx].inst); 1164 storeQueue[store_idx].completed = true; 1165 --storesToWB; 1166 // A bit conservative because a store completion may not free up entries, 1167 // but hopefully avoids two store completions in one cycle from making 1168 // the CPU tick twice. 1169 cpu->wakeCPU(); 1170 cpu->activityThisCycle(); 1171 1172 if (store_idx == storeHead) { 1173 do { 1174 incrStIdx(storeHead); 1175 1176 --stores; 1177 } while (storeQueue[storeHead].completed && 1178 storeHead != storeTail); 1179 1180 iewStage->updateLSQNextCycle = true; 1181 } 1182 1183 DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head " 1184 "idx:%i\n", 1185 storeQueue[store_idx].inst->seqNum, store_idx, storeHead); 1186 1187#if TRACING_ON 1188 if (DTRACE(O3PipeView)) { 1189 storeQueue[store_idx].inst->storeTick = 1190 curTick() - storeQueue[store_idx].inst->fetchTick; 1191 } 1192#endif 1193 1194 if (isStalled() && 1195 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { 1196 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " 1197 "load idx:%i\n", 1198 stallingStoreIsn, stallingLoadIdx); 1199 stalled = false; 1200 stallingStoreIsn = 0; 1201 iewStage->replayMemInst(loadQueue[stallingLoadIdx]); 1202 } 1203 1204 storeQueue[store_idx].inst->setCompleted(); 1205 1206 if (needsTSO) { 1207 storeInFlight = false; 1208 } 1209 1210 // Tell the checker we've completed this instruction. Some stores 1211 // may get reported twice to the checker, but the checker can 1212 // handle that case. 1213 if (cpu->checker) { 1214 cpu->checker->verify(storeQueue[store_idx].inst); 1215 } 1216} 1217 1218template <class Impl> 1219bool 1220LSQUnit<Impl>::sendStore(PacketPtr data_pkt) 1221{ 1222 if (!dcachePort->sendTimingReq(data_pkt)) { 1223 // Need to handle becoming blocked on a store. 1224 isStoreBlocked = true; 1225 ++lsqCacheBlocked; 1226 assert(retryPkt == NULL); 1227 retryPkt = data_pkt; 1228 return false; 1229 } 1230 return true; 1231} 1232 1233template <class Impl> 1234void 1235LSQUnit<Impl>::recvRetry() 1236{ 1237 if (isStoreBlocked) { 1238 DPRINTF(LSQUnit, "Receiving retry: store blocked\n"); 1239 assert(retryPkt != NULL); 1240 1241 LSQSenderState *state = 1242 dynamic_cast<LSQSenderState *>(retryPkt->senderState); 1243 1244 if (dcachePort->sendTimingReq(retryPkt)) { 1245 // Don't finish the store unless this is the last packet. 1246 if (!TheISA::HasUnalignedMemAcc || !state->pktToSend || 1247 state->pendingPacket == retryPkt) { 1248 state->pktToSend = false; 1249 storePostSend(retryPkt); 1250 } 1251 retryPkt = NULL; 1252 isStoreBlocked = false; 1253 1254 // Send any outstanding packet. 1255 if (TheISA::HasUnalignedMemAcc && state->pktToSend) { 1256 assert(state->pendingPacket); 1257 if (sendStore(state->pendingPacket)) { 1258 storePostSend(state->pendingPacket); 1259 } 1260 } 1261 } else { 1262 // Still blocked! 1263 ++lsqCacheBlocked; 1264 } 1265 } 1266} 1267 1268template <class Impl> 1269inline void 1270LSQUnit<Impl>::incrStIdx(int &store_idx) const 1271{ 1272 if (++store_idx >= SQEntries) 1273 store_idx = 0; 1274} 1275 1276template <class Impl> 1277inline void 1278LSQUnit<Impl>::decrStIdx(int &store_idx) const 1279{ 1280 if (--store_idx < 0) 1281 store_idx += SQEntries; 1282} 1283 1284template <class Impl> 1285inline void 1286LSQUnit<Impl>::incrLdIdx(int &load_idx) const 1287{ 1288 if (++load_idx >= LQEntries) 1289 load_idx = 0; 1290} 1291 1292template <class Impl> 1293inline void 1294LSQUnit<Impl>::decrLdIdx(int &load_idx) const 1295{ 1296 if (--load_idx < 0) 1297 load_idx += LQEntries; 1298} 1299 1300template <class Impl> 1301void 1302LSQUnit<Impl>::dumpInsts() const 1303{ 1304 cprintf("Load store queue: Dumping instructions.\n"); 1305 cprintf("Load queue size: %i\n", loads); 1306 cprintf("Load queue: "); 1307 1308 int load_idx = loadHead; 1309 1310 while (load_idx != loadTail && loadQueue[load_idx]) { 1311 const DynInstPtr &inst(loadQueue[load_idx]); 1312 cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum); 1313 1314 incrLdIdx(load_idx); 1315 } 1316 cprintf("\n"); 1317 1318 cprintf("Store queue size: %i\n", stores); 1319 cprintf("Store queue: "); 1320 1321 int store_idx = storeHead; 1322 1323 while (store_idx != storeTail && storeQueue[store_idx].inst) { 1324 const DynInstPtr &inst(storeQueue[store_idx].inst); 1325 cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum); 1326 1327 incrStIdx(store_idx); 1328 } 1329 1330 cprintf("\n"); 1331} 1332 1333#endif//__CPU_O3_LSQ_UNIT_IMPL_HH__ 1334