lsq_unit_impl.hh revision 6974:4d4903a3e7c5
1/* 2 * Copyright (c) 2004-2005 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Kevin Lim 29 * Korey Sewell 30 */ 31 32#include "arch/locked_mem.hh" 33#include "config/the_isa.hh" 34#include "config/use_checker.hh" 35#include "cpu/o3/lsq.hh" 36#include "cpu/o3/lsq_unit.hh" 37#include "base/str.hh" 38#include "mem/packet.hh" 39#include "mem/request.hh" 40 41#if USE_CHECKER 42#include "cpu/checker/cpu.hh" 43#endif 44 45template<class Impl> 46LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, 47 LSQUnit *lsq_ptr) 48 : inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr) 49{ 50 this->setFlags(Event::AutoDelete); 51} 52 53template<class Impl> 54void 55LSQUnit<Impl>::WritebackEvent::process() 56{ 57 if (!lsqPtr->isSwitchedOut()) { 58 lsqPtr->writeback(inst, pkt); 59 } 60 61 if (pkt->senderState) 62 delete pkt->senderState; 63 64 delete pkt->req; 65 delete pkt; 66} 67 68template<class Impl> 69const char * 70LSQUnit<Impl>::WritebackEvent::description() const 71{ 72 return "Store writeback"; 73} 74 75template<class Impl> 76void 77LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) 78{ 79 LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState); 80 DynInstPtr inst = state->inst; 81 DPRINTF(IEW, "Writeback event [sn:%lli]\n", inst->seqNum); 82 DPRINTF(Activity, "Activity: Writeback event [sn:%lli]\n", inst->seqNum); 83 84 //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); 85 86 assert(!pkt->wasNacked()); 87 88 // If this is a split access, wait until all packets are received. 89 if (TheISA::HasUnalignedMemAcc && !state->complete()) { 90 delete pkt->req; 91 delete pkt; 92 return; 93 } 94 95 if (isSwitchedOut() || inst->isSquashed()) { 96 iewStage->decrWb(inst->seqNum); 97 } else { 98 if (!state->noWB) { 99 if (!TheISA::HasUnalignedMemAcc || !state->isSplit || 100 !state->isLoad) { 101 writeback(inst, pkt); 102 } else { 103 writeback(inst, state->mainPkt); 104 } 105 } 106 107 if (inst->isStore()) { 108 completeStore(state->idx); 109 } 110 } 111 112 if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) { 113 delete state->mainPkt->req; 114 delete state->mainPkt; 115 } 116 delete state; 117 delete pkt->req; 118 delete pkt; 119} 120 121template <class Impl> 122LSQUnit<Impl>::LSQUnit() 123 : loads(0), stores(0), storesToWB(0), stalled(false), 124 isStoreBlocked(false), isLoadBlocked(false), 125 loadBlockedHandled(false), hasPendingPkt(false) 126{ 127} 128 129template<class Impl> 130void 131LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, 132 LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, 133 unsigned id) 134{ 135 cpu = cpu_ptr; 136 iewStage = iew_ptr; 137 138 DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); 139 140 switchedOut = false; 141 142 lsq = lsq_ptr; 143 144 lsqID = id; 145 146 // Add 1 for the sentinel entry (they are circular queues). 147 LQEntries = maxLQEntries + 1; 148 SQEntries = maxSQEntries + 1; 149 150 loadQueue.resize(LQEntries); 151 storeQueue.resize(SQEntries); 152 153 loadHead = loadTail = 0; 154 155 storeHead = storeWBIdx = storeTail = 0; 156 157 usedPorts = 0; 158 cachePorts = params->cachePorts; 159 160 retryPkt = NULL; 161 memDepViolator = NULL; 162 163 blockedLoadSeqNum = 0; 164} 165 166template<class Impl> 167std::string 168LSQUnit<Impl>::name() const 169{ 170 if (Impl::MaxThreads == 1) { 171 return iewStage->name() + ".lsq"; 172 } else { 173 return iewStage->name() + ".lsq.thread." + to_string(lsqID); 174 } 175} 176 177template<class Impl> 178void 179LSQUnit<Impl>::regStats() 180{ 181 lsqForwLoads 182 .name(name() + ".forwLoads") 183 .desc("Number of loads that had data forwarded from stores"); 184 185 invAddrLoads 186 .name(name() + ".invAddrLoads") 187 .desc("Number of loads ignored due to an invalid address"); 188 189 lsqSquashedLoads 190 .name(name() + ".squashedLoads") 191 .desc("Number of loads squashed"); 192 193 lsqIgnoredResponses 194 .name(name() + ".ignoredResponses") 195 .desc("Number of memory responses ignored because the instruction is squashed"); 196 197 lsqMemOrderViolation 198 .name(name() + ".memOrderViolation") 199 .desc("Number of memory ordering violations"); 200 201 lsqSquashedStores 202 .name(name() + ".squashedStores") 203 .desc("Number of stores squashed"); 204 205 invAddrSwpfs 206 .name(name() + ".invAddrSwpfs") 207 .desc("Number of software prefetches ignored due to an invalid address"); 208 209 lsqBlockedLoads 210 .name(name() + ".blockedLoads") 211 .desc("Number of blocked loads due to partial load-store forwarding"); 212 213 lsqRescheduledLoads 214 .name(name() + ".rescheduledLoads") 215 .desc("Number of loads that were rescheduled"); 216 217 lsqCacheBlocked 218 .name(name() + ".cacheBlocked") 219 .desc("Number of times an access to memory failed due to the cache being blocked"); 220} 221 222template<class Impl> 223void 224LSQUnit<Impl>::setDcachePort(Port *dcache_port) 225{ 226 dcachePort = dcache_port; 227 228#if USE_CHECKER 229 if (cpu->checker) { 230 cpu->checker->setDcachePort(dcachePort); 231 } 232#endif 233} 234 235template<class Impl> 236void 237LSQUnit<Impl>::clearLQ() 238{ 239 loadQueue.clear(); 240} 241 242template<class Impl> 243void 244LSQUnit<Impl>::clearSQ() 245{ 246 storeQueue.clear(); 247} 248 249template<class Impl> 250void 251LSQUnit<Impl>::switchOut() 252{ 253 switchedOut = true; 254 for (int i = 0; i < loadQueue.size(); ++i) { 255 assert(!loadQueue[i]); 256 loadQueue[i] = NULL; 257 } 258 259 assert(storesToWB == 0); 260} 261 262template<class Impl> 263void 264LSQUnit<Impl>::takeOverFrom() 265{ 266 switchedOut = false; 267 loads = stores = storesToWB = 0; 268 269 loadHead = loadTail = 0; 270 271 storeHead = storeWBIdx = storeTail = 0; 272 273 usedPorts = 0; 274 275 memDepViolator = NULL; 276 277 blockedLoadSeqNum = 0; 278 279 stalled = false; 280 isLoadBlocked = false; 281 loadBlockedHandled = false; 282} 283 284template<class Impl> 285void 286LSQUnit<Impl>::resizeLQ(unsigned size) 287{ 288 unsigned size_plus_sentinel = size + 1; 289 assert(size_plus_sentinel >= LQEntries); 290 291 if (size_plus_sentinel > LQEntries) { 292 while (size_plus_sentinel > loadQueue.size()) { 293 DynInstPtr dummy; 294 loadQueue.push_back(dummy); 295 LQEntries++; 296 } 297 } else { 298 LQEntries = size_plus_sentinel; 299 } 300 301} 302 303template<class Impl> 304void 305LSQUnit<Impl>::resizeSQ(unsigned size) 306{ 307 unsigned size_plus_sentinel = size + 1; 308 if (size_plus_sentinel > SQEntries) { 309 while (size_plus_sentinel > storeQueue.size()) { 310 SQEntry dummy; 311 storeQueue.push_back(dummy); 312 SQEntries++; 313 } 314 } else { 315 SQEntries = size_plus_sentinel; 316 } 317} 318 319template <class Impl> 320void 321LSQUnit<Impl>::insert(DynInstPtr &inst) 322{ 323 assert(inst->isMemRef()); 324 325 assert(inst->isLoad() || inst->isStore()); 326 327 if (inst->isLoad()) { 328 insertLoad(inst); 329 } else { 330 insertStore(inst); 331 } 332 333 inst->setInLSQ(); 334} 335 336template <class Impl> 337void 338LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst) 339{ 340 assert((loadTail + 1) % LQEntries != loadHead); 341 assert(loads < LQEntries); 342 343 DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n", 344 load_inst->readPC(), loadTail, load_inst->seqNum); 345 346 load_inst->lqIdx = loadTail; 347 348 if (stores == 0) { 349 load_inst->sqIdx = -1; 350 } else { 351 load_inst->sqIdx = storeTail; 352 } 353 354 loadQueue[loadTail] = load_inst; 355 356 incrLdIdx(loadTail); 357 358 ++loads; 359} 360 361template <class Impl> 362void 363LSQUnit<Impl>::insertStore(DynInstPtr &store_inst) 364{ 365 // Make sure it is not full before inserting an instruction. 366 assert((storeTail + 1) % SQEntries != storeHead); 367 assert(stores < SQEntries); 368 369 DPRINTF(LSQUnit, "Inserting store PC %#x, idx:%i [sn:%lli]\n", 370 store_inst->readPC(), storeTail, store_inst->seqNum); 371 372 store_inst->sqIdx = storeTail; 373 store_inst->lqIdx = loadTail; 374 375 storeQueue[storeTail] = SQEntry(store_inst); 376 377 incrStIdx(storeTail); 378 379 ++stores; 380} 381 382template <class Impl> 383typename Impl::DynInstPtr 384LSQUnit<Impl>::getMemDepViolator() 385{ 386 DynInstPtr temp = memDepViolator; 387 388 memDepViolator = NULL; 389 390 return temp; 391} 392 393template <class Impl> 394unsigned 395LSQUnit<Impl>::numFreeEntries() 396{ 397 unsigned free_lq_entries = LQEntries - loads; 398 unsigned free_sq_entries = SQEntries - stores; 399 400 // Both the LQ and SQ entries have an extra dummy entry to differentiate 401 // empty/full conditions. Subtract 1 from the free entries. 402 if (free_lq_entries < free_sq_entries) { 403 return free_lq_entries - 1; 404 } else { 405 return free_sq_entries - 1; 406 } 407} 408 409template <class Impl> 410int 411LSQUnit<Impl>::numLoadsReady() 412{ 413 int load_idx = loadHead; 414 int retval = 0; 415 416 while (load_idx != loadTail) { 417 assert(loadQueue[load_idx]); 418 419 if (loadQueue[load_idx]->readyToIssue()) { 420 ++retval; 421 } 422 } 423 424 return retval; 425} 426 427template <class Impl> 428Fault 429LSQUnit<Impl>::executeLoad(DynInstPtr &inst) 430{ 431 using namespace TheISA; 432 // Execute a specific load. 433 Fault load_fault = NoFault; 434 435 DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n", 436 inst->readPC(),inst->seqNum); 437 438 assert(!inst->isSquashed()); 439 440 load_fault = inst->initiateAcc(); 441 442 // If the instruction faulted, then we need to send it along to commit 443 // without the instruction completing. 444 if (load_fault != NoFault) { 445 // Send this instruction to commit, also make sure iew stage 446 // realizes there is activity. 447 // Mark it as executed unless it is an uncached load that 448 // needs to hit the head of commit. 449 if (!(inst->hasRequest() && inst->uncacheable()) || 450 inst->isAtCommit()) { 451 inst->setExecuted(); 452 } 453 iewStage->instToCommit(inst); 454 iewStage->activityThisCycle(); 455 } else if (!loadBlocked()) { 456 assert(inst->effAddrValid); 457 int load_idx = inst->lqIdx; 458 incrLdIdx(load_idx); 459 while (load_idx != loadTail) { 460 // Really only need to check loads that have actually executed 461 462 // @todo: For now this is extra conservative, detecting a 463 // violation if the addresses match assuming all accesses 464 // are quad word accesses. 465 466 // @todo: Fix this, magic number being used here 467 if (loadQueue[load_idx]->effAddrValid && 468 (loadQueue[load_idx]->effAddr >> 8) == 469 (inst->effAddr >> 8)) { 470 // A load incorrectly passed this load. Squash and refetch. 471 // For now return a fault to show that it was unsuccessful. 472 DynInstPtr violator = loadQueue[load_idx]; 473 if (!memDepViolator || 474 (violator->seqNum < memDepViolator->seqNum)) { 475 memDepViolator = violator; 476 } else { 477 break; 478 } 479 480 ++lsqMemOrderViolation; 481 482 return genMachineCheckFault(); 483 } 484 485 incrLdIdx(load_idx); 486 } 487 } 488 489 return load_fault; 490} 491 492template <class Impl> 493Fault 494LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) 495{ 496 using namespace TheISA; 497 // Make sure that a store exists. 498 assert(stores != 0); 499 500 int store_idx = store_inst->sqIdx; 501 502 DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n", 503 store_inst->readPC(), store_inst->seqNum); 504 505 assert(!store_inst->isSquashed()); 506 507 // Check the recently completed loads to see if any match this store's 508 // address. If so, then we have a memory ordering violation. 509 int load_idx = store_inst->lqIdx; 510 511 Fault store_fault = store_inst->initiateAcc(); 512 513 if (storeQueue[store_idx].size == 0) { 514 DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", 515 store_inst->readPC(),store_inst->seqNum); 516 517 return store_fault; 518 } 519 520 assert(store_fault == NoFault); 521 522 if (store_inst->isStoreConditional()) { 523 // Store conditionals need to set themselves as able to 524 // writeback if we haven't had a fault by here. 525 storeQueue[store_idx].canWB = true; 526 527 ++storesToWB; 528 } 529 530 assert(store_inst->effAddrValid); 531 while (load_idx != loadTail) { 532 // Really only need to check loads that have actually executed 533 // It's safe to check all loads because effAddr is set to 534 // InvalAddr when the dyn inst is created. 535 536 // @todo: For now this is extra conservative, detecting a 537 // violation if the addresses match assuming all accesses 538 // are quad word accesses. 539 540 // @todo: Fix this, magic number being used here 541 if (loadQueue[load_idx]->effAddrValid && 542 (loadQueue[load_idx]->effAddr >> 8) == 543 (store_inst->effAddr >> 8)) { 544 // A load incorrectly passed this store. Squash and refetch. 545 // For now return a fault to show that it was unsuccessful. 546 DynInstPtr violator = loadQueue[load_idx]; 547 if (!memDepViolator || 548 (violator->seqNum < memDepViolator->seqNum)) { 549 memDepViolator = violator; 550 } else { 551 break; 552 } 553 554 ++lsqMemOrderViolation; 555 556 return genMachineCheckFault(); 557 } 558 559 incrLdIdx(load_idx); 560 } 561 562 return store_fault; 563} 564 565template <class Impl> 566void 567LSQUnit<Impl>::commitLoad() 568{ 569 assert(loadQueue[loadHead]); 570 571 DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n", 572 loadQueue[loadHead]->readPC()); 573 574 loadQueue[loadHead] = NULL; 575 576 incrLdIdx(loadHead); 577 578 --loads; 579} 580 581template <class Impl> 582void 583LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst) 584{ 585 assert(loads == 0 || loadQueue[loadHead]); 586 587 while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { 588 commitLoad(); 589 } 590} 591 592template <class Impl> 593void 594LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst) 595{ 596 assert(stores == 0 || storeQueue[storeHead].inst); 597 598 int store_idx = storeHead; 599 600 while (store_idx != storeTail) { 601 assert(storeQueue[store_idx].inst); 602 // Mark any stores that are now committed and have not yet 603 // been marked as able to write back. 604 if (!storeQueue[store_idx].canWB) { 605 if (storeQueue[store_idx].inst->seqNum > youngest_inst) { 606 break; 607 } 608 DPRINTF(LSQUnit, "Marking store as able to write back, PC " 609 "%#x [sn:%lli]\n", 610 storeQueue[store_idx].inst->readPC(), 611 storeQueue[store_idx].inst->seqNum); 612 613 storeQueue[store_idx].canWB = true; 614 615 ++storesToWB; 616 } 617 618 incrStIdx(store_idx); 619 } 620} 621 622template <class Impl> 623void 624LSQUnit<Impl>::writebackPendingStore() 625{ 626 if (hasPendingPkt) { 627 assert(pendingPkt != NULL); 628 629 // If the cache is blocked, this will store the packet for retry. 630 if (sendStore(pendingPkt)) { 631 storePostSend(pendingPkt); 632 } 633 pendingPkt = NULL; 634 hasPendingPkt = false; 635 } 636} 637 638template <class Impl> 639void 640LSQUnit<Impl>::writebackStores() 641{ 642 // First writeback the second packet from any split store that didn't 643 // complete last cycle because there weren't enough cache ports available. 644 if (TheISA::HasUnalignedMemAcc) { 645 writebackPendingStore(); 646 } 647 648 while (storesToWB > 0 && 649 storeWBIdx != storeTail && 650 storeQueue[storeWBIdx].inst && 651 storeQueue[storeWBIdx].canWB && 652 usedPorts < cachePorts) { 653 654 if (isStoreBlocked || lsq->cacheBlocked()) { 655 DPRINTF(LSQUnit, "Unable to write back any more stores, cache" 656 " is blocked!\n"); 657 break; 658 } 659 660 // Store didn't write any data so no need to write it back to 661 // memory. 662 if (storeQueue[storeWBIdx].size == 0) { 663 completeStore(storeWBIdx); 664 665 incrStIdx(storeWBIdx); 666 667 continue; 668 } 669 670 ++usedPorts; 671 672 if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { 673 incrStIdx(storeWBIdx); 674 675 continue; 676 } 677 678 assert(storeQueue[storeWBIdx].req); 679 assert(!storeQueue[storeWBIdx].committed); 680 681 if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) { 682 assert(storeQueue[storeWBIdx].sreqLow); 683 assert(storeQueue[storeWBIdx].sreqHigh); 684 } 685 686 DynInstPtr inst = storeQueue[storeWBIdx].inst; 687 688 Request *req = storeQueue[storeWBIdx].req; 689 storeQueue[storeWBIdx].committed = true; 690 691 assert(!inst->memData); 692 inst->memData = new uint8_t[64]; 693 694 memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize()); 695 696 MemCmd command = 697 req->isSwap() ? MemCmd::SwapReq : 698 (req->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq); 699 PacketPtr data_pkt; 700 PacketPtr snd_data_pkt = NULL; 701 702 LSQSenderState *state = new LSQSenderState; 703 state->isLoad = false; 704 state->idx = storeWBIdx; 705 state->inst = inst; 706 707 if (!TheISA::HasUnalignedMemAcc || !storeQueue[storeWBIdx].isSplit) { 708 709 // Build a single data packet if the store isn't split. 710 data_pkt = new Packet(req, command, Packet::Broadcast); 711 data_pkt->dataStatic(inst->memData); 712 data_pkt->senderState = state; 713 } else { 714 RequestPtr sreqLow = storeQueue[storeWBIdx].sreqLow; 715 RequestPtr sreqHigh = storeQueue[storeWBIdx].sreqHigh; 716 717 // Create two packets if the store is split in two. 718 data_pkt = new Packet(sreqLow, command, Packet::Broadcast); 719 snd_data_pkt = new Packet(sreqHigh, command, Packet::Broadcast); 720 721 data_pkt->dataStatic(inst->memData); 722 snd_data_pkt->dataStatic(inst->memData + sreqLow->getSize()); 723 724 data_pkt->senderState = state; 725 snd_data_pkt->senderState = state; 726 727 state->isSplit = true; 728 state->outstanding = 2; 729 730 // Can delete the main request now. 731 delete req; 732 req = sreqLow; 733 } 734 735 DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " 736 "to Addr:%#x, data:%#x [sn:%lli]\n", 737 storeWBIdx, inst->readPC(), 738 req->getPaddr(), (int)*(inst->memData), 739 inst->seqNum); 740 741 // @todo: Remove this SC hack once the memory system handles it. 742 if (inst->isStoreConditional()) { 743 assert(!storeQueue[storeWBIdx].isSplit); 744 // Disable recording the result temporarily. Writing to 745 // misc regs normally updates the result, but this is not 746 // the desired behavior when handling store conditionals. 747 inst->recordResult = false; 748 bool success = TheISA::handleLockedWrite(inst.get(), req); 749 inst->recordResult = true; 750 751 if (!success) { 752 // Instantly complete this store. 753 DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. " 754 "Instantly completing it.\n", 755 inst->seqNum); 756 WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); 757 cpu->schedule(wb, curTick + 1); 758 completeStore(storeWBIdx); 759 incrStIdx(storeWBIdx); 760 continue; 761 } 762 } else { 763 // Non-store conditionals do not need a writeback. 764 state->noWB = true; 765 } 766 767 if (!sendStore(data_pkt)) { 768 DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will" 769 "retry later\n", 770 inst->seqNum); 771 772 // Need to store the second packet, if split. 773 if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) { 774 state->pktToSend = true; 775 state->pendingPacket = snd_data_pkt; 776 } 777 } else { 778 779 // If split, try to send the second packet too 780 if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) { 781 assert(snd_data_pkt); 782 783 // Ensure there are enough ports to use. 784 if (usedPorts < cachePorts) { 785 ++usedPorts; 786 if (sendStore(snd_data_pkt)) { 787 storePostSend(snd_data_pkt); 788 } else { 789 DPRINTF(IEW, "D-Cache became blocked when writing" 790 " [sn:%lli] second packet, will retry later\n", 791 inst->seqNum); 792 } 793 } else { 794 795 // Store the packet for when there's free ports. 796 assert(pendingPkt == NULL); 797 pendingPkt = snd_data_pkt; 798 hasPendingPkt = true; 799 } 800 } else { 801 802 // Not a split store. 803 storePostSend(data_pkt); 804 } 805 } 806 } 807 808 // Not sure this should set it to 0. 809 usedPorts = 0; 810 811 assert(stores >= 0 && storesToWB >= 0); 812} 813 814/*template <class Impl> 815void 816LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum) 817{ 818 list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(), 819 mshrSeqNums.end(), 820 seqNum); 821 822 if (mshr_it != mshrSeqNums.end()) { 823 mshrSeqNums.erase(mshr_it); 824 DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size()); 825 } 826}*/ 827 828template <class Impl> 829void 830LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) 831{ 832 DPRINTF(LSQUnit, "Squashing until [sn:%lli]!" 833 "(Loads:%i Stores:%i)\n", squashed_num, loads, stores); 834 835 int load_idx = loadTail; 836 decrLdIdx(load_idx); 837 838 while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { 839 DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, " 840 "[sn:%lli]\n", 841 loadQueue[load_idx]->readPC(), 842 loadQueue[load_idx]->seqNum); 843 844 if (isStalled() && load_idx == stallingLoadIdx) { 845 stalled = false; 846 stallingStoreIsn = 0; 847 stallingLoadIdx = 0; 848 } 849 850 // Clear the smart pointer to make sure it is decremented. 851 loadQueue[load_idx]->setSquashed(); 852 loadQueue[load_idx] = NULL; 853 --loads; 854 855 // Inefficient! 856 loadTail = load_idx; 857 858 decrLdIdx(load_idx); 859 ++lsqSquashedLoads; 860 } 861 862 if (isLoadBlocked) { 863 if (squashed_num < blockedLoadSeqNum) { 864 isLoadBlocked = false; 865 loadBlockedHandled = false; 866 blockedLoadSeqNum = 0; 867 } 868 } 869 870 if (memDepViolator && squashed_num < memDepViolator->seqNum) { 871 memDepViolator = NULL; 872 } 873 874 int store_idx = storeTail; 875 decrStIdx(store_idx); 876 877 while (stores != 0 && 878 storeQueue[store_idx].inst->seqNum > squashed_num) { 879 // Instructions marked as can WB are already committed. 880 if (storeQueue[store_idx].canWB) { 881 break; 882 } 883 884 DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, " 885 "idx:%i [sn:%lli]\n", 886 storeQueue[store_idx].inst->readPC(), 887 store_idx, storeQueue[store_idx].inst->seqNum); 888 889 // I don't think this can happen. It should have been cleared 890 // by the stalling load. 891 if (isStalled() && 892 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { 893 panic("Is stalled should have been cleared by stalling load!\n"); 894 stalled = false; 895 stallingStoreIsn = 0; 896 } 897 898 // Clear the smart pointer to make sure it is decremented. 899 storeQueue[store_idx].inst->setSquashed(); 900 storeQueue[store_idx].inst = NULL; 901 storeQueue[store_idx].canWB = 0; 902 903 // Must delete request now that it wasn't handed off to 904 // memory. This is quite ugly. @todo: Figure out the proper 905 // place to really handle request deletes. 906 delete storeQueue[store_idx].req; 907 if (TheISA::HasUnalignedMemAcc && storeQueue[store_idx].isSplit) { 908 delete storeQueue[store_idx].sreqLow; 909 delete storeQueue[store_idx].sreqHigh; 910 911 storeQueue[store_idx].sreqLow = NULL; 912 storeQueue[store_idx].sreqHigh = NULL; 913 } 914 915 storeQueue[store_idx].req = NULL; 916 --stores; 917 918 // Inefficient! 919 storeTail = store_idx; 920 921 decrStIdx(store_idx); 922 ++lsqSquashedStores; 923 } 924} 925 926template <class Impl> 927void 928LSQUnit<Impl>::storePostSend(PacketPtr pkt) 929{ 930 if (isStalled() && 931 storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { 932 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " 933 "load idx:%i\n", 934 stallingStoreIsn, stallingLoadIdx); 935 stalled = false; 936 stallingStoreIsn = 0; 937 iewStage->replayMemInst(loadQueue[stallingLoadIdx]); 938 } 939 940 if (!storeQueue[storeWBIdx].inst->isStoreConditional()) { 941 // The store is basically completed at this time. This 942 // only works so long as the checker doesn't try to 943 // verify the value in memory for stores. 944 storeQueue[storeWBIdx].inst->setCompleted(); 945#if USE_CHECKER 946 if (cpu->checker) { 947 cpu->checker->verify(storeQueue[storeWBIdx].inst); 948 } 949#endif 950 } 951 952 incrStIdx(storeWBIdx); 953} 954 955template <class Impl> 956void 957LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) 958{ 959 iewStage->wakeCPU(); 960 961 // Squashed instructions do not need to complete their access. 962 if (inst->isSquashed()) { 963 iewStage->decrWb(inst->seqNum); 964 assert(!inst->isStore()); 965 ++lsqIgnoredResponses; 966 return; 967 } 968 969 if (!inst->isExecuted()) { 970 inst->setExecuted(); 971 972 // Complete access to copy data to proper place. 973 inst->completeAcc(pkt); 974 } 975 976 // Need to insert instruction into queue to commit 977 iewStage->instToCommit(inst); 978 979 iewStage->activityThisCycle(); 980} 981 982template <class Impl> 983void 984LSQUnit<Impl>::completeStore(int store_idx) 985{ 986 assert(storeQueue[store_idx].inst); 987 storeQueue[store_idx].completed = true; 988 --storesToWB; 989 // A bit conservative because a store completion may not free up entries, 990 // but hopefully avoids two store completions in one cycle from making 991 // the CPU tick twice. 992 cpu->wakeCPU(); 993 cpu->activityThisCycle(); 994 995 if (store_idx == storeHead) { 996 do { 997 incrStIdx(storeHead); 998 999 --stores; 1000 } while (storeQueue[storeHead].completed && 1001 storeHead != storeTail); 1002 1003 iewStage->updateLSQNextCycle = true; 1004 } 1005 1006 DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head " 1007 "idx:%i\n", 1008 storeQueue[store_idx].inst->seqNum, store_idx, storeHead); 1009 1010 if (isStalled() && 1011 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { 1012 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " 1013 "load idx:%i\n", 1014 stallingStoreIsn, stallingLoadIdx); 1015 stalled = false; 1016 stallingStoreIsn = 0; 1017 iewStage->replayMemInst(loadQueue[stallingLoadIdx]); 1018 } 1019 1020 storeQueue[store_idx].inst->setCompleted(); 1021 1022 // Tell the checker we've completed this instruction. Some stores 1023 // may get reported twice to the checker, but the checker can 1024 // handle that case. 1025#if USE_CHECKER 1026 if (cpu->checker) { 1027 cpu->checker->verify(storeQueue[store_idx].inst); 1028 } 1029#endif 1030} 1031 1032template <class Impl> 1033bool 1034LSQUnit<Impl>::sendStore(PacketPtr data_pkt) 1035{ 1036 if (!dcachePort->sendTiming(data_pkt)) { 1037 // Need to handle becoming blocked on a store. 1038 isStoreBlocked = true; 1039 ++lsqCacheBlocked; 1040 assert(retryPkt == NULL); 1041 retryPkt = data_pkt; 1042 lsq->setRetryTid(lsqID); 1043 return false; 1044 } 1045 return true; 1046} 1047 1048template <class Impl> 1049void 1050LSQUnit<Impl>::recvRetry() 1051{ 1052 if (isStoreBlocked) { 1053 DPRINTF(LSQUnit, "Receiving retry: store blocked\n"); 1054 assert(retryPkt != NULL); 1055 1056 if (dcachePort->sendTiming(retryPkt)) { 1057 LSQSenderState *state = 1058 dynamic_cast<LSQSenderState *>(retryPkt->senderState); 1059 1060 // Don't finish the store unless this is the last packet. 1061 if (!TheISA::HasUnalignedMemAcc || !state->pktToSend) { 1062 storePostSend(retryPkt); 1063 } 1064 retryPkt = NULL; 1065 isStoreBlocked = false; 1066 lsq->setRetryTid(InvalidThreadID); 1067 1068 // Send any outstanding packet. 1069 if (TheISA::HasUnalignedMemAcc && state->pktToSend) { 1070 assert(state->pendingPacket); 1071 if (sendStore(state->pendingPacket)) { 1072 storePostSend(state->pendingPacket); 1073 } 1074 } 1075 } else { 1076 // Still blocked! 1077 ++lsqCacheBlocked; 1078 lsq->setRetryTid(lsqID); 1079 } 1080 } else if (isLoadBlocked) { 1081 DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " 1082 "no need to resend packet.\n"); 1083 } else { 1084 DPRINTF(LSQUnit, "Retry received but LSQ is no longer blocked.\n"); 1085 } 1086} 1087 1088template <class Impl> 1089inline void 1090LSQUnit<Impl>::incrStIdx(int &store_idx) 1091{ 1092 if (++store_idx >= SQEntries) 1093 store_idx = 0; 1094} 1095 1096template <class Impl> 1097inline void 1098LSQUnit<Impl>::decrStIdx(int &store_idx) 1099{ 1100 if (--store_idx < 0) 1101 store_idx += SQEntries; 1102} 1103 1104template <class Impl> 1105inline void 1106LSQUnit<Impl>::incrLdIdx(int &load_idx) 1107{ 1108 if (++load_idx >= LQEntries) 1109 load_idx = 0; 1110} 1111 1112template <class Impl> 1113inline void 1114LSQUnit<Impl>::decrLdIdx(int &load_idx) 1115{ 1116 if (--load_idx < 0) 1117 load_idx += LQEntries; 1118} 1119 1120template <class Impl> 1121void 1122LSQUnit<Impl>::dumpInsts() 1123{ 1124 cprintf("Load store queue: Dumping instructions.\n"); 1125 cprintf("Load queue size: %i\n", loads); 1126 cprintf("Load queue: "); 1127 1128 int load_idx = loadHead; 1129 1130 while (load_idx != loadTail && loadQueue[load_idx]) { 1131 cprintf("%#x ", loadQueue[load_idx]->readPC()); 1132 1133 incrLdIdx(load_idx); 1134 } 1135 1136 cprintf("Store queue size: %i\n", stores); 1137 cprintf("Store queue: "); 1138 1139 int store_idx = storeHead; 1140 1141 while (store_idx != storeTail && storeQueue[store_idx].inst) { 1142 cprintf("%#x ", storeQueue[store_idx].inst->readPC()); 1143 1144 incrStIdx(store_idx); 1145 } 1146 1147 cprintf("\n"); 1148} 1149