/* * Copyright (c) 2010-2014, 2017-2018 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * * The license below extends only to copyright in the software and shall * not be construed as granting a license to any other intellectual * property including but not limited to intellectual property relating * to a hardware implementation of the functionality of the software * licensed hereunder. You may use the software subject to the license * terms below provided that you ensure that this notice is replicated * unmodified and in its entirety in all distributions of the software, * modified or unmodified, in source code or in binary form. * * Copyright (c) 2004-2005 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim * Korey Sewell */ #ifndef __CPU_O3_LSQ_UNIT_IMPL_HH__ #define __CPU_O3_LSQ_UNIT_IMPL_HH__ #include "arch/generic/debugfaults.hh" #include "arch/locked_mem.hh" #include "base/str.hh" #include "config/the_isa.hh" #include "cpu/checker/cpu.hh" #include "cpu/o3/lsq.hh" #include "cpu/o3/lsq_unit.hh" #include "debug/Activity.hh" #include "debug/IEW.hh" #include "debug/LSQUnit.hh" #include "debug/O3PipeView.hh" #include "mem/packet.hh" #include "mem/request.hh" template LSQUnit::WritebackEvent::WritebackEvent(const DynInstPtr &_inst, PacketPtr _pkt, LSQUnit *lsq_ptr) : Event(Default_Pri, AutoDelete), inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr) { assert(_inst->savedReq); _inst->savedReq->writebackScheduled(); } template void LSQUnit::WritebackEvent::process() { assert(!lsqPtr->cpu->switchedOut()); lsqPtr->writeback(inst, pkt); assert(inst->savedReq); inst->savedReq->writebackDone(); delete pkt; } template const char * LSQUnit::WritebackEvent::description() const { return "Store writeback"; } template bool LSQUnit::recvTimingResp(PacketPtr pkt) { auto senderState = dynamic_cast(pkt->senderState); LSQRequest* req = senderState->request(); assert(req != nullptr); bool ret = true; /* Check that the request is still alive before any further action. */ if (senderState->alive()) { ret = req->recvTimingResp(pkt); } else { senderState->outstanding--; } return ret; } template void LSQUnit::completeDataAccess(PacketPtr pkt) { LSQSenderState *state = dynamic_cast(pkt->senderState); DynInstPtr inst = state->inst; cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt)); /* Notify the sender state that the access is complete (for ownership * tracking). */ state->complete(); assert(!cpu->switchedOut()); if (!inst->isSquashed()) { if (state->needWB) { // Only loads, store conditionals and atomics perform the writeback // after receving the response from the memory assert(inst->isLoad() || inst->isStoreConditional() || inst->isAtomic()); writeback(inst, state->request()->mainPacket()); if (inst->isStore() || inst->isAtomic()) { auto ss = dynamic_cast(state); ss->writebackDone(); completeStore(ss->idx); } } else if (inst->isStore()) { // This is a regular store (i.e., not store conditionals and // atomics), so it can complete without writing back completeStore(dynamic_cast(state)->idx); } } } template LSQUnit::LSQUnit(uint32_t lqEntries, uint32_t sqEntries) : lsqID(-1), storeQueue(sqEntries+1), loadQueue(lqEntries+1), loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false), isStoreBlocked(false), storeInFlight(false), hasPendingRequest(false), pendingRequest(nullptr) { } template void LSQUnit::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, LSQ *lsq_ptr, unsigned id) { lsqID = id; cpu = cpu_ptr; iewStage = iew_ptr; lsq = lsq_ptr; DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",lsqID); depCheckShift = params->LSQDepCheckShift; checkLoads = params->LSQCheckLoads; needsTSO = params->needsTSO; resetState(); } template void LSQUnit::resetState() { loads = stores = storesToWB = 0; storeWBIt = storeQueue.begin(); retryPkt = NULL; memDepViolator = NULL; stalled = false; cacheBlockMask = ~(cpu->cacheLineSize() - 1); } template std::string LSQUnit::name() const { if (Impl::MaxThreads == 1) { return iewStage->name() + ".lsq"; } else { return iewStage->name() + ".lsq.thread" + std::to_string(lsqID); } } template void LSQUnit::regStats() { lsqForwLoads .name(name() + ".forwLoads") .desc("Number of loads that had data forwarded from stores"); invAddrLoads .name(name() + ".invAddrLoads") .desc("Number of loads ignored due to an invalid address"); lsqSquashedLoads .name(name() + ".squashedLoads") .desc("Number of loads squashed"); lsqIgnoredResponses .name(name() + ".ignoredResponses") .desc("Number of memory responses ignored because the instruction is squashed"); lsqMemOrderViolation .name(name() + ".memOrderViolation") .desc("Number of memory ordering violations"); lsqSquashedStores .name(name() + ".squashedStores") .desc("Number of stores squashed"); invAddrSwpfs .name(name() + ".invAddrSwpfs") .desc("Number of software prefetches ignored due to an invalid address"); lsqBlockedLoads .name(name() + ".blockedLoads") .desc("Number of blocked loads due to partial load-store forwarding"); lsqRescheduledLoads .name(name() + ".rescheduledLoads") .desc("Number of loads that were rescheduled"); lsqCacheBlocked .name(name() + ".cacheBlocked") .desc("Number of times an access to memory failed due to the cache being blocked"); } template void LSQUnit::setDcachePort(MasterPort *dcache_port) { dcachePort = dcache_port; } template void LSQUnit::drainSanityCheck() const { for (int i = 0; i < loadQueue.capacity(); ++i) assert(!loadQueue[i].valid()); assert(storesToWB == 0); assert(!retryPkt); } template void LSQUnit::takeOverFrom() { resetState(); } template void LSQUnit::insert(const DynInstPtr &inst) { assert(inst->isMemRef()); assert(inst->isLoad() || inst->isStore() || inst->isAtomic()); if (inst->isLoad()) { insertLoad(inst); } else { insertStore(inst); } inst->setInLSQ(); } template void LSQUnit::insertLoad(const DynInstPtr &load_inst) { assert(!loadQueue.full()); assert(loads < loadQueue.capacity()); DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n", load_inst->pcState(), loadQueue.tail(), load_inst->seqNum); /* Grow the queue. */ loadQueue.advance_tail(); load_inst->sqIt = storeQueue.end(); assert(!loadQueue.back().valid()); loadQueue.back().set(load_inst); load_inst->lqIdx = loadQueue.tail(); load_inst->lqIt = loadQueue.getIterator(load_inst->lqIdx); ++loads; } template void LSQUnit::insertStore(const DynInstPtr& store_inst) { // Make sure it is not full before inserting an instruction. assert(!storeQueue.full()); assert(stores < storeQueue.capacity()); DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n", store_inst->pcState(), storeQueue.tail(), store_inst->seqNum); storeQueue.advance_tail(); store_inst->sqIdx = storeQueue.tail(); store_inst->lqIdx = loadQueue.moduloAdd(loadQueue.tail(), 1); store_inst->lqIt = loadQueue.end(); storeQueue.back().set(store_inst); ++stores; } template typename Impl::DynInstPtr LSQUnit::getMemDepViolator() { DynInstPtr temp = memDepViolator; memDepViolator = NULL; return temp; } template unsigned LSQUnit::numFreeLoadEntries() { //LQ has an extra dummy entry to differentiate //empty/full conditions. Subtract 1 from the free entries. DPRINTF(LSQUnit, "LQ size: %d, #loads occupied: %d\n", 1 + loadQueue.capacity(), loads); return loadQueue.capacity() - loads; } template unsigned LSQUnit::numFreeStoreEntries() { //SQ has an extra dummy entry to differentiate //empty/full conditions. Subtract 1 from the free entries. DPRINTF(LSQUnit, "SQ size: %d, #stores occupied: %d\n", 1 + storeQueue.capacity(), stores); return storeQueue.capacity() - stores; } template void LSQUnit::checkSnoop(PacketPtr pkt) { // Should only ever get invalidations in here assert(pkt->isInvalidate()); DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr()); for (int x = 0; x < cpu->numContexts(); x++) { ThreadContext *tc = cpu->getContext(x); bool no_squash = cpu->thread[x]->noSquashFromTC; cpu->thread[x]->noSquashFromTC = true; TheISA::handleLockedSnoop(tc, pkt, cacheBlockMask); cpu->thread[x]->noSquashFromTC = no_squash; } if (loadQueue.empty()) return; auto iter = loadQueue.begin(); Addr invalidate_addr = pkt->getAddr() & cacheBlockMask; DynInstPtr ld_inst = iter->instruction(); assert(ld_inst); LSQRequest *req = iter->request(); // Check that this snoop didn't just invalidate our lock flag if (ld_inst->effAddrValid() && req->isCacheBlockHit(invalidate_addr, cacheBlockMask) && ld_inst->memReqFlags & Request::LLSC) TheISA::handleLockedSnoopHit(ld_inst.get()); bool force_squash = false; while (++iter != loadQueue.end()) { ld_inst = iter->instruction(); assert(ld_inst); req = iter->request(); if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) continue; DPRINTF(LSQUnit, "-- inst [sn:%lli] to pktAddr:%#x\n", ld_inst->seqNum, invalidate_addr); if (force_squash || req->isCacheBlockHit(invalidate_addr, cacheBlockMask)) { if (needsTSO) { // If we have a TSO system, as all loads must be ordered with // all other loads, this load as well as *all* subsequent loads // need to be squashed to prevent possible load reordering. force_squash = true; } if (ld_inst->possibleLoadViolation() || force_squash) { DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n", pkt->getAddr(), ld_inst->seqNum); // Mark the load for re-execution ld_inst->fault = std::make_shared(); } else { DPRINTF(LSQUnit, "HitExternal Snoop for addr %#x [sn:%lli]\n", pkt->getAddr(), ld_inst->seqNum); // Make sure that we don't lose a snoop hitting a LOCKED // address since the LOCK* flags don't get updated until // commit. if (ld_inst->memReqFlags & Request::LLSC) TheISA::handleLockedSnoopHit(ld_inst.get()); // If a older load checks this and it's true // then we might have missed the snoop // in which case we need to invalidate to be sure ld_inst->hitExternalSnoop(true); } } } return; } template Fault LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt, const DynInstPtr& inst) { Addr inst_eff_addr1 = inst->effAddr >> depCheckShift; Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift; /** @todo in theory you only need to check an instruction that has executed * however, there isn't a good way in the pipeline at the moment to check * all instructions that will execute before the store writes back. Thus, * like the implementation that came before it, we're overly conservative. */ while (loadIt != loadQueue.end()) { DynInstPtr ld_inst = loadIt->instruction(); if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) { ++loadIt; continue; } Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift; Addr ld_eff_addr2 = (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift; if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) { if (inst->isLoad()) { // If this load is to the same block as an external snoop // invalidate that we've observed then the load needs to be // squashed as it could have newer data if (ld_inst->hitExternalSnoop()) { if (!memDepViolator || ld_inst->seqNum < memDepViolator->seqNum) { DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] " "and [sn:%lli] at address %#x\n", inst->seqNum, ld_inst->seqNum, ld_eff_addr1); memDepViolator = ld_inst; ++lsqMemOrderViolation; return std::make_shared( "Detected fault with inst [sn:%lli] and " "[sn:%lli] at address %#x\n", inst->seqNum, ld_inst->seqNum, ld_eff_addr1); } } // Otherwise, mark the load has a possible load violation // and if we see a snoop before it's commited, we need to squash ld_inst->possibleLoadViolation(true); DPRINTF(LSQUnit, "Found possible load violation at addr: %#x" " between instructions [sn:%lli] and [sn:%lli]\n", inst_eff_addr1, inst->seqNum, ld_inst->seqNum); } else { // A load/store incorrectly passed this store. // Check if we already have a violator, or if it's newer // squash and refetch. if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum) break; DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and " "[sn:%lli] at address %#x\n", inst->seqNum, ld_inst->seqNum, ld_eff_addr1); memDepViolator = ld_inst; ++lsqMemOrderViolation; return std::make_shared( "Detected fault with " "inst [sn:%lli] and [sn:%lli] at address %#x\n", inst->seqNum, ld_inst->seqNum, ld_eff_addr1); } } ++loadIt; } return NoFault; } template Fault LSQUnit::executeLoad(const DynInstPtr &inst) { using namespace TheISA; // Execute a specific load. Fault load_fault = NoFault; DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n", inst->pcState(), inst->seqNum); assert(!inst->isSquashed()); load_fault = inst->initiateAcc(); if (inst->isTranslationDelayed() && load_fault == NoFault) return load_fault; // If the instruction faulted or predicated false, then we need to send it // along to commit without the instruction completing. if (load_fault != NoFault || !inst->readPredicate()) { // Send this instruction to commit, also make sure iew stage // realizes there is activity. Mark it as executed unless it // is a strictly ordered load that needs to hit the head of // commit. if (!inst->readPredicate()) inst->forwardOldRegs(); DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n", inst->seqNum, (load_fault != NoFault ? "fault" : "predication")); if (!(inst->hasRequest() && inst->strictlyOrdered()) || inst->isAtCommit()) { inst->setExecuted(); } iewStage->instToCommit(inst); iewStage->activityThisCycle(); } else { if (inst->effAddrValid()) { auto it = inst->lqIt; ++it; if (checkLoads) return checkViolations(it, inst); } } return load_fault; } template Fault LSQUnit::executeStore(const DynInstPtr &store_inst) { using namespace TheISA; // Make sure that a store exists. assert(stores != 0); int store_idx = store_inst->sqIdx; DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n", store_inst->pcState(), store_inst->seqNum); assert(!store_inst->isSquashed()); // Check the recently completed loads to see if any match this store's // address. If so, then we have a memory ordering violation. typename LoadQueue::iterator loadIt = store_inst->lqIt; Fault store_fault = store_inst->initiateAcc(); if (store_inst->isTranslationDelayed() && store_fault == NoFault) return store_fault; if (!store_inst->readPredicate()) { DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n", store_inst->seqNum); store_inst->forwardOldRegs(); return store_fault; } if (storeQueue[store_idx].size() == 0) { DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n", store_inst->pcState(), store_inst->seqNum); return store_fault; } assert(store_fault == NoFault); if (store_inst->isStoreConditional() || store_inst->isAtomic()) { // Store conditionals and Atomics need to set themselves as able to // writeback if we haven't had a fault by here. storeQueue[store_idx].canWB() = true; ++storesToWB; } return checkViolations(loadIt, store_inst); } template void LSQUnit::commitLoad() { assert(loadQueue.front().valid()); DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n", loadQueue.front().instruction()->pcState()); loadQueue.front().clear(); loadQueue.pop_front(); --loads; } template void LSQUnit::commitLoads(InstSeqNum &youngest_inst) { assert(loads == 0 || loadQueue.front().valid()); while (loads != 0 && loadQueue.front().instruction()->seqNum <= youngest_inst) { commitLoad(); } } template void LSQUnit::commitStores(InstSeqNum &youngest_inst) { assert(stores == 0 || storeQueue.front().valid()); /* Forward iterate the store queue (age order). */ for (auto& x : storeQueue) { assert(x.valid()); // Mark any stores that are now committed and have not yet // been marked as able to write back. if (!x.canWB()) { if (x.instruction()->seqNum > youngest_inst) { break; } DPRINTF(LSQUnit, "Marking store as able to write back, PC " "%s [sn:%lli]\n", x.instruction()->pcState(), x.instruction()->seqNum); x.canWB() = true; ++storesToWB; } } } template void LSQUnit::writebackBlockedStore() { assert(isStoreBlocked); storeWBIt->request()->sendPacketToCache(); if (storeWBIt->request()->isSent()){ storePostSend(); } } template void LSQUnit::writebackStores() { if (isStoreBlocked) { DPRINTF(LSQUnit, "Writing back blocked store\n"); writebackBlockedStore(); } while (storesToWB > 0 && storeWBIt.dereferenceable() && storeWBIt->valid() && storeWBIt->canWB() && ((!needsTSO) || (!storeInFlight)) && lsq->storePortAvailable()) { if (isStoreBlocked) { DPRINTF(LSQUnit, "Unable to write back any more stores, cache" " is blocked!\n"); break; } // Store didn't write any data so no need to write it back to // memory. if (storeWBIt->size() == 0) { /* It is important that the preincrement happens at (or before) * the call, as the the code of completeStore checks * storeWBIt. */ completeStore(storeWBIt++); continue; } if (storeWBIt->instruction()->isDataPrefetch()) { storeWBIt++; continue; } assert(storeWBIt->hasRequest()); assert(!storeWBIt->committed()); DynInstPtr inst = storeWBIt->instruction(); LSQRequest* req = storeWBIt->request(); storeWBIt->committed() = true; assert(!inst->memData); inst->memData = new uint8_t[req->_size]; if (storeWBIt->isAllZeros()) memset(inst->memData, 0, req->_size); else memcpy(inst->memData, storeWBIt->data(), req->_size); if (req->senderState() == nullptr) { SQSenderState *state = new SQSenderState(storeWBIt); state->isLoad = false; state->needWB = false; state->inst = inst; req->senderState(state); if (inst->isStoreConditional() || inst->isAtomic()) { /* Only store conditionals and atomics need a writeback. */ state->needWB = true; } } req->buildPackets(); DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s " "to Addr:%#x, data:%#x [sn:%lli]\n", storeWBIt.idx(), inst->pcState(), req->request()->getPaddr(), (int)*(inst->memData), inst->seqNum); // @todo: Remove this SC hack once the memory system handles it. if (inst->isStoreConditional()) { // Disable recording the result temporarily. Writing to // misc regs normally updates the result, but this is not // the desired behavior when handling store conditionals. inst->recordResult(false); bool success = TheISA::handleLockedWrite(inst.get(), req->request(), cacheBlockMask); inst->recordResult(true); req->packetSent(); if (!success) { req->complete(); // Instantly complete this store. DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. " "Instantly completing it.\n", inst->seqNum); PacketPtr new_pkt = new Packet(*req->packet()); WritebackEvent *wb = new WritebackEvent(inst, new_pkt, this); cpu->schedule(wb, curTick() + 1); completeStore(storeWBIt); if (!storeQueue.empty()) storeWBIt++; else storeWBIt = storeQueue.end(); continue; } } if (req->request()->isMmappedIpr()) { assert(!inst->isStoreConditional()); ThreadContext *thread = cpu->tcBase(lsqID); PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::WriteReq); main_pkt->dataStatic(inst->memData); req->handleIprWrite(thread, main_pkt); delete main_pkt; completeStore(storeWBIt); storeWBIt++; continue; } /* Send to cache */ req->sendPacketToCache(); /* If successful, do the post send */ if (req->isSent()) { storePostSend(); } else { DPRINTF(LSQUnit, "D-Cache became blocked when writing [sn:%lli], " "will retry later\n", inst->seqNum); } } assert(stores >= 0 && storesToWB >= 0); } template void LSQUnit::squash(const InstSeqNum &squashed_num) { DPRINTF(LSQUnit, "Squashing until [sn:%lli]!" "(Loads:%i Stores:%i)\n", squashed_num, loads, stores); while (loads != 0 && loadQueue.back().instruction()->seqNum > squashed_num) { DPRINTF(LSQUnit,"Load Instruction PC %s squashed, " "[sn:%lli]\n", loadQueue.back().instruction()->pcState(), loadQueue.back().instruction()->seqNum); if (isStalled() && loadQueue.tail() == stallingLoadIdx) { stalled = false; stallingStoreIsn = 0; stallingLoadIdx = 0; } // Clear the smart pointer to make sure it is decremented. loadQueue.back().instruction()->setSquashed(); loadQueue.back().clear(); --loads; loadQueue.pop_back(); ++lsqSquashedLoads; } if (memDepViolator && squashed_num < memDepViolator->seqNum) { memDepViolator = NULL; } while (stores != 0 && storeQueue.back().instruction()->seqNum > squashed_num) { // Instructions marked as can WB are already committed. if (storeQueue.back().canWB()) { break; } DPRINTF(LSQUnit,"Store Instruction PC %s squashed, " "idx:%i [sn:%lli]\n", storeQueue.back().instruction()->pcState(), storeQueue.tail(), storeQueue.back().instruction()->seqNum); // I don't think this can happen. It should have been cleared // by the stalling load. if (isStalled() && storeQueue.back().instruction()->seqNum == stallingStoreIsn) { panic("Is stalled should have been cleared by stalling load!\n"); stalled = false; stallingStoreIsn = 0; } // Clear the smart pointer to make sure it is decremented. storeQueue.back().instruction()->setSquashed(); // Must delete request now that it wasn't handed off to // memory. This is quite ugly. @todo: Figure out the proper // place to really handle request deletes. storeQueue.back().clear(); --stores; storeQueue.pop_back(); ++lsqSquashedStores; } } template void LSQUnit::storePostSend() { if (isStalled() && storeWBIt->instruction()->seqNum == stallingStoreIsn) { DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " "load idx:%i\n", stallingStoreIsn, stallingLoadIdx); stalled = false; stallingStoreIsn = 0; iewStage->replayMemInst(loadQueue[stallingLoadIdx].instruction()); } if (!storeWBIt->instruction()->isStoreConditional()) { // The store is basically completed at this time. This // only works so long as the checker doesn't try to // verify the value in memory for stores. storeWBIt->instruction()->setCompleted(); if (cpu->checker) { cpu->checker->verify(storeWBIt->instruction()); } } if (needsTSO) { storeInFlight = true; } storeWBIt++; } template void LSQUnit::writeback(const DynInstPtr &inst, PacketPtr pkt) { iewStage->wakeCPU(); // Squashed instructions do not need to complete their access. if (inst->isSquashed()) { assert(!inst->isStore()); ++lsqIgnoredResponses; return; } if (!inst->isExecuted()) { inst->setExecuted(); if (inst->fault == NoFault) { // Complete access to copy data to proper place. inst->completeAcc(pkt); } else { // If the instruction has an outstanding fault, we cannot complete // the access as this discards the current fault. // If we have an outstanding fault, the fault should only be of // type ReExec. assert(dynamic_cast(inst->fault.get()) != nullptr); DPRINTF(LSQUnit, "Not completing instruction [sn:%lli] access " "due to pending fault.\n", inst->seqNum); } } // Need to insert instruction into queue to commit iewStage->instToCommit(inst); iewStage->activityThisCycle(); // see if this load changed the PC iewStage->checkMisprediction(inst); } template void LSQUnit::completeStore(typename StoreQueue::iterator store_idx) { assert(store_idx->valid()); store_idx->completed() = true; --storesToWB; // A bit conservative because a store completion may not free up entries, // but hopefully avoids two store completions in one cycle from making // the CPU tick twice. cpu->wakeCPU(); cpu->activityThisCycle(); /* We 'need' a copy here because we may clear the entry from the * store queue. */ DynInstPtr store_inst = store_idx->instruction(); if (store_idx == storeQueue.begin()) { do { storeQueue.front().clear(); storeQueue.pop_front(); --stores; } while (storeQueue.front().completed() && !storeQueue.empty()); iewStage->updateLSQNextCycle = true; } DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head " "idx:%i\n", store_inst->seqNum, store_idx.idx() - 1, storeQueue.head() - 1); #if TRACING_ON if (DTRACE(O3PipeView)) { store_idx->instruction()->storeTick = curTick() - store_idx->instruction()->fetchTick; } #endif if (isStalled() && store_inst->seqNum == stallingStoreIsn) { DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " "load idx:%i\n", stallingStoreIsn, stallingLoadIdx); stalled = false; stallingStoreIsn = 0; iewStage->replayMemInst(loadQueue[stallingLoadIdx].instruction()); } store_inst->setCompleted(); if (needsTSO) { storeInFlight = false; } // Tell the checker we've completed this instruction. Some stores // may get reported twice to the checker, but the checker can // handle that case. // Store conditionals cannot be sent to the checker yet, they have // to update the misc registers first which should take place // when they commit if (cpu->checker && !store_inst->isStoreConditional()) { cpu->checker->verify(store_inst); } } template bool LSQUnit::trySendPacket(bool isLoad, PacketPtr data_pkt) { bool ret = true; bool cache_got_blocked = false; auto state = dynamic_cast(data_pkt->senderState); if (!lsq->cacheBlocked() && (isLoad || lsq->storePortAvailable())) { if (!dcachePort->sendTimingReq(data_pkt)) { ret = false; cache_got_blocked = true; } } else { ret = false; } if (ret) { if (!isLoad) { lsq->storePortBusy(); isStoreBlocked = false; } state->outstanding++; state->request()->packetSent(); } else { if (cache_got_blocked) { lsq->cacheBlocked(true); ++lsqCacheBlocked; } if (!isLoad) { assert(state->request() == storeWBIt->request()); isStoreBlocked = true; } state->request()->packetNotSent(); } return ret; } template void LSQUnit::recvRetry() { if (isStoreBlocked) { DPRINTF(LSQUnit, "Receiving retry: blocked store\n"); writebackBlockedStore(); } } template void LSQUnit::dumpInsts() const { cprintf("Load store queue: Dumping instructions.\n"); cprintf("Load queue size: %i\n", loads); cprintf("Load queue: "); for (const auto& e: loadQueue) { const DynInstPtr &inst(e.instruction()); cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum); } cprintf("\n"); cprintf("Store queue size: %i\n", stores); cprintf("Store queue: "); for (const auto& e: storeQueue) { const DynInstPtr &inst(e.instruction()); cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum); } cprintf("\n"); } template unsigned int LSQUnit::cacheLineSize() { return cpu->cacheLineSize(); } #endif//__CPU_O3_LSQ_UNIT_IMPL_HH__