Cross Reference: /gem5/src/cpu/o3/lsq

Deleted Added

sdiff udiff text old ( 13560:f8732494c155 ) new ( 13590:d7e018859709 )

full compact

2c2
< * Copyright (c) 2011-2012, 2014 ARM Limited
---
> * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
63a64,65
> _cacheBlocked(false),
> cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
79,80c81,82
< //Figure out fetch policy
< if (lsqPolicy == SMTQueuePolicy::Dynamic) {
---
> /* Run SMT olicy checks. */
> if (lsqPolicy == SMTQueuePolicy::Dynamic) {
88,89c90,91
< assert(params->smtLSQThreshold > LQEntries);
< assert(params->smtLSQThreshold > SQEntries);
---
> assert(params->smtLSQThreshold > params->LQEntries);
> assert(params->smtLSQThreshold > params->SQEntries);
165a168,170
> usedStorePorts = 0;
> _cacheBlocked = false;
>
171,173c176,178
< template <class Impl>
< int
< LSQ<Impl>::entryAmount(ThreadID num_threads)
---
> template<class Impl>
> bool
> LSQ<Impl>::cacheBlocked() const
175,179c180
< if (lsqPolicy == SMTQueuePolicy::Partitioned) {
< return LQEntries / num_threads;
< } else {
< return 0;
< }
---
> return _cacheBlocked;
182,210d182
< template <class Impl>
< void
< LSQ<Impl>::resetEntries()
< {
< if (lsqPolicy != SMTQueuePolicy::Dynamic || numThreads > 1) {
< int active_threads = activeThreads->size();
<
< int maxEntries;
<
< if (lsqPolicy == SMTQueuePolicy::Partitioned) {
< maxEntries = LQEntries / active_threads;
< } else if (lsqPolicy == SMTQueuePolicy::Threshold &&
< active_threads == 1) {
< maxEntries = LQEntries;
< } else {
< maxEntries = LQEntries;
< }
<
< list<ThreadID>::iterator threads = activeThreads->begin();
< list<ThreadID>::iterator end = activeThreads->end();
<
< while (threads != end) {
< ThreadID tid = *threads++;
<
< resizeEntries(maxEntries, tid);
< }
< }
< }
<
213c185
< LSQ<Impl>::removeEntries(ThreadID tid)
---
> LSQ<Impl>::cacheBlocked(bool v)
215,216c187
< thread[tid].clearLQ();
< thread[tid].clearSQ();
---
> _cacheBlocked = v;
220,221c191,192
< void
< LSQ<Impl>::resizeEntries(unsigned size, ThreadID tid)
---
> bool
> LSQ<Impl>::storePortAvailable() const
223,224c194
< thread[tid].resizeLQ(size);
< thread[tid].resizeSQ(size);
---
> return usedStorePorts < cacheStorePorts;
229c199
< LSQ<Impl>::tick()
---
> LSQ<Impl>::storePortBusy()
231,238c201,202
< list<ThreadID>::iterator threads = activeThreads->begin();
< list<ThreadID>::iterator end = activeThreads->end();
<
< while (threads != end) {
< ThreadID tid = *threads++;
<
< thread[tid].tick();
< }
---
> usedStorePorts++;
> assert(usedStorePorts <= cacheStorePorts);
318a283
> cacheBlocked(false);
325a291,299
> void
> LSQ<Impl>::completeDataAccess(PacketPtr pkt)
> {
> auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
> thread[cpu->contextToThread(senderState->contextId())]
> .completeDataAccess(pkt);
> }
>
> template <class Impl>
333,334c307,308
< thread[cpu->contextToThread(pkt->req->contextId())]
< .completeDataAccess(pkt);
---
> auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
> panic_if(!senderState, "Got packet back with unknown sender state\n");
335a310,311
> thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
>
354a331,332
> // Update the LSQRequest state (this may delete the request)
> senderState->request()->packetReplied();
356d333
< delete pkt;
683a661,1098
> static Addr
> addrBlockOffset(Addr addr, unsigned int block_size)
> {
> return addr & (block_size - 1);
> }
>
> static Addr
> addrBlockAlign(Addr addr, uint64_t block_size)
> {
> return addr & ~(block_size - 1);
> }
>
> static bool
> transferNeedsBurst(Addr addr, uint64_t size, uint64_t block_size)
> {
> return (addrBlockOffset(addr, block_size) + size) > block_size;
> }
>
> template<class Impl>
> Fault
> LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
> unsigned int size, Addr addr, Request::Flags flags,
> uint64_t *res)
> {
> ThreadID tid = cpu->contextToThread(inst->contextId());
> auto cacheLineSize = cpu->cacheLineSize();
> bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
> LSQRequest* req = nullptr;
>
> if (inst->translationStarted()) {
> req = inst->savedReq;
> assert(req);
> } else {
> if (needs_burst) {
> req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
> size, flags, data, res);
> } else {
> req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
> size, flags, data, res);
> }
> assert(req);
> inst->setRequest();
> req->taskId(cpu->taskId());
>
> req->initiateTranslation();
> }
>
> /* This is the place were instructions get the effAddr. */
> if (req->isTranslationComplete()) {
> if (inst->getFault() == NoFault) {
> inst->effAddr = req->getVaddr();
> inst->effSize = size;
> inst->effAddrValid(true);
>
> if (cpu->checker) {
> inst->reqToVerify = std::make_shared<Request>(*req->request());
> }
> if (isLoad)
> inst->getFault() = cpu->read(req, inst->lqIdx);
> else
> inst->getFault() = cpu->write(req, data, inst->sqIdx);
> } else if (isLoad) {
> // Commit will have to clean up whatever happened. Set this
> // instruction as executed.
> inst->setExecuted();
> }
> }
>
> if (inst->traceData)
> inst->traceData->setMem(addr, size, flags);
>
> return inst->getFault();
> }
>
> template<class Impl>
> void
> LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
> ThreadContext* tc, BaseTLB::Mode mode)
> {
> _fault.push_back(fault);
> numInTranslationFragments = 0;
> numTranslatedFragments = 1;
> /* If the instruction has been squahsed, let the request know
> * as it may have to self-destruct. */
> if (_inst->isSquashed()) {
> this->squashTranslation();
> } else {
> _inst->strictlyOrdered(req->isStrictlyOrdered());
>
> flags.set(Flag::TranslationFinished);
> if (fault == NoFault) {
> _inst->physEffAddr = req->getPaddr();
> _inst->memReqFlags = req->getFlags();
> if (req->isCondSwap()) {
> assert(_res);
> req->setExtraData(*_res);
> }
> setState(State::Request);
> } else {
> setState(State::Fault);
> }
>
> LSQRequest::_inst->fault = fault;
> LSQRequest::_inst->translationCompleted(true);
> }
> }
>
> template<class Impl>
> void
> LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
> ThreadContext* tc, BaseTLB::Mode mode)
> {
> _fault.push_back(fault);
> assert(req == _requests[numTranslatedFragments] || this->isDelayed());
>
> numInTranslationFragments--;
> numTranslatedFragments++;
>
> mainReq->setFlags(req->getFlags());
>
> if (numTranslatedFragments == _requests.size()) {
> if (_inst->isSquashed()) {
> this->squashTranslation();
> } else {
> _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
> flags.set(Flag::TranslationFinished);
> auto fault_it = _fault.begin();
> /* Ffwd to the first NoFault. */
> while (fault_it != _fault.end() && *fault_it == NoFault)
> fault_it++;
> /* If none of the fragments faulted: */
> if (fault_it == _fault.end()) {
> _inst->physEffAddr = request(0)->getPaddr();
>
> _inst->memReqFlags = mainReq->getFlags();
> if (mainReq->isCondSwap()) {
> assert(_res);
> mainReq->setExtraData(*_res);
> }
> setState(State::Request);
> _inst->fault = NoFault;
> } else {
> setState(State::Fault);
> _inst->fault = *fault_it;
> }
> _inst->translationCompleted(true);
> }
> }
> }
>
> template<class Impl>
> void
> LSQ<Impl>::SingleDataRequest::initiateTranslation()
> {
> _inst->translationStarted(true);
> setState(State::Translation);
> flags.set(Flag::TranslationStarted);
>
> _inst->savedReq = this;
> sendFragmentToTranslation(0);
>
> if (isTranslationComplete()) {
> }
> }
>
> template<class Impl>
> PacketPtr
> LSQ<Impl>::SplitDataRequest::mainPacket()
> {
> return _mainPacket;
> }
>
> template<class Impl>
> RequestPtr
> LSQ<Impl>::SplitDataRequest::mainRequest()
> {
> return mainReq;
> }
>
> template<class Impl>
> void
> LSQ<Impl>::SplitDataRequest::initiateTranslation()
> {
> _inst->translationStarted(true);
> setState(State::Translation);
> flags.set(Flag::TranslationStarted);
>
> unsigned int cacheLineSize = _port.cacheLineSize();
> Addr base_addr = _addr;
> Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
> Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
> uint32_t size_so_far = 0;
>
> mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
> _size, _flags, _inst->masterId(),
> _inst->instAddr(), _inst->contextId());
>
> // Paddr is not used in mainReq. However, we will accumulate the flags
> // from the sub requests into mainReq by calling setFlags() in finish().
> // setFlags() assumes that paddr is set so flip the paddr valid bit here to
> // avoid a potential assert in setFlags() when we call it from finish().
> mainReq->setPaddr(0);
>
> /* Get the pre-fix, possibly unaligned. */
> _requests.push_back(std::make_shared<Request>(_inst->getASID(), base_addr,
> next_addr - base_addr, _flags, _inst->masterId(),
> _inst->instAddr(), _inst->contextId()));
> size_so_far = next_addr - base_addr;
>
> /* We are block aligned now, reading whole blocks. */
> base_addr = next_addr;
> while (base_addr != final_addr) {
> _requests.push_back(std::make_shared<Request>(_inst->getASID(),
> base_addr, cacheLineSize, _flags, _inst->masterId(),
> _inst->instAddr(), _inst->contextId()));
> size_so_far += cacheLineSize;
> base_addr += cacheLineSize;
> }
>
> /* Deal with the tail. */
> if (size_so_far < _size) {
> _requests.push_back(std::make_shared<Request>(_inst->getASID(),
> base_addr, _size - size_so_far, _flags, _inst->masterId(),
> _inst->instAddr(), _inst->contextId()));
> }
>
> /* Setup the requests and send them to translation. */
> for (auto& r: _requests) {
> r->setReqInstSeqNum(_inst->seqNum);
> r->taskId(_taskId);
> }
> this->_inst->savedReq = this;
> numInTranslationFragments = 0;
> numTranslatedFragments = 0;
>
> for (uint32_t i = 0; i < _requests.size(); i++) {
> sendFragmentToTranslation(i);
> }
> }
>
> template<class Impl>
> void
> LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
> {
> numInTranslationFragments++;
> _port.dTLB()->translateTiming(
> this->request(i),
> this->_inst->thread->getTC(), this,
> this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
> }
>
> template<class Impl>
> bool
> LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
> {
> assert(_numOutstandingPackets == 1);
> auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
> setState(State::Complete);
> flags.set(Flag::Complete);
> state->outstanding--;
> assert(pkt == _packets.front());
> _port.completeDataAccess(pkt);
> return true;
> }
>
> template<class Impl>
> bool
> LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
> {
> auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
> uint32_t pktIdx = 0;
> while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
> pktIdx++;
> assert(pktIdx < _packets.size());
> assert(pkt->req == _requests[pktIdx]);
> assert(pkt == _packets[pktIdx]);
> numReceivedPackets++;
> state->outstanding--;
> if (numReceivedPackets == _packets.size()) {
> setState(State::Complete);
> flags.set(Flag::Complete);
> /* Assemble packets. */
> PacketPtr resp = isLoad()
> ? Packet::createRead(mainReq)
> : Packet::createWrite(mainReq);
> if (isLoad())
> resp->dataStatic(_inst->memData);
> else
> resp->dataStatic(_data);
> resp->senderState = _senderState;
> _port.completeDataAccess(resp);
> delete resp;
> }
> return true;
> }
>
> template<class Impl>
> void
> LSQ<Impl>::SingleDataRequest::buildPackets()
> {
> assert(_senderState);
> /* Retries do not create new packets. */
> if (_packets.size() == 0) {
> _packets.push_back(
> isLoad()
> ? Packet::createRead(request())
> : Packet::createWrite(request()));
> _packets.back()->dataStatic(_inst->memData);
> _packets.back()->senderState = _senderState;
> }
> assert(_packets.size() == 1);
> }
>
> template<class Impl>
> void
> LSQ<Impl>::SplitDataRequest::buildPackets()
> {
> /* Extra data?? */
> ptrdiff_t offset = 0;
> if (_packets.size() == 0) {
> /* New stuff */
> if (isLoad()) {
> _mainPacket = Packet::createRead(mainReq);
> _mainPacket->dataStatic(_inst->memData);
> }
> for (auto& r: _requests) {
> PacketPtr pkt = isLoad() ? Packet::createRead(r)
> : Packet::createWrite(r);
> if (isLoad()) {
> pkt->dataStatic(_inst->memData + offset);
> } else {
> uint8_t* req_data = new uint8_t[r->getSize()];
> std::memcpy(req_data,
> _inst->memData + offset,
> r->getSize());
> pkt->dataDynamic(req_data);
> }
> offset += r->getSize();
> pkt->senderState = _senderState;
> _packets.push_back(pkt);
> }
> }
> assert(_packets.size() == _requests.size());
> }
>
> template<class Impl>
> void
> LSQ<Impl>::SingleDataRequest::sendPacketToCache()
> {
> assert(_numOutstandingPackets == 0);
> if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
> _numOutstandingPackets = 1;
> }
>
> template<class Impl>
> void
> LSQ<Impl>::SplitDataRequest::sendPacketToCache()
> {
> /* Try to send the packets. */
> while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
> lsqUnit()->trySendPacket(isLoad(),
> _packets.at(numReceivedPackets + _numOutstandingPackets))) {
> _numOutstandingPackets++;
> }
> }
>
> template<class Impl>
> void
> LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext *thread,
> PacketPtr pkt)
> {
> TheISA::handleIprWrite(thread, pkt);
> }
>
> template<class Impl>
> void
> LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext *thread,
> PacketPtr mainPkt)
> {
> unsigned offset = 0;
> for (auto r: _requests) {
> PacketPtr pkt = new Packet(r, MemCmd::WriteReq);
> pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
> TheISA::handleIprWrite(thread, pkt);
> offset += r->getSize();
> delete pkt;
> }
> }
>
> template<class Impl>
> Cycles
> LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext *thread,
> PacketPtr pkt)
> {
> return TheISA::handleIprRead(thread, pkt);
> }
>
> template<class Impl>
> Cycles
> LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext *thread,
> PacketPtr mainPkt)
> {
> Cycles delay(0);
> unsigned offset = 0;
>
> for (auto r: _requests) {
> PacketPtr pkt = new Packet(r, MemCmd::ReadReq);
> pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
> Cycles d = TheISA::handleIprRead(thread, pkt);
> if (d > delay)
> delay = d;
> offset += r->getSize();
> delete pkt;
> }
> return delay;
> }
>
> template<class Impl>
> bool
> LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
> {
> return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
> }
>
> template<class Impl>
> bool
> LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
> {
> bool is_hit = false;
> for (auto &r: _requests) {
> if ((r->getPaddr() & blockMask) == blockAddr) {
> is_hit = true;
> break;
> }
> }
> return is_hit;
> }
>