fetch_unit.cc revision 11308
111308Santhony.gutierrez@amd.com/* 211308Santhony.gutierrez@amd.com * Copyright (c) 2014-2015 Advanced Micro Devices, Inc. 311308Santhony.gutierrez@amd.com * All rights reserved. 411308Santhony.gutierrez@amd.com * 511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only 611308Santhony.gutierrez@amd.com * 711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without 811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met: 911308Santhony.gutierrez@amd.com * 1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice, 1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer. 1211308Santhony.gutierrez@amd.com * 1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice, 1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation 1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution. 1611308Santhony.gutierrez@amd.com * 1711308Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its contributors 1811308Santhony.gutierrez@amd.com * may be used to endorse or promote products derived from this software 1911308Santhony.gutierrez@amd.com * without specific prior written permission. 2011308Santhony.gutierrez@amd.com * 2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE. 3211308Santhony.gutierrez@amd.com * 3311308Santhony.gutierrez@amd.com * Author: Brad Beckmann, Sooraj Puthoor 3411308Santhony.gutierrez@amd.com */ 3511308Santhony.gutierrez@amd.com 3611308Santhony.gutierrez@amd.com#include "gpu-compute/fetch_unit.hh" 3711308Santhony.gutierrez@amd.com 3811308Santhony.gutierrez@amd.com#include "debug/GPUFetch.hh" 3911308Santhony.gutierrez@amd.com#include "debug/GPUPort.hh" 4011308Santhony.gutierrez@amd.com#include "debug/GPUTLB.hh" 4111308Santhony.gutierrez@amd.com#include "gpu-compute/compute_unit.hh" 4211308Santhony.gutierrez@amd.com#include "gpu-compute/gpu_dyn_inst.hh" 4311308Santhony.gutierrez@amd.com#include "gpu-compute/gpu_static_inst.hh" 4411308Santhony.gutierrez@amd.com#include "gpu-compute/shader.hh" 4511308Santhony.gutierrez@amd.com#include "gpu-compute/wavefront.hh" 4611308Santhony.gutierrez@amd.com#include "mem/ruby/system/RubySystem.hh" 4711308Santhony.gutierrez@amd.com 4811308Santhony.gutierrez@amd.comuint32_t FetchUnit::globalFetchUnitID; 4911308Santhony.gutierrez@amd.com 5011308Santhony.gutierrez@amd.comFetchUnit::FetchUnit(const ComputeUnitParams* params) : 5111308Santhony.gutierrez@amd.com timingSim(true), 5211308Santhony.gutierrez@amd.com computeUnit(nullptr), 5311308Santhony.gutierrez@amd.com fetchScheduler(params), 5411308Santhony.gutierrez@amd.com waveList(nullptr) 5511308Santhony.gutierrez@amd.com{ 5611308Santhony.gutierrez@amd.com} 5711308Santhony.gutierrez@amd.com 5811308Santhony.gutierrez@amd.comFetchUnit::~FetchUnit() 5911308Santhony.gutierrez@amd.com{ 6011308Santhony.gutierrez@amd.com fetchQueue.clear(); 6111308Santhony.gutierrez@amd.com fetchStatusQueue.clear(); 6211308Santhony.gutierrez@amd.com} 6311308Santhony.gutierrez@amd.com 6411308Santhony.gutierrez@amd.comvoid 6511308Santhony.gutierrez@amd.comFetchUnit::init(ComputeUnit *cu) 6611308Santhony.gutierrez@amd.com{ 6711308Santhony.gutierrez@amd.com computeUnit = cu; 6811308Santhony.gutierrez@amd.com timingSim = computeUnit->shader->timingSim; 6911308Santhony.gutierrez@amd.com fetchQueue.clear(); 7011308Santhony.gutierrez@amd.com fetchStatusQueue.resize(computeUnit->shader->n_wf); 7111308Santhony.gutierrez@amd.com 7211308Santhony.gutierrez@amd.com for (int j = 0; j < computeUnit->shader->n_wf; ++j) { 7311308Santhony.gutierrez@amd.com fetchStatusQueue[j] = std::make_pair(waveList->at(j), false); 7411308Santhony.gutierrez@amd.com } 7511308Santhony.gutierrez@amd.com 7611308Santhony.gutierrez@amd.com fetchScheduler.bindList(&fetchQueue); 7711308Santhony.gutierrez@amd.com} 7811308Santhony.gutierrez@amd.com 7911308Santhony.gutierrez@amd.comvoid 8011308Santhony.gutierrez@amd.comFetchUnit::exec() 8111308Santhony.gutierrez@amd.com{ 8211308Santhony.gutierrez@amd.com // re-evaluate waves which are marked as not ready for fetch 8311308Santhony.gutierrez@amd.com for (int j = 0; j < computeUnit->shader->n_wf; ++j) { 8411308Santhony.gutierrez@amd.com // Following code assumes 64-bit opertaion and all insts are 8511308Santhony.gutierrez@amd.com // represented by 64-bit pointers to inst objects. 8611308Santhony.gutierrez@amd.com Wavefront *curWave = fetchStatusQueue[j].first; 8711308Santhony.gutierrez@amd.com assert (curWave); 8811308Santhony.gutierrez@amd.com 8911308Santhony.gutierrez@amd.com // The wavefront has to be active, the IB occupancy has to be 9011308Santhony.gutierrez@amd.com // 4 or less instructions and it can not have any branches to 9111308Santhony.gutierrez@amd.com // prevent speculative instruction fetches 9211308Santhony.gutierrez@amd.com if (!fetchStatusQueue[j].second) { 9311308Santhony.gutierrez@amd.com if (curWave->status == Wavefront::S_RUNNING && 9411308Santhony.gutierrez@amd.com curWave->instructionBuffer.size() <= 4 && 9511308Santhony.gutierrez@amd.com !curWave->instructionBufferHasBranch() && 9611308Santhony.gutierrez@amd.com !curWave->pendingFetch) { 9711308Santhony.gutierrez@amd.com fetchQueue.push_back(curWave); 9811308Santhony.gutierrez@amd.com fetchStatusQueue[j].second = true; 9911308Santhony.gutierrez@amd.com } 10011308Santhony.gutierrez@amd.com } 10111308Santhony.gutierrez@amd.com } 10211308Santhony.gutierrez@amd.com 10311308Santhony.gutierrez@amd.com // Fetch only if there is some wave ready to be fetched 10411308Santhony.gutierrez@amd.com // An empty fetchQueue will cause the schedular to panic 10511308Santhony.gutierrez@amd.com if (fetchQueue.size()) { 10611308Santhony.gutierrez@amd.com Wavefront *waveToBeFetched = fetchScheduler.chooseWave(); 10711308Santhony.gutierrez@amd.com waveToBeFetched->pendingFetch = true; 10811308Santhony.gutierrez@amd.com fetchStatusQueue[waveToBeFetched->wfSlotId].second = false; 10911308Santhony.gutierrez@amd.com initiateFetch(waveToBeFetched); 11011308Santhony.gutierrez@amd.com } 11111308Santhony.gutierrez@amd.com} 11211308Santhony.gutierrez@amd.com 11311308Santhony.gutierrez@amd.comvoid 11411308Santhony.gutierrez@amd.comFetchUnit::initiateFetch(Wavefront *wavefront) 11511308Santhony.gutierrez@amd.com{ 11611308Santhony.gutierrez@amd.com // calculate the virtual address to fetch from the SQC 11711308Santhony.gutierrez@amd.com Addr vaddr = wavefront->pc() + wavefront->instructionBuffer.size(); 11811308Santhony.gutierrez@amd.com vaddr = wavefront->base_ptr + vaddr * sizeof(GPUStaticInst*); 11911308Santhony.gutierrez@amd.com 12011308Santhony.gutierrez@amd.com DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n", 12111308Santhony.gutierrez@amd.com computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr); 12211308Santhony.gutierrez@amd.com 12311308Santhony.gutierrez@amd.com // Since this is an instruction prefetch, if you're split then just finish 12411308Santhony.gutierrez@amd.com // out the current line. 12511308Santhony.gutierrez@amd.com unsigned block_size = RubySystem::getBlockSizeBytes(); 12611308Santhony.gutierrez@amd.com // check for split accesses 12711308Santhony.gutierrez@amd.com Addr split_addr = roundDown(vaddr + block_size - 1, block_size); 12811308Santhony.gutierrez@amd.com unsigned size = block_size; 12911308Santhony.gutierrez@amd.com 13011308Santhony.gutierrez@amd.com if (split_addr > vaddr) { 13111308Santhony.gutierrez@amd.com // misaligned access, just grab the rest of the line 13211308Santhony.gutierrez@amd.com size = split_addr - vaddr; 13311308Santhony.gutierrez@amd.com } 13411308Santhony.gutierrez@amd.com 13511308Santhony.gutierrez@amd.com // set up virtual request 13611308Santhony.gutierrez@amd.com Request *req = new Request(0, vaddr, size, Request::INST_FETCH, 13711308Santhony.gutierrez@amd.com computeUnit->masterId(), 0, 0, 0); 13811308Santhony.gutierrez@amd.com 13911308Santhony.gutierrez@amd.com PacketPtr pkt = new Packet(req, MemCmd::ReadReq); 14011308Santhony.gutierrez@amd.com // This fetchBlock is kind of faux right now - because the translations so 14111308Santhony.gutierrez@amd.com // far don't actually return Data 14211308Santhony.gutierrez@amd.com uint64_t fetchBlock; 14311308Santhony.gutierrez@amd.com pkt->dataStatic(&fetchBlock); 14411308Santhony.gutierrez@amd.com 14511308Santhony.gutierrez@amd.com if (timingSim) { 14611308Santhony.gutierrez@amd.com // SenderState needed on Return 14711308Santhony.gutierrez@amd.com pkt->senderState = new ComputeUnit::ITLBPort::SenderState(wavefront); 14811308Santhony.gutierrez@amd.com 14911308Santhony.gutierrez@amd.com // Sender State needed by TLB hierarchy 15011308Santhony.gutierrez@amd.com pkt->senderState = 15111308Santhony.gutierrez@amd.com new TheISA::GpuTLB::TranslationState(BaseTLB::Execute, 15211308Santhony.gutierrez@amd.com computeUnit->shader->gpuTc, 15311308Santhony.gutierrez@amd.com false, pkt->senderState); 15411308Santhony.gutierrez@amd.com 15511308Santhony.gutierrez@amd.com if (computeUnit->sqcTLBPort->isStalled()) { 15611308Santhony.gutierrez@amd.com assert(computeUnit->sqcTLBPort->retries.size() > 0); 15711308Santhony.gutierrez@amd.com 15811308Santhony.gutierrez@amd.com DPRINTF(GPUTLB, "Failed to send TLB req for FETCH addr %#x\n", 15911308Santhony.gutierrez@amd.com vaddr); 16011308Santhony.gutierrez@amd.com 16111308Santhony.gutierrez@amd.com computeUnit->sqcTLBPort->retries.push_back(pkt); 16211308Santhony.gutierrez@amd.com } else if (!computeUnit->sqcTLBPort->sendTimingReq(pkt)) { 16311308Santhony.gutierrez@amd.com // Stall the data port; 16411308Santhony.gutierrez@amd.com // No more packet is issued till 16511308Santhony.gutierrez@amd.com // ruby indicates resources are freed by 16611308Santhony.gutierrez@amd.com // a recvReqRetry() call back on this port. 16711308Santhony.gutierrez@amd.com computeUnit->sqcTLBPort->stallPort(); 16811308Santhony.gutierrez@amd.com 16911308Santhony.gutierrez@amd.com DPRINTF(GPUTLB, "Failed to send TLB req for FETCH addr %#x\n", 17011308Santhony.gutierrez@amd.com vaddr); 17111308Santhony.gutierrez@amd.com 17211308Santhony.gutierrez@amd.com computeUnit->sqcTLBPort->retries.push_back(pkt); 17311308Santhony.gutierrez@amd.com } else { 17411308Santhony.gutierrez@amd.com DPRINTF(GPUTLB, "sent FETCH translation request for %#x\n", vaddr); 17511308Santhony.gutierrez@amd.com } 17611308Santhony.gutierrez@amd.com } else { 17711308Santhony.gutierrez@amd.com pkt->senderState = 17811308Santhony.gutierrez@amd.com new TheISA::GpuTLB::TranslationState(BaseTLB::Execute, 17911308Santhony.gutierrez@amd.com computeUnit->shader->gpuTc); 18011308Santhony.gutierrez@amd.com 18111308Santhony.gutierrez@amd.com computeUnit->sqcTLBPort->sendFunctional(pkt); 18211308Santhony.gutierrez@amd.com 18311308Santhony.gutierrez@amd.com TheISA::GpuTLB::TranslationState *sender_state = 18411308Santhony.gutierrez@amd.com safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState); 18511308Santhony.gutierrez@amd.com 18611308Santhony.gutierrez@amd.com delete sender_state->tlbEntry; 18711308Santhony.gutierrez@amd.com delete sender_state; 18811308Santhony.gutierrez@amd.com // fetch the instructions from the SQC when we operate in 18911308Santhony.gutierrez@amd.com // functional mode only 19011308Santhony.gutierrez@amd.com fetch(pkt, wavefront); 19111308Santhony.gutierrez@amd.com } 19211308Santhony.gutierrez@amd.com} 19311308Santhony.gutierrez@amd.com 19411308Santhony.gutierrez@amd.comvoid 19511308Santhony.gutierrez@amd.comFetchUnit::fetch(PacketPtr pkt, Wavefront *wavefront) 19611308Santhony.gutierrez@amd.com{ 19711308Santhony.gutierrez@amd.com assert(pkt->req->hasPaddr()); 19811308Santhony.gutierrez@amd.com assert(pkt->req->hasSize()); 19911308Santhony.gutierrez@amd.com 20011308Santhony.gutierrez@amd.com DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: Fetch Access: %#x\n", 20111308Santhony.gutierrez@amd.com computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, 20211308Santhony.gutierrez@amd.com pkt->req->getPaddr()); 20311308Santhony.gutierrez@amd.com 20411308Santhony.gutierrez@amd.com // this is necessary because the GPU TLB receives packets instead of 20511308Santhony.gutierrez@amd.com // requests. when the translation is complete, all relevent fields in the 20611308Santhony.gutierrez@amd.com // request will be populated, but not in the packet. here we create the 20711308Santhony.gutierrez@amd.com // new packet so we can set the size, addr, and proper flags. 20811308Santhony.gutierrez@amd.com PacketPtr oldPkt = pkt; 20911308Santhony.gutierrez@amd.com pkt = new Packet(oldPkt->req, oldPkt->cmd); 21011308Santhony.gutierrez@amd.com delete oldPkt; 21111308Santhony.gutierrez@amd.com 21211308Santhony.gutierrez@amd.com TheGpuISA::RawMachInst *data = 21311308Santhony.gutierrez@amd.com new TheGpuISA::RawMachInst[pkt->req->getSize() / 21411308Santhony.gutierrez@amd.com sizeof(TheGpuISA::RawMachInst)]; 21511308Santhony.gutierrez@amd.com 21611308Santhony.gutierrez@amd.com pkt->dataDynamic<TheGpuISA::RawMachInst>(data); 21711308Santhony.gutierrez@amd.com 21811308Santhony.gutierrez@amd.com // New SenderState for the memory access 21911308Santhony.gutierrez@amd.com pkt->senderState = new ComputeUnit::SQCPort::SenderState(wavefront); 22011308Santhony.gutierrez@amd.com 22111308Santhony.gutierrez@amd.com if (timingSim) { 22211308Santhony.gutierrez@amd.com // translation is done. Send the appropriate timing memory request. 22311308Santhony.gutierrez@amd.com 22411308Santhony.gutierrez@amd.com if (!computeUnit->sqcPort->sendTimingReq(pkt)) { 22511308Santhony.gutierrez@amd.com computeUnit->sqcPort->retries.push_back(std::make_pair(pkt, 22611308Santhony.gutierrez@amd.com wavefront)); 22711308Santhony.gutierrez@amd.com 22811308Santhony.gutierrez@amd.com DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Fetch addr %#x failed!\n", 22911308Santhony.gutierrez@amd.com computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, 23011308Santhony.gutierrez@amd.com pkt->req->getPaddr()); 23111308Santhony.gutierrez@amd.com } else { 23211308Santhony.gutierrez@amd.com DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Fetch addr %#x sent!\n", 23311308Santhony.gutierrez@amd.com computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, 23411308Santhony.gutierrez@amd.com pkt->req->getPaddr()); 23511308Santhony.gutierrez@amd.com } 23611308Santhony.gutierrez@amd.com } else { 23711308Santhony.gutierrez@amd.com computeUnit->sqcPort->sendFunctional(pkt); 23811308Santhony.gutierrez@amd.com processFetchReturn(pkt); 23911308Santhony.gutierrez@amd.com } 24011308Santhony.gutierrez@amd.com} 24111308Santhony.gutierrez@amd.com 24211308Santhony.gutierrez@amd.comvoid 24311308Santhony.gutierrez@amd.comFetchUnit::processFetchReturn(PacketPtr pkt) 24411308Santhony.gutierrez@amd.com{ 24511308Santhony.gutierrez@amd.com ComputeUnit::SQCPort::SenderState *sender_state = 24611308Santhony.gutierrez@amd.com safe_cast<ComputeUnit::SQCPort::SenderState*>(pkt->senderState); 24711308Santhony.gutierrez@amd.com 24811308Santhony.gutierrez@amd.com Wavefront *wavefront = sender_state->wavefront; 24911308Santhony.gutierrez@amd.com 25011308Santhony.gutierrez@amd.com DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: Fetch addr %#x returned " 25111308Santhony.gutierrez@amd.com "%d bytes, %d instructions!\n", computeUnit->cu_id, 25211308Santhony.gutierrez@amd.com wavefront->simdId, wavefront->wfSlotId, pkt->req->getPaddr(), 25311308Santhony.gutierrez@amd.com pkt->req->getSize(), pkt->req->getSize() / 25411308Santhony.gutierrez@amd.com sizeof(TheGpuISA::RawMachInst)); 25511308Santhony.gutierrez@amd.com 25611308Santhony.gutierrez@amd.com if (wavefront->dropFetch) { 25711308Santhony.gutierrez@amd.com assert(wavefront->instructionBuffer.empty()); 25811308Santhony.gutierrez@amd.com wavefront->dropFetch = false; 25911308Santhony.gutierrez@amd.com } else { 26011308Santhony.gutierrez@amd.com TheGpuISA::RawMachInst *inst_index_ptr = 26111308Santhony.gutierrez@amd.com (TheGpuISA::RawMachInst*)pkt->getPtr<uint8_t>(); 26211308Santhony.gutierrez@amd.com 26311308Santhony.gutierrez@amd.com assert(wavefront->instructionBuffer.size() <= 4); 26411308Santhony.gutierrez@amd.com 26511308Santhony.gutierrez@amd.com for (int i = 0; i < pkt->req->getSize() / 26611308Santhony.gutierrez@amd.com sizeof(TheGpuISA::RawMachInst); ++i) { 26711308Santhony.gutierrez@amd.com GPUStaticInst *inst_ptr = decoder.decode(inst_index_ptr[i]); 26811308Santhony.gutierrez@amd.com 26911308Santhony.gutierrez@amd.com assert(inst_ptr); 27011308Santhony.gutierrez@amd.com DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: added %s\n", 27111308Santhony.gutierrez@amd.com computeUnit->cu_id, wavefront->simdId, 27211308Santhony.gutierrez@amd.com wavefront->wfSlotId, inst_ptr->disassemble()); 27311308Santhony.gutierrez@amd.com 27411308Santhony.gutierrez@amd.com GPUDynInstPtr gpuDynInst = 27511308Santhony.gutierrez@amd.com std::make_shared<GPUDynInst>(computeUnit, wavefront, inst_ptr, 27611308Santhony.gutierrez@amd.com computeUnit->getAndIncSeqNum()); 27711308Santhony.gutierrez@amd.com 27811308Santhony.gutierrez@amd.com wavefront->instructionBuffer.push_back(gpuDynInst); 27911308Santhony.gutierrez@amd.com } 28011308Santhony.gutierrez@amd.com } 28111308Santhony.gutierrez@amd.com 28211308Santhony.gutierrez@amd.com wavefront->pendingFetch = false; 28311308Santhony.gutierrez@amd.com 28411308Santhony.gutierrez@amd.com delete pkt->senderState; 28511308Santhony.gutierrez@amd.com delete pkt->req; 28611308Santhony.gutierrez@amd.com delete pkt; 28711308Santhony.gutierrez@amd.com} 28811308Santhony.gutierrez@amd.com 28911308Santhony.gutierrez@amd.comvoid 29011308Santhony.gutierrez@amd.comFetchUnit::bindWaveList(std::vector<Wavefront*> *wave_list) 29111308Santhony.gutierrez@amd.com{ 29211308Santhony.gutierrez@amd.com waveList = wave_list; 29311308Santhony.gutierrez@amd.com} 294