global_memory_pipeline.cc revision 11700
111308Santhony.gutierrez@amd.com/* 211308Santhony.gutierrez@amd.com * Copyright (c) 2014-2015 Advanced Micro Devices, Inc. 311308Santhony.gutierrez@amd.com * All rights reserved. 411308Santhony.gutierrez@amd.com * 511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only 611308Santhony.gutierrez@amd.com * 711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without 811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met: 911308Santhony.gutierrez@amd.com * 1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice, 1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer. 1211308Santhony.gutierrez@amd.com * 1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice, 1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation 1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution. 1611308Santhony.gutierrez@amd.com * 1711308Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its contributors 1811308Santhony.gutierrez@amd.com * may be used to endorse or promote products derived from this software 1911308Santhony.gutierrez@amd.com * without specific prior written permission. 2011308Santhony.gutierrez@amd.com * 2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE. 3211308Santhony.gutierrez@amd.com * 3311308Santhony.gutierrez@amd.com * Author: John Kalamatianos, Sooraj Puthoor 3411308Santhony.gutierrez@amd.com */ 3511308Santhony.gutierrez@amd.com 3611308Santhony.gutierrez@amd.com#include "gpu-compute/global_memory_pipeline.hh" 3711308Santhony.gutierrez@amd.com 3811308Santhony.gutierrez@amd.com#include "debug/GPUMem.hh" 3911308Santhony.gutierrez@amd.com#include "debug/GPUReg.hh" 4011308Santhony.gutierrez@amd.com#include "gpu-compute/compute_unit.hh" 4111308Santhony.gutierrez@amd.com#include "gpu-compute/gpu_dyn_inst.hh" 4211308Santhony.gutierrez@amd.com#include "gpu-compute/shader.hh" 4311308Santhony.gutierrez@amd.com#include "gpu-compute/vector_register_file.hh" 4411308Santhony.gutierrez@amd.com#include "gpu-compute/wavefront.hh" 4511308Santhony.gutierrez@amd.com 4611308Santhony.gutierrez@amd.comGlobalMemPipeline::GlobalMemPipeline(const ComputeUnitParams* p) : 4711308Santhony.gutierrez@amd.com computeUnit(nullptr), gmQueueSize(p->global_mem_queue_size), 4811700Santhony.gutierrez@amd.com outOfOrderDataDelivery(p->out_of_order_data_delivery), inflightStores(0), 4911700Santhony.gutierrez@amd.com inflightLoads(0) 5011308Santhony.gutierrez@amd.com{ 5111308Santhony.gutierrez@amd.com} 5211308Santhony.gutierrez@amd.com 5311308Santhony.gutierrez@amd.comvoid 5411308Santhony.gutierrez@amd.comGlobalMemPipeline::init(ComputeUnit *cu) 5511308Santhony.gutierrez@amd.com{ 5611308Santhony.gutierrez@amd.com computeUnit = cu; 5711308Santhony.gutierrez@amd.com globalMemSize = computeUnit->shader->globalMemSize; 5811308Santhony.gutierrez@amd.com _name = computeUnit->name() + ".GlobalMemPipeline"; 5911308Santhony.gutierrez@amd.com} 6011308Santhony.gutierrez@amd.com 6111308Santhony.gutierrez@amd.comvoid 6211308Santhony.gutierrez@amd.comGlobalMemPipeline::exec() 6311308Santhony.gutierrez@amd.com{ 6411308Santhony.gutierrez@amd.com // apply any returned global memory operations 6511700Santhony.gutierrez@amd.com GPUDynInstPtr m = getNextReadyResp(); 6611308Santhony.gutierrez@amd.com 6711308Santhony.gutierrez@amd.com bool accessVrf = true; 6811693Santhony.gutierrez@amd.com Wavefront *w = nullptr; 6911693Santhony.gutierrez@amd.com 7011308Santhony.gutierrez@amd.com // check the VRF to see if the operands of a load (or load component 7111308Santhony.gutierrez@amd.com // of an atomic) are accessible 7211692Santhony.gutierrez@amd.com if ((m) && (m->isLoad() || m->isAtomicRet())) { 7311693Santhony.gutierrez@amd.com w = m->wavefront(); 7411308Santhony.gutierrez@amd.com 7511308Santhony.gutierrez@amd.com accessVrf = 7611693Santhony.gutierrez@amd.com w->computeUnit->vrf[w->simdId]-> 7711700Santhony.gutierrez@amd.com vrfOperandAccessReady(m->seqNum(), w, m, VrfAccessType::WRITE); 7811308Santhony.gutierrez@amd.com } 7911308Santhony.gutierrez@amd.com 8011700Santhony.gutierrez@amd.com if (m && m->latency.rdy() && computeUnit->glbMemToVrfBus.rdy() && 8111308Santhony.gutierrez@amd.com accessVrf && m->statusBitVector == VectorMask(0) && 8211308Santhony.gutierrez@amd.com (computeUnit->shader->coissue_return || 8311700Santhony.gutierrez@amd.com computeUnit->wfWait.at(m->pipeId).rdy())) { 8411308Santhony.gutierrez@amd.com 8511693Santhony.gutierrez@amd.com w = m->wavefront(); 8611693Santhony.gutierrez@amd.com 8711693Santhony.gutierrez@amd.com m->completeAcc(m); 8811693Santhony.gutierrez@amd.com 8911700Santhony.gutierrez@amd.com completeRequest(m); 9011693Santhony.gutierrez@amd.com 9111693Santhony.gutierrez@amd.com // Decrement outstanding register count 9211693Santhony.gutierrez@amd.com computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1); 9311693Santhony.gutierrez@amd.com 9411693Santhony.gutierrez@amd.com if (m->isStore() || m->isAtomic()) { 9511693Santhony.gutierrez@amd.com computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm, 9611693Santhony.gutierrez@amd.com m->time, -1); 9711693Santhony.gutierrez@amd.com } 9811693Santhony.gutierrez@amd.com 9911693Santhony.gutierrez@amd.com if (m->isLoad() || m->isAtomic()) { 10011693Santhony.gutierrez@amd.com computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm, 10111693Santhony.gutierrez@amd.com m->time, -1); 10211693Santhony.gutierrez@amd.com } 10311693Santhony.gutierrez@amd.com 10411693Santhony.gutierrez@amd.com // Mark write bus busy for appropriate amount of time 10511693Santhony.gutierrez@amd.com computeUnit->glbMemToVrfBus.set(m->time); 10611693Santhony.gutierrez@amd.com if (!computeUnit->shader->coissue_return) 10711693Santhony.gutierrez@amd.com w->computeUnit->wfWait.at(m->pipeId).set(m->time); 10811308Santhony.gutierrez@amd.com } 10911308Santhony.gutierrez@amd.com 11011308Santhony.gutierrez@amd.com // If pipeline has executed a global memory instruction 11111308Santhony.gutierrez@amd.com // execute global memory packets and issue global 11211308Santhony.gutierrez@amd.com // memory packets to DTLB 11311308Santhony.gutierrez@amd.com if (!gmIssuedRequests.empty()) { 11411308Santhony.gutierrez@amd.com GPUDynInstPtr mp = gmIssuedRequests.front(); 11511692Santhony.gutierrez@amd.com if (mp->isLoad() || mp->isAtomic()) { 11611308Santhony.gutierrez@amd.com if (inflightLoads >= gmQueueSize) { 11711308Santhony.gutierrez@amd.com return; 11811308Santhony.gutierrez@amd.com } else { 11911308Santhony.gutierrez@amd.com ++inflightLoads; 12011308Santhony.gutierrez@amd.com } 12111700Santhony.gutierrez@amd.com } else if (mp->isStore()) { 12211308Santhony.gutierrez@amd.com if (inflightStores >= gmQueueSize) { 12311308Santhony.gutierrez@amd.com return; 12411700Santhony.gutierrez@amd.com } else { 12511308Santhony.gutierrez@amd.com ++inflightStores; 12611308Santhony.gutierrez@amd.com } 12711308Santhony.gutierrez@amd.com } 12811308Santhony.gutierrez@amd.com 12911308Santhony.gutierrez@amd.com mp->initiateAcc(mp); 13011700Santhony.gutierrez@amd.com 13111700Santhony.gutierrez@amd.com if (!outOfOrderDataDelivery && !mp->isMemFence()) { 13211700Santhony.gutierrez@amd.com /** 13311700Santhony.gutierrez@amd.com * if we are not in out-of-order data delivery mode 13411700Santhony.gutierrez@amd.com * then we keep the responses sorted in program order. 13511700Santhony.gutierrez@amd.com * in order to do so we must reserve an entry in the 13611700Santhony.gutierrez@amd.com * resp buffer before we issue the request to the mem 13711700Santhony.gutierrez@amd.com * system. mem fence requests will not be stored here 13811700Santhony.gutierrez@amd.com * because once they are issued from the GM pipeline, 13911700Santhony.gutierrez@amd.com * they do not send any response back to it. 14011700Santhony.gutierrez@amd.com */ 14111700Santhony.gutierrez@amd.com gmOrderedRespBuffer.insert(std::make_pair(mp->seqNum(), 14211700Santhony.gutierrez@amd.com std::make_pair(mp, false))); 14311700Santhony.gutierrez@amd.com } 14411700Santhony.gutierrez@amd.com 14511308Santhony.gutierrez@amd.com gmIssuedRequests.pop(); 14611308Santhony.gutierrez@amd.com 14711692Santhony.gutierrez@amd.com DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = \n", 14811692Santhony.gutierrez@amd.com computeUnit->cu_id, mp->simdId, mp->wfSlotId); 14911308Santhony.gutierrez@amd.com } 15011308Santhony.gutierrez@amd.com} 15111308Santhony.gutierrez@amd.com 15211700Santhony.gutierrez@amd.comGPUDynInstPtr 15311700Santhony.gutierrez@amd.comGlobalMemPipeline::getNextReadyResp() 15411700Santhony.gutierrez@amd.com{ 15511700Santhony.gutierrez@amd.com if (outOfOrderDataDelivery) { 15611700Santhony.gutierrez@amd.com if (!gmReturnedLoads.empty()) { 15711700Santhony.gutierrez@amd.com return gmReturnedLoads.front(); 15811700Santhony.gutierrez@amd.com } else if (!gmReturnedStores.empty()) { 15911700Santhony.gutierrez@amd.com return gmReturnedStores.front(); 16011700Santhony.gutierrez@amd.com } 16111700Santhony.gutierrez@amd.com } else { 16211700Santhony.gutierrez@amd.com if (!gmOrderedRespBuffer.empty()) { 16311700Santhony.gutierrez@amd.com auto mem_req = gmOrderedRespBuffer.begin(); 16411700Santhony.gutierrez@amd.com 16511700Santhony.gutierrez@amd.com if (mem_req->second.second) { 16611700Santhony.gutierrez@amd.com return mem_req->second.first; 16711700Santhony.gutierrez@amd.com } 16811700Santhony.gutierrez@amd.com } 16911700Santhony.gutierrez@amd.com } 17011700Santhony.gutierrez@amd.com 17111700Santhony.gutierrez@amd.com return nullptr; 17211700Santhony.gutierrez@amd.com} 17311700Santhony.gutierrez@amd.com 17411700Santhony.gutierrez@amd.comvoid 17511700Santhony.gutierrez@amd.comGlobalMemPipeline::completeRequest(GPUDynInstPtr gpuDynInst) 17611700Santhony.gutierrez@amd.com{ 17711700Santhony.gutierrez@amd.com if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) { 17811700Santhony.gutierrez@amd.com assert(inflightLoads > 0); 17911700Santhony.gutierrez@amd.com --inflightLoads; 18011700Santhony.gutierrez@amd.com } else if (gpuDynInst->isStore()) { 18111700Santhony.gutierrez@amd.com assert(inflightStores > 0); 18211700Santhony.gutierrez@amd.com --inflightStores; 18311700Santhony.gutierrez@amd.com } 18411700Santhony.gutierrez@amd.com 18511700Santhony.gutierrez@amd.com if (outOfOrderDataDelivery) { 18611700Santhony.gutierrez@amd.com if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) { 18711700Santhony.gutierrez@amd.com assert(!gmReturnedLoads.empty()); 18811700Santhony.gutierrez@amd.com gmReturnedLoads.pop(); 18911700Santhony.gutierrez@amd.com } else if (gpuDynInst->isStore()) { 19011700Santhony.gutierrez@amd.com assert(!gmReturnedStores.empty()); 19111700Santhony.gutierrez@amd.com gmReturnedStores.pop(); 19211700Santhony.gutierrez@amd.com } 19311700Santhony.gutierrez@amd.com } else { 19411700Santhony.gutierrez@amd.com // we should only pop the oldest requst, and it 19511700Santhony.gutierrez@amd.com // should be marked as done if we are here 19611700Santhony.gutierrez@amd.com assert(gmOrderedRespBuffer.begin()->first == gpuDynInst->seqNum()); 19711700Santhony.gutierrez@amd.com assert(gmOrderedRespBuffer.begin()->second.first == gpuDynInst); 19811700Santhony.gutierrez@amd.com assert(gmOrderedRespBuffer.begin()->second.second); 19911700Santhony.gutierrez@amd.com // remove this instruction from the buffer by its 20011700Santhony.gutierrez@amd.com // unique seq ID 20111700Santhony.gutierrez@amd.com gmOrderedRespBuffer.erase(gpuDynInst->seqNum()); 20211700Santhony.gutierrez@amd.com } 20311700Santhony.gutierrez@amd.com} 20411700Santhony.gutierrez@amd.com 20511700Santhony.gutierrez@amd.comvoid 20611700Santhony.gutierrez@amd.comGlobalMemPipeline::issueRequest(GPUDynInstPtr gpuDynInst) 20711700Santhony.gutierrez@amd.com{ 20811700Santhony.gutierrez@amd.com gmIssuedRequests.push(gpuDynInst); 20911700Santhony.gutierrez@amd.com} 21011700Santhony.gutierrez@amd.com 21111700Santhony.gutierrez@amd.comvoid 21211700Santhony.gutierrez@amd.comGlobalMemPipeline::handleResponse(GPUDynInstPtr gpuDynInst) 21311700Santhony.gutierrez@amd.com{ 21411700Santhony.gutierrez@amd.com if (outOfOrderDataDelivery) { 21511700Santhony.gutierrez@amd.com if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) { 21611700Santhony.gutierrez@amd.com assert(isGMLdRespFIFOWrRdy()); 21711700Santhony.gutierrez@amd.com gmReturnedLoads.push(gpuDynInst); 21811700Santhony.gutierrez@amd.com } else { 21911700Santhony.gutierrez@amd.com assert(isGMStRespFIFOWrRdy()); 22011700Santhony.gutierrez@amd.com gmReturnedStores.push(gpuDynInst); 22111700Santhony.gutierrez@amd.com } 22211700Santhony.gutierrez@amd.com } else { 22311700Santhony.gutierrez@amd.com auto mem_req = gmOrderedRespBuffer.find(gpuDynInst->seqNum()); 22411700Santhony.gutierrez@amd.com // if we are getting a response for this mem request, 22511700Santhony.gutierrez@amd.com // then it ought to already be in the ordered response 22611700Santhony.gutierrez@amd.com // buffer 22711700Santhony.gutierrez@amd.com assert(mem_req != gmOrderedRespBuffer.end()); 22811700Santhony.gutierrez@amd.com mem_req->second.second = true; 22911700Santhony.gutierrez@amd.com } 23011700Santhony.gutierrez@amd.com} 23111700Santhony.gutierrez@amd.com 23211308Santhony.gutierrez@amd.comvoid 23311308Santhony.gutierrez@amd.comGlobalMemPipeline::regStats() 23411308Santhony.gutierrez@amd.com{ 23511308Santhony.gutierrez@amd.com loadVrfBankConflictCycles 23611308Santhony.gutierrez@amd.com .name(name() + ".load_vrf_bank_conflict_cycles") 23711308Santhony.gutierrez@amd.com .desc("total number of cycles GM data are delayed before updating " 23811308Santhony.gutierrez@amd.com "the VRF") 23911308Santhony.gutierrez@amd.com ; 24011308Santhony.gutierrez@amd.com} 241