local_memory_pipeline.cc revision 11308
111308Santhony.gutierrez@amd.com/* 211308Santhony.gutierrez@amd.com * Copyright (c) 2014-2015 Advanced Micro Devices, Inc. 311308Santhony.gutierrez@amd.com * All rights reserved. 411308Santhony.gutierrez@amd.com * 511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only 611308Santhony.gutierrez@amd.com * 711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without 811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met: 911308Santhony.gutierrez@amd.com * 1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice, 1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer. 1211308Santhony.gutierrez@amd.com * 1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice, 1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation 1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution. 1611308Santhony.gutierrez@amd.com * 1711308Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its contributors 1811308Santhony.gutierrez@amd.com * may be used to endorse or promote products derived from this software 1911308Santhony.gutierrez@amd.com * without specific prior written permission. 2011308Santhony.gutierrez@amd.com * 2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE. 3211308Santhony.gutierrez@amd.com * 3311308Santhony.gutierrez@amd.com * Author: Sooraj Puthoor 3411308Santhony.gutierrez@amd.com */ 3511308Santhony.gutierrez@amd.com 3611308Santhony.gutierrez@amd.com#include "gpu-compute/local_memory_pipeline.hh" 3711308Santhony.gutierrez@amd.com 3811308Santhony.gutierrez@amd.com#include "debug/GPUPort.hh" 3911308Santhony.gutierrez@amd.com#include "gpu-compute/compute_unit.hh" 4011308Santhony.gutierrez@amd.com#include "gpu-compute/gpu_dyn_inst.hh" 4111308Santhony.gutierrez@amd.com#include "gpu-compute/shader.hh" 4211308Santhony.gutierrez@amd.com#include "gpu-compute/vector_register_file.hh" 4311308Santhony.gutierrez@amd.com#include "gpu-compute/wavefront.hh" 4411308Santhony.gutierrez@amd.com 4511308Santhony.gutierrez@amd.comLocalMemPipeline::LocalMemPipeline(const ComputeUnitParams* p) : 4611308Santhony.gutierrez@amd.com computeUnit(nullptr), lmQueueSize(p->local_mem_queue_size) 4711308Santhony.gutierrez@amd.com{ 4811308Santhony.gutierrez@amd.com} 4911308Santhony.gutierrez@amd.com 5011308Santhony.gutierrez@amd.comvoid 5111308Santhony.gutierrez@amd.comLocalMemPipeline::init(ComputeUnit *cu) 5211308Santhony.gutierrez@amd.com{ 5311308Santhony.gutierrez@amd.com computeUnit = cu; 5411308Santhony.gutierrez@amd.com _name = computeUnit->name() + ".LocalMemPipeline"; 5511308Santhony.gutierrez@amd.com} 5611308Santhony.gutierrez@amd.com 5711308Santhony.gutierrez@amd.comvoid 5811308Santhony.gutierrez@amd.comLocalMemPipeline::exec() 5911308Santhony.gutierrez@amd.com{ 6011308Santhony.gutierrez@amd.com // apply any returned shared (LDS) memory operations 6111308Santhony.gutierrez@amd.com GPUDynInstPtr m = !lmReturnedRequests.empty() ? 6211308Santhony.gutierrez@amd.com lmReturnedRequests.front() : nullptr; 6311308Santhony.gutierrez@amd.com 6411308Santhony.gutierrez@amd.com bool accessVrf = true; 6511308Santhony.gutierrez@amd.com if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) { 6611308Santhony.gutierrez@amd.com Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId]; 6711308Santhony.gutierrez@amd.com 6811308Santhony.gutierrez@amd.com accessVrf = 6911308Santhony.gutierrez@amd.com w->computeUnit->vrf[m->simdId]-> 7011308Santhony.gutierrez@amd.com vrfOperandAccessReady(m->seqNum(), w, m, 7111308Santhony.gutierrez@amd.com VrfAccessType::WRITE); 7211308Santhony.gutierrez@amd.com } 7311308Santhony.gutierrez@amd.com 7411308Santhony.gutierrez@amd.com if (!lmReturnedRequests.empty() && m->latency.rdy() && accessVrf && 7511308Santhony.gutierrez@amd.com computeUnit->locMemToVrfBus.rdy() && (computeUnit->shader->coissue_return 7611308Santhony.gutierrez@amd.com || computeUnit->wfWait.at(m->pipeId).rdy())) { 7711308Santhony.gutierrez@amd.com if (m->v_type == VT_32 && m->m_type == Enums::M_U8) 7811308Santhony.gutierrez@amd.com doSmReturn<uint32_t, uint8_t>(m); 7911308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_U16) 8011308Santhony.gutierrez@amd.com doSmReturn<uint32_t, uint16_t>(m); 8111308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_U32) 8211308Santhony.gutierrez@amd.com doSmReturn<uint32_t, uint32_t>(m); 8311308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_S8) 8411308Santhony.gutierrez@amd.com doSmReturn<int32_t, int8_t>(m); 8511308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_S16) 8611308Santhony.gutierrez@amd.com doSmReturn<int32_t, int16_t>(m); 8711308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_S32) 8811308Santhony.gutierrez@amd.com doSmReturn<int32_t, int32_t>(m); 8911308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_F16) 9011308Santhony.gutierrez@amd.com doSmReturn<float, Float16>(m); 9111308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_F32) 9211308Santhony.gutierrez@amd.com doSmReturn<float, float>(m); 9311308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_U8) 9411308Santhony.gutierrez@amd.com doSmReturn<uint64_t, uint8_t>(m); 9511308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_U16) 9611308Santhony.gutierrez@amd.com doSmReturn<uint64_t, uint16_t>(m); 9711308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_U32) 9811308Santhony.gutierrez@amd.com doSmReturn<uint64_t, uint32_t>(m); 9911308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_U64) 10011308Santhony.gutierrez@amd.com doSmReturn<uint64_t, uint64_t>(m); 10111308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_S8) 10211308Santhony.gutierrez@amd.com doSmReturn<int64_t, int8_t>(m); 10311308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_S16) 10411308Santhony.gutierrez@amd.com doSmReturn<int64_t, int16_t>(m); 10511308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_S32) 10611308Santhony.gutierrez@amd.com doSmReturn<int64_t, int32_t>(m); 10711308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_S64) 10811308Santhony.gutierrez@amd.com doSmReturn<int64_t, int64_t>(m); 10911308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_F16) 11011308Santhony.gutierrez@amd.com doSmReturn<double, Float16>(m); 11111308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_F32) 11211308Santhony.gutierrez@amd.com doSmReturn<double, float>(m); 11311308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_F64) 11411308Santhony.gutierrez@amd.com doSmReturn<double, double>(m); 11511308Santhony.gutierrez@amd.com } 11611308Santhony.gutierrez@amd.com 11711308Santhony.gutierrez@amd.com // If pipeline has executed a local memory instruction 11811308Santhony.gutierrez@amd.com // execute local memory packet and issue the packets 11911308Santhony.gutierrez@amd.com // to LDS 12011308Santhony.gutierrez@amd.com if (!lmIssuedRequests.empty() && lmReturnedRequests.size() < lmQueueSize) { 12111308Santhony.gutierrez@amd.com 12211308Santhony.gutierrez@amd.com GPUDynInstPtr m = lmIssuedRequests.front(); 12311308Santhony.gutierrez@amd.com 12411308Santhony.gutierrez@amd.com bool returnVal = computeUnit->sendToLds(m); 12511308Santhony.gutierrez@amd.com if (!returnVal) { 12611308Santhony.gutierrez@amd.com DPRINTF(GPUPort, "packet was nack'd and put in retry queue"); 12711308Santhony.gutierrez@amd.com } 12811308Santhony.gutierrez@amd.com lmIssuedRequests.pop(); 12911308Santhony.gutierrez@amd.com } 13011308Santhony.gutierrez@amd.com} 13111308Santhony.gutierrez@amd.com 13211308Santhony.gutierrez@amd.comtemplate<typename c0, typename c1> 13311308Santhony.gutierrez@amd.comvoid 13411308Santhony.gutierrez@amd.comLocalMemPipeline::doSmReturn(GPUDynInstPtr m) 13511308Santhony.gutierrez@amd.com{ 13611308Santhony.gutierrez@amd.com lmReturnedRequests.pop(); 13711308Santhony.gutierrez@amd.com Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId]; 13811308Santhony.gutierrez@amd.com 13911308Santhony.gutierrez@amd.com // Return data to registers 14011308Santhony.gutierrez@amd.com if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) { 14111308Santhony.gutierrez@amd.com std::vector<uint32_t> regVec; 14211308Santhony.gutierrez@amd.com for (int k = 0; k < m->n_reg; ++k) { 14311308Santhony.gutierrez@amd.com int dst = m->dst_reg+k; 14411308Santhony.gutierrez@amd.com 14511308Santhony.gutierrez@amd.com if (m->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST) 14611308Santhony.gutierrez@amd.com dst = m->dst_reg_vec[k]; 14711308Santhony.gutierrez@amd.com // virtual->physical VGPR mapping 14811308Santhony.gutierrez@amd.com int physVgpr = w->remap(dst,sizeof(c0),1); 14911308Santhony.gutierrez@amd.com // save the physical VGPR index 15011308Santhony.gutierrez@amd.com regVec.push_back(physVgpr); 15111308Santhony.gutierrez@amd.com c1 *p1 = &((c1*)m->d_data)[k * VSZ]; 15211308Santhony.gutierrez@amd.com 15311308Santhony.gutierrez@amd.com for (int i = 0; i < VSZ; ++i) { 15411308Santhony.gutierrez@amd.com if (m->exec_mask[i]) { 15511308Santhony.gutierrez@amd.com // write the value into the physical VGPR. This is a purely 15611308Santhony.gutierrez@amd.com // functional operation. No timing is modeled. 15711308Santhony.gutierrez@amd.com w->computeUnit->vrf[w->simdId]->write<c0>(physVgpr, 15811308Santhony.gutierrez@amd.com *p1, i); 15911308Santhony.gutierrez@amd.com } 16011308Santhony.gutierrez@amd.com ++p1; 16111308Santhony.gutierrez@amd.com } 16211308Santhony.gutierrez@amd.com } 16311308Santhony.gutierrez@amd.com 16411308Santhony.gutierrez@amd.com // Schedule the write operation of the load data on the VRF. This simply 16511308Santhony.gutierrez@amd.com // models the timing aspect of the VRF write operation. It does not 16611308Santhony.gutierrez@amd.com // modify the physical VGPR. 16711308Santhony.gutierrez@amd.com loadVrfBankConflictCycles += 16811308Santhony.gutierrez@amd.com w->computeUnit->vrf[w->simdId]->exec(m->seqNum(), w, 16911308Santhony.gutierrez@amd.com regVec, sizeof(c0), m->time); 17011308Santhony.gutierrez@amd.com } 17111308Santhony.gutierrez@amd.com 17211308Santhony.gutierrez@amd.com // Decrement outstanding request count 17311308Santhony.gutierrez@amd.com computeUnit->shader->ScheduleAdd(&w->outstanding_reqs, m->time, -1); 17411308Santhony.gutierrez@amd.com 17511308Santhony.gutierrez@amd.com if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) 17611308Santhony.gutierrez@amd.com || MO_H(m->m_op)) { 17711308Santhony.gutierrez@amd.com computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_wr_lm, 17811308Santhony.gutierrez@amd.com m->time, -1); 17911308Santhony.gutierrez@amd.com } 18011308Santhony.gutierrez@amd.com 18111308Santhony.gutierrez@amd.com if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) { 18211308Santhony.gutierrez@amd.com computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_rd_lm, 18311308Santhony.gutierrez@amd.com m->time, -1); 18411308Santhony.gutierrez@amd.com } 18511308Santhony.gutierrez@amd.com 18611308Santhony.gutierrez@amd.com // Mark write bus busy for appropriate amount of time 18711308Santhony.gutierrez@amd.com computeUnit->locMemToVrfBus.set(m->time); 18811308Santhony.gutierrez@amd.com if (computeUnit->shader->coissue_return == 0) 18911308Santhony.gutierrez@amd.com w->computeUnit->wfWait.at(m->pipeId).set(m->time); 19011308Santhony.gutierrez@amd.com} 19111308Santhony.gutierrez@amd.com 19211308Santhony.gutierrez@amd.comvoid 19311308Santhony.gutierrez@amd.comLocalMemPipeline::regStats() 19411308Santhony.gutierrez@amd.com{ 19511308Santhony.gutierrez@amd.com loadVrfBankConflictCycles 19611308Santhony.gutierrez@amd.com .name(name() + ".load_vrf_bank_conflict_cycles") 19711308Santhony.gutierrez@amd.com .desc("total number of cycles LDS data are delayed before updating " 19811308Santhony.gutierrez@amd.com "the VRF") 19911308Santhony.gutierrez@amd.com ; 20011308Santhony.gutierrez@amd.com} 201