global_memory_pipeline.cc revision 11308
111308Santhony.gutierrez@amd.com/* 211308Santhony.gutierrez@amd.com * Copyright (c) 2014-2015 Advanced Micro Devices, Inc. 311308Santhony.gutierrez@amd.com * All rights reserved. 411308Santhony.gutierrez@amd.com * 511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only 611308Santhony.gutierrez@amd.com * 711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without 811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met: 911308Santhony.gutierrez@amd.com * 1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice, 1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer. 1211308Santhony.gutierrez@amd.com * 1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice, 1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation 1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution. 1611308Santhony.gutierrez@amd.com * 1711308Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its contributors 1811308Santhony.gutierrez@amd.com * may be used to endorse or promote products derived from this software 1911308Santhony.gutierrez@amd.com * without specific prior written permission. 2011308Santhony.gutierrez@amd.com * 2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE. 3211308Santhony.gutierrez@amd.com * 3311308Santhony.gutierrez@amd.com * Author: John Kalamatianos, Sooraj Puthoor 3411308Santhony.gutierrez@amd.com */ 3511308Santhony.gutierrez@amd.com 3611308Santhony.gutierrez@amd.com#include "gpu-compute/global_memory_pipeline.hh" 3711308Santhony.gutierrez@amd.com 3811308Santhony.gutierrez@amd.com#include "debug/GPUMem.hh" 3911308Santhony.gutierrez@amd.com#include "debug/GPUReg.hh" 4011308Santhony.gutierrez@amd.com#include "gpu-compute/compute_unit.hh" 4111308Santhony.gutierrez@amd.com#include "gpu-compute/gpu_dyn_inst.hh" 4211308Santhony.gutierrez@amd.com#include "gpu-compute/shader.hh" 4311308Santhony.gutierrez@amd.com#include "gpu-compute/vector_register_file.hh" 4411308Santhony.gutierrez@amd.com#include "gpu-compute/wavefront.hh" 4511308Santhony.gutierrez@amd.com 4611308Santhony.gutierrez@amd.comGlobalMemPipeline::GlobalMemPipeline(const ComputeUnitParams* p) : 4711308Santhony.gutierrez@amd.com computeUnit(nullptr), gmQueueSize(p->global_mem_queue_size), 4811308Santhony.gutierrez@amd.com inflightStores(0), inflightLoads(0) 4911308Santhony.gutierrez@amd.com{ 5011308Santhony.gutierrez@amd.com} 5111308Santhony.gutierrez@amd.com 5211308Santhony.gutierrez@amd.comvoid 5311308Santhony.gutierrez@amd.comGlobalMemPipeline::init(ComputeUnit *cu) 5411308Santhony.gutierrez@amd.com{ 5511308Santhony.gutierrez@amd.com computeUnit = cu; 5611308Santhony.gutierrez@amd.com globalMemSize = computeUnit->shader->globalMemSize; 5711308Santhony.gutierrez@amd.com _name = computeUnit->name() + ".GlobalMemPipeline"; 5811308Santhony.gutierrez@amd.com} 5911308Santhony.gutierrez@amd.com 6011308Santhony.gutierrez@amd.comvoid 6111308Santhony.gutierrez@amd.comGlobalMemPipeline::exec() 6211308Santhony.gutierrez@amd.com{ 6311308Santhony.gutierrez@amd.com // apply any returned global memory operations 6411308Santhony.gutierrez@amd.com GPUDynInstPtr m = !gmReturnedLoads.empty() ? gmReturnedLoads.front() : 6511308Santhony.gutierrez@amd.com !gmReturnedStores.empty() ? gmReturnedStores.front() : nullptr; 6611308Santhony.gutierrez@amd.com 6711308Santhony.gutierrez@amd.com bool accessVrf = true; 6811308Santhony.gutierrez@amd.com // check the VRF to see if the operands of a load (or load component 6911308Santhony.gutierrez@amd.com // of an atomic) are accessible 7011308Santhony.gutierrez@amd.com if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) { 7111308Santhony.gutierrez@amd.com Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId]; 7211308Santhony.gutierrez@amd.com 7311308Santhony.gutierrez@amd.com accessVrf = 7411308Santhony.gutierrez@amd.com w->computeUnit->vrf[m->simdId]-> 7511308Santhony.gutierrez@amd.com vrfOperandAccessReady(m->seqNum(), w, m, 7611308Santhony.gutierrez@amd.com VrfAccessType::WRITE); 7711308Santhony.gutierrez@amd.com } 7811308Santhony.gutierrez@amd.com 7911308Santhony.gutierrez@amd.com if ((!gmReturnedStores.empty() || !gmReturnedLoads.empty()) && 8011308Santhony.gutierrez@amd.com m->latency.rdy() && computeUnit->glbMemToVrfBus.rdy() && 8111308Santhony.gutierrez@amd.com accessVrf && m->statusBitVector == VectorMask(0) && 8211308Santhony.gutierrez@amd.com (computeUnit->shader->coissue_return || 8311308Santhony.gutierrez@amd.com computeUnit->wfWait.at(m->pipeId).rdy())) { 8411308Santhony.gutierrez@amd.com 8511308Santhony.gutierrez@amd.com if (m->v_type == VT_32 && m->m_type == Enums::M_U8) 8611308Santhony.gutierrez@amd.com doGmReturn<uint32_t, uint8_t>(m); 8711308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_U16) 8811308Santhony.gutierrez@amd.com doGmReturn<uint32_t, uint16_t>(m); 8911308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_U32) 9011308Santhony.gutierrez@amd.com doGmReturn<uint32_t, uint32_t>(m); 9111308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_S8) 9211308Santhony.gutierrez@amd.com doGmReturn<int32_t, int8_t>(m); 9311308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_S16) 9411308Santhony.gutierrez@amd.com doGmReturn<int32_t, int16_t>(m); 9511308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_S32) 9611308Santhony.gutierrez@amd.com doGmReturn<int32_t, int32_t>(m); 9711308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_F16) 9811308Santhony.gutierrez@amd.com doGmReturn<float, Float16>(m); 9911308Santhony.gutierrez@amd.com else if (m->v_type == VT_32 && m->m_type == Enums::M_F32) 10011308Santhony.gutierrez@amd.com doGmReturn<float, float>(m); 10111308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_U8) 10211308Santhony.gutierrez@amd.com doGmReturn<uint64_t, uint8_t>(m); 10311308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_U16) 10411308Santhony.gutierrez@amd.com doGmReturn<uint64_t, uint16_t>(m); 10511308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_U32) 10611308Santhony.gutierrez@amd.com doGmReturn<uint64_t, uint32_t>(m); 10711308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_U64) 10811308Santhony.gutierrez@amd.com doGmReturn<uint64_t, uint64_t>(m); 10911308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_S8) 11011308Santhony.gutierrez@amd.com doGmReturn<int64_t, int8_t>(m); 11111308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_S16) 11211308Santhony.gutierrez@amd.com doGmReturn<int64_t, int16_t>(m); 11311308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_S32) 11411308Santhony.gutierrez@amd.com doGmReturn<int64_t, int32_t>(m); 11511308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_S64) 11611308Santhony.gutierrez@amd.com doGmReturn<int64_t, int64_t>(m); 11711308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_F16) 11811308Santhony.gutierrez@amd.com doGmReturn<double, Float16>(m); 11911308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_F32) 12011308Santhony.gutierrez@amd.com doGmReturn<double, float>(m); 12111308Santhony.gutierrez@amd.com else if (m->v_type == VT_64 && m->m_type == Enums::M_F64) 12211308Santhony.gutierrez@amd.com doGmReturn<double, double>(m); 12311308Santhony.gutierrez@amd.com } 12411308Santhony.gutierrez@amd.com 12511308Santhony.gutierrez@amd.com // If pipeline has executed a global memory instruction 12611308Santhony.gutierrez@amd.com // execute global memory packets and issue global 12711308Santhony.gutierrez@amd.com // memory packets to DTLB 12811308Santhony.gutierrez@amd.com if (!gmIssuedRequests.empty()) { 12911308Santhony.gutierrez@amd.com GPUDynInstPtr mp = gmIssuedRequests.front(); 13011308Santhony.gutierrez@amd.com if (mp->m_op == Enums::MO_LD || 13111308Santhony.gutierrez@amd.com (mp->m_op >= Enums::MO_AAND && mp->m_op <= Enums::MO_AMIN) || 13211308Santhony.gutierrez@amd.com (mp->m_op >= Enums::MO_ANRAND && mp->m_op <= Enums::MO_ANRMIN)) { 13311308Santhony.gutierrez@amd.com 13411308Santhony.gutierrez@amd.com if (inflightLoads >= gmQueueSize) { 13511308Santhony.gutierrez@amd.com return; 13611308Santhony.gutierrez@amd.com } else { 13711308Santhony.gutierrez@amd.com ++inflightLoads; 13811308Santhony.gutierrez@amd.com } 13911308Santhony.gutierrez@amd.com } else { 14011308Santhony.gutierrez@amd.com if (inflightStores >= gmQueueSize) { 14111308Santhony.gutierrez@amd.com return; 14211308Santhony.gutierrez@amd.com } else { 14311308Santhony.gutierrez@amd.com ++inflightStores; 14411308Santhony.gutierrez@amd.com } 14511308Santhony.gutierrez@amd.com } 14611308Santhony.gutierrez@amd.com 14711308Santhony.gutierrez@amd.com mp->initiateAcc(mp); 14811308Santhony.gutierrez@amd.com gmIssuedRequests.pop(); 14911308Santhony.gutierrez@amd.com 15011308Santhony.gutierrez@amd.com DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = %s\n", 15111308Santhony.gutierrez@amd.com computeUnit->cu_id, mp->simdId, mp->wfSlotId, 15211308Santhony.gutierrez@amd.com Enums::MemOpTypeStrings[mp->m_op]); 15311308Santhony.gutierrez@amd.com } 15411308Santhony.gutierrez@amd.com} 15511308Santhony.gutierrez@amd.com 15611308Santhony.gutierrez@amd.comtemplate<typename c0, typename c1> 15711308Santhony.gutierrez@amd.comvoid 15811308Santhony.gutierrez@amd.comGlobalMemPipeline::doGmReturn(GPUDynInstPtr m) 15911308Santhony.gutierrez@amd.com{ 16011308Santhony.gutierrez@amd.com Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId]; 16111308Santhony.gutierrez@amd.com 16211308Santhony.gutierrez@amd.com // Return data to registers 16311308Santhony.gutierrez@amd.com if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) { 16411308Santhony.gutierrez@amd.com gmReturnedLoads.pop(); 16511308Santhony.gutierrez@amd.com assert(inflightLoads > 0); 16611308Santhony.gutierrez@amd.com --inflightLoads; 16711308Santhony.gutierrez@amd.com 16811308Santhony.gutierrez@amd.com if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) { 16911308Santhony.gutierrez@amd.com std::vector<uint32_t> regVec; 17011308Santhony.gutierrez@amd.com // iterate over number of destination register operands since 17111308Santhony.gutierrez@amd.com // this is a load or atomic operation 17211308Santhony.gutierrez@amd.com for (int k = 0; k < m->n_reg; ++k) { 17311308Santhony.gutierrez@amd.com assert((sizeof(c1) * m->n_reg) <= MAX_WIDTH_FOR_MEM_INST); 17411308Santhony.gutierrez@amd.com int dst = m->dst_reg + k; 17511308Santhony.gutierrez@amd.com 17611308Santhony.gutierrez@amd.com if (m->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST) 17711308Santhony.gutierrez@amd.com dst = m->dst_reg_vec[k]; 17811308Santhony.gutierrez@amd.com // virtual->physical VGPR mapping 17911308Santhony.gutierrez@amd.com int physVgpr = w->remap(dst, sizeof(c0), 1); 18011308Santhony.gutierrez@amd.com // save the physical VGPR index 18111308Santhony.gutierrez@amd.com regVec.push_back(physVgpr); 18211308Santhony.gutierrez@amd.com c1 *p1 = &((c1*)m->d_data)[k * VSZ]; 18311308Santhony.gutierrez@amd.com 18411308Santhony.gutierrez@amd.com for (int i = 0; i < VSZ; ++i) { 18511308Santhony.gutierrez@amd.com if (m->exec_mask[i]) { 18611308Santhony.gutierrez@amd.com DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: " 18711308Santhony.gutierrez@amd.com "$%s%d <- %d global ld done (src = wavefront " 18811308Santhony.gutierrez@amd.com "ld inst)\n", w->computeUnit->cu_id, w->simdId, 18911308Santhony.gutierrez@amd.com w->wfSlotId, i, sizeof(c0) == 4 ? "s" : "d", 19011308Santhony.gutierrez@amd.com dst, *p1); 19111308Santhony.gutierrez@amd.com // write the value into the physical VGPR. This is a 19211308Santhony.gutierrez@amd.com // purely functional operation. No timing is modeled. 19311308Santhony.gutierrez@amd.com w->computeUnit->vrf[w->simdId]->write<c0>(physVgpr, 19411308Santhony.gutierrez@amd.com *p1, i); 19511308Santhony.gutierrez@amd.com } 19611308Santhony.gutierrez@amd.com ++p1; 19711308Santhony.gutierrez@amd.com } 19811308Santhony.gutierrez@amd.com } 19911308Santhony.gutierrez@amd.com 20011308Santhony.gutierrez@amd.com // Schedule the write operation of the load data on the VRF. 20111308Santhony.gutierrez@amd.com // This simply models the timing aspect of the VRF write operation. 20211308Santhony.gutierrez@amd.com // It does not modify the physical VGPR. 20311308Santhony.gutierrez@amd.com loadVrfBankConflictCycles += 20411308Santhony.gutierrez@amd.com w->computeUnit->vrf[w->simdId]->exec(m->seqNum(), 20511308Santhony.gutierrez@amd.com w, regVec, sizeof(c0), 20611308Santhony.gutierrez@amd.com m->time); 20711308Santhony.gutierrez@amd.com } 20811308Santhony.gutierrez@amd.com } else { 20911308Santhony.gutierrez@amd.com gmReturnedStores.pop(); 21011308Santhony.gutierrez@amd.com assert(inflightStores > 0); 21111308Santhony.gutierrez@amd.com --inflightStores; 21211308Santhony.gutierrez@amd.com } 21311308Santhony.gutierrez@amd.com 21411308Santhony.gutierrez@amd.com // Decrement outstanding register count 21511308Santhony.gutierrez@amd.com computeUnit->shader->ScheduleAdd(&w->outstanding_reqs, m->time, -1); 21611308Santhony.gutierrez@amd.com 21711308Santhony.gutierrez@amd.com if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) || 21811308Santhony.gutierrez@amd.com MO_H(m->m_op)) { 21911308Santhony.gutierrez@amd.com computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_wr_gm, m->time, 22011308Santhony.gutierrez@amd.com -1); 22111308Santhony.gutierrez@amd.com } 22211308Santhony.gutierrez@amd.com 22311308Santhony.gutierrez@amd.com if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) { 22411308Santhony.gutierrez@amd.com computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_rd_gm, m->time, 22511308Santhony.gutierrez@amd.com -1); 22611308Santhony.gutierrez@amd.com } 22711308Santhony.gutierrez@amd.com 22811308Santhony.gutierrez@amd.com // Mark write bus busy for appropriate amount of time 22911308Santhony.gutierrez@amd.com computeUnit->glbMemToVrfBus.set(m->time); 23011308Santhony.gutierrez@amd.com if (!computeUnit->shader->coissue_return) 23111308Santhony.gutierrez@amd.com w->computeUnit->wfWait.at(m->pipeId).set(m->time); 23211308Santhony.gutierrez@amd.com} 23311308Santhony.gutierrez@amd.com 23411308Santhony.gutierrez@amd.comvoid 23511308Santhony.gutierrez@amd.comGlobalMemPipeline::regStats() 23611308Santhony.gutierrez@amd.com{ 23711308Santhony.gutierrez@amd.com loadVrfBankConflictCycles 23811308Santhony.gutierrez@amd.com .name(name() + ".load_vrf_bank_conflict_cycles") 23911308Santhony.gutierrez@amd.com .desc("total number of cycles GM data are delayed before updating " 24011308Santhony.gutierrez@amd.com "the VRF") 24111308Santhony.gutierrez@amd.com ; 24211308Santhony.gutierrez@amd.com} 243