wavefront.cc revision 11883
111308Santhony.gutierrez@amd.com/* 211308Santhony.gutierrez@amd.com * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 311308Santhony.gutierrez@amd.com * All rights reserved. 411308Santhony.gutierrez@amd.com * 511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only 611308Santhony.gutierrez@amd.com * 711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without 811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met: 911308Santhony.gutierrez@amd.com * 1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice, 1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer. 1211308Santhony.gutierrez@amd.com * 1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice, 1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation 1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution. 1611308Santhony.gutierrez@amd.com * 1711308Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its contributors 1811308Santhony.gutierrez@amd.com * may be used to endorse or promote products derived from this software 1911308Santhony.gutierrez@amd.com * without specific prior written permission. 2011308Santhony.gutierrez@amd.com * 2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE. 3211308Santhony.gutierrez@amd.com * 3311308Santhony.gutierrez@amd.com * Author: Lisa Hsu 3411308Santhony.gutierrez@amd.com */ 3511308Santhony.gutierrez@amd.com 3611308Santhony.gutierrez@amd.com#include "gpu-compute/wavefront.hh" 3711308Santhony.gutierrez@amd.com 3811308Santhony.gutierrez@amd.com#include "debug/GPUExec.hh" 3911308Santhony.gutierrez@amd.com#include "debug/WavefrontStack.hh" 4011308Santhony.gutierrez@amd.com#include "gpu-compute/compute_unit.hh" 4111308Santhony.gutierrez@amd.com#include "gpu-compute/gpu_dyn_inst.hh" 4211308Santhony.gutierrez@amd.com#include "gpu-compute/shader.hh" 4311308Santhony.gutierrez@amd.com#include "gpu-compute/vector_register_file.hh" 4411308Santhony.gutierrez@amd.com 4511308Santhony.gutierrez@amd.comWavefront* 4611308Santhony.gutierrez@amd.comWavefrontParams::create() 4711308Santhony.gutierrez@amd.com{ 4811308Santhony.gutierrez@amd.com return new Wavefront(this); 4911308Santhony.gutierrez@amd.com} 5011308Santhony.gutierrez@amd.com 5111308Santhony.gutierrez@amd.comWavefront::Wavefront(const Params *p) 5211883Santhony.gutierrez@amd.com : SimObject(p), callArgMem(nullptr), _gpuISA() 5311308Santhony.gutierrez@amd.com{ 5411639Salexandru.dutu@amd.com lastTrace = 0; 5511308Santhony.gutierrez@amd.com simdId = p->simdId; 5611308Santhony.gutierrez@amd.com wfSlotId = p->wf_slot_id; 5711308Santhony.gutierrez@amd.com status = S_STOPPED; 5811308Santhony.gutierrez@amd.com reservedVectorRegs = 0; 5911308Santhony.gutierrez@amd.com startVgprIndex = 0; 6011639Salexandru.dutu@amd.com outstandingReqs = 0; 6111639Salexandru.dutu@amd.com memReqsInPipe = 0; 6211639Salexandru.dutu@amd.com outstandingReqsWrGm = 0; 6311639Salexandru.dutu@amd.com outstandingReqsWrLm = 0; 6411639Salexandru.dutu@amd.com outstandingReqsRdGm = 0; 6511639Salexandru.dutu@amd.com outstandingReqsRdLm = 0; 6611639Salexandru.dutu@amd.com rdLmReqsInPipe = 0; 6711639Salexandru.dutu@amd.com rdGmReqsInPipe = 0; 6811639Salexandru.dutu@amd.com wrLmReqsInPipe = 0; 6911639Salexandru.dutu@amd.com wrGmReqsInPipe = 0; 7011308Santhony.gutierrez@amd.com 7111639Salexandru.dutu@amd.com barrierCnt = 0; 7211639Salexandru.dutu@amd.com oldBarrierCnt = 0; 7311308Santhony.gutierrez@amd.com stalledAtBarrier = false; 7411308Santhony.gutierrez@amd.com 7511639Salexandru.dutu@amd.com memTraceBusy = 0; 7611639Salexandru.dutu@amd.com oldVgprTcnt = 0xffffffffffffffffll; 7711639Salexandru.dutu@amd.com oldDgprTcnt = 0xffffffffffffffffll; 7811639Salexandru.dutu@amd.com oldVgpr.resize(p->wfSize); 7911308Santhony.gutierrez@amd.com 8011308Santhony.gutierrez@amd.com pendingFetch = false; 8111308Santhony.gutierrez@amd.com dropFetch = false; 8211308Santhony.gutierrez@amd.com condRegState = new ConditionRegisterState(); 8311308Santhony.gutierrez@amd.com maxSpVgprs = 0; 8411308Santhony.gutierrez@amd.com maxDpVgprs = 0; 8511639Salexandru.dutu@amd.com lastAddr.resize(p->wfSize); 8611639Salexandru.dutu@amd.com workItemFlatId.resize(p->wfSize); 8711639Salexandru.dutu@amd.com oldDgpr.resize(p->wfSize); 8811639Salexandru.dutu@amd.com barCnt.resize(p->wfSize); 8911534Sjohn.kalamatianos@amd.com for (int i = 0; i < 3; ++i) { 9011639Salexandru.dutu@amd.com workItemId[i].resize(p->wfSize); 9111534Sjohn.kalamatianos@amd.com } 9211308Santhony.gutierrez@amd.com} 9311308Santhony.gutierrez@amd.com 9411308Santhony.gutierrez@amd.comvoid 9511308Santhony.gutierrez@amd.comWavefront::regStats() 9611308Santhony.gutierrez@amd.com{ 9711523Sdavid.guillen@arm.com SimObject::regStats(); 9811523Sdavid.guillen@arm.com 9911308Santhony.gutierrez@amd.com srcRegOpDist 10011308Santhony.gutierrez@amd.com .init(0, 4, 2) 10111308Santhony.gutierrez@amd.com .name(name() + ".src_reg_operand_dist") 10211308Santhony.gutierrez@amd.com .desc("number of executed instructions with N source register operands") 10311308Santhony.gutierrez@amd.com ; 10411308Santhony.gutierrez@amd.com 10511308Santhony.gutierrez@amd.com dstRegOpDist 10611308Santhony.gutierrez@amd.com .init(0, 3, 2) 10711308Santhony.gutierrez@amd.com .name(name() + ".dst_reg_operand_dist") 10811308Santhony.gutierrez@amd.com .desc("number of executed instructions with N destination register " 10911308Santhony.gutierrez@amd.com "operands") 11011308Santhony.gutierrez@amd.com ; 11111308Santhony.gutierrez@amd.com 11211308Santhony.gutierrez@amd.com // FIXME: the name of the WF needs to be unique 11311308Santhony.gutierrez@amd.com numTimesBlockedDueWAXDependencies 11411308Santhony.gutierrez@amd.com .name(name() + ".timesBlockedDueWAXDependencies") 11511308Santhony.gutierrez@amd.com .desc("number of times the wf's instructions are blocked due to WAW " 11611308Santhony.gutierrez@amd.com "or WAR dependencies") 11711308Santhony.gutierrez@amd.com ; 11811308Santhony.gutierrez@amd.com 11911308Santhony.gutierrez@amd.com // FIXME: the name of the WF needs to be unique 12011308Santhony.gutierrez@amd.com numTimesBlockedDueRAWDependencies 12111308Santhony.gutierrez@amd.com .name(name() + ".timesBlockedDueRAWDependencies") 12211308Santhony.gutierrez@amd.com .desc("number of times the wf's instructions are blocked due to RAW " 12311308Santhony.gutierrez@amd.com "dependencies") 12411308Santhony.gutierrez@amd.com ; 12511308Santhony.gutierrez@amd.com 12611308Santhony.gutierrez@amd.com // FIXME: the name of the WF needs to be unique 12711308Santhony.gutierrez@amd.com numTimesBlockedDueVrfPortAvail 12811308Santhony.gutierrez@amd.com .name(name() + ".timesBlockedDueVrfPortAvail") 12911308Santhony.gutierrez@amd.com .desc("number of times instructions are blocked due to VRF port " 13011308Santhony.gutierrez@amd.com "availability") 13111308Santhony.gutierrez@amd.com ; 13211308Santhony.gutierrez@amd.com} 13311308Santhony.gutierrez@amd.com 13411308Santhony.gutierrez@amd.comvoid 13511308Santhony.gutierrez@amd.comWavefront::init() 13611308Santhony.gutierrez@amd.com{ 13711308Santhony.gutierrez@amd.com reservedVectorRegs = 0; 13811308Santhony.gutierrez@amd.com startVgprIndex = 0; 13911308Santhony.gutierrez@amd.com} 14011308Santhony.gutierrez@amd.com 14111308Santhony.gutierrez@amd.comvoid 14211308Santhony.gutierrez@amd.comWavefront::resizeRegFiles(int num_cregs, int num_sregs, int num_dregs) 14311308Santhony.gutierrez@amd.com{ 14411308Santhony.gutierrez@amd.com condRegState->init(num_cregs); 14511308Santhony.gutierrez@amd.com maxSpVgprs = num_sregs; 14611308Santhony.gutierrez@amd.com maxDpVgprs = num_dregs; 14711308Santhony.gutierrez@amd.com} 14811308Santhony.gutierrez@amd.com 14911308Santhony.gutierrez@amd.comWavefront::~Wavefront() 15011308Santhony.gutierrez@amd.com{ 15111308Santhony.gutierrez@amd.com if (callArgMem) 15211308Santhony.gutierrez@amd.com delete callArgMem; 15311534Sjohn.kalamatianos@amd.com delete condRegState; 15411308Santhony.gutierrez@amd.com} 15511308Santhony.gutierrez@amd.com 15611308Santhony.gutierrez@amd.comvoid 15711640Salexandru.dutu@amd.comWavefront::start(uint64_t _wf_dyn_id,uint64_t _base_ptr) 15811308Santhony.gutierrez@amd.com{ 15911640Salexandru.dutu@amd.com wfDynId = _wf_dyn_id; 16011639Salexandru.dutu@amd.com basePtr = _base_ptr; 16111308Santhony.gutierrez@amd.com status = S_RUNNING; 16211308Santhony.gutierrez@amd.com} 16311308Santhony.gutierrez@amd.com 16411308Santhony.gutierrez@amd.combool 16511308Santhony.gutierrez@amd.comWavefront::isGmInstruction(GPUDynInstPtr ii) 16611308Santhony.gutierrez@amd.com{ 16711692Santhony.gutierrez@amd.com if (ii->isGlobalMem() || ii->isFlat()) 16811308Santhony.gutierrez@amd.com return true; 16911308Santhony.gutierrez@amd.com 17011308Santhony.gutierrez@amd.com return false; 17111308Santhony.gutierrez@amd.com} 17211308Santhony.gutierrez@amd.com 17311308Santhony.gutierrez@amd.combool 17411308Santhony.gutierrez@amd.comWavefront::isLmInstruction(GPUDynInstPtr ii) 17511308Santhony.gutierrez@amd.com{ 17611692Santhony.gutierrez@amd.com if (ii->isLocalMem()) { 17711308Santhony.gutierrez@amd.com return true; 17811308Santhony.gutierrez@amd.com } 17911308Santhony.gutierrez@amd.com 18011308Santhony.gutierrez@amd.com return false; 18111308Santhony.gutierrez@amd.com} 18211308Santhony.gutierrez@amd.com 18311308Santhony.gutierrez@amd.combool 18411308Santhony.gutierrez@amd.comWavefront::isOldestInstALU() 18511308Santhony.gutierrez@amd.com{ 18611308Santhony.gutierrez@amd.com assert(!instructionBuffer.empty()); 18711308Santhony.gutierrez@amd.com GPUDynInstPtr ii = instructionBuffer.front(); 18811308Santhony.gutierrez@amd.com 18911692Santhony.gutierrez@amd.com if (status != S_STOPPED && (ii->isNop() || 19011692Santhony.gutierrez@amd.com ii->isReturn() || ii->isBranch() || 19111692Santhony.gutierrez@amd.com ii->isALU() || (ii->isKernArgSeg() && ii->isLoad()))) { 19211308Santhony.gutierrez@amd.com return true; 19311308Santhony.gutierrez@amd.com } 19411308Santhony.gutierrez@amd.com 19511308Santhony.gutierrez@amd.com return false; 19611308Santhony.gutierrez@amd.com} 19711308Santhony.gutierrez@amd.com 19811308Santhony.gutierrez@amd.combool 19911308Santhony.gutierrez@amd.comWavefront::isOldestInstBarrier() 20011308Santhony.gutierrez@amd.com{ 20111308Santhony.gutierrez@amd.com assert(!instructionBuffer.empty()); 20211308Santhony.gutierrez@amd.com GPUDynInstPtr ii = instructionBuffer.front(); 20311308Santhony.gutierrez@amd.com 20411692Santhony.gutierrez@amd.com if (status != S_STOPPED && ii->isBarrier()) { 20511308Santhony.gutierrez@amd.com return true; 20611308Santhony.gutierrez@amd.com } 20711308Santhony.gutierrez@amd.com 20811308Santhony.gutierrez@amd.com return false; 20911308Santhony.gutierrez@amd.com} 21011308Santhony.gutierrez@amd.com 21111308Santhony.gutierrez@amd.combool 21211308Santhony.gutierrez@amd.comWavefront::isOldestInstGMem() 21311308Santhony.gutierrez@amd.com{ 21411308Santhony.gutierrez@amd.com assert(!instructionBuffer.empty()); 21511308Santhony.gutierrez@amd.com GPUDynInstPtr ii = instructionBuffer.front(); 21611308Santhony.gutierrez@amd.com 21711692Santhony.gutierrez@amd.com if (status != S_STOPPED && ii->isGlobalMem()) { 21811308Santhony.gutierrez@amd.com return true; 21911308Santhony.gutierrez@amd.com } 22011308Santhony.gutierrez@amd.com 22111308Santhony.gutierrez@amd.com return false; 22211308Santhony.gutierrez@amd.com} 22311308Santhony.gutierrez@amd.com 22411308Santhony.gutierrez@amd.combool 22511308Santhony.gutierrez@amd.comWavefront::isOldestInstLMem() 22611308Santhony.gutierrez@amd.com{ 22711308Santhony.gutierrez@amd.com assert(!instructionBuffer.empty()); 22811308Santhony.gutierrez@amd.com GPUDynInstPtr ii = instructionBuffer.front(); 22911308Santhony.gutierrez@amd.com 23011692Santhony.gutierrez@amd.com if (status != S_STOPPED && ii->isLocalMem()) { 23111308Santhony.gutierrez@amd.com return true; 23211308Santhony.gutierrez@amd.com } 23311308Santhony.gutierrez@amd.com 23411308Santhony.gutierrez@amd.com return false; 23511308Santhony.gutierrez@amd.com} 23611308Santhony.gutierrez@amd.com 23711308Santhony.gutierrez@amd.combool 23811308Santhony.gutierrez@amd.comWavefront::isOldestInstPrivMem() 23911308Santhony.gutierrez@amd.com{ 24011308Santhony.gutierrez@amd.com assert(!instructionBuffer.empty()); 24111308Santhony.gutierrez@amd.com GPUDynInstPtr ii = instructionBuffer.front(); 24211308Santhony.gutierrez@amd.com 24311692Santhony.gutierrez@amd.com if (status != S_STOPPED && ii->isPrivateSeg()) { 24411308Santhony.gutierrez@amd.com return true; 24511308Santhony.gutierrez@amd.com } 24611308Santhony.gutierrez@amd.com 24711308Santhony.gutierrez@amd.com return false; 24811308Santhony.gutierrez@amd.com} 24911308Santhony.gutierrez@amd.com 25011308Santhony.gutierrez@amd.combool 25111308Santhony.gutierrez@amd.comWavefront::isOldestInstFlatMem() 25211308Santhony.gutierrez@amd.com{ 25311308Santhony.gutierrez@amd.com assert(!instructionBuffer.empty()); 25411308Santhony.gutierrez@amd.com GPUDynInstPtr ii = instructionBuffer.front(); 25511308Santhony.gutierrez@amd.com 25611692Santhony.gutierrez@amd.com if (status != S_STOPPED && ii->isFlat()) { 25711308Santhony.gutierrez@amd.com return true; 25811308Santhony.gutierrez@amd.com } 25911308Santhony.gutierrez@amd.com 26011308Santhony.gutierrez@amd.com return false; 26111308Santhony.gutierrez@amd.com} 26211308Santhony.gutierrez@amd.com 26311308Santhony.gutierrez@amd.com// Return true if the Wavefront's instruction 26411308Santhony.gutierrez@amd.com// buffer has branch instruction. 26511308Santhony.gutierrez@amd.combool 26611308Santhony.gutierrez@amd.comWavefront::instructionBufferHasBranch() 26711308Santhony.gutierrez@amd.com{ 26811308Santhony.gutierrez@amd.com for (auto it : instructionBuffer) { 26911308Santhony.gutierrez@amd.com GPUDynInstPtr ii = it; 27011308Santhony.gutierrez@amd.com 27111692Santhony.gutierrez@amd.com if (ii->isReturn() || ii->isBranch()) { 27211308Santhony.gutierrez@amd.com return true; 27311308Santhony.gutierrez@amd.com } 27411308Santhony.gutierrez@amd.com } 27511308Santhony.gutierrez@amd.com 27611308Santhony.gutierrez@amd.com return false; 27711308Santhony.gutierrez@amd.com} 27811308Santhony.gutierrez@amd.com 27911308Santhony.gutierrez@amd.com// Remap HSAIL register to physical VGPR. 28011308Santhony.gutierrez@amd.com// HSAIL register = virtual register assigned to an operand by HLC compiler 28111308Santhony.gutierrez@amd.comuint32_t 28211308Santhony.gutierrez@amd.comWavefront::remap(uint32_t vgprIndex, uint32_t size, uint8_t mode) 28311308Santhony.gutierrez@amd.com{ 28411308Santhony.gutierrez@amd.com assert((vgprIndex < reservedVectorRegs) && (reservedVectorRegs > 0)); 28511308Santhony.gutierrez@amd.com // add the offset from where the VGPRs of the wavefront have been assigned 28611308Santhony.gutierrez@amd.com uint32_t physicalVgprIndex = startVgprIndex + vgprIndex; 28711308Santhony.gutierrez@amd.com // HSAIL double precision (DP) register: calculate the physical VGPR index 28811308Santhony.gutierrez@amd.com // assuming that DP registers are placed after SP ones in the VRF. The DP 28911308Santhony.gutierrez@amd.com // and SP VGPR name spaces in HSAIL mode are separate so we need to adjust 29011308Santhony.gutierrez@amd.com // the DP VGPR index before mapping it to the physical VRF address space 29111308Santhony.gutierrez@amd.com if (mode == 1 && size > 4) { 29211308Santhony.gutierrez@amd.com physicalVgprIndex = startVgprIndex + maxSpVgprs + (2 * vgprIndex); 29311308Santhony.gutierrez@amd.com } 29411308Santhony.gutierrez@amd.com 29511308Santhony.gutierrez@amd.com assert((startVgprIndex <= physicalVgprIndex) && 29611308Santhony.gutierrez@amd.com (startVgprIndex + reservedVectorRegs - 1) >= physicalVgprIndex); 29711308Santhony.gutierrez@amd.com 29811308Santhony.gutierrez@amd.com // calculate absolute physical VGPR index 29911308Santhony.gutierrez@amd.com return physicalVgprIndex % computeUnit->vrf[simdId]->numRegs(); 30011308Santhony.gutierrez@amd.com} 30111308Santhony.gutierrez@amd.com 30211308Santhony.gutierrez@amd.com// Return true if this wavefront is ready 30311308Santhony.gutierrez@amd.com// to execute an instruction of the specified type. 30411308Santhony.gutierrez@amd.comint 30511308Santhony.gutierrez@amd.comWavefront::ready(itype_e type) 30611308Santhony.gutierrez@amd.com{ 30711308Santhony.gutierrez@amd.com // Check to make sure wave is running 30811308Santhony.gutierrez@amd.com if (status == S_STOPPED || status == S_RETURNING || 30911308Santhony.gutierrez@amd.com instructionBuffer.empty()) { 31011308Santhony.gutierrez@amd.com return 0; 31111308Santhony.gutierrez@amd.com } 31211308Santhony.gutierrez@amd.com 31311308Santhony.gutierrez@amd.com // Is the wave waiting at a barrier 31411308Santhony.gutierrez@amd.com if (stalledAtBarrier) { 31511639Salexandru.dutu@amd.com if (!computeUnit->AllAtBarrier(barrierId,barrierCnt, 31611639Salexandru.dutu@amd.com computeUnit->getRefCounter(dispatchId, wgId))) { 31711308Santhony.gutierrez@amd.com // Are all threads at barrier? 31811308Santhony.gutierrez@amd.com return 0; 31911308Santhony.gutierrez@amd.com } 32011639Salexandru.dutu@amd.com oldBarrierCnt = barrierCnt; 32111308Santhony.gutierrez@amd.com stalledAtBarrier = false; 32211308Santhony.gutierrez@amd.com } 32311308Santhony.gutierrez@amd.com 32411308Santhony.gutierrez@amd.com // Read instruction 32511308Santhony.gutierrez@amd.com GPUDynInstPtr ii = instructionBuffer.front(); 32611308Santhony.gutierrez@amd.com 32711308Santhony.gutierrez@amd.com bool ready_inst M5_VAR_USED = false; 32811308Santhony.gutierrez@amd.com bool glbMemBusRdy = false; 32911308Santhony.gutierrez@amd.com bool glbMemIssueRdy = false; 33011308Santhony.gutierrez@amd.com if (type == I_GLOBAL || type == I_FLAT || type == I_PRIVATE) { 33111308Santhony.gutierrez@amd.com for (int j=0; j < computeUnit->numGlbMemUnits; ++j) { 33211308Santhony.gutierrez@amd.com if (computeUnit->vrfToGlobalMemPipeBus[j].prerdy()) 33311308Santhony.gutierrez@amd.com glbMemBusRdy = true; 33411308Santhony.gutierrez@amd.com if (computeUnit->wfWait[j].prerdy()) 33511308Santhony.gutierrez@amd.com glbMemIssueRdy = true; 33611308Santhony.gutierrez@amd.com } 33711308Santhony.gutierrez@amd.com } 33811308Santhony.gutierrez@amd.com bool locMemBusRdy = false; 33911308Santhony.gutierrez@amd.com bool locMemIssueRdy = false; 34011345Sjohn.kalamatianos@amd.com if (type == I_SHARED || type == I_FLAT) { 34111308Santhony.gutierrez@amd.com for (int j=0; j < computeUnit->numLocMemUnits; ++j) { 34211308Santhony.gutierrez@amd.com if (computeUnit->vrfToLocalMemPipeBus[j].prerdy()) 34311308Santhony.gutierrez@amd.com locMemBusRdy = true; 34411308Santhony.gutierrez@amd.com if (computeUnit->wfWait[j].prerdy()) 34511308Santhony.gutierrez@amd.com locMemIssueRdy = true; 34611308Santhony.gutierrez@amd.com } 34711308Santhony.gutierrez@amd.com } 34811308Santhony.gutierrez@amd.com 34911308Santhony.gutierrez@amd.com // The following code is very error prone and the entire process for 35011308Santhony.gutierrez@amd.com // checking readiness will be fixed eventually. In the meantime, let's 35111308Santhony.gutierrez@amd.com // make sure that we do not silently let an instruction type slip 35211308Santhony.gutierrez@amd.com // through this logic and always return not ready. 35311692Santhony.gutierrez@amd.com if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() || 35411692Santhony.gutierrez@amd.com ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() || 35511692Santhony.gutierrez@amd.com ii->isMemFence() || ii->isFlat())) { 35611308Santhony.gutierrez@amd.com panic("next instruction: %s is of unknown type\n", ii->disassemble()); 35711308Santhony.gutierrez@amd.com } 35811308Santhony.gutierrez@amd.com 35911308Santhony.gutierrez@amd.com DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Read for Inst : %s\n", 36011308Santhony.gutierrez@amd.com computeUnit->cu_id, simdId, wfSlotId, ii->disassemble()); 36111308Santhony.gutierrez@amd.com 36211692Santhony.gutierrez@amd.com if (type == I_ALU && ii->isBarrier()) { 36311308Santhony.gutierrez@amd.com // Here for ALU instruction (barrier) 36411308Santhony.gutierrez@amd.com if (!computeUnit->wfWait[simdId].prerdy()) { 36511308Santhony.gutierrez@amd.com // Is wave slot free? 36611308Santhony.gutierrez@amd.com return 0; 36711308Santhony.gutierrez@amd.com } 36811308Santhony.gutierrez@amd.com 36911308Santhony.gutierrez@amd.com // Are there in pipe or outstanding memory requests? 37011639Salexandru.dutu@amd.com if ((outstandingReqs + memReqsInPipe) > 0) { 37111308Santhony.gutierrez@amd.com return 0; 37211308Santhony.gutierrez@amd.com } 37311308Santhony.gutierrez@amd.com 37411308Santhony.gutierrez@amd.com ready_inst = true; 37511692Santhony.gutierrez@amd.com } else if (type == I_ALU && ii->isNop()) { 37611308Santhony.gutierrez@amd.com // Here for ALU instruction (nop) 37711308Santhony.gutierrez@amd.com if (!computeUnit->wfWait[simdId].prerdy()) { 37811308Santhony.gutierrez@amd.com // Is wave slot free? 37911308Santhony.gutierrez@amd.com return 0; 38011308Santhony.gutierrez@amd.com } 38111308Santhony.gutierrez@amd.com 38211308Santhony.gutierrez@amd.com ready_inst = true; 38311692Santhony.gutierrez@amd.com } else if (type == I_ALU && ii->isReturn()) { 38411308Santhony.gutierrez@amd.com // Here for ALU instruction (return) 38511308Santhony.gutierrez@amd.com if (!computeUnit->wfWait[simdId].prerdy()) { 38611308Santhony.gutierrez@amd.com // Is wave slot free? 38711308Santhony.gutierrez@amd.com return 0; 38811308Santhony.gutierrez@amd.com } 38911308Santhony.gutierrez@amd.com 39011308Santhony.gutierrez@amd.com // Are there in pipe or outstanding memory requests? 39111639Salexandru.dutu@amd.com if ((outstandingReqs + memReqsInPipe) > 0) { 39211308Santhony.gutierrez@amd.com return 0; 39311308Santhony.gutierrez@amd.com } 39411308Santhony.gutierrez@amd.com 39511308Santhony.gutierrez@amd.com ready_inst = true; 39611692Santhony.gutierrez@amd.com } else if (type == I_ALU && (ii->isBranch() || 39711692Santhony.gutierrez@amd.com ii->isALU() || 39811692Santhony.gutierrez@amd.com (ii->isKernArgSeg() && ii->isLoad()) || 39911692Santhony.gutierrez@amd.com ii->isArgSeg())) { 40011308Santhony.gutierrez@amd.com // Here for ALU instruction (all others) 40111308Santhony.gutierrez@amd.com if (!computeUnit->wfWait[simdId].prerdy()) { 40211308Santhony.gutierrez@amd.com // Is alu slot free? 40311308Santhony.gutierrez@amd.com return 0; 40411308Santhony.gutierrez@amd.com } 40511308Santhony.gutierrez@amd.com if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii, 40611308Santhony.gutierrez@amd.com VrfAccessType::RD_WR)) { 40711308Santhony.gutierrez@amd.com return 0; 40811308Santhony.gutierrez@amd.com } 40911308Santhony.gutierrez@amd.com 41011308Santhony.gutierrez@amd.com if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) { 41111308Santhony.gutierrez@amd.com return 0; 41211308Santhony.gutierrez@amd.com } 41311308Santhony.gutierrez@amd.com ready_inst = true; 41411692Santhony.gutierrez@amd.com } else if (type == I_GLOBAL && ii->isGlobalMem()) { 41511308Santhony.gutierrez@amd.com // Here Global memory instruction 41611692Santhony.gutierrez@amd.com if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) { 41711308Santhony.gutierrez@amd.com // Are there in pipe or outstanding global memory write requests? 41811639Salexandru.dutu@amd.com if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) { 41911308Santhony.gutierrez@amd.com return 0; 42011308Santhony.gutierrez@amd.com } 42111308Santhony.gutierrez@amd.com } 42211308Santhony.gutierrez@amd.com 42311692Santhony.gutierrez@amd.com if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) { 42411308Santhony.gutierrez@amd.com // Are there in pipe or outstanding global memory read requests? 42511639Salexandru.dutu@amd.com if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) 42611308Santhony.gutierrez@amd.com return 0; 42711308Santhony.gutierrez@amd.com } 42811308Santhony.gutierrez@amd.com 42911308Santhony.gutierrez@amd.com if (!glbMemIssueRdy) { 43011308Santhony.gutierrez@amd.com // Is WV issue slot free? 43111308Santhony.gutierrez@amd.com return 0; 43211308Santhony.gutierrez@amd.com } 43311308Santhony.gutierrez@amd.com 43411308Santhony.gutierrez@amd.com if (!glbMemBusRdy) { 43511308Santhony.gutierrez@amd.com // Is there an available VRF->Global memory read bus? 43611308Santhony.gutierrez@amd.com return 0; 43711308Santhony.gutierrez@amd.com } 43811308Santhony.gutierrez@amd.com 43911308Santhony.gutierrez@amd.com if (!computeUnit->globalMemoryPipe. 44011639Salexandru.dutu@amd.com isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) { 44111308Santhony.gutierrez@amd.com // Can we insert a new request to the Global Mem Request FIFO? 44211308Santhony.gutierrez@amd.com return 0; 44311308Santhony.gutierrez@amd.com } 44411308Santhony.gutierrez@amd.com // can we schedule source & destination operands on the VRF? 44511308Santhony.gutierrez@amd.com if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii, 44611308Santhony.gutierrez@amd.com VrfAccessType::RD_WR)) { 44711308Santhony.gutierrez@amd.com return 0; 44811308Santhony.gutierrez@amd.com } 44911308Santhony.gutierrez@amd.com if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) { 45011308Santhony.gutierrez@amd.com return 0; 45111308Santhony.gutierrez@amd.com } 45211308Santhony.gutierrez@amd.com ready_inst = true; 45311692Santhony.gutierrez@amd.com } else if (type == I_SHARED && ii->isLocalMem()) { 45411308Santhony.gutierrez@amd.com // Here for Shared memory instruction 45511692Santhony.gutierrez@amd.com if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) { 45611639Salexandru.dutu@amd.com if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) { 45711308Santhony.gutierrez@amd.com return 0; 45811308Santhony.gutierrez@amd.com } 45911308Santhony.gutierrez@amd.com } 46011308Santhony.gutierrez@amd.com 46111692Santhony.gutierrez@amd.com if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) { 46211639Salexandru.dutu@amd.com if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) { 46311308Santhony.gutierrez@amd.com return 0; 46411308Santhony.gutierrez@amd.com } 46511308Santhony.gutierrez@amd.com } 46611308Santhony.gutierrez@amd.com 46711308Santhony.gutierrez@amd.com if (!locMemBusRdy) { 46811308Santhony.gutierrez@amd.com // Is there an available VRF->LDS read bus? 46911308Santhony.gutierrez@amd.com return 0; 47011308Santhony.gutierrez@amd.com } 47111308Santhony.gutierrez@amd.com if (!locMemIssueRdy) { 47211308Santhony.gutierrez@amd.com // Is wave slot free? 47311308Santhony.gutierrez@amd.com return 0; 47411308Santhony.gutierrez@amd.com } 47511308Santhony.gutierrez@amd.com 47611308Santhony.gutierrez@amd.com if (!computeUnit->localMemoryPipe. 47711639Salexandru.dutu@amd.com isLMReqFIFOWrRdy(rdLmReqsInPipe + wrLmReqsInPipe)) { 47811308Santhony.gutierrez@amd.com // Can we insert a new request to the LDS Request FIFO? 47911308Santhony.gutierrez@amd.com return 0; 48011308Santhony.gutierrez@amd.com } 48111308Santhony.gutierrez@amd.com // can we schedule source & destination operands on the VRF? 48211308Santhony.gutierrez@amd.com if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii, 48311308Santhony.gutierrez@amd.com VrfAccessType::RD_WR)) { 48411308Santhony.gutierrez@amd.com return 0; 48511308Santhony.gutierrez@amd.com } 48611308Santhony.gutierrez@amd.com if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) { 48711308Santhony.gutierrez@amd.com return 0; 48811308Santhony.gutierrez@amd.com } 48911308Santhony.gutierrez@amd.com ready_inst = true; 49011692Santhony.gutierrez@amd.com } else if (type == I_FLAT && ii->isFlat()) { 49111308Santhony.gutierrez@amd.com if (!glbMemBusRdy) { 49211308Santhony.gutierrez@amd.com // Is there an available VRF->Global memory read bus? 49311308Santhony.gutierrez@amd.com return 0; 49411308Santhony.gutierrez@amd.com } 49511308Santhony.gutierrez@amd.com 49611308Santhony.gutierrez@amd.com if (!locMemBusRdy) { 49711308Santhony.gutierrez@amd.com // Is there an available VRF->LDS read bus? 49811308Santhony.gutierrez@amd.com return 0; 49911308Santhony.gutierrez@amd.com } 50011308Santhony.gutierrez@amd.com 50111308Santhony.gutierrez@amd.com if (!glbMemIssueRdy) { 50211308Santhony.gutierrez@amd.com // Is wave slot free? 50311308Santhony.gutierrez@amd.com return 0; 50411308Santhony.gutierrez@amd.com } 50511308Santhony.gutierrez@amd.com 50611308Santhony.gutierrez@amd.com if (!locMemIssueRdy) { 50711308Santhony.gutierrez@amd.com return 0; 50811308Santhony.gutierrez@amd.com } 50911308Santhony.gutierrez@amd.com if (!computeUnit->globalMemoryPipe. 51011639Salexandru.dutu@amd.com isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) { 51111308Santhony.gutierrez@amd.com // Can we insert a new request to the Global Mem Request FIFO? 51211308Santhony.gutierrez@amd.com return 0; 51311308Santhony.gutierrez@amd.com } 51411308Santhony.gutierrez@amd.com 51511308Santhony.gutierrez@amd.com if (!computeUnit->localMemoryPipe. 51611639Salexandru.dutu@amd.com isLMReqFIFOWrRdy(rdLmReqsInPipe + wrLmReqsInPipe)) { 51711308Santhony.gutierrez@amd.com // Can we insert a new request to the LDS Request FIFO? 51811308Santhony.gutierrez@amd.com return 0; 51911308Santhony.gutierrez@amd.com } 52011308Santhony.gutierrez@amd.com // can we schedule source & destination operands on the VRF? 52111308Santhony.gutierrez@amd.com if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii, 52211308Santhony.gutierrez@amd.com VrfAccessType::RD_WR)) { 52311308Santhony.gutierrez@amd.com return 0; 52411308Santhony.gutierrez@amd.com } 52511308Santhony.gutierrez@amd.com // are all the operands ready? (RAW, WAW and WAR depedencies met?) 52611308Santhony.gutierrez@amd.com if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) { 52711308Santhony.gutierrez@amd.com return 0; 52811308Santhony.gutierrez@amd.com } 52911308Santhony.gutierrez@amd.com ready_inst = true; 53011308Santhony.gutierrez@amd.com } else { 53111308Santhony.gutierrez@amd.com return 0; 53211308Santhony.gutierrez@amd.com } 53311308Santhony.gutierrez@amd.com 53411308Santhony.gutierrez@amd.com assert(ready_inst); 53511308Santhony.gutierrez@amd.com 53611308Santhony.gutierrez@amd.com DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Ready Inst : %s\n", computeUnit->cu_id, 53711308Santhony.gutierrez@amd.com simdId, wfSlotId, ii->disassemble()); 53811308Santhony.gutierrez@amd.com return 1; 53911308Santhony.gutierrez@amd.com} 54011308Santhony.gutierrez@amd.com 54111308Santhony.gutierrez@amd.comvoid 54211308Santhony.gutierrez@amd.comWavefront::updateResources() 54311308Santhony.gutierrez@amd.com{ 54411308Santhony.gutierrez@amd.com // Get current instruction 54511308Santhony.gutierrez@amd.com GPUDynInstPtr ii = instructionBuffer.front(); 54611308Santhony.gutierrez@amd.com assert(ii); 54711308Santhony.gutierrez@amd.com computeUnit->vrf[simdId]->updateResources(this, ii); 54811308Santhony.gutierrez@amd.com // Single precision ALU or Branch or Return or Special instruction 54911692Santhony.gutierrez@amd.com if (ii->isALU() || ii->isSpecialOp() || 55011692Santhony.gutierrez@amd.com ii->isBranch() || 55111308Santhony.gutierrez@amd.com // FIXME: Kernel argument loads are currently treated as ALU operations 55211308Santhony.gutierrez@amd.com // since we don't send memory packets at execution. If we fix that then 55311308Santhony.gutierrez@amd.com // we should map them to one of the memory pipelines 55411692Santhony.gutierrez@amd.com (ii->isKernArgSeg() && ii->isLoad()) || ii->isArgSeg() || 55511692Santhony.gutierrez@amd.com ii->isReturn()) { 55611308Santhony.gutierrez@amd.com computeUnit->aluPipe[simdId].preset(computeUnit->shader-> 55711308Santhony.gutierrez@amd.com ticks(computeUnit->spBypassLength())); 55811308Santhony.gutierrez@amd.com // this is to enforce a fixed number of cycles per issue slot per SIMD 55911308Santhony.gutierrez@amd.com computeUnit->wfWait[simdId].preset(computeUnit->shader-> 56011308Santhony.gutierrez@amd.com ticks(computeUnit->issuePeriod)); 56111692Santhony.gutierrez@amd.com } else if (ii->isBarrier()) { 56211308Santhony.gutierrez@amd.com computeUnit->wfWait[simdId].preset(computeUnit->shader-> 56311308Santhony.gutierrez@amd.com ticks(computeUnit->issuePeriod)); 56411692Santhony.gutierrez@amd.com } else if (ii->isLoad() && ii->isFlat()) { 56511308Santhony.gutierrez@amd.com assert(Enums::SC_NONE != ii->executedAs()); 56611639Salexandru.dutu@amd.com memReqsInPipe++; 56711639Salexandru.dutu@amd.com rdGmReqsInPipe++; 56811308Santhony.gutierrez@amd.com if ( Enums::SC_SHARED == ii->executedAs() ) { 56911308Santhony.gutierrez@amd.com computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 57011308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(4)); 57111308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 57211308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 57311308Santhony.gutierrez@amd.com } else { 57411308Santhony.gutierrez@amd.com computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 57511308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(4)); 57611308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 57711308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 57811308Santhony.gutierrez@amd.com } 57911692Santhony.gutierrez@amd.com } else if (ii->isStore() && ii->isFlat()) { 58011308Santhony.gutierrez@amd.com assert(Enums::SC_NONE != ii->executedAs()); 58111639Salexandru.dutu@amd.com memReqsInPipe++; 58211639Salexandru.dutu@amd.com wrGmReqsInPipe++; 58311308Santhony.gutierrez@amd.com if (Enums::SC_SHARED == ii->executedAs()) { 58411308Santhony.gutierrez@amd.com computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 58511308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(8)); 58611308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 58711308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 58811308Santhony.gutierrez@amd.com } else { 58911308Santhony.gutierrez@amd.com computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 59011308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(8)); 59111308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 59211308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 59311308Santhony.gutierrez@amd.com } 59411692Santhony.gutierrez@amd.com } else if (ii->isLoad() && ii->isGlobalMem()) { 59511639Salexandru.dutu@amd.com memReqsInPipe++; 59611639Salexandru.dutu@amd.com rdGmReqsInPipe++; 59711308Santhony.gutierrez@amd.com computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 59811308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(4)); 59911308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 60011308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 60111692Santhony.gutierrez@amd.com } else if (ii->isStore() && ii->isGlobalMem()) { 60211639Salexandru.dutu@amd.com memReqsInPipe++; 60311639Salexandru.dutu@amd.com wrGmReqsInPipe++; 60411308Santhony.gutierrez@amd.com computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 60511308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(8)); 60611308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 60711308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 60811692Santhony.gutierrez@amd.com } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) { 60911639Salexandru.dutu@amd.com memReqsInPipe++; 61011639Salexandru.dutu@amd.com wrGmReqsInPipe++; 61111639Salexandru.dutu@amd.com rdGmReqsInPipe++; 61211308Santhony.gutierrez@amd.com computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 61311308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(8)); 61411308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 61511308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 61611692Santhony.gutierrez@amd.com } else if (ii->isLoad() && ii->isLocalMem()) { 61711639Salexandru.dutu@amd.com memReqsInPipe++; 61811639Salexandru.dutu@amd.com rdLmReqsInPipe++; 61911308Santhony.gutierrez@amd.com computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 62011308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(4)); 62111308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 62211308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 62311692Santhony.gutierrez@amd.com } else if (ii->isStore() && ii->isLocalMem()) { 62411639Salexandru.dutu@amd.com memReqsInPipe++; 62511639Salexandru.dutu@amd.com wrLmReqsInPipe++; 62611308Santhony.gutierrez@amd.com computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 62711308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(8)); 62811308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 62911308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 63011692Santhony.gutierrez@amd.com } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) { 63111639Salexandru.dutu@amd.com memReqsInPipe++; 63211639Salexandru.dutu@amd.com wrLmReqsInPipe++; 63311639Salexandru.dutu@amd.com rdLmReqsInPipe++; 63411308Santhony.gutierrez@amd.com computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 63511308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(8)); 63611308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 63711308Santhony.gutierrez@amd.com preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 63811308Santhony.gutierrez@amd.com } 63911308Santhony.gutierrez@amd.com} 64011308Santhony.gutierrez@amd.com 64111308Santhony.gutierrez@amd.comvoid 64211308Santhony.gutierrez@amd.comWavefront::exec() 64311308Santhony.gutierrez@amd.com{ 64411308Santhony.gutierrez@amd.com // ---- Exit if wavefront is inactive ----------------------------- // 64511308Santhony.gutierrez@amd.com 64611308Santhony.gutierrez@amd.com if (status == S_STOPPED || status == S_RETURNING || 64711308Santhony.gutierrez@amd.com instructionBuffer.empty()) { 64811308Santhony.gutierrez@amd.com return; 64911308Santhony.gutierrez@amd.com } 65011308Santhony.gutierrez@amd.com 65111308Santhony.gutierrez@amd.com // Get current instruction 65211308Santhony.gutierrez@amd.com 65311308Santhony.gutierrez@amd.com GPUDynInstPtr ii = instructionBuffer.front(); 65411308Santhony.gutierrez@amd.com 65511308Santhony.gutierrez@amd.com const uint32_t old_pc = pc(); 65611308Santhony.gutierrez@amd.com DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s " 65711308Santhony.gutierrez@amd.com "(pc: %i)\n", computeUnit->cu_id, simdId, wfSlotId, wfDynId, 65811308Santhony.gutierrez@amd.com ii->disassemble(), old_pc); 65911695Santhony.gutierrez@amd.com 66011695Santhony.gutierrez@amd.com // update the instruction stats in the CU 66111695Santhony.gutierrez@amd.com 66211692Santhony.gutierrez@amd.com ii->execute(ii); 66311695Santhony.gutierrez@amd.com computeUnit->updateInstStats(ii); 66411308Santhony.gutierrez@amd.com // access the VRF 66511308Santhony.gutierrez@amd.com computeUnit->vrf[simdId]->exec(ii, this); 66611308Santhony.gutierrez@amd.com srcRegOpDist.sample(ii->numSrcRegOperands()); 66711308Santhony.gutierrez@amd.com dstRegOpDist.sample(ii->numDstRegOperands()); 66811308Santhony.gutierrez@amd.com computeUnit->numInstrExecuted++; 66911308Santhony.gutierrez@amd.com computeUnit->execRateDist.sample(computeUnit->totalCycles.value() - 67011308Santhony.gutierrez@amd.com computeUnit->lastExecCycle[simdId]); 67111308Santhony.gutierrez@amd.com computeUnit->lastExecCycle[simdId] = computeUnit->totalCycles.value(); 67211308Santhony.gutierrez@amd.com if (pc() == old_pc) { 67311696Santhony.gutierrez@amd.com uint32_t new_pc = _gpuISA.advancePC(old_pc, ii); 67411308Santhony.gutierrez@amd.com // PC not modified by instruction, proceed to next or pop frame 67511308Santhony.gutierrez@amd.com pc(new_pc); 67611308Santhony.gutierrez@amd.com if (new_pc == rpc()) { 67711308Santhony.gutierrez@amd.com popFromReconvergenceStack(); 67811308Santhony.gutierrez@amd.com discardFetch(); 67911308Santhony.gutierrez@amd.com } else { 68011308Santhony.gutierrez@amd.com instructionBuffer.pop_front(); 68111308Santhony.gutierrez@amd.com } 68211694Santhony.gutierrez@amd.com } else { 68311694Santhony.gutierrez@amd.com discardFetch(); 68411308Santhony.gutierrez@amd.com } 68511308Santhony.gutierrez@amd.com 68611308Santhony.gutierrez@amd.com if (computeUnit->shader->hsail_mode==Shader::SIMT) { 68711308Santhony.gutierrez@amd.com const int num_active_lanes = execMask().count(); 68811308Santhony.gutierrez@amd.com computeUnit->controlFlowDivergenceDist.sample(num_active_lanes); 68911308Santhony.gutierrez@amd.com computeUnit->numVecOpsExecuted += num_active_lanes; 69011308Santhony.gutierrez@amd.com if (isGmInstruction(ii)) { 69111308Santhony.gutierrez@amd.com computeUnit->activeLanesPerGMemInstrDist.sample(num_active_lanes); 69211308Santhony.gutierrez@amd.com } else if (isLmInstruction(ii)) { 69311308Santhony.gutierrez@amd.com computeUnit->activeLanesPerLMemInstrDist.sample(num_active_lanes); 69411308Santhony.gutierrez@amd.com } 69511308Santhony.gutierrez@amd.com } 69611308Santhony.gutierrez@amd.com 69711308Santhony.gutierrez@amd.com // ---- Update Vector ALU pipeline and other resources ------------------ // 69811308Santhony.gutierrez@amd.com // Single precision ALU or Branch or Return or Special instruction 69911692Santhony.gutierrez@amd.com if (ii->isALU() || ii->isSpecialOp() || 70011692Santhony.gutierrez@amd.com ii->isBranch() || 70111308Santhony.gutierrez@amd.com // FIXME: Kernel argument loads are currently treated as ALU operations 70211308Santhony.gutierrez@amd.com // since we don't send memory packets at execution. If we fix that then 70311308Santhony.gutierrez@amd.com // we should map them to one of the memory pipelines 70411692Santhony.gutierrez@amd.com (ii->isKernArgSeg() && ii->isLoad()) || 70511692Santhony.gutierrez@amd.com ii->isArgSeg() || 70611692Santhony.gutierrez@amd.com ii->isReturn()) { 70711308Santhony.gutierrez@amd.com computeUnit->aluPipe[simdId].set(computeUnit->shader-> 70811308Santhony.gutierrez@amd.com ticks(computeUnit->spBypassLength())); 70911308Santhony.gutierrez@amd.com 71011308Santhony.gutierrez@amd.com // this is to enforce a fixed number of cycles per issue slot per SIMD 71111308Santhony.gutierrez@amd.com computeUnit->wfWait[simdId].set(computeUnit->shader-> 71211308Santhony.gutierrez@amd.com ticks(computeUnit->issuePeriod)); 71311692Santhony.gutierrez@amd.com } else if (ii->isBarrier()) { 71411308Santhony.gutierrez@amd.com computeUnit->wfWait[simdId].set(computeUnit->shader-> 71511308Santhony.gutierrez@amd.com ticks(computeUnit->issuePeriod)); 71611692Santhony.gutierrez@amd.com } else if (ii->isLoad() && ii->isFlat()) { 71711308Santhony.gutierrez@amd.com assert(Enums::SC_NONE != ii->executedAs()); 71811308Santhony.gutierrez@amd.com 71911308Santhony.gutierrez@amd.com if (Enums::SC_SHARED == ii->executedAs()) { 72011308Santhony.gutierrez@amd.com computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 72111308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(4)); 72211308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 72311308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 72411308Santhony.gutierrez@amd.com } else { 72511308Santhony.gutierrez@amd.com computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 72611308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(4)); 72711308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 72811308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 72911308Santhony.gutierrez@amd.com } 73011692Santhony.gutierrez@amd.com } else if (ii->isStore() && ii->isFlat()) { 73111308Santhony.gutierrez@amd.com assert(Enums::SC_NONE != ii->executedAs()); 73211308Santhony.gutierrez@amd.com if (Enums::SC_SHARED == ii->executedAs()) { 73311308Santhony.gutierrez@amd.com computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 73411308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(8)); 73511308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 73611308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 73711308Santhony.gutierrez@amd.com } else { 73811308Santhony.gutierrez@amd.com computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 73911308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(8)); 74011308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 74111308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 74211308Santhony.gutierrez@amd.com } 74311692Santhony.gutierrez@amd.com } else if (ii->isLoad() && ii->isGlobalMem()) { 74411308Santhony.gutierrez@amd.com computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 74511308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(4)); 74611308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 74711308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 74811692Santhony.gutierrez@amd.com } else if (ii->isStore() && ii->isGlobalMem()) { 74911308Santhony.gutierrez@amd.com computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 75011308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(8)); 75111308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 75211308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 75311692Santhony.gutierrez@amd.com } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) { 75411308Santhony.gutierrez@amd.com computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 75511308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(8)); 75611308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 75711308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 75811692Santhony.gutierrez@amd.com } else if (ii->isLoad() && ii->isLocalMem()) { 75911308Santhony.gutierrez@amd.com computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 76011308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(4)); 76111308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 76211308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 76311692Santhony.gutierrez@amd.com } else if (ii->isStore() && ii->isLocalMem()) { 76411308Santhony.gutierrez@amd.com computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 76511308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(8)); 76611308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 76711308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 76811692Santhony.gutierrez@amd.com } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) { 76911308Santhony.gutierrez@amd.com computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 77011308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(8)); 77111308Santhony.gutierrez@amd.com computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 77211308Santhony.gutierrez@amd.com set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 77311308Santhony.gutierrez@amd.com } 77411308Santhony.gutierrez@amd.com} 77511308Santhony.gutierrez@amd.com 77611308Santhony.gutierrez@amd.combool 77711308Santhony.gutierrez@amd.comWavefront::waitingAtBarrier(int lane) 77811308Santhony.gutierrez@amd.com{ 77911639Salexandru.dutu@amd.com return barCnt[lane] < maxBarCnt; 78011308Santhony.gutierrez@amd.com} 78111308Santhony.gutierrez@amd.com 78211308Santhony.gutierrez@amd.comvoid 78311308Santhony.gutierrez@amd.comWavefront::pushToReconvergenceStack(uint32_t pc, uint32_t rpc, 78411308Santhony.gutierrez@amd.com const VectorMask& mask) 78511308Santhony.gutierrez@amd.com{ 78611308Santhony.gutierrez@amd.com assert(mask.count()); 78711641Salexandru.dutu@amd.com reconvergenceStack.emplace_back(new ReconvergenceStackEntry{pc, rpc, mask}); 78811308Santhony.gutierrez@amd.com} 78911308Santhony.gutierrez@amd.com 79011308Santhony.gutierrez@amd.comvoid 79111308Santhony.gutierrez@amd.comWavefront::popFromReconvergenceStack() 79211308Santhony.gutierrez@amd.com{ 79311308Santhony.gutierrez@amd.com assert(!reconvergenceStack.empty()); 79411308Santhony.gutierrez@amd.com 79511308Santhony.gutierrez@amd.com DPRINTF(WavefrontStack, "[%2d, %2d, %2d, %2d] %s %3i => ", 79611308Santhony.gutierrez@amd.com computeUnit->cu_id, simdId, wfSlotId, wfDynId, 79711308Santhony.gutierrez@amd.com execMask().to_string<char, std::string::traits_type, 79811308Santhony.gutierrez@amd.com std::string::allocator_type>().c_str(), pc()); 79911308Santhony.gutierrez@amd.com 80011641Salexandru.dutu@amd.com reconvergenceStack.pop_back(); 80111308Santhony.gutierrez@amd.com 80211308Santhony.gutierrez@amd.com DPRINTF(WavefrontStack, "%3i %s\n", pc(), 80311308Santhony.gutierrez@amd.com execMask().to_string<char, std::string::traits_type, 80411308Santhony.gutierrez@amd.com std::string::allocator_type>().c_str()); 80511308Santhony.gutierrez@amd.com 80611308Santhony.gutierrez@amd.com} 80711308Santhony.gutierrez@amd.com 80811308Santhony.gutierrez@amd.comvoid 80911308Santhony.gutierrez@amd.comWavefront::discardFetch() 81011308Santhony.gutierrez@amd.com{ 81111308Santhony.gutierrez@amd.com instructionBuffer.clear(); 81211308Santhony.gutierrez@amd.com dropFetch |=pendingFetch; 81311308Santhony.gutierrez@amd.com} 81411308Santhony.gutierrez@amd.com 81511308Santhony.gutierrez@amd.comuint32_t 81611308Santhony.gutierrez@amd.comWavefront::pc() const 81711308Santhony.gutierrez@amd.com{ 81811641Salexandru.dutu@amd.com return reconvergenceStack.back()->pc; 81911308Santhony.gutierrez@amd.com} 82011308Santhony.gutierrez@amd.com 82111308Santhony.gutierrez@amd.comuint32_t 82211308Santhony.gutierrez@amd.comWavefront::rpc() const 82311308Santhony.gutierrez@amd.com{ 82411641Salexandru.dutu@amd.com return reconvergenceStack.back()->rpc; 82511308Santhony.gutierrez@amd.com} 82611308Santhony.gutierrez@amd.com 82711308Santhony.gutierrez@amd.comVectorMask 82811308Santhony.gutierrez@amd.comWavefront::execMask() const 82911308Santhony.gutierrez@amd.com{ 83011641Salexandru.dutu@amd.com return reconvergenceStack.back()->execMask; 83111308Santhony.gutierrez@amd.com} 83211308Santhony.gutierrez@amd.com 83311308Santhony.gutierrez@amd.combool 83411308Santhony.gutierrez@amd.comWavefront::execMask(int lane) const 83511308Santhony.gutierrez@amd.com{ 83611641Salexandru.dutu@amd.com return reconvergenceStack.back()->execMask[lane]; 83711308Santhony.gutierrez@amd.com} 83811308Santhony.gutierrez@amd.com 83911308Santhony.gutierrez@amd.com 84011308Santhony.gutierrez@amd.comvoid 84111308Santhony.gutierrez@amd.comWavefront::pc(uint32_t new_pc) 84211308Santhony.gutierrez@amd.com{ 84311641Salexandru.dutu@amd.com reconvergenceStack.back()->pc = new_pc; 84411308Santhony.gutierrez@amd.com} 84511640Salexandru.dutu@amd.com 84611640Salexandru.dutu@amd.comuint32_t 84711640Salexandru.dutu@amd.comWavefront::getStaticContextSize() const 84811640Salexandru.dutu@amd.com{ 84911643Salexandru.dutu@amd.com return barCnt.size() * sizeof(int) + sizeof(wfId) + sizeof(maxBarCnt) + 85011640Salexandru.dutu@amd.com sizeof(oldBarrierCnt) + sizeof(barrierCnt) + sizeof(wgId) + 85111640Salexandru.dutu@amd.com sizeof(computeUnit->cu_id) + sizeof(barrierId) + sizeof(initMask) + 85211640Salexandru.dutu@amd.com sizeof(privBase) + sizeof(spillBase) + sizeof(ldsChunk) + 85311640Salexandru.dutu@amd.com computeUnit->wfSize() * sizeof(ReconvergenceStackEntry); 85411640Salexandru.dutu@amd.com} 85511644Salexandru.dutu@amd.com 85611644Salexandru.dutu@amd.comvoid 85711644Salexandru.dutu@amd.comWavefront::getContext(const void *out) 85811644Salexandru.dutu@amd.com{ 85911644Salexandru.dutu@amd.com uint8_t *iter = (uint8_t *)out; 86011644Salexandru.dutu@amd.com for (int i = 0; i < barCnt.size(); i++) { 86111644Salexandru.dutu@amd.com *(int *)iter = barCnt[i]; iter += sizeof(barCnt[i]); 86211644Salexandru.dutu@amd.com } 86311644Salexandru.dutu@amd.com *(int *)iter = wfId; iter += sizeof(wfId); 86411644Salexandru.dutu@amd.com *(int *)iter = maxBarCnt; iter += sizeof(maxBarCnt); 86511644Salexandru.dutu@amd.com *(int *)iter = oldBarrierCnt; iter += sizeof(oldBarrierCnt); 86611644Salexandru.dutu@amd.com *(int *)iter = barrierCnt; iter += sizeof(barrierCnt); 86711644Salexandru.dutu@amd.com *(int *)iter = computeUnit->cu_id; iter += sizeof(computeUnit->cu_id); 86811644Salexandru.dutu@amd.com *(uint32_t *)iter = wgId; iter += sizeof(wgId); 86911644Salexandru.dutu@amd.com *(uint32_t *)iter = barrierId; iter += sizeof(barrierId); 87011644Salexandru.dutu@amd.com *(uint64_t *)iter = initMask.to_ullong(); iter += sizeof(initMask.to_ullong()); 87111644Salexandru.dutu@amd.com *(Addr *)iter = privBase; iter += sizeof(privBase); 87211644Salexandru.dutu@amd.com *(Addr *)iter = spillBase; iter += sizeof(spillBase); 87311644Salexandru.dutu@amd.com 87411644Salexandru.dutu@amd.com int stackSize = reconvergenceStack.size(); 87511644Salexandru.dutu@amd.com ReconvergenceStackEntry empty = {std::numeric_limits<uint32_t>::max(), 87611644Salexandru.dutu@amd.com std::numeric_limits<uint32_t>::max(), 87711644Salexandru.dutu@amd.com std::numeric_limits<uint64_t>::max()}; 87811644Salexandru.dutu@amd.com for (int i = 0; i < workItemId[0].size(); i++) { 87911644Salexandru.dutu@amd.com if (i < stackSize) { 88011644Salexandru.dutu@amd.com *(ReconvergenceStackEntry *)iter = *reconvergenceStack.back(); 88111644Salexandru.dutu@amd.com iter += sizeof(ReconvergenceStackEntry); 88211644Salexandru.dutu@amd.com reconvergenceStack.pop_back(); 88311644Salexandru.dutu@amd.com } else { 88411644Salexandru.dutu@amd.com *(ReconvergenceStackEntry *)iter = empty; 88511644Salexandru.dutu@amd.com iter += sizeof(ReconvergenceStackEntry); 88611644Salexandru.dutu@amd.com } 88711644Salexandru.dutu@amd.com } 88811644Salexandru.dutu@amd.com 88911644Salexandru.dutu@amd.com int wf_size = computeUnit->wfSize(); 89011644Salexandru.dutu@amd.com for (int i = 0; i < maxSpVgprs; i++) { 89111644Salexandru.dutu@amd.com uint32_t vgprIdx = remap(i, sizeof(uint32_t), 1); 89211644Salexandru.dutu@amd.com for (int lane = 0; lane < wf_size; lane++) { 89311644Salexandru.dutu@amd.com uint32_t regVal = computeUnit->vrf[simdId]-> 89411644Salexandru.dutu@amd.com read<uint32_t>(vgprIdx,lane); 89511644Salexandru.dutu@amd.com *(uint32_t *)iter = regVal; iter += sizeof(regVal); 89611644Salexandru.dutu@amd.com } 89711644Salexandru.dutu@amd.com } 89811644Salexandru.dutu@amd.com 89911644Salexandru.dutu@amd.com for (int i = 0; i < maxDpVgprs; i++) { 90011644Salexandru.dutu@amd.com uint32_t vgprIdx = remap(i, sizeof(uint64_t), 1); 90111644Salexandru.dutu@amd.com for (int lane = 0; lane < wf_size; lane++) { 90211644Salexandru.dutu@amd.com uint64_t regVal = computeUnit->vrf[simdId]-> 90311644Salexandru.dutu@amd.com read<uint64_t>(vgprIdx,lane); 90411644Salexandru.dutu@amd.com *(uint64_t *)iter = regVal; iter += sizeof(regVal); 90511644Salexandru.dutu@amd.com } 90611644Salexandru.dutu@amd.com } 90711644Salexandru.dutu@amd.com 90811644Salexandru.dutu@amd.com for (int i = 0; i < condRegState->numRegs(); i++) { 90911644Salexandru.dutu@amd.com for (int lane = 0; lane < wf_size; lane++) { 91011644Salexandru.dutu@amd.com uint64_t regVal = condRegState->read<uint64_t>(i, lane); 91111644Salexandru.dutu@amd.com *(uint64_t *)iter = regVal; iter += sizeof(regVal); 91211644Salexandru.dutu@amd.com } 91311644Salexandru.dutu@amd.com } 91411644Salexandru.dutu@amd.com 91511644Salexandru.dutu@amd.com /* saving LDS content */ 91611644Salexandru.dutu@amd.com if (ldsChunk) 91711644Salexandru.dutu@amd.com for (int i = 0; i < ldsChunk->size(); i++) { 91811644Salexandru.dutu@amd.com char val = ldsChunk->read<char>(i); 91911644Salexandru.dutu@amd.com *(char *) iter = val; iter += sizeof(val); 92011644Salexandru.dutu@amd.com } 92111644Salexandru.dutu@amd.com} 92211644Salexandru.dutu@amd.com 92311644Salexandru.dutu@amd.comvoid 92411644Salexandru.dutu@amd.comWavefront::setContext(const void *in) 92511644Salexandru.dutu@amd.com{ 92611644Salexandru.dutu@amd.com uint8_t *iter = (uint8_t *)in; 92711644Salexandru.dutu@amd.com for (int i = 0; i < barCnt.size(); i++) { 92811644Salexandru.dutu@amd.com barCnt[i] = *(int *)iter; iter += sizeof(barCnt[i]); 92911644Salexandru.dutu@amd.com } 93011644Salexandru.dutu@amd.com wfId = *(int *)iter; iter += sizeof(wfId); 93111644Salexandru.dutu@amd.com maxBarCnt = *(int *)iter; iter += sizeof(maxBarCnt); 93211644Salexandru.dutu@amd.com oldBarrierCnt = *(int *)iter; iter += sizeof(oldBarrierCnt); 93311644Salexandru.dutu@amd.com barrierCnt = *(int *)iter; iter += sizeof(barrierCnt); 93411644Salexandru.dutu@amd.com computeUnit->cu_id = *(int *)iter; iter += sizeof(computeUnit->cu_id); 93511644Salexandru.dutu@amd.com wgId = *(uint32_t *)iter; iter += sizeof(wgId); 93611644Salexandru.dutu@amd.com barrierId = *(uint32_t *)iter; iter += sizeof(barrierId); 93711644Salexandru.dutu@amd.com initMask = VectorMask(*(uint64_t *)iter); iter += sizeof(initMask); 93811644Salexandru.dutu@amd.com privBase = *(Addr *)iter; iter += sizeof(privBase); 93911644Salexandru.dutu@amd.com spillBase = *(Addr *)iter; iter += sizeof(spillBase); 94011644Salexandru.dutu@amd.com 94111644Salexandru.dutu@amd.com for (int i = 0; i < workItemId[0].size(); i++) { 94211644Salexandru.dutu@amd.com ReconvergenceStackEntry newEntry = *(ReconvergenceStackEntry *)iter; 94311644Salexandru.dutu@amd.com iter += sizeof(ReconvergenceStackEntry); 94411644Salexandru.dutu@amd.com if (newEntry.pc != std::numeric_limits<uint32_t>::max()) { 94511644Salexandru.dutu@amd.com pushToReconvergenceStack(newEntry.pc, newEntry.rpc, 94611644Salexandru.dutu@amd.com newEntry.execMask); 94711644Salexandru.dutu@amd.com } 94811644Salexandru.dutu@amd.com } 94911644Salexandru.dutu@amd.com int wf_size = computeUnit->wfSize(); 95011644Salexandru.dutu@amd.com 95111644Salexandru.dutu@amd.com for (int i = 0; i < maxSpVgprs; i++) { 95211644Salexandru.dutu@amd.com uint32_t vgprIdx = remap(i, sizeof(uint32_t), 1); 95311644Salexandru.dutu@amd.com for (int lane = 0; lane < wf_size; lane++) { 95411644Salexandru.dutu@amd.com uint32_t regVal = *(uint32_t *)iter; iter += sizeof(regVal); 95511644Salexandru.dutu@amd.com computeUnit->vrf[simdId]->write<uint32_t>(vgprIdx, regVal, lane); 95611644Salexandru.dutu@amd.com } 95711644Salexandru.dutu@amd.com } 95811644Salexandru.dutu@amd.com 95911644Salexandru.dutu@amd.com for (int i = 0; i < maxDpVgprs; i++) { 96011644Salexandru.dutu@amd.com uint32_t vgprIdx = remap(i, sizeof(uint64_t), 1); 96111644Salexandru.dutu@amd.com for (int lane = 0; lane < wf_size; lane++) { 96211644Salexandru.dutu@amd.com uint64_t regVal = *(uint64_t *)iter; iter += sizeof(regVal); 96311644Salexandru.dutu@amd.com computeUnit->vrf[simdId]->write<uint64_t>(vgprIdx, regVal, lane); 96411644Salexandru.dutu@amd.com } 96511644Salexandru.dutu@amd.com } 96611644Salexandru.dutu@amd.com 96711644Salexandru.dutu@amd.com for (int i = 0; i < condRegState->numRegs(); i++) { 96811644Salexandru.dutu@amd.com for (int lane = 0; lane < wf_size; lane++) { 96911644Salexandru.dutu@amd.com uint64_t regVal = *(uint64_t *)iter; iter += sizeof(regVal); 97011644Salexandru.dutu@amd.com condRegState->write<uint64_t>(i, lane, regVal); 97111644Salexandru.dutu@amd.com } 97211644Salexandru.dutu@amd.com } 97311644Salexandru.dutu@amd.com /** Restoring LDS contents */ 97411644Salexandru.dutu@amd.com if (ldsChunk) 97511644Salexandru.dutu@amd.com for (int i = 0; i < ldsChunk->size(); i++) { 97611644Salexandru.dutu@amd.com char val = *(char *) iter; iter += sizeof(val); 97711644Salexandru.dutu@amd.com ldsChunk->write<char>(i, val); 97811644Salexandru.dutu@amd.com } 97911644Salexandru.dutu@amd.com} 98011657Salexandru.dutu@amd.com 98111657Salexandru.dutu@amd.comvoid 98211657Salexandru.dutu@amd.comWavefront::computeActualWgSz(NDRange *ndr) 98311657Salexandru.dutu@amd.com{ 98411657Salexandru.dutu@amd.com actualWgSzTotal = 1; 98511657Salexandru.dutu@amd.com for (int d = 0; d < 3; ++d) { 98611657Salexandru.dutu@amd.com actualWgSz[d] = std::min(workGroupSz[d], 98711657Salexandru.dutu@amd.com gridSz[d] - ndr->wgId[d] * workGroupSz[d]); 98811657Salexandru.dutu@amd.com actualWgSzTotal *= actualWgSz[d]; 98911657Salexandru.dutu@amd.com } 99011657Salexandru.dutu@amd.com} 991