wavefront.hh revision 12697
111308Santhony.gutierrez@amd.com/* 212697Santhony.gutierrez@amd.com * Copyright (c) 2011-2017 Advanced Micro Devices, Inc. 311308Santhony.gutierrez@amd.com * All rights reserved. 411308Santhony.gutierrez@amd.com * 511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only 611308Santhony.gutierrez@amd.com * 711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without 811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met: 911308Santhony.gutierrez@amd.com * 1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice, 1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer. 1211308Santhony.gutierrez@amd.com * 1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice, 1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation 1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution. 1611308Santhony.gutierrez@amd.com * 1712697Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its 1812697Santhony.gutierrez@amd.com * contributors may be used to endorse or promote products derived from this 1912697Santhony.gutierrez@amd.com * software without specific prior written permission. 2011308Santhony.gutierrez@amd.com * 2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE. 3211308Santhony.gutierrez@amd.com * 3312697Santhony.gutierrez@amd.com * Authors: Lisa Hsu 3411308Santhony.gutierrez@amd.com */ 3511308Santhony.gutierrez@amd.com 3611308Santhony.gutierrez@amd.com#ifndef __WAVEFRONT_HH__ 3711308Santhony.gutierrez@amd.com#define __WAVEFRONT_HH__ 3811308Santhony.gutierrez@amd.com 3911308Santhony.gutierrez@amd.com#include <cassert> 4011308Santhony.gutierrez@amd.com#include <deque> 4111308Santhony.gutierrez@amd.com#include <memory> 4211308Santhony.gutierrez@amd.com#include <stack> 4311308Santhony.gutierrez@amd.com#include <vector> 4411308Santhony.gutierrez@amd.com 4511696Santhony.gutierrez@amd.com#include "arch/gpu_isa.hh" 4612334Sgabeblack@google.com#include "base/logging.hh" 4711308Santhony.gutierrez@amd.com#include "base/types.hh" 4811696Santhony.gutierrez@amd.com#include "config/the_gpu_isa.hh" 4911308Santhony.gutierrez@amd.com#include "gpu-compute/condition_register_state.hh" 5011308Santhony.gutierrez@amd.com#include "gpu-compute/lds_state.hh" 5111308Santhony.gutierrez@amd.com#include "gpu-compute/misc.hh" 5211657Salexandru.dutu@amd.com#include "gpu-compute/ndrange.hh" 5311308Santhony.gutierrez@amd.com#include "params/Wavefront.hh" 5411308Santhony.gutierrez@amd.com#include "sim/sim_object.hh" 5511308Santhony.gutierrez@amd.com 5611308Santhony.gutierrez@amd.comstatic const int MAX_NUM_INSTS_PER_WF = 12; 5711308Santhony.gutierrez@amd.com 5811641Salexandru.dutu@amd.com/** 5911641Salexandru.dutu@amd.com * A reconvergence stack entry conveys the necessary state to implement 6011641Salexandru.dutu@amd.com * control flow divergence. 6111641Salexandru.dutu@amd.com */ 6211641Salexandru.dutu@amd.comstruct ReconvergenceStackEntry { 6311641Salexandru.dutu@amd.com /** 6411641Salexandru.dutu@amd.com * PC of current instruction. 6511641Salexandru.dutu@amd.com */ 6611641Salexandru.dutu@amd.com uint32_t pc; 6711641Salexandru.dutu@amd.com /** 6811641Salexandru.dutu@amd.com * PC of the immediate post-dominator instruction, i.e., the value of 6911641Salexandru.dutu@amd.com * @a pc for the first instruction that will be executed by the wavefront 7011641Salexandru.dutu@amd.com * when a reconvergence point is reached. 7111641Salexandru.dutu@amd.com */ 7211641Salexandru.dutu@amd.com uint32_t rpc; 7311641Salexandru.dutu@amd.com /** 7411641Salexandru.dutu@amd.com * Execution mask. 7511641Salexandru.dutu@amd.com */ 7611641Salexandru.dutu@amd.com VectorMask execMask; 7711641Salexandru.dutu@amd.com}; 7811641Salexandru.dutu@amd.com 7911308Santhony.gutierrez@amd.com/* 8011308Santhony.gutierrez@amd.com * Arguments for the hsail opcode call, are user defined and variable length. 8111308Santhony.gutierrez@amd.com * The hardware/finalizer can support arguments in hardware or use memory to 8211308Santhony.gutierrez@amd.com * pass arguments. For now, let's assume that an unlimited number of arguments 8311308Santhony.gutierrez@amd.com * are supported in hardware (the compiler inlines functions whenver it can 8411308Santhony.gutierrez@amd.com * anyways, so unless someone is interested in the implications of linking/ 8511308Santhony.gutierrez@amd.com * library functions, I think this is a reasonable assumption given the typical 8611308Santhony.gutierrez@amd.com * size of an OpenCL kernel). 8711308Santhony.gutierrez@amd.com * 8811308Santhony.gutierrez@amd.com * Note that call args are different than kernel arguments: 8911308Santhony.gutierrez@amd.com * * All work-items in a kernel refer the same set of kernel arguments 9011308Santhony.gutierrez@amd.com * * Each work-item has it's on set of call args. So a call argument at 9111308Santhony.gutierrez@amd.com * address 0x4 is different for work-item 0 and work-item 1. 9211308Santhony.gutierrez@amd.com * 9311308Santhony.gutierrez@amd.com * Ok, the table below shows an example of how we organize the call arguments in 9411308Santhony.gutierrez@amd.com * the CallArgMem class. 9511308Santhony.gutierrez@amd.com * 9611308Santhony.gutierrez@amd.com * int foo(int arg1, double arg2) 9711308Santhony.gutierrez@amd.com * ___________________________________________________ 9811308Santhony.gutierrez@amd.com * | 0: return.0 | 4: return.1 | ... | 252: return.63 | 9911308Santhony.gutierrez@amd.com * |---------------------------------------------------| 10011308Santhony.gutierrez@amd.com * | 256: arg1.0 | 260: arg1.1 | ... | 508: arg1.63 | 10111308Santhony.gutierrez@amd.com * |---------------------------------------------------| 10211308Santhony.gutierrez@amd.com * | 512: arg2.0 | 520: arg2.1 | ... | 1016: arg2.63 | 10311308Santhony.gutierrez@amd.com * ___________________________________________________ 10411308Santhony.gutierrez@amd.com */ 10511308Santhony.gutierrez@amd.comclass CallArgMem 10611308Santhony.gutierrez@amd.com{ 10711308Santhony.gutierrez@amd.com public: 10811308Santhony.gutierrez@amd.com // pointer to buffer for storing function arguments 10911308Santhony.gutierrez@amd.com uint8_t *mem; 11011534Sjohn.kalamatianos@amd.com int wfSize; 11111308Santhony.gutierrez@amd.com // size of function args 11211308Santhony.gutierrez@amd.com int funcArgsSizePerItem; 11311308Santhony.gutierrez@amd.com 11411308Santhony.gutierrez@amd.com template<typename CType> 11511308Santhony.gutierrez@amd.com int 11611308Santhony.gutierrez@amd.com getLaneOffset(int lane, int addr) 11711308Santhony.gutierrez@amd.com { 11811534Sjohn.kalamatianos@amd.com return addr * wfSize + sizeof(CType) * lane; 11911308Santhony.gutierrez@amd.com } 12011308Santhony.gutierrez@amd.com 12111534Sjohn.kalamatianos@amd.com CallArgMem(int func_args_size_per_item, int wf_size) 12211534Sjohn.kalamatianos@amd.com : wfSize(wf_size), funcArgsSizePerItem(func_args_size_per_item) 12311308Santhony.gutierrez@amd.com { 12411534Sjohn.kalamatianos@amd.com mem = (uint8_t*)malloc(funcArgsSizePerItem * wfSize); 12511308Santhony.gutierrez@amd.com } 12611308Santhony.gutierrez@amd.com 12711308Santhony.gutierrez@amd.com ~CallArgMem() 12811308Santhony.gutierrez@amd.com { 12911308Santhony.gutierrez@amd.com free(mem); 13011308Santhony.gutierrez@amd.com } 13111308Santhony.gutierrez@amd.com 13211308Santhony.gutierrez@amd.com template<typename CType> 13311308Santhony.gutierrez@amd.com uint8_t* 13411308Santhony.gutierrez@amd.com getLaneAddr(int lane, int addr) 13511308Santhony.gutierrez@amd.com { 13611308Santhony.gutierrez@amd.com return mem + getLaneOffset<CType>(lane, addr); 13711308Santhony.gutierrez@amd.com } 13811308Santhony.gutierrez@amd.com 13911308Santhony.gutierrez@amd.com template<typename CType> 14011308Santhony.gutierrez@amd.com void 14111308Santhony.gutierrez@amd.com setLaneAddr(int lane, int addr, CType val) 14211308Santhony.gutierrez@amd.com { 14311308Santhony.gutierrez@amd.com *((CType*)(mem + getLaneOffset<CType>(lane, addr))) = val; 14411308Santhony.gutierrez@amd.com } 14511308Santhony.gutierrez@amd.com}; 14611308Santhony.gutierrez@amd.com 14711308Santhony.gutierrez@amd.comclass Wavefront : public SimObject 14811308Santhony.gutierrez@amd.com{ 14911308Santhony.gutierrez@amd.com public: 15011308Santhony.gutierrez@amd.com enum itype_e {I_ALU,I_GLOBAL,I_SHARED,I_FLAT,I_PRIVATE}; 15111308Santhony.gutierrez@amd.com enum status_e {S_STOPPED,S_RETURNING,S_RUNNING}; 15211308Santhony.gutierrez@amd.com 15311308Santhony.gutierrez@amd.com // Base pointer for array of instruction pointers 15411639Salexandru.dutu@amd.com uint64_t basePtr; 15511308Santhony.gutierrez@amd.com 15611639Salexandru.dutu@amd.com uint32_t oldBarrierCnt; 15711639Salexandru.dutu@amd.com uint32_t barrierCnt; 15811639Salexandru.dutu@amd.com uint32_t barrierId; 15911639Salexandru.dutu@amd.com uint32_t barrierSlots; 16011308Santhony.gutierrez@amd.com status_e status; 16111308Santhony.gutierrez@amd.com // HW slot id where the WF is mapped to inside a SIMD unit 16211308Santhony.gutierrez@amd.com int wfSlotId; 16311639Salexandru.dutu@amd.com int kernId; 16411308Santhony.gutierrez@amd.com // SIMD unit where the WV has been scheduled 16511308Santhony.gutierrez@amd.com int simdId; 16611308Santhony.gutierrez@amd.com // pointer to parent CU 16711308Santhony.gutierrez@amd.com ComputeUnit *computeUnit; 16811308Santhony.gutierrez@amd.com 16911308Santhony.gutierrez@amd.com std::deque<GPUDynInstPtr> instructionBuffer; 17011308Santhony.gutierrez@amd.com 17111308Santhony.gutierrez@amd.com bool pendingFetch; 17211308Santhony.gutierrez@amd.com bool dropFetch; 17311308Santhony.gutierrez@amd.com 17411308Santhony.gutierrez@amd.com // Condition Register State (for HSAIL simulations only) 17511308Santhony.gutierrez@amd.com class ConditionRegisterState *condRegState; 17611308Santhony.gutierrez@amd.com // number of single precision VGPRs required by WF 17711308Santhony.gutierrez@amd.com uint32_t maxSpVgprs; 17811308Santhony.gutierrez@amd.com // number of double precision VGPRs required by WF 17911308Santhony.gutierrez@amd.com uint32_t maxDpVgprs; 18011308Santhony.gutierrez@amd.com // map virtual to physical vector register 18111308Santhony.gutierrez@amd.com uint32_t remap(uint32_t vgprIndex, uint32_t size, uint8_t mode=0); 18211308Santhony.gutierrez@amd.com void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs); 18311308Santhony.gutierrez@amd.com bool isGmInstruction(GPUDynInstPtr ii); 18411308Santhony.gutierrez@amd.com bool isLmInstruction(GPUDynInstPtr ii); 18511308Santhony.gutierrez@amd.com bool isOldestInstGMem(); 18611308Santhony.gutierrez@amd.com bool isOldestInstLMem(); 18711308Santhony.gutierrez@amd.com bool isOldestInstPrivMem(); 18811308Santhony.gutierrez@amd.com bool isOldestInstFlatMem(); 18911308Santhony.gutierrez@amd.com bool isOldestInstALU(); 19011308Santhony.gutierrez@amd.com bool isOldestInstBarrier(); 19111308Santhony.gutierrez@amd.com // used for passing spill address to DDInstGPU 19211639Salexandru.dutu@amd.com std::vector<Addr> lastAddr; 19311639Salexandru.dutu@amd.com std::vector<uint32_t> workItemId[3]; 19411639Salexandru.dutu@amd.com std::vector<uint32_t> workItemFlatId; 19511657Salexandru.dutu@amd.com /* kernel launch parameters */ 19611639Salexandru.dutu@amd.com uint32_t workGroupId[3]; 19711639Salexandru.dutu@amd.com uint32_t workGroupSz[3]; 19811639Salexandru.dutu@amd.com uint32_t gridSz[3]; 19911639Salexandru.dutu@amd.com uint32_t wgId; 20011639Salexandru.dutu@amd.com uint32_t wgSz; 20111657Salexandru.dutu@amd.com /* the actual WG size can differ than the maximum size */ 20211657Salexandru.dutu@amd.com uint32_t actualWgSz[3]; 20311657Salexandru.dutu@amd.com uint32_t actualWgSzTotal; 20411657Salexandru.dutu@amd.com void computeActualWgSz(NDRange *ndr); 20511643Salexandru.dutu@amd.com // wavefront id within a workgroup 20611643Salexandru.dutu@amd.com uint32_t wfId; 20711639Salexandru.dutu@amd.com uint32_t maxDynWaveId; 20811639Salexandru.dutu@amd.com uint32_t dispatchId; 20911308Santhony.gutierrez@amd.com // outstanding global+local memory requests 21011639Salexandru.dutu@amd.com uint32_t outstandingReqs; 21111308Santhony.gutierrez@amd.com // memory requests between scoreboard 21211308Santhony.gutierrez@amd.com // and execute stage not yet executed 21311639Salexandru.dutu@amd.com uint32_t memReqsInPipe; 21411308Santhony.gutierrez@amd.com // outstanding global memory write requests 21511639Salexandru.dutu@amd.com uint32_t outstandingReqsWrGm; 21611308Santhony.gutierrez@amd.com // outstanding local memory write requests 21711639Salexandru.dutu@amd.com uint32_t outstandingReqsWrLm; 21811308Santhony.gutierrez@amd.com // outstanding global memory read requests 21911639Salexandru.dutu@amd.com uint32_t outstandingReqsRdGm; 22011308Santhony.gutierrez@amd.com // outstanding local memory read requests 22111639Salexandru.dutu@amd.com uint32_t outstandingReqsRdLm; 22211639Salexandru.dutu@amd.com uint32_t rdLmReqsInPipe; 22311639Salexandru.dutu@amd.com uint32_t rdGmReqsInPipe; 22411639Salexandru.dutu@amd.com uint32_t wrLmReqsInPipe; 22511639Salexandru.dutu@amd.com uint32_t wrGmReqsInPipe; 22611308Santhony.gutierrez@amd.com 22711639Salexandru.dutu@amd.com int memTraceBusy; 22811639Salexandru.dutu@amd.com uint64_t lastTrace; 22911308Santhony.gutierrez@amd.com // number of vector registers reserved by WF 23011308Santhony.gutierrez@amd.com int reservedVectorRegs; 23111308Santhony.gutierrez@amd.com // Index into the Vector Register File's namespace where the WF's registers 23211308Santhony.gutierrez@amd.com // will live while the WF is executed 23311308Santhony.gutierrez@amd.com uint32_t startVgprIndex; 23411308Santhony.gutierrez@amd.com 23511308Santhony.gutierrez@amd.com // Old value of destination gpr (for trace) 23611639Salexandru.dutu@amd.com std::vector<uint32_t> oldVgpr; 23711308Santhony.gutierrez@amd.com // Id of destination gpr (for trace) 23811639Salexandru.dutu@amd.com uint32_t oldVgprId; 23911308Santhony.gutierrez@amd.com // Tick count of last old_vgpr copy 24011639Salexandru.dutu@amd.com uint64_t oldVgprTcnt; 24111308Santhony.gutierrez@amd.com 24211308Santhony.gutierrez@amd.com // Old value of destination gpr (for trace) 24311639Salexandru.dutu@amd.com std::vector<uint64_t> oldDgpr; 24411308Santhony.gutierrez@amd.com // Id of destination gpr (for trace) 24511639Salexandru.dutu@amd.com uint32_t oldDgprId; 24611308Santhony.gutierrez@amd.com // Tick count of last old_vgpr copy 24711639Salexandru.dutu@amd.com uint64_t oldDgprTcnt; 24811308Santhony.gutierrez@amd.com 24911308Santhony.gutierrez@amd.com // Execution mask at wavefront start 25011639Salexandru.dutu@amd.com VectorMask initMask; 25111308Santhony.gutierrez@amd.com 25211308Santhony.gutierrez@amd.com // number of barriers this WF has joined 25311639Salexandru.dutu@amd.com std::vector<int> barCnt; 25411639Salexandru.dutu@amd.com int maxBarCnt; 25511308Santhony.gutierrez@amd.com // Flag to stall a wave on barrier 25611308Santhony.gutierrez@amd.com bool stalledAtBarrier; 25711308Santhony.gutierrez@amd.com 25811308Santhony.gutierrez@amd.com // a pointer to the fraction of the LDS allocated 25911308Santhony.gutierrez@amd.com // to this workgroup (thus this wavefront) 26011308Santhony.gutierrez@amd.com LdsChunk *ldsChunk; 26111308Santhony.gutierrez@amd.com 26211308Santhony.gutierrez@amd.com // A pointer to the spill area 26311308Santhony.gutierrez@amd.com Addr spillBase; 26411308Santhony.gutierrez@amd.com // The size of the spill area 26511308Santhony.gutierrez@amd.com uint32_t spillSizePerItem; 26611308Santhony.gutierrez@amd.com // The vector width of the spill area 26711308Santhony.gutierrez@amd.com uint32_t spillWidth; 26811308Santhony.gutierrez@amd.com 26911308Santhony.gutierrez@amd.com // A pointer to the private memory area 27011308Santhony.gutierrez@amd.com Addr privBase; 27111308Santhony.gutierrez@amd.com // The size of the private memory area 27211308Santhony.gutierrez@amd.com uint32_t privSizePerItem; 27311308Santhony.gutierrez@amd.com 27411308Santhony.gutierrez@amd.com // A pointer ot the read-only memory area 27511308Santhony.gutierrez@amd.com Addr roBase; 27611308Santhony.gutierrez@amd.com // size of the read-only memory area 27711308Santhony.gutierrez@amd.com uint32_t roSize; 27811308Santhony.gutierrez@amd.com 27911308Santhony.gutierrez@amd.com // pointer to buffer for storing kernel arguments 28011308Santhony.gutierrez@amd.com uint8_t *kernelArgs; 28111308Santhony.gutierrez@amd.com // unique WF id over all WFs executed across all CUs 28211308Santhony.gutierrez@amd.com uint64_t wfDynId; 28311308Santhony.gutierrez@amd.com 28411308Santhony.gutierrez@amd.com // number of times instruction issue for this wavefront is blocked 28511308Santhony.gutierrez@amd.com // due to VRF port availability 28611308Santhony.gutierrez@amd.com Stats::Scalar numTimesBlockedDueVrfPortAvail; 28711308Santhony.gutierrez@amd.com // number of times an instruction of a WF is blocked from being issued 28811308Santhony.gutierrez@amd.com // due to WAR and WAW dependencies 28911308Santhony.gutierrez@amd.com Stats::Scalar numTimesBlockedDueWAXDependencies; 29011308Santhony.gutierrez@amd.com // number of times an instruction of a WF is blocked from being issued 29111308Santhony.gutierrez@amd.com // due to WAR and WAW dependencies 29211308Santhony.gutierrez@amd.com Stats::Scalar numTimesBlockedDueRAWDependencies; 29311308Santhony.gutierrez@amd.com // distribution of executed instructions based on their register 29411308Santhony.gutierrez@amd.com // operands; this is used to highlight the load on the VRF 29511308Santhony.gutierrez@amd.com Stats::Distribution srcRegOpDist; 29611308Santhony.gutierrez@amd.com Stats::Distribution dstRegOpDist; 29711308Santhony.gutierrez@amd.com 29811308Santhony.gutierrez@amd.com // Functions to operate on call argument memory 29911308Santhony.gutierrez@amd.com // argument memory for hsail call instruction 30011308Santhony.gutierrez@amd.com CallArgMem *callArgMem; 30111308Santhony.gutierrez@amd.com void 30211534Sjohn.kalamatianos@amd.com initCallArgMem(int func_args_size_per_item, int wf_size) 30311308Santhony.gutierrez@amd.com { 30411534Sjohn.kalamatianos@amd.com callArgMem = new CallArgMem(func_args_size_per_item, wf_size); 30511308Santhony.gutierrez@amd.com } 30611308Santhony.gutierrez@amd.com 30711308Santhony.gutierrez@amd.com template<typename CType> 30811308Santhony.gutierrez@amd.com CType 30911308Santhony.gutierrez@amd.com readCallArgMem(int lane, int addr) 31011308Santhony.gutierrez@amd.com { 31111308Santhony.gutierrez@amd.com return *((CType*)(callArgMem->getLaneAddr<CType>(lane, addr))); 31211308Santhony.gutierrez@amd.com } 31311308Santhony.gutierrez@amd.com 31411308Santhony.gutierrez@amd.com template<typename CType> 31511308Santhony.gutierrez@amd.com void 31611308Santhony.gutierrez@amd.com writeCallArgMem(int lane, int addr, CType val) 31711308Santhony.gutierrez@amd.com { 31811308Santhony.gutierrez@amd.com callArgMem->setLaneAddr<CType>(lane, addr, val); 31911308Santhony.gutierrez@amd.com } 32011308Santhony.gutierrez@amd.com 32111308Santhony.gutierrez@amd.com typedef WavefrontParams Params; 32211308Santhony.gutierrez@amd.com Wavefront(const Params *p); 32311308Santhony.gutierrez@amd.com ~Wavefront(); 32411308Santhony.gutierrez@amd.com virtual void init(); 32511308Santhony.gutierrez@amd.com 32611308Santhony.gutierrez@amd.com void 32711308Santhony.gutierrez@amd.com setParent(ComputeUnit *cu) 32811308Santhony.gutierrez@amd.com { 32911308Santhony.gutierrez@amd.com computeUnit = cu; 33011308Santhony.gutierrez@amd.com } 33111308Santhony.gutierrez@amd.com 33211308Santhony.gutierrez@amd.com void start(uint64_t _wfDynId, uint64_t _base_ptr); 33311308Santhony.gutierrez@amd.com void exec(); 33411308Santhony.gutierrez@amd.com void updateResources(); 33511308Santhony.gutierrez@amd.com int ready(itype_e type); 33611308Santhony.gutierrez@amd.com bool instructionBufferHasBranch(); 33711308Santhony.gutierrez@amd.com void regStats(); 33811639Salexandru.dutu@amd.com VectorMask getPred() { return execMask() & initMask; } 33911308Santhony.gutierrez@amd.com 34011308Santhony.gutierrez@amd.com bool waitingAtBarrier(int lane); 34111308Santhony.gutierrez@amd.com 34211308Santhony.gutierrez@amd.com void pushToReconvergenceStack(uint32_t pc, uint32_t rpc, 34311308Santhony.gutierrez@amd.com const VectorMask& exec_mask); 34411308Santhony.gutierrez@amd.com 34511308Santhony.gutierrez@amd.com void popFromReconvergenceStack(); 34611308Santhony.gutierrez@amd.com 34711308Santhony.gutierrez@amd.com uint32_t pc() const; 34811308Santhony.gutierrez@amd.com 34911308Santhony.gutierrez@amd.com uint32_t rpc() const; 35011308Santhony.gutierrez@amd.com 35111308Santhony.gutierrez@amd.com VectorMask execMask() const; 35211308Santhony.gutierrez@amd.com 35311308Santhony.gutierrez@amd.com bool execMask(int lane) const; 35411308Santhony.gutierrez@amd.com 35511308Santhony.gutierrez@amd.com void pc(uint32_t new_pc); 35611308Santhony.gutierrez@amd.com 35711308Santhony.gutierrez@amd.com void discardFetch(); 35811308Santhony.gutierrez@amd.com 35911640Salexandru.dutu@amd.com /** 36011640Salexandru.dutu@amd.com * Returns the size of the static hardware context of a particular wavefront 36111640Salexandru.dutu@amd.com * This should be updated everytime the context is changed 36211640Salexandru.dutu@amd.com */ 36311640Salexandru.dutu@amd.com uint32_t getStaticContextSize() const; 36411640Salexandru.dutu@amd.com 36511644Salexandru.dutu@amd.com /** 36611644Salexandru.dutu@amd.com * Returns the hardware context as a stream of bytes 36711644Salexandru.dutu@amd.com * This method is designed for HSAIL execution 36811644Salexandru.dutu@amd.com */ 36911644Salexandru.dutu@amd.com void getContext(const void *out); 37011644Salexandru.dutu@amd.com 37111644Salexandru.dutu@amd.com /** 37211644Salexandru.dutu@amd.com * Sets the hardware context fromt a stream of bytes 37311644Salexandru.dutu@amd.com * This method is designed for HSAIL execution 37411644Salexandru.dutu@amd.com */ 37511644Salexandru.dutu@amd.com void setContext(const void *in); 37611644Salexandru.dutu@amd.com 37711696Santhony.gutierrez@amd.com TheGpuISA::GPUISA& 37811696Santhony.gutierrez@amd.com gpuISA() 37911696Santhony.gutierrez@amd.com { 38011696Santhony.gutierrez@amd.com return _gpuISA; 38111696Santhony.gutierrez@amd.com } 38211696Santhony.gutierrez@amd.com 38311308Santhony.gutierrez@amd.com private: 38411696Santhony.gutierrez@amd.com TheGpuISA::GPUISA _gpuISA; 38511308Santhony.gutierrez@amd.com /** 38611308Santhony.gutierrez@amd.com * Stack containing Control Flow Graph nodes (i.e., kernel instructions) 38711308Santhony.gutierrez@amd.com * to be visited by the wavefront, and the associated execution masks. The 38811308Santhony.gutierrez@amd.com * reconvergence stack grows every time the wavefront reaches a divergence 38911308Santhony.gutierrez@amd.com * point (branch instruction), and shrinks every time the wavefront 39011308Santhony.gutierrez@amd.com * reaches a reconvergence point (immediate post-dominator instruction). 39111308Santhony.gutierrez@amd.com */ 39211641Salexandru.dutu@amd.com std::deque<std::unique_ptr<ReconvergenceStackEntry>> reconvergenceStack; 39311308Santhony.gutierrez@amd.com}; 39411308Santhony.gutierrez@amd.com 39511308Santhony.gutierrez@amd.com#endif // __WAVEFRONT_HH__ 396