wavefront.hh revision 11639
110447Snilay@cs.wisc.edu/* 210447Snilay@cs.wisc.edu * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 310447Snilay@cs.wisc.edu * All rights reserved. 410447Snilay@cs.wisc.edu * 510447Snilay@cs.wisc.edu * For use for simulation and test purposes only 610447Snilay@cs.wisc.edu * 710447Snilay@cs.wisc.edu * Redistribution and use in source and binary forms, with or without 810447Snilay@cs.wisc.edu * modification, are permitted provided that the following conditions are met: 910447Snilay@cs.wisc.edu * 1010447Snilay@cs.wisc.edu * 1. Redistributions of source code must retain the above copyright notice, 1110447Snilay@cs.wisc.edu * this list of conditions and the following disclaimer. 1210447Snilay@cs.wisc.edu * 1310447Snilay@cs.wisc.edu * 2. Redistributions in binary form must reproduce the above copyright notice, 1410447Snilay@cs.wisc.edu * this list of conditions and the following disclaimer in the documentation 1510447Snilay@cs.wisc.edu * and/or other materials provided with the distribution. 1610447Snilay@cs.wisc.edu * 1710447Snilay@cs.wisc.edu * 3. Neither the name of the copyright holder nor the names of its contributors 1810447Snilay@cs.wisc.edu * may be used to endorse or promote products derived from this software 1910447Snilay@cs.wisc.edu * without specific prior written permission. 2010447Snilay@cs.wisc.edu * 2110447Snilay@cs.wisc.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2210447Snilay@cs.wisc.edu * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2310447Snilay@cs.wisc.edu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2410447Snilay@cs.wisc.edu * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2510447Snilay@cs.wisc.edu * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2610447Snilay@cs.wisc.edu * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2710447Snilay@cs.wisc.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2810447Snilay@cs.wisc.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2910447Snilay@cs.wisc.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3010447Snilay@cs.wisc.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3110447Snilay@cs.wisc.edu * POSSIBILITY OF SUCH DAMAGE. 3210447Snilay@cs.wisc.edu * 3310447Snilay@cs.wisc.edu * Author: Lisa Hsu 3410447Snilay@cs.wisc.edu */ 3510447Snilay@cs.wisc.edu 3610447Snilay@cs.wisc.edu#ifndef __WAVEFRONT_HH__ 3710447Snilay@cs.wisc.edu#define __WAVEFRONT_HH__ 3810447Snilay@cs.wisc.edu 3910447Snilay@cs.wisc.edu#include <cassert> 4010447Snilay@cs.wisc.edu#include <deque> 4110447Snilay@cs.wisc.edu#include <memory> 4210447Snilay@cs.wisc.edu#include <stack> 4310447Snilay@cs.wisc.edu#include <vector> 4410447Snilay@cs.wisc.edu 4510447Snilay@cs.wisc.edu#include "base/misc.hh" 4610447Snilay@cs.wisc.edu#include "base/types.hh" 4710447Snilay@cs.wisc.edu#include "gpu-compute/condition_register_state.hh" 4810447Snilay@cs.wisc.edu#include "gpu-compute/lds_state.hh" 4910447Snilay@cs.wisc.edu#include "gpu-compute/misc.hh" 5010447Snilay@cs.wisc.edu#include "params/Wavefront.hh" 5110447Snilay@cs.wisc.edu#include "sim/sim_object.hh" 5210447Snilay@cs.wisc.edu 5310447Snilay@cs.wisc.edustatic const int MAX_NUM_INSTS_PER_WF = 12; 5410447Snilay@cs.wisc.edu 5510447Snilay@cs.wisc.edu/* 5610447Snilay@cs.wisc.edu * Arguments for the hsail opcode call, are user defined and variable length. 5710447Snilay@cs.wisc.edu * The hardware/finalizer can support arguments in hardware or use memory to 5810447Snilay@cs.wisc.edu * pass arguments. For now, let's assume that an unlimited number of arguments 5910447Snilay@cs.wisc.edu * are supported in hardware (the compiler inlines functions whenver it can 6010447Snilay@cs.wisc.edu * anyways, so unless someone is interested in the implications of linking/ 6110447Snilay@cs.wisc.edu * library functions, I think this is a reasonable assumption given the typical 6210447Snilay@cs.wisc.edu * size of an OpenCL kernel). 6310447Snilay@cs.wisc.edu * 6410447Snilay@cs.wisc.edu * Note that call args are different than kernel arguments: 6510447Snilay@cs.wisc.edu * * All work-items in a kernel refer the same set of kernel arguments 6610447Snilay@cs.wisc.edu * * Each work-item has it's on set of call args. So a call argument at 6710447Snilay@cs.wisc.edu * address 0x4 is different for work-item 0 and work-item 1. 6810447Snilay@cs.wisc.edu * 6910447Snilay@cs.wisc.edu * Ok, the table below shows an example of how we organize the call arguments in 7010447Snilay@cs.wisc.edu * the CallArgMem class. 7110447Snilay@cs.wisc.edu * 7210447Snilay@cs.wisc.edu * int foo(int arg1, double arg2) 7310447Snilay@cs.wisc.edu * ___________________________________________________ 7410447Snilay@cs.wisc.edu * | 0: return.0 | 4: return.1 | ... | 252: return.63 | 7510447Snilay@cs.wisc.edu * |---------------------------------------------------| 7610447Snilay@cs.wisc.edu * | 256: arg1.0 | 260: arg1.1 | ... | 508: arg1.63 | 7710447Snilay@cs.wisc.edu * |---------------------------------------------------| 7810447Snilay@cs.wisc.edu * | 512: arg2.0 | 520: arg2.1 | ... | 1016: arg2.63 | 7910447Snilay@cs.wisc.edu * ___________________________________________________ 8010447Snilay@cs.wisc.edu */ 8110447Snilay@cs.wisc.educlass CallArgMem 8210447Snilay@cs.wisc.edu{ 8310447Snilay@cs.wisc.edu public: 8410447Snilay@cs.wisc.edu // pointer to buffer for storing function arguments 8510447Snilay@cs.wisc.edu uint8_t *mem; 8610447Snilay@cs.wisc.edu int wfSize; 8710447Snilay@cs.wisc.edu // size of function args 8810447Snilay@cs.wisc.edu int funcArgsSizePerItem; 8910447Snilay@cs.wisc.edu 9010447Snilay@cs.wisc.edu template<typename CType> 9110447Snilay@cs.wisc.edu int 9210447Snilay@cs.wisc.edu getLaneOffset(int lane, int addr) 9310447Snilay@cs.wisc.edu { 9410447Snilay@cs.wisc.edu return addr * wfSize + sizeof(CType) * lane; 9510447Snilay@cs.wisc.edu } 9610447Snilay@cs.wisc.edu 9710447Snilay@cs.wisc.edu CallArgMem(int func_args_size_per_item, int wf_size) 9810447Snilay@cs.wisc.edu : wfSize(wf_size), funcArgsSizePerItem(func_args_size_per_item) 9910447Snilay@cs.wisc.edu { 10010447Snilay@cs.wisc.edu mem = (uint8_t*)malloc(funcArgsSizePerItem * wfSize); 10110447Snilay@cs.wisc.edu } 10210447Snilay@cs.wisc.edu 10310447Snilay@cs.wisc.edu ~CallArgMem() 10410447Snilay@cs.wisc.edu { 10510447Snilay@cs.wisc.edu free(mem); 10610447Snilay@cs.wisc.edu } 10710447Snilay@cs.wisc.edu 10810447Snilay@cs.wisc.edu template<typename CType> 10910447Snilay@cs.wisc.edu uint8_t* 11010447Snilay@cs.wisc.edu getLaneAddr(int lane, int addr) 11110447Snilay@cs.wisc.edu { 11210447Snilay@cs.wisc.edu return mem + getLaneOffset<CType>(lane, addr); 11310447Snilay@cs.wisc.edu } 11410447Snilay@cs.wisc.edu 11510447Snilay@cs.wisc.edu template<typename CType> 11610447Snilay@cs.wisc.edu void 11710447Snilay@cs.wisc.edu setLaneAddr(int lane, int addr, CType val) 11810447Snilay@cs.wisc.edu { 11910447Snilay@cs.wisc.edu *((CType*)(mem + getLaneOffset<CType>(lane, addr))) = val; 12010447Snilay@cs.wisc.edu } 12110447Snilay@cs.wisc.edu}; 12210447Snilay@cs.wisc.edu 12310447Snilay@cs.wisc.edu/** 12410447Snilay@cs.wisc.edu * A reconvergence stack entry conveys the necessary state to implement 12510447Snilay@cs.wisc.edu * control flow divergence. 12610447Snilay@cs.wisc.edu */ 12710447Snilay@cs.wisc.educlass ReconvergenceStackEntry { 12810447Snilay@cs.wisc.edu 12910447Snilay@cs.wisc.edu public: 13010447Snilay@cs.wisc.edu ReconvergenceStackEntry(uint32_t new_pc, uint32_t new_rpc, 13110447Snilay@cs.wisc.edu VectorMask new_mask) : pc(new_pc), rpc(new_rpc), 13210447Snilay@cs.wisc.edu execMask(new_mask) { 13310447Snilay@cs.wisc.edu } 13410447Snilay@cs.wisc.edu 13510447Snilay@cs.wisc.edu /** 13610447Snilay@cs.wisc.edu * PC of current instruction. 13710447Snilay@cs.wisc.edu */ 13810447Snilay@cs.wisc.edu uint32_t pc; 13910447Snilay@cs.wisc.edu /** 14010447Snilay@cs.wisc.edu * PC of the immediate post-dominator instruction, i.e., the value of 14110447Snilay@cs.wisc.edu * @a pc for the first instruction that will be executed by the wavefront 14210447Snilay@cs.wisc.edu * when a reconvergence point is reached. 14310447Snilay@cs.wisc.edu */ 14410447Snilay@cs.wisc.edu uint32_t rpc; 14510447Snilay@cs.wisc.edu /** 14610447Snilay@cs.wisc.edu * Execution mask. 14710447Snilay@cs.wisc.edu */ 14810447Snilay@cs.wisc.edu VectorMask execMask; 14910447Snilay@cs.wisc.edu}; 15010447Snilay@cs.wisc.edu 15110447Snilay@cs.wisc.educlass Wavefront : public SimObject 15210447Snilay@cs.wisc.edu{ 15310447Snilay@cs.wisc.edu public: 15410447Snilay@cs.wisc.edu enum itype_e {I_ALU,I_GLOBAL,I_SHARED,I_FLAT,I_PRIVATE}; 15510447Snilay@cs.wisc.edu enum status_e {S_STOPPED,S_RETURNING,S_RUNNING}; 15610447Snilay@cs.wisc.edu 15710447Snilay@cs.wisc.edu // Base pointer for array of instruction pointers 15810447Snilay@cs.wisc.edu uint64_t basePtr; 15910447Snilay@cs.wisc.edu 16010447Snilay@cs.wisc.edu uint32_t oldBarrierCnt; 16110447Snilay@cs.wisc.edu uint32_t barrierCnt; 16210447Snilay@cs.wisc.edu uint32_t barrierId; 16310447Snilay@cs.wisc.edu uint32_t barrierSlots; 16410447Snilay@cs.wisc.edu status_e status; 16510447Snilay@cs.wisc.edu // HW slot id where the WF is mapped to inside a SIMD unit 16610447Snilay@cs.wisc.edu int wfSlotId; 16710447Snilay@cs.wisc.edu int kernId; 16810447Snilay@cs.wisc.edu // SIMD unit where the WV has been scheduled 16910447Snilay@cs.wisc.edu int simdId; 17010447Snilay@cs.wisc.edu // pointer to parent CU 17110447Snilay@cs.wisc.edu ComputeUnit *computeUnit; 17210447Snilay@cs.wisc.edu 17310447Snilay@cs.wisc.edu std::deque<GPUDynInstPtr> instructionBuffer; 17410447Snilay@cs.wisc.edu 17510447Snilay@cs.wisc.edu bool pendingFetch; 17610447Snilay@cs.wisc.edu bool dropFetch; 17710447Snilay@cs.wisc.edu 17810447Snilay@cs.wisc.edu // Condition Register State (for HSAIL simulations only) 17910447Snilay@cs.wisc.edu class ConditionRegisterState *condRegState; 18010447Snilay@cs.wisc.edu // number of single precision VGPRs required by WF 18110447Snilay@cs.wisc.edu uint32_t maxSpVgprs; 18210447Snilay@cs.wisc.edu // number of double precision VGPRs required by WF 18310447Snilay@cs.wisc.edu uint32_t maxDpVgprs; 18410447Snilay@cs.wisc.edu // map virtual to physical vector register 18510447Snilay@cs.wisc.edu uint32_t remap(uint32_t vgprIndex, uint32_t size, uint8_t mode=0); 18610447Snilay@cs.wisc.edu void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs); 18710447Snilay@cs.wisc.edu bool isGmInstruction(GPUDynInstPtr ii); 18810447Snilay@cs.wisc.edu bool isLmInstruction(GPUDynInstPtr ii); 18910447Snilay@cs.wisc.edu bool isOldestInstGMem(); 19010447Snilay@cs.wisc.edu bool isOldestInstLMem(); 19110447Snilay@cs.wisc.edu bool isOldestInstPrivMem(); 19210447Snilay@cs.wisc.edu bool isOldestInstFlatMem(); 19310447Snilay@cs.wisc.edu bool isOldestInstALU(); 19410447Snilay@cs.wisc.edu bool isOldestInstBarrier(); 19510447Snilay@cs.wisc.edu // used for passing spill address to DDInstGPU 19610447Snilay@cs.wisc.edu std::vector<Addr> lastAddr; 19710447Snilay@cs.wisc.edu std::vector<uint32_t> workItemId[3]; 19810447Snilay@cs.wisc.edu std::vector<uint32_t> workItemFlatId; 19910447Snilay@cs.wisc.edu uint32_t workGroupId[3]; 20010447Snilay@cs.wisc.edu uint32_t workGroupSz[3]; 20110447Snilay@cs.wisc.edu uint32_t gridSz[3]; 20210447Snilay@cs.wisc.edu uint32_t wgId; 20310447Snilay@cs.wisc.edu uint32_t wgSz; 20410447Snilay@cs.wisc.edu uint32_t dynWaveId; 20510447Snilay@cs.wisc.edu uint32_t maxDynWaveId; 20610447Snilay@cs.wisc.edu uint32_t dispatchId; 20710447Snilay@cs.wisc.edu // outstanding global+local memory requests 20810447Snilay@cs.wisc.edu uint32_t outstandingReqs; 20910447Snilay@cs.wisc.edu // memory requests between scoreboard 21010447Snilay@cs.wisc.edu // and execute stage not yet executed 21110447Snilay@cs.wisc.edu uint32_t memReqsInPipe; 21210447Snilay@cs.wisc.edu // outstanding global memory write requests 21310447Snilay@cs.wisc.edu uint32_t outstandingReqsWrGm; 21410447Snilay@cs.wisc.edu // outstanding local memory write requests 21510447Snilay@cs.wisc.edu uint32_t outstandingReqsWrLm; 21610447Snilay@cs.wisc.edu // outstanding global memory read requests 21710447Snilay@cs.wisc.edu uint32_t outstandingReqsRdGm; 21810447Snilay@cs.wisc.edu // outstanding local memory read requests 21910447Snilay@cs.wisc.edu uint32_t outstandingReqsRdLm; 22010447Snilay@cs.wisc.edu uint32_t rdLmReqsInPipe; 22110447Snilay@cs.wisc.edu uint32_t rdGmReqsInPipe; 22210447Snilay@cs.wisc.edu uint32_t wrLmReqsInPipe; 22310447Snilay@cs.wisc.edu uint32_t wrGmReqsInPipe; 22410447Snilay@cs.wisc.edu 22510447Snilay@cs.wisc.edu int memTraceBusy; 22610447Snilay@cs.wisc.edu uint64_t lastTrace; 22710447Snilay@cs.wisc.edu // number of vector registers reserved by WF 22810447Snilay@cs.wisc.edu int reservedVectorRegs; 22910447Snilay@cs.wisc.edu // Index into the Vector Register File's namespace where the WF's registers 23010447Snilay@cs.wisc.edu // will live while the WF is executed 23110447Snilay@cs.wisc.edu uint32_t startVgprIndex; 23210447Snilay@cs.wisc.edu 23310447Snilay@cs.wisc.edu // Old value of destination gpr (for trace) 23410447Snilay@cs.wisc.edu std::vector<uint32_t> oldVgpr; 23510447Snilay@cs.wisc.edu // Id of destination gpr (for trace) 23610447Snilay@cs.wisc.edu uint32_t oldVgprId; 23710447Snilay@cs.wisc.edu // Tick count of last old_vgpr copy 23810447Snilay@cs.wisc.edu uint64_t oldVgprTcnt; 23910447Snilay@cs.wisc.edu 24010447Snilay@cs.wisc.edu // Old value of destination gpr (for trace) 24110447Snilay@cs.wisc.edu std::vector<uint64_t> oldDgpr; 24210447Snilay@cs.wisc.edu // Id of destination gpr (for trace) 24310447Snilay@cs.wisc.edu uint32_t oldDgprId; 24410447Snilay@cs.wisc.edu // Tick count of last old_vgpr copy 24510447Snilay@cs.wisc.edu uint64_t oldDgprTcnt; 24610447Snilay@cs.wisc.edu 24710447Snilay@cs.wisc.edu // Execution mask at wavefront start 24810447Snilay@cs.wisc.edu VectorMask initMask; 24910447Snilay@cs.wisc.edu 25010447Snilay@cs.wisc.edu // number of barriers this WF has joined 25110447Snilay@cs.wisc.edu std::vector<int> barCnt; 25210447Snilay@cs.wisc.edu int maxBarCnt; 25310447Snilay@cs.wisc.edu // Flag to stall a wave on barrier 25410447Snilay@cs.wisc.edu bool stalledAtBarrier; 25510447Snilay@cs.wisc.edu 25610447Snilay@cs.wisc.edu // a pointer to the fraction of the LDS allocated 25710447Snilay@cs.wisc.edu // to this workgroup (thus this wavefront) 25810447Snilay@cs.wisc.edu LdsChunk *ldsChunk; 25910447Snilay@cs.wisc.edu 26010447Snilay@cs.wisc.edu // A pointer to the spill area 26110447Snilay@cs.wisc.edu Addr spillBase; 26210447Snilay@cs.wisc.edu // The size of the spill area 26310447Snilay@cs.wisc.edu uint32_t spillSizePerItem; 26410447Snilay@cs.wisc.edu // The vector width of the spill area 26510447Snilay@cs.wisc.edu uint32_t spillWidth; 26610447Snilay@cs.wisc.edu 26710447Snilay@cs.wisc.edu // A pointer to the private memory area 26810447Snilay@cs.wisc.edu Addr privBase; 26910447Snilay@cs.wisc.edu // The size of the private memory area 27010447Snilay@cs.wisc.edu uint32_t privSizePerItem; 27110447Snilay@cs.wisc.edu 27210447Snilay@cs.wisc.edu // A pointer ot the read-only memory area 27310447Snilay@cs.wisc.edu Addr roBase; 27410447Snilay@cs.wisc.edu // size of the read-only memory area 27510447Snilay@cs.wisc.edu uint32_t roSize; 27610447Snilay@cs.wisc.edu 27710447Snilay@cs.wisc.edu // pointer to buffer for storing kernel arguments 27810447Snilay@cs.wisc.edu uint8_t *kernelArgs; 27910447Snilay@cs.wisc.edu // unique WF id over all WFs executed across all CUs 28010447Snilay@cs.wisc.edu uint64_t wfDynId; 28110447Snilay@cs.wisc.edu 28210447Snilay@cs.wisc.edu // number of times instruction issue for this wavefront is blocked 28310447Snilay@cs.wisc.edu // due to VRF port availability 28410447Snilay@cs.wisc.edu Stats::Scalar numTimesBlockedDueVrfPortAvail; 28510447Snilay@cs.wisc.edu // number of times an instruction of a WF is blocked from being issued 28610447Snilay@cs.wisc.edu // due to WAR and WAW dependencies 28710447Snilay@cs.wisc.edu Stats::Scalar numTimesBlockedDueWAXDependencies; 28810447Snilay@cs.wisc.edu // number of times an instruction of a WF is blocked from being issued 28910447Snilay@cs.wisc.edu // due to WAR and WAW dependencies 29010447Snilay@cs.wisc.edu Stats::Scalar numTimesBlockedDueRAWDependencies; 29110447Snilay@cs.wisc.edu // distribution of executed instructions based on their register 29210447Snilay@cs.wisc.edu // operands; this is used to highlight the load on the VRF 29310447Snilay@cs.wisc.edu Stats::Distribution srcRegOpDist; 29410447Snilay@cs.wisc.edu Stats::Distribution dstRegOpDist; 29510447Snilay@cs.wisc.edu 29610447Snilay@cs.wisc.edu // Functions to operate on call argument memory 29710447Snilay@cs.wisc.edu // argument memory for hsail call instruction 29810447Snilay@cs.wisc.edu CallArgMem *callArgMem; 29910447Snilay@cs.wisc.edu void 30010447Snilay@cs.wisc.edu initCallArgMem(int func_args_size_per_item, int wf_size) 30110447Snilay@cs.wisc.edu { 30210447Snilay@cs.wisc.edu callArgMem = new CallArgMem(func_args_size_per_item, wf_size); 30310447Snilay@cs.wisc.edu } 30410447Snilay@cs.wisc.edu 30510447Snilay@cs.wisc.edu template<typename CType> 30610447Snilay@cs.wisc.edu CType 30710447Snilay@cs.wisc.edu readCallArgMem(int lane, int addr) 30810447Snilay@cs.wisc.edu { 30910447Snilay@cs.wisc.edu return *((CType*)(callArgMem->getLaneAddr<CType>(lane, addr))); 31010447Snilay@cs.wisc.edu } 31110447Snilay@cs.wisc.edu 31210447Snilay@cs.wisc.edu template<typename CType> 31310447Snilay@cs.wisc.edu void 31410447Snilay@cs.wisc.edu writeCallArgMem(int lane, int addr, CType val) 31510447Snilay@cs.wisc.edu { 31610447Snilay@cs.wisc.edu callArgMem->setLaneAddr<CType>(lane, addr, val); 31710447Snilay@cs.wisc.edu } 31810447Snilay@cs.wisc.edu 31910447Snilay@cs.wisc.edu typedef WavefrontParams Params; 32010447Snilay@cs.wisc.edu Wavefront(const Params *p); 32110447Snilay@cs.wisc.edu ~Wavefront(); 32210447Snilay@cs.wisc.edu virtual void init(); 32310447Snilay@cs.wisc.edu 32410447Snilay@cs.wisc.edu void 32510447Snilay@cs.wisc.edu setParent(ComputeUnit *cu) 32610447Snilay@cs.wisc.edu { 32710447Snilay@cs.wisc.edu computeUnit = cu; 32810447Snilay@cs.wisc.edu } 32910447Snilay@cs.wisc.edu 33010447Snilay@cs.wisc.edu void start(uint64_t _wfDynId, uint64_t _base_ptr); 33110447Snilay@cs.wisc.edu void exec(); 33210447Snilay@cs.wisc.edu void updateResources(); 33310447Snilay@cs.wisc.edu int ready(itype_e type); 33410447Snilay@cs.wisc.edu bool instructionBufferHasBranch(); 33510447Snilay@cs.wisc.edu void regStats(); 33610447Snilay@cs.wisc.edu VectorMask getPred() { return execMask() & initMask; } 33710447Snilay@cs.wisc.edu 33810447Snilay@cs.wisc.edu bool waitingAtBarrier(int lane); 33910447Snilay@cs.wisc.edu 34010447Snilay@cs.wisc.edu void pushToReconvergenceStack(uint32_t pc, uint32_t rpc, 34110447Snilay@cs.wisc.edu const VectorMask& exec_mask); 34210447Snilay@cs.wisc.edu 34310447Snilay@cs.wisc.edu void popFromReconvergenceStack(); 34410447Snilay@cs.wisc.edu 34510447Snilay@cs.wisc.edu uint32_t pc() const; 34610447Snilay@cs.wisc.edu 34710447Snilay@cs.wisc.edu uint32_t rpc() const; 34810447Snilay@cs.wisc.edu 34910447Snilay@cs.wisc.edu VectorMask execMask() const; 35010447Snilay@cs.wisc.edu 35110447Snilay@cs.wisc.edu bool execMask(int lane) const; 35210447Snilay@cs.wisc.edu 35310447Snilay@cs.wisc.edu void pc(uint32_t new_pc); 35410447Snilay@cs.wisc.edu 35510447Snilay@cs.wisc.edu void discardFetch(); 35610447Snilay@cs.wisc.edu 35710447Snilay@cs.wisc.edu private: 35810447Snilay@cs.wisc.edu /** 35910447Snilay@cs.wisc.edu * Stack containing Control Flow Graph nodes (i.e., kernel instructions) 36010447Snilay@cs.wisc.edu * to be visited by the wavefront, and the associated execution masks. The 36110447Snilay@cs.wisc.edu * reconvergence stack grows every time the wavefront reaches a divergence 36210447Snilay@cs.wisc.edu * point (branch instruction), and shrinks every time the wavefront 36310447Snilay@cs.wisc.edu * reaches a reconvergence point (immediate post-dominator instruction). 36410447Snilay@cs.wisc.edu */ 36510447Snilay@cs.wisc.edu std::stack<std::unique_ptr<ReconvergenceStackEntry>> reconvergenceStack; 36610447Snilay@cs.wisc.edu}; 36710447Snilay@cs.wisc.edu 36810447Snilay@cs.wisc.edu#endif // __WAVEFRONT_HH__ 36910447Snilay@cs.wisc.edu