wavefront.hh revision 11644
17404SAli.Saidi@ARM.com/* 212709Sgiacomo.travaglini@arm.com * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 37404SAli.Saidi@ARM.com * All rights reserved. 47404SAli.Saidi@ARM.com * 57404SAli.Saidi@ARM.com * For use for simulation and test purposes only 67404SAli.Saidi@ARM.com * 77404SAli.Saidi@ARM.com * Redistribution and use in source and binary forms, with or without 87404SAli.Saidi@ARM.com * modification, are permitted provided that the following conditions are met: 97404SAli.Saidi@ARM.com * 107404SAli.Saidi@ARM.com * 1. Redistributions of source code must retain the above copyright notice, 117404SAli.Saidi@ARM.com * this list of conditions and the following disclaimer. 127404SAli.Saidi@ARM.com * 137404SAli.Saidi@ARM.com * 2. Redistributions in binary form must reproduce the above copyright notice, 147404SAli.Saidi@ARM.com * this list of conditions and the following disclaimer in the documentation 157404SAli.Saidi@ARM.com * and/or other materials provided with the distribution. 167404SAli.Saidi@ARM.com * 177404SAli.Saidi@ARM.com * 3. Neither the name of the copyright holder nor the names of its contributors 187404SAli.Saidi@ARM.com * may be used to endorse or promote products derived from this software 197404SAli.Saidi@ARM.com * without specific prior written permission. 207404SAli.Saidi@ARM.com * 217404SAli.Saidi@ARM.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 227404SAli.Saidi@ARM.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 237404SAli.Saidi@ARM.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 247404SAli.Saidi@ARM.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 257404SAli.Saidi@ARM.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 267404SAli.Saidi@ARM.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 277404SAli.Saidi@ARM.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 287404SAli.Saidi@ARM.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 297404SAli.Saidi@ARM.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 307404SAli.Saidi@ARM.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 317404SAli.Saidi@ARM.com * POSSIBILITY OF SUCH DAMAGE. 327404SAli.Saidi@ARM.com * 337404SAli.Saidi@ARM.com * Author: Lisa Hsu 347404SAli.Saidi@ARM.com */ 357404SAli.Saidi@ARM.com 367404SAli.Saidi@ARM.com#ifndef __WAVEFRONT_HH__ 377404SAli.Saidi@ARM.com#define __WAVEFRONT_HH__ 3810037SARM gem5 Developers 397404SAli.Saidi@ARM.com#include <cassert> 4010873Sandreas.sandberg@arm.com#include <deque> 417404SAli.Saidi@ARM.com#include <memory> 4210474Sandreas.hansson@arm.com#include <stack> 4310474Sandreas.hansson@arm.com#include <vector> 447404SAli.Saidi@ARM.com 4510037SARM gem5 Developers#include "base/misc.hh" 4610037SARM gem5 Developers#include "base/types.hh" 477404SAli.Saidi@ARM.com#include "gpu-compute/condition_register_state.hh" 487728SAli.Saidi@ARM.com#include "gpu-compute/lds_state.hh" 497404SAli.Saidi@ARM.com#include "gpu-compute/misc.hh" 508245Snate@binkert.org#include "params/Wavefront.hh" 519152Satgutier@umich.edu#include "sim/sim_object.hh" 528245Snate@binkert.org 538245Snate@binkert.orgstatic const int MAX_NUM_INSTS_PER_WF = 12; 5410873Sandreas.sandberg@arm.com 557748SAli.Saidi@ARM.com/** 567404SAli.Saidi@ARM.com * A reconvergence stack entry conveys the necessary state to implement 577404SAli.Saidi@ARM.com * control flow divergence. 587404SAli.Saidi@ARM.com */ 597404SAli.Saidi@ARM.comstruct ReconvergenceStackEntry { 6013892Sgabeblack@google.com /** 6110717Sandreas.hansson@arm.com * PC of current instruction. 6210717Sandreas.hansson@arm.com */ 6310717Sandreas.hansson@arm.com uint32_t pc; 649258SAli.Saidi@ARM.com /** 6510621SCurtis.Dunham@arm.com * PC of the immediate post-dominator instruction, i.e., the value of 6610621SCurtis.Dunham@arm.com * @a pc for the first instruction that will be executed by the wavefront 6712086Sspwilson2@wisc.edu * when a reconvergence point is reached. 6812086Sspwilson2@wisc.edu */ 6912086Sspwilson2@wisc.edu uint32_t rpc; 7012086Sspwilson2@wisc.edu /** 7112086Sspwilson2@wisc.edu * Execution mask. 7212086Sspwilson2@wisc.edu */ 7311588SCurtis.Dunham@arm.com VectorMask execMask; 7411588SCurtis.Dunham@arm.com}; 7512086Sspwilson2@wisc.edu 767439Sdam.sunwoo@arm.com/* 777576SAli.Saidi@ARM.com * Arguments for the hsail opcode call, are user defined and variable length. 7810037SARM gem5 Developers * The hardware/finalizer can support arguments in hardware or use memory to 7910037SARM gem5 Developers * pass arguments. For now, let's assume that an unlimited number of arguments 8010037SARM gem5 Developers * are supported in hardware (the compiler inlines functions whenver it can 8110717Sandreas.hansson@arm.com * anyways, so unless someone is interested in the implications of linking/ 8210037SARM gem5 Developers * library functions, I think this is a reasonable assumption given the typical 8310037SARM gem5 Developers * size of an OpenCL kernel). 8410037SARM gem5 Developers * 8510037SARM gem5 Developers * Note that call args are different than kernel arguments: 8610037SARM gem5 Developers * * All work-items in a kernel refer the same set of kernel arguments 8710037SARM gem5 Developers * * Each work-item has it's on set of call args. So a call argument at 8810037SARM gem5 Developers * address 0x4 is different for work-item 0 and work-item 1. 8910037SARM gem5 Developers * 9010037SARM gem5 Developers * Ok, the table below shows an example of how we organize the call arguments in 9110037SARM gem5 Developers * the CallArgMem class. 9210037SARM gem5 Developers * 9310037SARM gem5 Developers * int foo(int arg1, double arg2) 947439Sdam.sunwoo@arm.com * ___________________________________________________ 957404SAli.Saidi@ARM.com * | 0: return.0 | 4: return.1 | ... | 252: return.63 | 967404SAli.Saidi@ARM.com * |---------------------------------------------------| 977404SAli.Saidi@ARM.com * | 256: arg1.0 | 260: arg1.1 | ... | 508: arg1.63 | 987404SAli.Saidi@ARM.com * |---------------------------------------------------| 997404SAli.Saidi@ARM.com * | 512: arg2.0 | 520: arg2.1 | ... | 1016: arg2.63 | 1007404SAli.Saidi@ARM.com * ___________________________________________________ 10110717Sandreas.hansson@arm.com */ 10210717Sandreas.hansson@arm.comclass CallArgMem 10310717Sandreas.hansson@arm.com{ 10410717Sandreas.hansson@arm.com public: 10513795SAndrea.Mondelli@ucf.edu // pointer to buffer for storing function arguments 10610717Sandreas.hansson@arm.com uint8_t *mem; 10710717Sandreas.hansson@arm.com int wfSize; 10810717Sandreas.hansson@arm.com // size of function args 10910717Sandreas.hansson@arm.com int funcArgsSizePerItem; 11010717Sandreas.hansson@arm.com 11110717Sandreas.hansson@arm.com template<typename CType> 11210717Sandreas.hansson@arm.com int 11310717Sandreas.hansson@arm.com getLaneOffset(int lane, int addr) 11410717Sandreas.hansson@arm.com { 11510717Sandreas.hansson@arm.com return addr * wfSize + sizeof(CType) * lane; 11610717Sandreas.hansson@arm.com } 11713784Sgabeblack@google.com 11813784Sgabeblack@google.com CallArgMem(int func_args_size_per_item, int wf_size) 11910717Sandreas.hansson@arm.com : wfSize(wf_size), funcArgsSizePerItem(func_args_size_per_item) 12010717Sandreas.hansson@arm.com { 12110717Sandreas.hansson@arm.com mem = (uint8_t*)malloc(funcArgsSizePerItem * wfSize); 12210717Sandreas.hansson@arm.com } 12310717Sandreas.hansson@arm.com 12410717Sandreas.hansson@arm.com ~CallArgMem() 12510717Sandreas.hansson@arm.com { 12610717Sandreas.hansson@arm.com free(mem); 12713892Sgabeblack@google.com } 12810717Sandreas.hansson@arm.com 12910717Sandreas.hansson@arm.com template<typename CType> 13010537Sandreas.hansson@arm.com uint8_t* 13110537Sandreas.hansson@arm.com getLaneAddr(int lane, int addr) 13210537Sandreas.hansson@arm.com { 13314040Sgiacomo.travaglini@arm.com return mem + getLaneOffset<CType>(lane, addr); 13414040Sgiacomo.travaglini@arm.com } 13514040Sgiacomo.travaglini@arm.com 13614040Sgiacomo.travaglini@arm.com template<typename CType> 13710537Sandreas.hansson@arm.com void 13812738Sandreas.sandberg@arm.com setLaneAddr(int lane, int addr, CType val) 13910537Sandreas.hansson@arm.com { 14010537Sandreas.hansson@arm.com *((CType*)(mem + getLaneOffset<CType>(lane, addr))) = val; 14110537Sandreas.hansson@arm.com } 14210037SARM gem5 Developers}; 14310037SARM gem5 Developers 14410037SARM gem5 Developersclass Wavefront : public SimObject 1459152Satgutier@umich.edu{ 1469152Satgutier@umich.edu public: 1479152Satgutier@umich.edu enum itype_e {I_ALU,I_GLOBAL,I_SHARED,I_FLAT,I_PRIVATE}; 14810913Sandreas.sandberg@arm.com enum status_e {S_STOPPED,S_RETURNING,S_RUNNING}; 14911588SCurtis.Dunham@arm.com 15011588SCurtis.Dunham@arm.com // Base pointer for array of instruction pointers 1519152Satgutier@umich.edu uint64_t basePtr; 15210913Sandreas.sandberg@arm.com 1539152Satgutier@umich.edu uint32_t oldBarrierCnt; 15410913Sandreas.sandberg@arm.com uint32_t barrierCnt; 1559152Satgutier@umich.edu uint32_t barrierId; 1569152Satgutier@umich.edu uint32_t barrierSlots; 1579152Satgutier@umich.edu status_e status; 15810913Sandreas.sandberg@arm.com // HW slot id where the WF is mapped to inside a SIMD unit 15910913Sandreas.sandberg@arm.com int wfSlotId; 1607404SAli.Saidi@ARM.com int kernId; 16110037SARM gem5 Developers // SIMD unit where the WV has been scheduled 1629152Satgutier@umich.edu int simdId; 16310037SARM gem5 Developers // pointer to parent CU 16410037SARM gem5 Developers ComputeUnit *computeUnit; 16510037SARM gem5 Developers 16610037SARM gem5 Developers std::deque<GPUDynInstPtr> instructionBuffer; 16710037SARM gem5 Developers 16810037SARM gem5 Developers bool pendingFetch; 16910037SARM gem5 Developers bool dropFetch; 17010037SARM gem5 Developers 1719152Satgutier@umich.edu // Condition Register State (for HSAIL simulations only) 17210913Sandreas.sandberg@arm.com class ConditionRegisterState *condRegState; 17310037SARM gem5 Developers // number of single precision VGPRs required by WF 17410037SARM gem5 Developers uint32_t maxSpVgprs; 17510913Sandreas.sandberg@arm.com // number of double precision VGPRs required by WF 1767733SAli.Saidi@ARM.com uint32_t maxDpVgprs; 1777404SAli.Saidi@ARM.com // map virtual to physical vector register 1787404SAli.Saidi@ARM.com uint32_t remap(uint32_t vgprIndex, uint32_t size, uint8_t mode=0); 1797748SAli.Saidi@ARM.com void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs); 1809342SAndreas.Sandberg@arm.com bool isGmInstruction(GPUDynInstPtr ii); 1817748SAli.Saidi@ARM.com bool isLmInstruction(GPUDynInstPtr ii); 1829524SAndreas.Sandberg@ARM.com bool isOldestInstGMem(); 1839152Satgutier@umich.edu bool isOldestInstLMem(); 1849152Satgutier@umich.edu bool isOldestInstPrivMem(); 18510621SCurtis.Dunham@arm.com bool isOldestInstFlatMem(); 1867748SAli.Saidi@ARM.com bool isOldestInstALU(); 1877748SAli.Saidi@ARM.com bool isOldestInstBarrier(); 1887748SAli.Saidi@ARM.com // used for passing spill address to DDInstGPU 1897404SAli.Saidi@ARM.com std::vector<Addr> lastAddr; 19012749Sgiacomo.travaglini@arm.com std::vector<uint32_t> workItemId[3]; 19110037SARM gem5 Developers std::vector<uint32_t> workItemFlatId; 19210037SARM gem5 Developers uint32_t workGroupId[3]; 19311580SDylan.Johnson@ARM.com uint32_t workGroupSz[3]; 19411580SDylan.Johnson@ARM.com uint32_t gridSz[3]; 1957404SAli.Saidi@ARM.com uint32_t wgId; 1968733Sgeoffrey.blake@arm.com uint32_t wgSz; 19710621SCurtis.Dunham@arm.com // wavefront id within a workgroup 19810621SCurtis.Dunham@arm.com uint32_t wfId; 19910109SGeoffrey.Blake@arm.com uint32_t maxDynWaveId; 20010037SARM gem5 Developers uint32_t dispatchId; 20110109SGeoffrey.Blake@arm.com // outstanding global+local memory requests 2027439Sdam.sunwoo@arm.com uint32_t outstandingReqs; 2037439Sdam.sunwoo@arm.com // memory requests between scoreboard 2047439Sdam.sunwoo@arm.com // and execute stage not yet executed 2057439Sdam.sunwoo@arm.com uint32_t memReqsInPipe; 2067404SAli.Saidi@ARM.com // outstanding global memory write requests 2077439Sdam.sunwoo@arm.com uint32_t outstandingReqsWrGm; 2087439Sdam.sunwoo@arm.com // outstanding local memory write requests 20910109SGeoffrey.Blake@arm.com uint32_t outstandingReqsWrLm; 21010109SGeoffrey.Blake@arm.com // outstanding global memory read requests 21110109SGeoffrey.Blake@arm.com uint32_t outstandingReqsRdGm; 21210109SGeoffrey.Blake@arm.com // outstanding local memory read requests 21310109SGeoffrey.Blake@arm.com uint32_t outstandingReqsRdLm; 21410109SGeoffrey.Blake@arm.com uint32_t rdLmReqsInPipe; 21510109SGeoffrey.Blake@arm.com uint32_t rdGmReqsInPipe; 21610109SGeoffrey.Blake@arm.com uint32_t wrLmReqsInPipe; 2178202SAli.Saidi@ARM.com uint32_t wrGmReqsInPipe; 2188202SAli.Saidi@ARM.com 2198202SAli.Saidi@ARM.com int memTraceBusy; 2208202SAli.Saidi@ARM.com uint64_t lastTrace; 2218202SAli.Saidi@ARM.com // number of vector registers reserved by WF 2228202SAli.Saidi@ARM.com int reservedVectorRegs; 2238202SAli.Saidi@ARM.com // Index into the Vector Register File's namespace where the WF's registers 22410037SARM gem5 Developers // will live while the WF is executed 22510621SCurtis.Dunham@arm.com uint32_t startVgprIndex; 22610474Sandreas.hansson@arm.com 2278202SAli.Saidi@ARM.com // Old value of destination gpr (for trace) 2287439Sdam.sunwoo@arm.com std::vector<uint32_t> oldVgpr; 22910621SCurtis.Dunham@arm.com // Id of destination gpr (for trace) 2307439Sdam.sunwoo@arm.com uint32_t oldVgprId; 23110621SCurtis.Dunham@arm.com // Tick count of last old_vgpr copy 2327439Sdam.sunwoo@arm.com uint64_t oldVgprTcnt; 23311517SCurtis.Dunham@arm.com 23411517SCurtis.Dunham@arm.com // Old value of destination gpr (for trace) 23511517SCurtis.Dunham@arm.com std::vector<uint64_t> oldDgpr; 23612735Sandreas.sandberg@arm.com // Id of destination gpr (for trace) 23712735Sandreas.sandberg@arm.com uint32_t oldDgprId; 23812735Sandreas.sandberg@arm.com // Tick count of last old_vgpr copy 23912735Sandreas.sandberg@arm.com uint64_t oldDgprTcnt; 24012735Sandreas.sandberg@arm.com 24112735Sandreas.sandberg@arm.com // Execution mask at wavefront start 24212735Sandreas.sandberg@arm.com VectorMask initMask; 24312735Sandreas.sandberg@arm.com 24412735Sandreas.sandberg@arm.com // number of barriers this WF has joined 2457439Sdam.sunwoo@arm.com std::vector<int> barCnt; 2467439Sdam.sunwoo@arm.com int maxBarCnt; 2477439Sdam.sunwoo@arm.com // Flag to stall a wave on barrier 24810037SARM gem5 Developers bool stalledAtBarrier; 24910037SARM gem5 Developers 25010037SARM gem5 Developers // a pointer to the fraction of the LDS allocated 2517439Sdam.sunwoo@arm.com // to this workgroup (thus this wavefront) 2528733Sgeoffrey.blake@arm.com LdsChunk *ldsChunk; 2537439Sdam.sunwoo@arm.com 25410037SARM gem5 Developers // A pointer to the spill area 25510037SARM gem5 Developers Addr spillBase; 25610037SARM gem5 Developers // The size of the spill area 2577404SAli.Saidi@ARM.com uint32_t spillSizePerItem; 2587436Sdam.sunwoo@arm.com // The vector width of the spill area 2597436Sdam.sunwoo@arm.com uint32_t spillWidth; 26010037SARM gem5 Developers 26110037SARM gem5 Developers // A pointer to the private memory area 26210037SARM gem5 Developers Addr privBase; 26310037SARM gem5 Developers // The size of the private memory area 26410037SARM gem5 Developers uint32_t privSizePerItem; 26510037SARM gem5 Developers 26610037SARM gem5 Developers // A pointer ot the read-only memory area 26710037SARM gem5 Developers Addr roBase; 26811575SDylan.Johnson@ARM.com // size of the read-only memory area 26911575SDylan.Johnson@ARM.com uint32_t roSize; 27011575SDylan.Johnson@ARM.com 27111575SDylan.Johnson@ARM.com // pointer to buffer for storing kernel arguments 27210037SARM gem5 Developers uint8_t *kernelArgs; 27310037SARM gem5 Developers // unique WF id over all WFs executed across all CUs 27410037SARM gem5 Developers uint64_t wfDynId; 27510324SCurtis.Dunham@arm.com 27610037SARM gem5 Developers // number of times instruction issue for this wavefront is blocked 27711574SCurtis.Dunham@arm.com // due to VRF port availability 27811574SCurtis.Dunham@arm.com Stats::Scalar numTimesBlockedDueVrfPortAvail; 27911574SCurtis.Dunham@arm.com // number of times an instruction of a WF is blocked from being issued 28011574SCurtis.Dunham@arm.com // due to WAR and WAW dependencies 28111574SCurtis.Dunham@arm.com Stats::Scalar numTimesBlockedDueWAXDependencies; 28210037SARM gem5 Developers // number of times an instruction of a WF is blocked from being issued 28310037SARM gem5 Developers // due to WAR and WAW dependencies 28410037SARM gem5 Developers Stats::Scalar numTimesBlockedDueRAWDependencies; 28510324SCurtis.Dunham@arm.com // distribution of executed instructions based on their register 28610037SARM gem5 Developers // operands; this is used to highlight the load on the VRF 28710037SARM gem5 Developers Stats::Distribution srcRegOpDist; 28810037SARM gem5 Developers Stats::Distribution dstRegOpDist; 28910037SARM gem5 Developers 29010037SARM gem5 Developers // Functions to operate on call argument memory 29111575SDylan.Johnson@ARM.com // argument memory for hsail call instruction 29210037SARM gem5 Developers CallArgMem *callArgMem; 29312499Sgiacomo.travaglini@arm.com void 29410037SARM gem5 Developers initCallArgMem(int func_args_size_per_item, int wf_size) 29512499Sgiacomo.travaglini@arm.com { 29610037SARM gem5 Developers callArgMem = new CallArgMem(func_args_size_per_item, wf_size); 29710037SARM gem5 Developers } 29810037SARM gem5 Developers 29910037SARM gem5 Developers template<typename CType> 30010037SARM gem5 Developers CType 3017439Sdam.sunwoo@arm.com readCallArgMem(int lane, int addr) 3027439Sdam.sunwoo@arm.com { 3037439Sdam.sunwoo@arm.com return *((CType*)(callArgMem->getLaneAddr<CType>(lane, addr))); 3047439Sdam.sunwoo@arm.com } 3057439Sdam.sunwoo@arm.com 30610621SCurtis.Dunham@arm.com template<typename CType> 30710621SCurtis.Dunham@arm.com void 30811580SDylan.Johnson@ARM.com writeCallArgMem(int lane, int addr, CType val) 3097728SAli.Saidi@ARM.com { 31011517SCurtis.Dunham@arm.com callArgMem->setLaneAddr<CType>(lane, addr, val); 31111517SCurtis.Dunham@arm.com } 31210037SARM gem5 Developers 31310037SARM gem5 Developers typedef WavefrontParams Params; 31410037SARM gem5 Developers Wavefront(const Params *p); 31510037SARM gem5 Developers ~Wavefront(); 31610037SARM gem5 Developers virtual void init(); 31710037SARM gem5 Developers 31810037SARM gem5 Developers void 31910037SARM gem5 Developers setParent(ComputeUnit *cu) 32010621SCurtis.Dunham@arm.com { 32110621SCurtis.Dunham@arm.com computeUnit = cu; 32210621SCurtis.Dunham@arm.com } 32310621SCurtis.Dunham@arm.com 32410037SARM gem5 Developers void start(uint64_t _wfDynId, uint64_t _base_ptr); 32510037SARM gem5 Developers void exec(); 32610037SARM gem5 Developers void updateResources(); 32710109SGeoffrey.Blake@arm.com int ready(itype_e type); 32810037SARM gem5 Developers bool instructionBufferHasBranch(); 32910109SGeoffrey.Blake@arm.com void regStats(); 33010037SARM gem5 Developers VectorMask getPred() { return execMask() & initMask; } 33110109SGeoffrey.Blake@arm.com 33210037SARM gem5 Developers bool waitingAtBarrier(int lane); 33310109SGeoffrey.Blake@arm.com 33410109SGeoffrey.Blake@arm.com void pushToReconvergenceStack(uint32_t pc, uint32_t rpc, 33510109SGeoffrey.Blake@arm.com const VectorMask& exec_mask); 33610109SGeoffrey.Blake@arm.com 33710109SGeoffrey.Blake@arm.com void popFromReconvergenceStack(); 33810109SGeoffrey.Blake@arm.com 33910109SGeoffrey.Blake@arm.com uint32_t pc() const; 34010109SGeoffrey.Blake@arm.com 34110109SGeoffrey.Blake@arm.com uint32_t rpc() const; 34210037SARM gem5 Developers 3437728SAli.Saidi@ARM.com VectorMask execMask() const; 3448067SAli.Saidi@ARM.com 3457728SAli.Saidi@ARM.com bool execMask(int lane) const; 3467728SAli.Saidi@ARM.com 34710621SCurtis.Dunham@arm.com void pc(uint32_t new_pc); 3487728SAli.Saidi@ARM.com 3497728SAli.Saidi@ARM.com void discardFetch(); 35010621SCurtis.Dunham@arm.com 35110037SARM gem5 Developers /** 35210037SARM gem5 Developers * Returns the size of the static hardware context of a particular wavefront 35310037SARM gem5 Developers * This should be updated everytime the context is changed 35410037SARM gem5 Developers */ 35510037SARM gem5 Developers uint32_t getStaticContextSize() const; 35610037SARM gem5 Developers 3577728SAli.Saidi@ARM.com /** 3587728SAli.Saidi@ARM.com * Returns the hardware context as a stream of bytes 3597728SAli.Saidi@ARM.com * This method is designed for HSAIL execution 3607728SAli.Saidi@ARM.com */ 3617728SAli.Saidi@ARM.com void getContext(const void *out); 3627728SAli.Saidi@ARM.com 3637728SAli.Saidi@ARM.com /** 3647728SAli.Saidi@ARM.com * Sets the hardware context fromt a stream of bytes 3657728SAli.Saidi@ARM.com * This method is designed for HSAIL execution 3667728SAli.Saidi@ARM.com */ 36710621SCurtis.Dunham@arm.com void setContext(const void *in); 3687728SAli.Saidi@ARM.com 3699258SAli.Saidi@ARM.com private: 3709535Smrinmoy.ghosh@arm.com /** 37110037SARM gem5 Developers * Stack containing Control Flow Graph nodes (i.e., kernel instructions) 37210037SARM gem5 Developers * to be visited by the wavefront, and the associated execution masks. The 37310037SARM gem5 Developers * reconvergence stack grows every time the wavefront reaches a divergence 37412735Sandreas.sandberg@arm.com * point (branch instruction), and shrinks every time the wavefront 3759258SAli.Saidi@ARM.com * reaches a reconvergence point (immediate post-dominator instruction). 3769535Smrinmoy.ghosh@arm.com */ 3779535Smrinmoy.ghosh@arm.com std::deque<std::unique_ptr<ReconvergenceStackEntry>> reconvergenceStack; 3789535Smrinmoy.ghosh@arm.com}; 3799535Smrinmoy.ghosh@arm.com 3809535Smrinmoy.ghosh@arm.com#endif // __WAVEFRONT_HH__ 3819535Smrinmoy.ghosh@arm.com