wavefront.hh revision 11639
110447Snilay@cs.wisc.edu/*
210447Snilay@cs.wisc.edu * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
310447Snilay@cs.wisc.edu * All rights reserved.
410447Snilay@cs.wisc.edu *
510447Snilay@cs.wisc.edu * For use for simulation and test purposes only
610447Snilay@cs.wisc.edu *
710447Snilay@cs.wisc.edu * Redistribution and use in source and binary forms, with or without
810447Snilay@cs.wisc.edu * modification, are permitted provided that the following conditions are met:
910447Snilay@cs.wisc.edu *
1010447Snilay@cs.wisc.edu * 1. Redistributions of source code must retain the above copyright notice,
1110447Snilay@cs.wisc.edu * this list of conditions and the following disclaimer.
1210447Snilay@cs.wisc.edu *
1310447Snilay@cs.wisc.edu * 2. Redistributions in binary form must reproduce the above copyright notice,
1410447Snilay@cs.wisc.edu * this list of conditions and the following disclaimer in the documentation
1510447Snilay@cs.wisc.edu * and/or other materials provided with the distribution.
1610447Snilay@cs.wisc.edu *
1710447Snilay@cs.wisc.edu * 3. Neither the name of the copyright holder nor the names of its contributors
1810447Snilay@cs.wisc.edu * may be used to endorse or promote products derived from this software
1910447Snilay@cs.wisc.edu * without specific prior written permission.
2010447Snilay@cs.wisc.edu *
2110447Snilay@cs.wisc.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2210447Snilay@cs.wisc.edu * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2310447Snilay@cs.wisc.edu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2410447Snilay@cs.wisc.edu * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
2510447Snilay@cs.wisc.edu * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2610447Snilay@cs.wisc.edu * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2710447Snilay@cs.wisc.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2810447Snilay@cs.wisc.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2910447Snilay@cs.wisc.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
3010447Snilay@cs.wisc.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3110447Snilay@cs.wisc.edu * POSSIBILITY OF SUCH DAMAGE.
3210447Snilay@cs.wisc.edu *
3310447Snilay@cs.wisc.edu * Author: Lisa Hsu
3410447Snilay@cs.wisc.edu */
3510447Snilay@cs.wisc.edu
3610447Snilay@cs.wisc.edu#ifndef __WAVEFRONT_HH__
3710447Snilay@cs.wisc.edu#define __WAVEFRONT_HH__
3810447Snilay@cs.wisc.edu
3910447Snilay@cs.wisc.edu#include <cassert>
4010447Snilay@cs.wisc.edu#include <deque>
4110447Snilay@cs.wisc.edu#include <memory>
4210447Snilay@cs.wisc.edu#include <stack>
4310447Snilay@cs.wisc.edu#include <vector>
4410447Snilay@cs.wisc.edu
4510447Snilay@cs.wisc.edu#include "base/misc.hh"
4610447Snilay@cs.wisc.edu#include "base/types.hh"
4710447Snilay@cs.wisc.edu#include "gpu-compute/condition_register_state.hh"
4810447Snilay@cs.wisc.edu#include "gpu-compute/lds_state.hh"
4910447Snilay@cs.wisc.edu#include "gpu-compute/misc.hh"
5010447Snilay@cs.wisc.edu#include "params/Wavefront.hh"
5110447Snilay@cs.wisc.edu#include "sim/sim_object.hh"
5210447Snilay@cs.wisc.edu
5310447Snilay@cs.wisc.edustatic const int MAX_NUM_INSTS_PER_WF = 12;
5410447Snilay@cs.wisc.edu
5510447Snilay@cs.wisc.edu/*
5610447Snilay@cs.wisc.edu * Arguments for the hsail opcode call, are user defined and variable length.
5710447Snilay@cs.wisc.edu * The hardware/finalizer can support arguments in hardware or use memory to
5810447Snilay@cs.wisc.edu * pass arguments. For now, let's assume that an unlimited number of arguments
5910447Snilay@cs.wisc.edu * are supported in hardware (the compiler inlines functions whenver it can
6010447Snilay@cs.wisc.edu * anyways, so unless someone is interested in the implications of linking/
6110447Snilay@cs.wisc.edu * library functions, I think this is a reasonable assumption given the typical
6210447Snilay@cs.wisc.edu * size of an OpenCL kernel).
6310447Snilay@cs.wisc.edu *
6410447Snilay@cs.wisc.edu * Note that call args are different than kernel arguments:
6510447Snilay@cs.wisc.edu *   * All work-items in a kernel refer the same set of kernel arguments
6610447Snilay@cs.wisc.edu *   * Each work-item has it's on set of call args. So a call argument at
6710447Snilay@cs.wisc.edu *     address 0x4 is different for work-item 0 and work-item 1.
6810447Snilay@cs.wisc.edu *
6910447Snilay@cs.wisc.edu * Ok, the table below shows an example of how we organize the call arguments in
7010447Snilay@cs.wisc.edu * the CallArgMem class.
7110447Snilay@cs.wisc.edu *
7210447Snilay@cs.wisc.edu * int foo(int arg1, double arg2)
7310447Snilay@cs.wisc.edu *  ___________________________________________________
7410447Snilay@cs.wisc.edu * | 0: return.0 | 4: return.1 | ... | 252: return.63  |
7510447Snilay@cs.wisc.edu * |---------------------------------------------------|
7610447Snilay@cs.wisc.edu * | 256: arg1.0 | 260: arg1.1 | ... | 508: arg1.63    |
7710447Snilay@cs.wisc.edu * |---------------------------------------------------|
7810447Snilay@cs.wisc.edu * | 512: arg2.0 | 520: arg2.1 | ... | 1016: arg2.63   |
7910447Snilay@cs.wisc.edu *  ___________________________________________________
8010447Snilay@cs.wisc.edu */
8110447Snilay@cs.wisc.educlass CallArgMem
8210447Snilay@cs.wisc.edu{
8310447Snilay@cs.wisc.edu  public:
8410447Snilay@cs.wisc.edu    // pointer to buffer for storing function arguments
8510447Snilay@cs.wisc.edu    uint8_t *mem;
8610447Snilay@cs.wisc.edu    int wfSize;
8710447Snilay@cs.wisc.edu    // size of function args
8810447Snilay@cs.wisc.edu    int funcArgsSizePerItem;
8910447Snilay@cs.wisc.edu
9010447Snilay@cs.wisc.edu    template<typename CType>
9110447Snilay@cs.wisc.edu    int
9210447Snilay@cs.wisc.edu    getLaneOffset(int lane, int addr)
9310447Snilay@cs.wisc.edu    {
9410447Snilay@cs.wisc.edu        return addr * wfSize + sizeof(CType) * lane;
9510447Snilay@cs.wisc.edu    }
9610447Snilay@cs.wisc.edu
9710447Snilay@cs.wisc.edu    CallArgMem(int func_args_size_per_item, int wf_size)
9810447Snilay@cs.wisc.edu        : wfSize(wf_size), funcArgsSizePerItem(func_args_size_per_item)
9910447Snilay@cs.wisc.edu    {
10010447Snilay@cs.wisc.edu        mem = (uint8_t*)malloc(funcArgsSizePerItem * wfSize);
10110447Snilay@cs.wisc.edu    }
10210447Snilay@cs.wisc.edu
10310447Snilay@cs.wisc.edu    ~CallArgMem()
10410447Snilay@cs.wisc.edu    {
10510447Snilay@cs.wisc.edu        free(mem);
10610447Snilay@cs.wisc.edu    }
10710447Snilay@cs.wisc.edu
10810447Snilay@cs.wisc.edu    template<typename CType>
10910447Snilay@cs.wisc.edu    uint8_t*
11010447Snilay@cs.wisc.edu    getLaneAddr(int lane, int addr)
11110447Snilay@cs.wisc.edu    {
11210447Snilay@cs.wisc.edu        return mem + getLaneOffset<CType>(lane, addr);
11310447Snilay@cs.wisc.edu    }
11410447Snilay@cs.wisc.edu
11510447Snilay@cs.wisc.edu    template<typename CType>
11610447Snilay@cs.wisc.edu    void
11710447Snilay@cs.wisc.edu    setLaneAddr(int lane, int addr, CType val)
11810447Snilay@cs.wisc.edu    {
11910447Snilay@cs.wisc.edu        *((CType*)(mem + getLaneOffset<CType>(lane, addr))) = val;
12010447Snilay@cs.wisc.edu    }
12110447Snilay@cs.wisc.edu};
12210447Snilay@cs.wisc.edu
12310447Snilay@cs.wisc.edu/**
12410447Snilay@cs.wisc.edu * A reconvergence stack entry conveys the necessary state to implement
12510447Snilay@cs.wisc.edu * control flow divergence.
12610447Snilay@cs.wisc.edu */
12710447Snilay@cs.wisc.educlass ReconvergenceStackEntry {
12810447Snilay@cs.wisc.edu
12910447Snilay@cs.wisc.edu  public:
13010447Snilay@cs.wisc.edu    ReconvergenceStackEntry(uint32_t new_pc, uint32_t new_rpc,
13110447Snilay@cs.wisc.edu                            VectorMask new_mask) : pc(new_pc), rpc(new_rpc),
13210447Snilay@cs.wisc.edu                            execMask(new_mask) {
13310447Snilay@cs.wisc.edu    }
13410447Snilay@cs.wisc.edu
13510447Snilay@cs.wisc.edu    /**
13610447Snilay@cs.wisc.edu     * PC of current instruction.
13710447Snilay@cs.wisc.edu     */
13810447Snilay@cs.wisc.edu    uint32_t pc;
13910447Snilay@cs.wisc.edu    /**
14010447Snilay@cs.wisc.edu     * PC of the immediate post-dominator instruction, i.e., the value of
14110447Snilay@cs.wisc.edu     * @a pc for the first instruction that will be executed by the wavefront
14210447Snilay@cs.wisc.edu     * when a reconvergence point is reached.
14310447Snilay@cs.wisc.edu     */
14410447Snilay@cs.wisc.edu    uint32_t rpc;
14510447Snilay@cs.wisc.edu    /**
14610447Snilay@cs.wisc.edu     * Execution mask.
14710447Snilay@cs.wisc.edu     */
14810447Snilay@cs.wisc.edu    VectorMask execMask;
14910447Snilay@cs.wisc.edu};
15010447Snilay@cs.wisc.edu
15110447Snilay@cs.wisc.educlass Wavefront : public SimObject
15210447Snilay@cs.wisc.edu{
15310447Snilay@cs.wisc.edu  public:
15410447Snilay@cs.wisc.edu    enum itype_e {I_ALU,I_GLOBAL,I_SHARED,I_FLAT,I_PRIVATE};
15510447Snilay@cs.wisc.edu    enum status_e {S_STOPPED,S_RETURNING,S_RUNNING};
15610447Snilay@cs.wisc.edu
15710447Snilay@cs.wisc.edu    // Base pointer for array of instruction pointers
15810447Snilay@cs.wisc.edu    uint64_t basePtr;
15910447Snilay@cs.wisc.edu
16010447Snilay@cs.wisc.edu    uint32_t oldBarrierCnt;
16110447Snilay@cs.wisc.edu    uint32_t barrierCnt;
16210447Snilay@cs.wisc.edu    uint32_t barrierId;
16310447Snilay@cs.wisc.edu    uint32_t barrierSlots;
16410447Snilay@cs.wisc.edu    status_e status;
16510447Snilay@cs.wisc.edu    // HW slot id where the WF is mapped to inside a SIMD unit
16610447Snilay@cs.wisc.edu    int wfSlotId;
16710447Snilay@cs.wisc.edu    int kernId;
16810447Snilay@cs.wisc.edu    // SIMD unit where the WV has been scheduled
16910447Snilay@cs.wisc.edu    int simdId;
17010447Snilay@cs.wisc.edu    // pointer to parent CU
17110447Snilay@cs.wisc.edu    ComputeUnit *computeUnit;
17210447Snilay@cs.wisc.edu
17310447Snilay@cs.wisc.edu    std::deque<GPUDynInstPtr> instructionBuffer;
17410447Snilay@cs.wisc.edu
17510447Snilay@cs.wisc.edu    bool pendingFetch;
17610447Snilay@cs.wisc.edu    bool dropFetch;
17710447Snilay@cs.wisc.edu
17810447Snilay@cs.wisc.edu    // Condition Register State (for HSAIL simulations only)
17910447Snilay@cs.wisc.edu    class ConditionRegisterState *condRegState;
18010447Snilay@cs.wisc.edu    // number of single precision VGPRs required by WF
18110447Snilay@cs.wisc.edu    uint32_t maxSpVgprs;
18210447Snilay@cs.wisc.edu    // number of double precision VGPRs required by WF
18310447Snilay@cs.wisc.edu    uint32_t maxDpVgprs;
18410447Snilay@cs.wisc.edu    // map virtual to physical vector register
18510447Snilay@cs.wisc.edu    uint32_t remap(uint32_t vgprIndex, uint32_t size, uint8_t mode=0);
18610447Snilay@cs.wisc.edu    void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs);
18710447Snilay@cs.wisc.edu    bool isGmInstruction(GPUDynInstPtr ii);
18810447Snilay@cs.wisc.edu    bool isLmInstruction(GPUDynInstPtr ii);
18910447Snilay@cs.wisc.edu    bool isOldestInstGMem();
19010447Snilay@cs.wisc.edu    bool isOldestInstLMem();
19110447Snilay@cs.wisc.edu    bool isOldestInstPrivMem();
19210447Snilay@cs.wisc.edu    bool isOldestInstFlatMem();
19310447Snilay@cs.wisc.edu    bool isOldestInstALU();
19410447Snilay@cs.wisc.edu    bool isOldestInstBarrier();
19510447Snilay@cs.wisc.edu    // used for passing spill address to DDInstGPU
19610447Snilay@cs.wisc.edu    std::vector<Addr> lastAddr;
19710447Snilay@cs.wisc.edu    std::vector<uint32_t> workItemId[3];
19810447Snilay@cs.wisc.edu    std::vector<uint32_t> workItemFlatId;
19910447Snilay@cs.wisc.edu    uint32_t workGroupId[3];
20010447Snilay@cs.wisc.edu    uint32_t workGroupSz[3];
20110447Snilay@cs.wisc.edu    uint32_t gridSz[3];
20210447Snilay@cs.wisc.edu    uint32_t wgId;
20310447Snilay@cs.wisc.edu    uint32_t wgSz;
20410447Snilay@cs.wisc.edu    uint32_t dynWaveId;
20510447Snilay@cs.wisc.edu    uint32_t maxDynWaveId;
20610447Snilay@cs.wisc.edu    uint32_t dispatchId;
20710447Snilay@cs.wisc.edu    // outstanding global+local memory requests
20810447Snilay@cs.wisc.edu    uint32_t outstandingReqs;
20910447Snilay@cs.wisc.edu    // memory requests between scoreboard
21010447Snilay@cs.wisc.edu    // and execute stage not yet executed
21110447Snilay@cs.wisc.edu    uint32_t memReqsInPipe;
21210447Snilay@cs.wisc.edu    // outstanding global memory write requests
21310447Snilay@cs.wisc.edu    uint32_t outstandingReqsWrGm;
21410447Snilay@cs.wisc.edu    // outstanding local memory write requests
21510447Snilay@cs.wisc.edu    uint32_t outstandingReqsWrLm;
21610447Snilay@cs.wisc.edu    // outstanding global memory read requests
21710447Snilay@cs.wisc.edu    uint32_t outstandingReqsRdGm;
21810447Snilay@cs.wisc.edu    // outstanding local memory read requests
21910447Snilay@cs.wisc.edu    uint32_t outstandingReqsRdLm;
22010447Snilay@cs.wisc.edu    uint32_t rdLmReqsInPipe;
22110447Snilay@cs.wisc.edu    uint32_t rdGmReqsInPipe;
22210447Snilay@cs.wisc.edu    uint32_t wrLmReqsInPipe;
22310447Snilay@cs.wisc.edu    uint32_t wrGmReqsInPipe;
22410447Snilay@cs.wisc.edu
22510447Snilay@cs.wisc.edu    int memTraceBusy;
22610447Snilay@cs.wisc.edu    uint64_t lastTrace;
22710447Snilay@cs.wisc.edu    // number of vector registers reserved by WF
22810447Snilay@cs.wisc.edu    int reservedVectorRegs;
22910447Snilay@cs.wisc.edu    // Index into the Vector Register File's namespace where the WF's registers
23010447Snilay@cs.wisc.edu    // will live while the WF is executed
23110447Snilay@cs.wisc.edu    uint32_t startVgprIndex;
23210447Snilay@cs.wisc.edu
23310447Snilay@cs.wisc.edu    // Old value of destination gpr (for trace)
23410447Snilay@cs.wisc.edu    std::vector<uint32_t> oldVgpr;
23510447Snilay@cs.wisc.edu    // Id of destination gpr (for trace)
23610447Snilay@cs.wisc.edu    uint32_t oldVgprId;
23710447Snilay@cs.wisc.edu    // Tick count of last old_vgpr copy
23810447Snilay@cs.wisc.edu    uint64_t oldVgprTcnt;
23910447Snilay@cs.wisc.edu
24010447Snilay@cs.wisc.edu    // Old value of destination gpr (for trace)
24110447Snilay@cs.wisc.edu    std::vector<uint64_t> oldDgpr;
24210447Snilay@cs.wisc.edu    // Id of destination gpr (for trace)
24310447Snilay@cs.wisc.edu    uint32_t oldDgprId;
24410447Snilay@cs.wisc.edu    // Tick count of last old_vgpr copy
24510447Snilay@cs.wisc.edu    uint64_t oldDgprTcnt;
24610447Snilay@cs.wisc.edu
24710447Snilay@cs.wisc.edu    // Execution mask at wavefront start
24810447Snilay@cs.wisc.edu    VectorMask initMask;
24910447Snilay@cs.wisc.edu
25010447Snilay@cs.wisc.edu    // number of barriers this WF has joined
25110447Snilay@cs.wisc.edu    std::vector<int> barCnt;
25210447Snilay@cs.wisc.edu    int maxBarCnt;
25310447Snilay@cs.wisc.edu    // Flag to stall a wave on barrier
25410447Snilay@cs.wisc.edu    bool stalledAtBarrier;
25510447Snilay@cs.wisc.edu
25610447Snilay@cs.wisc.edu    // a pointer to the fraction of the LDS allocated
25710447Snilay@cs.wisc.edu    // to this workgroup (thus this wavefront)
25810447Snilay@cs.wisc.edu    LdsChunk *ldsChunk;
25910447Snilay@cs.wisc.edu
26010447Snilay@cs.wisc.edu    // A pointer to the spill area
26110447Snilay@cs.wisc.edu    Addr spillBase;
26210447Snilay@cs.wisc.edu    // The size of the spill area
26310447Snilay@cs.wisc.edu    uint32_t spillSizePerItem;
26410447Snilay@cs.wisc.edu    // The vector width of the spill area
26510447Snilay@cs.wisc.edu    uint32_t spillWidth;
26610447Snilay@cs.wisc.edu
26710447Snilay@cs.wisc.edu    // A pointer to the private memory area
26810447Snilay@cs.wisc.edu    Addr privBase;
26910447Snilay@cs.wisc.edu    // The size of the private memory area
27010447Snilay@cs.wisc.edu    uint32_t privSizePerItem;
27110447Snilay@cs.wisc.edu
27210447Snilay@cs.wisc.edu    // A pointer ot the read-only memory area
27310447Snilay@cs.wisc.edu    Addr roBase;
27410447Snilay@cs.wisc.edu    // size of the read-only memory area
27510447Snilay@cs.wisc.edu    uint32_t roSize;
27610447Snilay@cs.wisc.edu
27710447Snilay@cs.wisc.edu    // pointer to buffer for storing kernel arguments
27810447Snilay@cs.wisc.edu    uint8_t *kernelArgs;
27910447Snilay@cs.wisc.edu    // unique WF id over all WFs executed across all CUs
28010447Snilay@cs.wisc.edu    uint64_t wfDynId;
28110447Snilay@cs.wisc.edu
28210447Snilay@cs.wisc.edu    // number of times instruction issue for this wavefront is blocked
28310447Snilay@cs.wisc.edu    // due to VRF port availability
28410447Snilay@cs.wisc.edu    Stats::Scalar numTimesBlockedDueVrfPortAvail;
28510447Snilay@cs.wisc.edu    // number of times an instruction of a WF is blocked from being issued
28610447Snilay@cs.wisc.edu    // due to WAR and WAW dependencies
28710447Snilay@cs.wisc.edu    Stats::Scalar numTimesBlockedDueWAXDependencies;
28810447Snilay@cs.wisc.edu    // number of times an instruction of a WF is blocked from being issued
28910447Snilay@cs.wisc.edu    // due to WAR and WAW dependencies
29010447Snilay@cs.wisc.edu    Stats::Scalar numTimesBlockedDueRAWDependencies;
29110447Snilay@cs.wisc.edu    // distribution of executed instructions based on their register
29210447Snilay@cs.wisc.edu    // operands; this is used to highlight the load on the VRF
29310447Snilay@cs.wisc.edu    Stats::Distribution srcRegOpDist;
29410447Snilay@cs.wisc.edu    Stats::Distribution dstRegOpDist;
29510447Snilay@cs.wisc.edu
29610447Snilay@cs.wisc.edu    // Functions to operate on call argument memory
29710447Snilay@cs.wisc.edu    // argument memory for hsail call instruction
29810447Snilay@cs.wisc.edu    CallArgMem *callArgMem;
29910447Snilay@cs.wisc.edu    void
30010447Snilay@cs.wisc.edu    initCallArgMem(int func_args_size_per_item, int wf_size)
30110447Snilay@cs.wisc.edu    {
30210447Snilay@cs.wisc.edu        callArgMem = new CallArgMem(func_args_size_per_item, wf_size);
30310447Snilay@cs.wisc.edu    }
30410447Snilay@cs.wisc.edu
30510447Snilay@cs.wisc.edu    template<typename CType>
30610447Snilay@cs.wisc.edu    CType
30710447Snilay@cs.wisc.edu    readCallArgMem(int lane, int addr)
30810447Snilay@cs.wisc.edu    {
30910447Snilay@cs.wisc.edu        return *((CType*)(callArgMem->getLaneAddr<CType>(lane, addr)));
31010447Snilay@cs.wisc.edu    }
31110447Snilay@cs.wisc.edu
31210447Snilay@cs.wisc.edu    template<typename CType>
31310447Snilay@cs.wisc.edu    void
31410447Snilay@cs.wisc.edu    writeCallArgMem(int lane, int addr, CType val)
31510447Snilay@cs.wisc.edu    {
31610447Snilay@cs.wisc.edu        callArgMem->setLaneAddr<CType>(lane, addr, val);
31710447Snilay@cs.wisc.edu    }
31810447Snilay@cs.wisc.edu
31910447Snilay@cs.wisc.edu    typedef WavefrontParams Params;
32010447Snilay@cs.wisc.edu    Wavefront(const Params *p);
32110447Snilay@cs.wisc.edu    ~Wavefront();
32210447Snilay@cs.wisc.edu    virtual void init();
32310447Snilay@cs.wisc.edu
32410447Snilay@cs.wisc.edu    void
32510447Snilay@cs.wisc.edu    setParent(ComputeUnit *cu)
32610447Snilay@cs.wisc.edu    {
32710447Snilay@cs.wisc.edu        computeUnit = cu;
32810447Snilay@cs.wisc.edu    }
32910447Snilay@cs.wisc.edu
33010447Snilay@cs.wisc.edu    void start(uint64_t _wfDynId, uint64_t _base_ptr);
33110447Snilay@cs.wisc.edu    void exec();
33210447Snilay@cs.wisc.edu    void updateResources();
33310447Snilay@cs.wisc.edu    int ready(itype_e type);
33410447Snilay@cs.wisc.edu    bool instructionBufferHasBranch();
33510447Snilay@cs.wisc.edu    void regStats();
33610447Snilay@cs.wisc.edu    VectorMask getPred() { return execMask() & initMask; }
33710447Snilay@cs.wisc.edu
33810447Snilay@cs.wisc.edu    bool waitingAtBarrier(int lane);
33910447Snilay@cs.wisc.edu
34010447Snilay@cs.wisc.edu    void pushToReconvergenceStack(uint32_t pc, uint32_t rpc,
34110447Snilay@cs.wisc.edu                                  const VectorMask& exec_mask);
34210447Snilay@cs.wisc.edu
34310447Snilay@cs.wisc.edu    void popFromReconvergenceStack();
34410447Snilay@cs.wisc.edu
34510447Snilay@cs.wisc.edu    uint32_t pc() const;
34610447Snilay@cs.wisc.edu
34710447Snilay@cs.wisc.edu    uint32_t rpc() const;
34810447Snilay@cs.wisc.edu
34910447Snilay@cs.wisc.edu    VectorMask execMask() const;
35010447Snilay@cs.wisc.edu
35110447Snilay@cs.wisc.edu    bool execMask(int lane) const;
35210447Snilay@cs.wisc.edu
35310447Snilay@cs.wisc.edu    void pc(uint32_t new_pc);
35410447Snilay@cs.wisc.edu
35510447Snilay@cs.wisc.edu    void discardFetch();
35610447Snilay@cs.wisc.edu
35710447Snilay@cs.wisc.edu  private:
35810447Snilay@cs.wisc.edu    /**
35910447Snilay@cs.wisc.edu     * Stack containing Control Flow Graph nodes (i.e., kernel instructions)
36010447Snilay@cs.wisc.edu     * to be visited by the wavefront, and the associated execution masks. The
36110447Snilay@cs.wisc.edu     * reconvergence stack grows every time the wavefront reaches a divergence
36210447Snilay@cs.wisc.edu     * point (branch instruction), and shrinks every time the wavefront
36310447Snilay@cs.wisc.edu     * reaches a reconvergence point (immediate post-dominator instruction).
36410447Snilay@cs.wisc.edu     */
36510447Snilay@cs.wisc.edu    std::stack<std::unique_ptr<ReconvergenceStackEntry>> reconvergenceStack;
36610447Snilay@cs.wisc.edu};
36710447Snilay@cs.wisc.edu
36810447Snilay@cs.wisc.edu#endif // __WAVEFRONT_HH__
36910447Snilay@cs.wisc.edu