111308Santhony.gutierrez@amd.com/*
212697Santhony.gutierrez@amd.com * Copyright (c) 2011-2017 Advanced Micro Devices, Inc.
311308Santhony.gutierrez@amd.com * All rights reserved.
411308Santhony.gutierrez@amd.com *
511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only
611308Santhony.gutierrez@amd.com *
711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without
811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met:
911308Santhony.gutierrez@amd.com *
1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice,
1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer.
1211308Santhony.gutierrez@amd.com *
1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice,
1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation
1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution.
1611308Santhony.gutierrez@amd.com *
1712697Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its
1812697Santhony.gutierrez@amd.com * contributors may be used to endorse or promote products derived from this
1912697Santhony.gutierrez@amd.com * software without specific prior written permission.
2011308Santhony.gutierrez@amd.com *
2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE.
3211308Santhony.gutierrez@amd.com *
3312697Santhony.gutierrez@amd.com * Authors: Lisa Hsu
3411308Santhony.gutierrez@amd.com */
3511308Santhony.gutierrez@amd.com
3611308Santhony.gutierrez@amd.com#ifndef __WAVEFRONT_HH__
3711308Santhony.gutierrez@amd.com#define __WAVEFRONT_HH__
3811308Santhony.gutierrez@amd.com
3911308Santhony.gutierrez@amd.com#include <cassert>
4011308Santhony.gutierrez@amd.com#include <deque>
4111308Santhony.gutierrez@amd.com#include <memory>
4211308Santhony.gutierrez@amd.com#include <stack>
4311308Santhony.gutierrez@amd.com#include <vector>
4411308Santhony.gutierrez@amd.com
4511696Santhony.gutierrez@amd.com#include "arch/gpu_isa.hh"
4612334Sgabeblack@google.com#include "base/logging.hh"
4711308Santhony.gutierrez@amd.com#include "base/types.hh"
4811696Santhony.gutierrez@amd.com#include "config/the_gpu_isa.hh"
4911308Santhony.gutierrez@amd.com#include "gpu-compute/condition_register_state.hh"
5011308Santhony.gutierrez@amd.com#include "gpu-compute/lds_state.hh"
5111308Santhony.gutierrez@amd.com#include "gpu-compute/misc.hh"
5211657Salexandru.dutu@amd.com#include "gpu-compute/ndrange.hh"
5311308Santhony.gutierrez@amd.com#include "params/Wavefront.hh"
5411308Santhony.gutierrez@amd.com#include "sim/sim_object.hh"
5511308Santhony.gutierrez@amd.com
5611308Santhony.gutierrez@amd.comstatic const int MAX_NUM_INSTS_PER_WF = 12;
5711308Santhony.gutierrez@amd.com
5811641Salexandru.dutu@amd.com/**
5911641Salexandru.dutu@amd.com * A reconvergence stack entry conveys the necessary state to implement
6011641Salexandru.dutu@amd.com * control flow divergence.
6111641Salexandru.dutu@amd.com */
6211641Salexandru.dutu@amd.comstruct ReconvergenceStackEntry {
6311641Salexandru.dutu@amd.com    /**
6411641Salexandru.dutu@amd.com     * PC of current instruction.
6511641Salexandru.dutu@amd.com     */
6611641Salexandru.dutu@amd.com    uint32_t pc;
6711641Salexandru.dutu@amd.com    /**
6811641Salexandru.dutu@amd.com     * PC of the immediate post-dominator instruction, i.e., the value of
6911641Salexandru.dutu@amd.com     * @a pc for the first instruction that will be executed by the wavefront
7011641Salexandru.dutu@amd.com     * when a reconvergence point is reached.
7111641Salexandru.dutu@amd.com     */
7211641Salexandru.dutu@amd.com    uint32_t rpc;
7311641Salexandru.dutu@amd.com    /**
7411641Salexandru.dutu@amd.com     * Execution mask.
7511641Salexandru.dutu@amd.com     */
7611641Salexandru.dutu@amd.com    VectorMask execMask;
7711641Salexandru.dutu@amd.com};
7811641Salexandru.dutu@amd.com
7911308Santhony.gutierrez@amd.com/*
8011308Santhony.gutierrez@amd.com * Arguments for the hsail opcode call, are user defined and variable length.
8111308Santhony.gutierrez@amd.com * The hardware/finalizer can support arguments in hardware or use memory to
8211308Santhony.gutierrez@amd.com * pass arguments. For now, let's assume that an unlimited number of arguments
8311308Santhony.gutierrez@amd.com * are supported in hardware (the compiler inlines functions whenver it can
8411308Santhony.gutierrez@amd.com * anyways, so unless someone is interested in the implications of linking/
8511308Santhony.gutierrez@amd.com * library functions, I think this is a reasonable assumption given the typical
8611308Santhony.gutierrez@amd.com * size of an OpenCL kernel).
8711308Santhony.gutierrez@amd.com *
8811308Santhony.gutierrez@amd.com * Note that call args are different than kernel arguments:
8911308Santhony.gutierrez@amd.com *   * All work-items in a kernel refer the same set of kernel arguments
9011308Santhony.gutierrez@amd.com *   * Each work-item has it's on set of call args. So a call argument at
9111308Santhony.gutierrez@amd.com *     address 0x4 is different for work-item 0 and work-item 1.
9211308Santhony.gutierrez@amd.com *
9311308Santhony.gutierrez@amd.com * Ok, the table below shows an example of how we organize the call arguments in
9411308Santhony.gutierrez@amd.com * the CallArgMem class.
9511308Santhony.gutierrez@amd.com *
9611308Santhony.gutierrez@amd.com * int foo(int arg1, double arg2)
9711308Santhony.gutierrez@amd.com *  ___________________________________________________
9811308Santhony.gutierrez@amd.com * | 0: return.0 | 4: return.1 | ... | 252: return.63  |
9911308Santhony.gutierrez@amd.com * |---------------------------------------------------|
10011308Santhony.gutierrez@amd.com * | 256: arg1.0 | 260: arg1.1 | ... | 508: arg1.63    |
10111308Santhony.gutierrez@amd.com * |---------------------------------------------------|
10211308Santhony.gutierrez@amd.com * | 512: arg2.0 | 520: arg2.1 | ... | 1016: arg2.63   |
10311308Santhony.gutierrez@amd.com *  ___________________________________________________
10411308Santhony.gutierrez@amd.com */
10511308Santhony.gutierrez@amd.comclass CallArgMem
10611308Santhony.gutierrez@amd.com{
10711308Santhony.gutierrez@amd.com  public:
10811308Santhony.gutierrez@amd.com    // pointer to buffer for storing function arguments
10911308Santhony.gutierrez@amd.com    uint8_t *mem;
11011534Sjohn.kalamatianos@amd.com    int wfSize;
11111308Santhony.gutierrez@amd.com    // size of function args
11211308Santhony.gutierrez@amd.com    int funcArgsSizePerItem;
11311308Santhony.gutierrez@amd.com
11411308Santhony.gutierrez@amd.com    template<typename CType>
11511308Santhony.gutierrez@amd.com    int
11611308Santhony.gutierrez@amd.com    getLaneOffset(int lane, int addr)
11711308Santhony.gutierrez@amd.com    {
11811534Sjohn.kalamatianos@amd.com        return addr * wfSize + sizeof(CType) * lane;
11911308Santhony.gutierrez@amd.com    }
12011308Santhony.gutierrez@amd.com
12111534Sjohn.kalamatianos@amd.com    CallArgMem(int func_args_size_per_item, int wf_size)
12211534Sjohn.kalamatianos@amd.com        : wfSize(wf_size), funcArgsSizePerItem(func_args_size_per_item)
12311308Santhony.gutierrez@amd.com    {
12411534Sjohn.kalamatianos@amd.com        mem = (uint8_t*)malloc(funcArgsSizePerItem * wfSize);
12511308Santhony.gutierrez@amd.com    }
12611308Santhony.gutierrez@amd.com
12711308Santhony.gutierrez@amd.com    ~CallArgMem()
12811308Santhony.gutierrez@amd.com    {
12911308Santhony.gutierrez@amd.com        free(mem);
13011308Santhony.gutierrez@amd.com    }
13111308Santhony.gutierrez@amd.com
13211308Santhony.gutierrez@amd.com    template<typename CType>
13311308Santhony.gutierrez@amd.com    uint8_t*
13411308Santhony.gutierrez@amd.com    getLaneAddr(int lane, int addr)
13511308Santhony.gutierrez@amd.com    {
13611308Santhony.gutierrez@amd.com        return mem + getLaneOffset<CType>(lane, addr);
13711308Santhony.gutierrez@amd.com    }
13811308Santhony.gutierrez@amd.com
13911308Santhony.gutierrez@amd.com    template<typename CType>
14011308Santhony.gutierrez@amd.com    void
14111308Santhony.gutierrez@amd.com    setLaneAddr(int lane, int addr, CType val)
14211308Santhony.gutierrez@amd.com    {
14311308Santhony.gutierrez@amd.com        *((CType*)(mem + getLaneOffset<CType>(lane, addr))) = val;
14411308Santhony.gutierrez@amd.com    }
14511308Santhony.gutierrez@amd.com};
14611308Santhony.gutierrez@amd.com
14711308Santhony.gutierrez@amd.comclass Wavefront : public SimObject
14811308Santhony.gutierrez@amd.com{
14911308Santhony.gutierrez@amd.com  public:
15011308Santhony.gutierrez@amd.com    enum itype_e {I_ALU,I_GLOBAL,I_SHARED,I_FLAT,I_PRIVATE};
15111308Santhony.gutierrez@amd.com    enum status_e {S_STOPPED,S_RETURNING,S_RUNNING};
15211308Santhony.gutierrez@amd.com
15311308Santhony.gutierrez@amd.com    // Base pointer for array of instruction pointers
15411639Salexandru.dutu@amd.com    uint64_t basePtr;
15511308Santhony.gutierrez@amd.com
15611639Salexandru.dutu@amd.com    uint32_t oldBarrierCnt;
15711639Salexandru.dutu@amd.com    uint32_t barrierCnt;
15811639Salexandru.dutu@amd.com    uint32_t barrierId;
15911639Salexandru.dutu@amd.com    uint32_t barrierSlots;
16011308Santhony.gutierrez@amd.com    status_e status;
16111308Santhony.gutierrez@amd.com    // HW slot id where the WF is mapped to inside a SIMD unit
16211308Santhony.gutierrez@amd.com    int wfSlotId;
16311639Salexandru.dutu@amd.com    int kernId;
16411308Santhony.gutierrez@amd.com    // SIMD unit where the WV has been scheduled
16511308Santhony.gutierrez@amd.com    int simdId;
16611308Santhony.gutierrez@amd.com    // pointer to parent CU
16711308Santhony.gutierrez@amd.com    ComputeUnit *computeUnit;
16811308Santhony.gutierrez@amd.com
16911308Santhony.gutierrez@amd.com    std::deque<GPUDynInstPtr> instructionBuffer;
17011308Santhony.gutierrez@amd.com
17111308Santhony.gutierrez@amd.com    bool pendingFetch;
17211308Santhony.gutierrez@amd.com    bool dropFetch;
17311308Santhony.gutierrez@amd.com
17411308Santhony.gutierrez@amd.com    // Condition Register State (for HSAIL simulations only)
17511308Santhony.gutierrez@amd.com    class ConditionRegisterState *condRegState;
17611308Santhony.gutierrez@amd.com    // number of single precision VGPRs required by WF
17711308Santhony.gutierrez@amd.com    uint32_t maxSpVgprs;
17811308Santhony.gutierrez@amd.com    // number of double precision VGPRs required by WF
17911308Santhony.gutierrez@amd.com    uint32_t maxDpVgprs;
18011308Santhony.gutierrez@amd.com    // map virtual to physical vector register
18111308Santhony.gutierrez@amd.com    uint32_t remap(uint32_t vgprIndex, uint32_t size, uint8_t mode=0);
18211308Santhony.gutierrez@amd.com    void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs);
18311308Santhony.gutierrez@amd.com    bool isGmInstruction(GPUDynInstPtr ii);
18411308Santhony.gutierrez@amd.com    bool isLmInstruction(GPUDynInstPtr ii);
18511308Santhony.gutierrez@amd.com    bool isOldestInstGMem();
18611308Santhony.gutierrez@amd.com    bool isOldestInstLMem();
18711308Santhony.gutierrez@amd.com    bool isOldestInstPrivMem();
18811308Santhony.gutierrez@amd.com    bool isOldestInstFlatMem();
18911308Santhony.gutierrez@amd.com    bool isOldestInstALU();
19011308Santhony.gutierrez@amd.com    bool isOldestInstBarrier();
19111308Santhony.gutierrez@amd.com    // used for passing spill address to DDInstGPU
19211639Salexandru.dutu@amd.com    std::vector<Addr> lastAddr;
19311639Salexandru.dutu@amd.com    std::vector<uint32_t> workItemId[3];
19411639Salexandru.dutu@amd.com    std::vector<uint32_t> workItemFlatId;
19511657Salexandru.dutu@amd.com    /* kernel launch parameters */
19611639Salexandru.dutu@amd.com    uint32_t workGroupId[3];
19711639Salexandru.dutu@amd.com    uint32_t workGroupSz[3];
19811639Salexandru.dutu@amd.com    uint32_t gridSz[3];
19911639Salexandru.dutu@amd.com    uint32_t wgId;
20011639Salexandru.dutu@amd.com    uint32_t wgSz;
20111657Salexandru.dutu@amd.com    /* the actual WG size can differ than the maximum size */
20211657Salexandru.dutu@amd.com    uint32_t actualWgSz[3];
20311657Salexandru.dutu@amd.com    uint32_t actualWgSzTotal;
20411657Salexandru.dutu@amd.com    void computeActualWgSz(NDRange *ndr);
20511643Salexandru.dutu@amd.com    // wavefront id within a workgroup
20611643Salexandru.dutu@amd.com    uint32_t wfId;
20711639Salexandru.dutu@amd.com    uint32_t maxDynWaveId;
20811639Salexandru.dutu@amd.com    uint32_t dispatchId;
20911308Santhony.gutierrez@amd.com    // outstanding global+local memory requests
21011639Salexandru.dutu@amd.com    uint32_t outstandingReqs;
21111308Santhony.gutierrez@amd.com    // memory requests between scoreboard
21211308Santhony.gutierrez@amd.com    // and execute stage not yet executed
21311639Salexandru.dutu@amd.com    uint32_t memReqsInPipe;
21411308Santhony.gutierrez@amd.com    // outstanding global memory write requests
21511639Salexandru.dutu@amd.com    uint32_t outstandingReqsWrGm;
21611308Santhony.gutierrez@amd.com    // outstanding local memory write requests
21711639Salexandru.dutu@amd.com    uint32_t outstandingReqsWrLm;
21811308Santhony.gutierrez@amd.com    // outstanding global memory read requests
21911639Salexandru.dutu@amd.com    uint32_t outstandingReqsRdGm;
22011308Santhony.gutierrez@amd.com    // outstanding local memory read requests
22111639Salexandru.dutu@amd.com    uint32_t outstandingReqsRdLm;
22211639Salexandru.dutu@amd.com    uint32_t rdLmReqsInPipe;
22311639Salexandru.dutu@amd.com    uint32_t rdGmReqsInPipe;
22411639Salexandru.dutu@amd.com    uint32_t wrLmReqsInPipe;
22511639Salexandru.dutu@amd.com    uint32_t wrGmReqsInPipe;
22611308Santhony.gutierrez@amd.com
22711639Salexandru.dutu@amd.com    int memTraceBusy;
22811639Salexandru.dutu@amd.com    uint64_t lastTrace;
22911308Santhony.gutierrez@amd.com    // number of vector registers reserved by WF
23011308Santhony.gutierrez@amd.com    int reservedVectorRegs;
23111308Santhony.gutierrez@amd.com    // Index into the Vector Register File's namespace where the WF's registers
23211308Santhony.gutierrez@amd.com    // will live while the WF is executed
23311308Santhony.gutierrez@amd.com    uint32_t startVgprIndex;
23411308Santhony.gutierrez@amd.com
23511308Santhony.gutierrez@amd.com    // Old value of destination gpr (for trace)
23611639Salexandru.dutu@amd.com    std::vector<uint32_t> oldVgpr;
23711308Santhony.gutierrez@amd.com    // Id of destination gpr (for trace)
23811639Salexandru.dutu@amd.com    uint32_t oldVgprId;
23911308Santhony.gutierrez@amd.com    // Tick count of last old_vgpr copy
24011639Salexandru.dutu@amd.com    uint64_t oldVgprTcnt;
24111308Santhony.gutierrez@amd.com
24211308Santhony.gutierrez@amd.com    // Old value of destination gpr (for trace)
24311639Salexandru.dutu@amd.com    std::vector<uint64_t> oldDgpr;
24411308Santhony.gutierrez@amd.com    // Id of destination gpr (for trace)
24511639Salexandru.dutu@amd.com    uint32_t oldDgprId;
24611308Santhony.gutierrez@amd.com    // Tick count of last old_vgpr copy
24711639Salexandru.dutu@amd.com    uint64_t oldDgprTcnt;
24811308Santhony.gutierrez@amd.com
24911308Santhony.gutierrez@amd.com    // Execution mask at wavefront start
25011639Salexandru.dutu@amd.com    VectorMask initMask;
25111308Santhony.gutierrez@amd.com
25211308Santhony.gutierrez@amd.com    // number of barriers this WF has joined
25311639Salexandru.dutu@amd.com    std::vector<int> barCnt;
25411639Salexandru.dutu@amd.com    int maxBarCnt;
25511308Santhony.gutierrez@amd.com    // Flag to stall a wave on barrier
25611308Santhony.gutierrez@amd.com    bool stalledAtBarrier;
25711308Santhony.gutierrez@amd.com
25811308Santhony.gutierrez@amd.com    // a pointer to the fraction of the LDS allocated
25911308Santhony.gutierrez@amd.com    // to this workgroup (thus this wavefront)
26011308Santhony.gutierrez@amd.com    LdsChunk *ldsChunk;
26111308Santhony.gutierrez@amd.com
26211308Santhony.gutierrez@amd.com    // A pointer to the spill area
26311308Santhony.gutierrez@amd.com    Addr spillBase;
26411308Santhony.gutierrez@amd.com    // The size of the spill area
26511308Santhony.gutierrez@amd.com    uint32_t spillSizePerItem;
26611308Santhony.gutierrez@amd.com    // The vector width of the spill area
26711308Santhony.gutierrez@amd.com    uint32_t spillWidth;
26811308Santhony.gutierrez@amd.com
26911308Santhony.gutierrez@amd.com    // A pointer to the private memory area
27011308Santhony.gutierrez@amd.com    Addr privBase;
27111308Santhony.gutierrez@amd.com    // The size of the private memory area
27211308Santhony.gutierrez@amd.com    uint32_t privSizePerItem;
27311308Santhony.gutierrez@amd.com
27411308Santhony.gutierrez@amd.com    // A pointer ot the read-only memory area
27511308Santhony.gutierrez@amd.com    Addr roBase;
27611308Santhony.gutierrez@amd.com    // size of the read-only memory area
27711308Santhony.gutierrez@amd.com    uint32_t roSize;
27811308Santhony.gutierrez@amd.com
27911308Santhony.gutierrez@amd.com    // pointer to buffer for storing kernel arguments
28011308Santhony.gutierrez@amd.com    uint8_t *kernelArgs;
28111308Santhony.gutierrez@amd.com    // unique WF id over all WFs executed across all CUs
28211308Santhony.gutierrez@amd.com    uint64_t wfDynId;
28311308Santhony.gutierrez@amd.com
28411308Santhony.gutierrez@amd.com    // number of times instruction issue for this wavefront is blocked
28511308Santhony.gutierrez@amd.com    // due to VRF port availability
28611308Santhony.gutierrez@amd.com    Stats::Scalar numTimesBlockedDueVrfPortAvail;
28711308Santhony.gutierrez@amd.com    // number of times an instruction of a WF is blocked from being issued
28811308Santhony.gutierrez@amd.com    // due to WAR and WAW dependencies
28911308Santhony.gutierrez@amd.com    Stats::Scalar numTimesBlockedDueWAXDependencies;
29011308Santhony.gutierrez@amd.com    // number of times an instruction of a WF is blocked from being issued
29111308Santhony.gutierrez@amd.com    // due to WAR and WAW dependencies
29211308Santhony.gutierrez@amd.com    Stats::Scalar numTimesBlockedDueRAWDependencies;
29311308Santhony.gutierrez@amd.com    // distribution of executed instructions based on their register
29411308Santhony.gutierrez@amd.com    // operands; this is used to highlight the load on the VRF
29511308Santhony.gutierrez@amd.com    Stats::Distribution srcRegOpDist;
29611308Santhony.gutierrez@amd.com    Stats::Distribution dstRegOpDist;
29711308Santhony.gutierrez@amd.com
29811308Santhony.gutierrez@amd.com    // Functions to operate on call argument memory
29911308Santhony.gutierrez@amd.com    // argument memory for hsail call instruction
30011308Santhony.gutierrez@amd.com    CallArgMem *callArgMem;
30111308Santhony.gutierrez@amd.com    void
30211534Sjohn.kalamatianos@amd.com    initCallArgMem(int func_args_size_per_item, int wf_size)
30311308Santhony.gutierrez@amd.com    {
30411534Sjohn.kalamatianos@amd.com        callArgMem = new CallArgMem(func_args_size_per_item, wf_size);
30511308Santhony.gutierrez@amd.com    }
30611308Santhony.gutierrez@amd.com
30711308Santhony.gutierrez@amd.com    template<typename CType>
30811308Santhony.gutierrez@amd.com    CType
30911308Santhony.gutierrez@amd.com    readCallArgMem(int lane, int addr)
31011308Santhony.gutierrez@amd.com    {
31111308Santhony.gutierrez@amd.com        return *((CType*)(callArgMem->getLaneAddr<CType>(lane, addr)));
31211308Santhony.gutierrez@amd.com    }
31311308Santhony.gutierrez@amd.com
31411308Santhony.gutierrez@amd.com    template<typename CType>
31511308Santhony.gutierrez@amd.com    void
31611308Santhony.gutierrez@amd.com    writeCallArgMem(int lane, int addr, CType val)
31711308Santhony.gutierrez@amd.com    {
31811308Santhony.gutierrez@amd.com        callArgMem->setLaneAddr<CType>(lane, addr, val);
31911308Santhony.gutierrez@amd.com    }
32011308Santhony.gutierrez@amd.com
32111308Santhony.gutierrez@amd.com    typedef WavefrontParams Params;
32211308Santhony.gutierrez@amd.com    Wavefront(const Params *p);
32311308Santhony.gutierrez@amd.com    ~Wavefront();
32411308Santhony.gutierrez@amd.com    virtual void init();
32511308Santhony.gutierrez@amd.com
32611308Santhony.gutierrez@amd.com    void
32711308Santhony.gutierrez@amd.com    setParent(ComputeUnit *cu)
32811308Santhony.gutierrez@amd.com    {
32911308Santhony.gutierrez@amd.com        computeUnit = cu;
33011308Santhony.gutierrez@amd.com    }
33111308Santhony.gutierrez@amd.com
33211308Santhony.gutierrez@amd.com    void start(uint64_t _wfDynId, uint64_t _base_ptr);
33311308Santhony.gutierrez@amd.com    void exec();
33411308Santhony.gutierrez@amd.com    void updateResources();
33511308Santhony.gutierrez@amd.com    int ready(itype_e type);
33611308Santhony.gutierrez@amd.com    bool instructionBufferHasBranch();
33711308Santhony.gutierrez@amd.com    void regStats();
33811639Salexandru.dutu@amd.com    VectorMask getPred() { return execMask() & initMask; }
33911308Santhony.gutierrez@amd.com
34011308Santhony.gutierrez@amd.com    bool waitingAtBarrier(int lane);
34111308Santhony.gutierrez@amd.com
34211308Santhony.gutierrez@amd.com    void pushToReconvergenceStack(uint32_t pc, uint32_t rpc,
34311308Santhony.gutierrez@amd.com                                  const VectorMask& exec_mask);
34411308Santhony.gutierrez@amd.com
34511308Santhony.gutierrez@amd.com    void popFromReconvergenceStack();
34611308Santhony.gutierrez@amd.com
34711308Santhony.gutierrez@amd.com    uint32_t pc() const;
34811308Santhony.gutierrez@amd.com
34911308Santhony.gutierrez@amd.com    uint32_t rpc() const;
35011308Santhony.gutierrez@amd.com
35111308Santhony.gutierrez@amd.com    VectorMask execMask() const;
35211308Santhony.gutierrez@amd.com
35311308Santhony.gutierrez@amd.com    bool execMask(int lane) const;
35411308Santhony.gutierrez@amd.com
35511308Santhony.gutierrez@amd.com    void pc(uint32_t new_pc);
35611308Santhony.gutierrez@amd.com
35711308Santhony.gutierrez@amd.com    void discardFetch();
35811308Santhony.gutierrez@amd.com
35911640Salexandru.dutu@amd.com    /**
36011640Salexandru.dutu@amd.com     * Returns the size of the static hardware context of a particular wavefront
36111640Salexandru.dutu@amd.com     * This should be updated everytime the context is changed
36211640Salexandru.dutu@amd.com     */
36311640Salexandru.dutu@amd.com    uint32_t getStaticContextSize() const;
36411640Salexandru.dutu@amd.com
36511644Salexandru.dutu@amd.com    /**
36611644Salexandru.dutu@amd.com     * Returns the hardware context as a stream of bytes
36711644Salexandru.dutu@amd.com     * This method is designed for HSAIL execution
36811644Salexandru.dutu@amd.com     */
36911644Salexandru.dutu@amd.com    void getContext(const void *out);
37011644Salexandru.dutu@amd.com
37111644Salexandru.dutu@amd.com    /**
37211644Salexandru.dutu@amd.com     * Sets the hardware context fromt a stream of bytes
37311644Salexandru.dutu@amd.com     * This method is designed for HSAIL execution
37411644Salexandru.dutu@amd.com     */
37511644Salexandru.dutu@amd.com    void setContext(const void *in);
37611644Salexandru.dutu@amd.com
37711696Santhony.gutierrez@amd.com    TheGpuISA::GPUISA&
37811696Santhony.gutierrez@amd.com    gpuISA()
37911696Santhony.gutierrez@amd.com    {
38011696Santhony.gutierrez@amd.com        return _gpuISA;
38111696Santhony.gutierrez@amd.com    }
38211696Santhony.gutierrez@amd.com
38311308Santhony.gutierrez@amd.com  private:
38411696Santhony.gutierrez@amd.com    TheGpuISA::GPUISA _gpuISA;
38511308Santhony.gutierrez@amd.com    /**
38611308Santhony.gutierrez@amd.com     * Stack containing Control Flow Graph nodes (i.e., kernel instructions)
38711308Santhony.gutierrez@amd.com     * to be visited by the wavefront, and the associated execution masks. The
38811308Santhony.gutierrez@amd.com     * reconvergence stack grows every time the wavefront reaches a divergence
38911308Santhony.gutierrez@amd.com     * point (branch instruction), and shrinks every time the wavefront
39011308Santhony.gutierrez@amd.com     * reaches a reconvergence point (immediate post-dominator instruction).
39111308Santhony.gutierrez@amd.com     */
39211641Salexandru.dutu@amd.com    std::deque<std::unique_ptr<ReconvergenceStackEntry>> reconvergenceStack;
39311308Santhony.gutierrez@amd.com};
39411308Santhony.gutierrez@amd.com
39511308Santhony.gutierrez@amd.com#endif // __WAVEFRONT_HH__
396