111308Santhony.gutierrez@amd.com/*
212697Santhony.gutierrez@amd.com * Copyright (c) 2011-2017 Advanced Micro Devices, Inc.
311308Santhony.gutierrez@amd.com * All rights reserved.
411308Santhony.gutierrez@amd.com *
511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only
611308Santhony.gutierrez@amd.com *
711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without
811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met:
911308Santhony.gutierrez@amd.com *
1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice,
1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer.
1211308Santhony.gutierrez@amd.com *
1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice,
1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation
1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution.
1611308Santhony.gutierrez@amd.com *
1712697Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its
1812697Santhony.gutierrez@amd.com * contributors may be used to endorse or promote products derived from this
1912697Santhony.gutierrez@amd.com * software without specific prior written permission.
2011308Santhony.gutierrez@amd.com *
2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE.
3211308Santhony.gutierrez@amd.com *
3312697Santhony.gutierrez@amd.com * Authors: Lisa Hsu
3411308Santhony.gutierrez@amd.com */
3511308Santhony.gutierrez@amd.com
3611308Santhony.gutierrez@amd.com#include "gpu-compute/wavefront.hh"
3711308Santhony.gutierrez@amd.com
3811308Santhony.gutierrez@amd.com#include "debug/GPUExec.hh"
3911308Santhony.gutierrez@amd.com#include "debug/WavefrontStack.hh"
4011308Santhony.gutierrez@amd.com#include "gpu-compute/compute_unit.hh"
4111308Santhony.gutierrez@amd.com#include "gpu-compute/gpu_dyn_inst.hh"
4211308Santhony.gutierrez@amd.com#include "gpu-compute/shader.hh"
4311308Santhony.gutierrez@amd.com#include "gpu-compute/vector_register_file.hh"
4411308Santhony.gutierrez@amd.com
4511308Santhony.gutierrez@amd.comWavefront*
4611308Santhony.gutierrez@amd.comWavefrontParams::create()
4711308Santhony.gutierrez@amd.com{
4811308Santhony.gutierrez@amd.com    return new Wavefront(this);
4911308Santhony.gutierrez@amd.com}
5011308Santhony.gutierrez@amd.com
5111308Santhony.gutierrez@amd.comWavefront::Wavefront(const Params *p)
5211883Santhony.gutierrez@amd.com  : SimObject(p), callArgMem(nullptr), _gpuISA()
5311308Santhony.gutierrez@amd.com{
5411639Salexandru.dutu@amd.com    lastTrace = 0;
5511308Santhony.gutierrez@amd.com    simdId = p->simdId;
5611308Santhony.gutierrez@amd.com    wfSlotId = p->wf_slot_id;
5711308Santhony.gutierrez@amd.com    status = S_STOPPED;
5811308Santhony.gutierrez@amd.com    reservedVectorRegs = 0;
5911308Santhony.gutierrez@amd.com    startVgprIndex = 0;
6011639Salexandru.dutu@amd.com    outstandingReqs = 0;
6111639Salexandru.dutu@amd.com    memReqsInPipe = 0;
6211639Salexandru.dutu@amd.com    outstandingReqsWrGm = 0;
6311639Salexandru.dutu@amd.com    outstandingReqsWrLm = 0;
6411639Salexandru.dutu@amd.com    outstandingReqsRdGm = 0;
6511639Salexandru.dutu@amd.com    outstandingReqsRdLm = 0;
6611639Salexandru.dutu@amd.com    rdLmReqsInPipe = 0;
6711639Salexandru.dutu@amd.com    rdGmReqsInPipe = 0;
6811639Salexandru.dutu@amd.com    wrLmReqsInPipe = 0;
6911639Salexandru.dutu@amd.com    wrGmReqsInPipe = 0;
7011308Santhony.gutierrez@amd.com
7111639Salexandru.dutu@amd.com    barrierCnt = 0;
7211639Salexandru.dutu@amd.com    oldBarrierCnt = 0;
7311308Santhony.gutierrez@amd.com    stalledAtBarrier = false;
7411308Santhony.gutierrez@amd.com
7511639Salexandru.dutu@amd.com    memTraceBusy = 0;
7611639Salexandru.dutu@amd.com    oldVgprTcnt = 0xffffffffffffffffll;
7711639Salexandru.dutu@amd.com    oldDgprTcnt = 0xffffffffffffffffll;
7811639Salexandru.dutu@amd.com    oldVgpr.resize(p->wfSize);
7911308Santhony.gutierrez@amd.com
8011308Santhony.gutierrez@amd.com    pendingFetch = false;
8111308Santhony.gutierrez@amd.com    dropFetch = false;
8211308Santhony.gutierrez@amd.com    condRegState = new ConditionRegisterState();
8311308Santhony.gutierrez@amd.com    maxSpVgprs = 0;
8411308Santhony.gutierrez@amd.com    maxDpVgprs = 0;
8511639Salexandru.dutu@amd.com    lastAddr.resize(p->wfSize);
8611639Salexandru.dutu@amd.com    workItemFlatId.resize(p->wfSize);
8711639Salexandru.dutu@amd.com    oldDgpr.resize(p->wfSize);
8811639Salexandru.dutu@amd.com    barCnt.resize(p->wfSize);
8911534Sjohn.kalamatianos@amd.com    for (int i = 0; i < 3; ++i) {
9011639Salexandru.dutu@amd.com        workItemId[i].resize(p->wfSize);
9111534Sjohn.kalamatianos@amd.com    }
9211308Santhony.gutierrez@amd.com}
9311308Santhony.gutierrez@amd.com
9411308Santhony.gutierrez@amd.comvoid
9511308Santhony.gutierrez@amd.comWavefront::regStats()
9611308Santhony.gutierrez@amd.com{
9711523Sdavid.guillen@arm.com    SimObject::regStats();
9811523Sdavid.guillen@arm.com
9911308Santhony.gutierrez@amd.com    srcRegOpDist
10011308Santhony.gutierrez@amd.com        .init(0, 4, 2)
10111308Santhony.gutierrez@amd.com        .name(name() + ".src_reg_operand_dist")
10211308Santhony.gutierrez@amd.com        .desc("number of executed instructions with N source register operands")
10311308Santhony.gutierrez@amd.com        ;
10411308Santhony.gutierrez@amd.com
10511308Santhony.gutierrez@amd.com    dstRegOpDist
10611308Santhony.gutierrez@amd.com        .init(0, 3, 2)
10711308Santhony.gutierrez@amd.com        .name(name() + ".dst_reg_operand_dist")
10811308Santhony.gutierrez@amd.com        .desc("number of executed instructions with N destination register "
10911308Santhony.gutierrez@amd.com              "operands")
11011308Santhony.gutierrez@amd.com        ;
11111308Santhony.gutierrez@amd.com
11211308Santhony.gutierrez@amd.com    // FIXME: the name of the WF needs to be unique
11311308Santhony.gutierrez@amd.com    numTimesBlockedDueWAXDependencies
11411308Santhony.gutierrez@amd.com        .name(name() + ".timesBlockedDueWAXDependencies")
11511308Santhony.gutierrez@amd.com        .desc("number of times the wf's instructions are blocked due to WAW "
11611308Santhony.gutierrez@amd.com              "or WAR dependencies")
11711308Santhony.gutierrez@amd.com        ;
11811308Santhony.gutierrez@amd.com
11911308Santhony.gutierrez@amd.com    // FIXME: the name of the WF needs to be unique
12011308Santhony.gutierrez@amd.com    numTimesBlockedDueRAWDependencies
12111308Santhony.gutierrez@amd.com        .name(name() + ".timesBlockedDueRAWDependencies")
12211308Santhony.gutierrez@amd.com        .desc("number of times the wf's instructions are blocked due to RAW "
12311308Santhony.gutierrez@amd.com              "dependencies")
12411308Santhony.gutierrez@amd.com        ;
12511308Santhony.gutierrez@amd.com
12611308Santhony.gutierrez@amd.com    // FIXME: the name of the WF needs to be unique
12711308Santhony.gutierrez@amd.com    numTimesBlockedDueVrfPortAvail
12811308Santhony.gutierrez@amd.com        .name(name() + ".timesBlockedDueVrfPortAvail")
12911308Santhony.gutierrez@amd.com        .desc("number of times instructions are blocked due to VRF port "
13011308Santhony.gutierrez@amd.com              "availability")
13111308Santhony.gutierrez@amd.com        ;
13211308Santhony.gutierrez@amd.com}
13311308Santhony.gutierrez@amd.com
13411308Santhony.gutierrez@amd.comvoid
13511308Santhony.gutierrez@amd.comWavefront::init()
13611308Santhony.gutierrez@amd.com{
13711308Santhony.gutierrez@amd.com    reservedVectorRegs = 0;
13811308Santhony.gutierrez@amd.com    startVgprIndex = 0;
13911308Santhony.gutierrez@amd.com}
14011308Santhony.gutierrez@amd.com
14111308Santhony.gutierrez@amd.comvoid
14211308Santhony.gutierrez@amd.comWavefront::resizeRegFiles(int num_cregs, int num_sregs, int num_dregs)
14311308Santhony.gutierrez@amd.com{
14411308Santhony.gutierrez@amd.com    condRegState->init(num_cregs);
14511308Santhony.gutierrez@amd.com    maxSpVgprs = num_sregs;
14611308Santhony.gutierrez@amd.com    maxDpVgprs = num_dregs;
14711308Santhony.gutierrez@amd.com}
14811308Santhony.gutierrez@amd.com
14911308Santhony.gutierrez@amd.comWavefront::~Wavefront()
15011308Santhony.gutierrez@amd.com{
15111308Santhony.gutierrez@amd.com    if (callArgMem)
15211308Santhony.gutierrez@amd.com        delete callArgMem;
15311534Sjohn.kalamatianos@amd.com    delete condRegState;
15411308Santhony.gutierrez@amd.com}
15511308Santhony.gutierrez@amd.com
15611308Santhony.gutierrez@amd.comvoid
15711640Salexandru.dutu@amd.comWavefront::start(uint64_t _wf_dyn_id,uint64_t _base_ptr)
15811308Santhony.gutierrez@amd.com{
15911640Salexandru.dutu@amd.com    wfDynId = _wf_dyn_id;
16011639Salexandru.dutu@amd.com    basePtr = _base_ptr;
16111308Santhony.gutierrez@amd.com    status = S_RUNNING;
16211308Santhony.gutierrez@amd.com}
16311308Santhony.gutierrez@amd.com
16411308Santhony.gutierrez@amd.combool
16511308Santhony.gutierrez@amd.comWavefront::isGmInstruction(GPUDynInstPtr ii)
16611308Santhony.gutierrez@amd.com{
16711692Santhony.gutierrez@amd.com    if (ii->isGlobalMem() || ii->isFlat())
16811308Santhony.gutierrez@amd.com        return true;
16911308Santhony.gutierrez@amd.com
17011308Santhony.gutierrez@amd.com    return false;
17111308Santhony.gutierrez@amd.com}
17211308Santhony.gutierrez@amd.com
17311308Santhony.gutierrez@amd.combool
17411308Santhony.gutierrez@amd.comWavefront::isLmInstruction(GPUDynInstPtr ii)
17511308Santhony.gutierrez@amd.com{
17611692Santhony.gutierrez@amd.com    if (ii->isLocalMem()) {
17711308Santhony.gutierrez@amd.com        return true;
17811308Santhony.gutierrez@amd.com    }
17911308Santhony.gutierrez@amd.com
18011308Santhony.gutierrez@amd.com    return false;
18111308Santhony.gutierrez@amd.com}
18211308Santhony.gutierrez@amd.com
18311308Santhony.gutierrez@amd.combool
18411308Santhony.gutierrez@amd.comWavefront::isOldestInstALU()
18511308Santhony.gutierrez@amd.com{
18611308Santhony.gutierrez@amd.com    assert(!instructionBuffer.empty());
18711308Santhony.gutierrez@amd.com    GPUDynInstPtr ii = instructionBuffer.front();
18811308Santhony.gutierrez@amd.com
18911692Santhony.gutierrez@amd.com    if (status != S_STOPPED && (ii->isNop() ||
19011692Santhony.gutierrez@amd.com        ii->isReturn() || ii->isBranch() ||
19111692Santhony.gutierrez@amd.com        ii->isALU() || (ii->isKernArgSeg() && ii->isLoad()))) {
19211308Santhony.gutierrez@amd.com        return true;
19311308Santhony.gutierrez@amd.com    }
19411308Santhony.gutierrez@amd.com
19511308Santhony.gutierrez@amd.com    return false;
19611308Santhony.gutierrez@amd.com}
19711308Santhony.gutierrez@amd.com
19811308Santhony.gutierrez@amd.combool
19911308Santhony.gutierrez@amd.comWavefront::isOldestInstBarrier()
20011308Santhony.gutierrez@amd.com{
20111308Santhony.gutierrez@amd.com    assert(!instructionBuffer.empty());
20211308Santhony.gutierrez@amd.com    GPUDynInstPtr ii = instructionBuffer.front();
20311308Santhony.gutierrez@amd.com
20411692Santhony.gutierrez@amd.com    if (status != S_STOPPED && ii->isBarrier()) {
20511308Santhony.gutierrez@amd.com        return true;
20611308Santhony.gutierrez@amd.com    }
20711308Santhony.gutierrez@amd.com
20811308Santhony.gutierrez@amd.com    return false;
20911308Santhony.gutierrez@amd.com}
21011308Santhony.gutierrez@amd.com
21111308Santhony.gutierrez@amd.combool
21211308Santhony.gutierrez@amd.comWavefront::isOldestInstGMem()
21311308Santhony.gutierrez@amd.com{
21411308Santhony.gutierrez@amd.com    assert(!instructionBuffer.empty());
21511308Santhony.gutierrez@amd.com    GPUDynInstPtr ii = instructionBuffer.front();
21611308Santhony.gutierrez@amd.com
21711692Santhony.gutierrez@amd.com    if (status != S_STOPPED && ii->isGlobalMem()) {
21811308Santhony.gutierrez@amd.com        return true;
21911308Santhony.gutierrez@amd.com    }
22011308Santhony.gutierrez@amd.com
22111308Santhony.gutierrez@amd.com    return false;
22211308Santhony.gutierrez@amd.com}
22311308Santhony.gutierrez@amd.com
22411308Santhony.gutierrez@amd.combool
22511308Santhony.gutierrez@amd.comWavefront::isOldestInstLMem()
22611308Santhony.gutierrez@amd.com{
22711308Santhony.gutierrez@amd.com    assert(!instructionBuffer.empty());
22811308Santhony.gutierrez@amd.com    GPUDynInstPtr ii = instructionBuffer.front();
22911308Santhony.gutierrez@amd.com
23011692Santhony.gutierrez@amd.com    if (status != S_STOPPED && ii->isLocalMem()) {
23111308Santhony.gutierrez@amd.com        return true;
23211308Santhony.gutierrez@amd.com    }
23311308Santhony.gutierrez@amd.com
23411308Santhony.gutierrez@amd.com    return false;
23511308Santhony.gutierrez@amd.com}
23611308Santhony.gutierrez@amd.com
23711308Santhony.gutierrez@amd.combool
23811308Santhony.gutierrez@amd.comWavefront::isOldestInstPrivMem()
23911308Santhony.gutierrez@amd.com{
24011308Santhony.gutierrez@amd.com    assert(!instructionBuffer.empty());
24111308Santhony.gutierrez@amd.com    GPUDynInstPtr ii = instructionBuffer.front();
24211308Santhony.gutierrez@amd.com
24311692Santhony.gutierrez@amd.com    if (status != S_STOPPED && ii->isPrivateSeg()) {
24411308Santhony.gutierrez@amd.com        return true;
24511308Santhony.gutierrez@amd.com    }
24611308Santhony.gutierrez@amd.com
24711308Santhony.gutierrez@amd.com    return false;
24811308Santhony.gutierrez@amd.com}
24911308Santhony.gutierrez@amd.com
25011308Santhony.gutierrez@amd.combool
25111308Santhony.gutierrez@amd.comWavefront::isOldestInstFlatMem()
25211308Santhony.gutierrez@amd.com{
25311308Santhony.gutierrez@amd.com    assert(!instructionBuffer.empty());
25411308Santhony.gutierrez@amd.com    GPUDynInstPtr ii = instructionBuffer.front();
25511308Santhony.gutierrez@amd.com
25611692Santhony.gutierrez@amd.com    if (status != S_STOPPED && ii->isFlat()) {
25711308Santhony.gutierrez@amd.com        return true;
25811308Santhony.gutierrez@amd.com    }
25911308Santhony.gutierrez@amd.com
26011308Santhony.gutierrez@amd.com    return false;
26111308Santhony.gutierrez@amd.com}
26211308Santhony.gutierrez@amd.com
26311308Santhony.gutierrez@amd.com// Return true if the Wavefront's instruction
26411308Santhony.gutierrez@amd.com// buffer has branch instruction.
26511308Santhony.gutierrez@amd.combool
26611308Santhony.gutierrez@amd.comWavefront::instructionBufferHasBranch()
26711308Santhony.gutierrez@amd.com{
26811308Santhony.gutierrez@amd.com    for (auto it : instructionBuffer) {
26911308Santhony.gutierrez@amd.com        GPUDynInstPtr ii = it;
27011308Santhony.gutierrez@amd.com
27111692Santhony.gutierrez@amd.com        if (ii->isReturn() || ii->isBranch()) {
27211308Santhony.gutierrez@amd.com            return true;
27311308Santhony.gutierrez@amd.com        }
27411308Santhony.gutierrez@amd.com    }
27511308Santhony.gutierrez@amd.com
27611308Santhony.gutierrez@amd.com    return false;
27711308Santhony.gutierrez@amd.com}
27811308Santhony.gutierrez@amd.com
27911308Santhony.gutierrez@amd.com// Remap HSAIL register to physical VGPR.
28011308Santhony.gutierrez@amd.com// HSAIL register = virtual register assigned to an operand by HLC compiler
28111308Santhony.gutierrez@amd.comuint32_t
28211308Santhony.gutierrez@amd.comWavefront::remap(uint32_t vgprIndex, uint32_t size, uint8_t mode)
28311308Santhony.gutierrez@amd.com{
28411308Santhony.gutierrez@amd.com    assert((vgprIndex < reservedVectorRegs) && (reservedVectorRegs > 0));
28511308Santhony.gutierrez@amd.com    // add the offset from where the VGPRs of the wavefront have been assigned
28611308Santhony.gutierrez@amd.com    uint32_t physicalVgprIndex = startVgprIndex + vgprIndex;
28711308Santhony.gutierrez@amd.com    // HSAIL double precision (DP) register: calculate the physical VGPR index
28811308Santhony.gutierrez@amd.com    // assuming that DP registers are placed after SP ones in the VRF. The DP
28911308Santhony.gutierrez@amd.com    // and SP VGPR name spaces in HSAIL mode are separate so we need to adjust
29011308Santhony.gutierrez@amd.com    // the DP VGPR index before mapping it to the physical VRF address space
29111308Santhony.gutierrez@amd.com    if (mode == 1 && size > 4) {
29211308Santhony.gutierrez@amd.com        physicalVgprIndex = startVgprIndex + maxSpVgprs + (2 * vgprIndex);
29311308Santhony.gutierrez@amd.com    }
29411308Santhony.gutierrez@amd.com
29511308Santhony.gutierrez@amd.com    assert((startVgprIndex <= physicalVgprIndex) &&
29611308Santhony.gutierrez@amd.com           (startVgprIndex + reservedVectorRegs - 1) >= physicalVgprIndex);
29711308Santhony.gutierrez@amd.com
29811308Santhony.gutierrez@amd.com    // calculate absolute physical VGPR index
29911308Santhony.gutierrez@amd.com    return physicalVgprIndex % computeUnit->vrf[simdId]->numRegs();
30011308Santhony.gutierrez@amd.com}
30111308Santhony.gutierrez@amd.com
30211308Santhony.gutierrez@amd.com// Return true if this wavefront is ready
30311308Santhony.gutierrez@amd.com// to execute an instruction of the specified type.
30411308Santhony.gutierrez@amd.comint
30511308Santhony.gutierrez@amd.comWavefront::ready(itype_e type)
30611308Santhony.gutierrez@amd.com{
30711308Santhony.gutierrez@amd.com    // Check to make sure wave is running
30811308Santhony.gutierrez@amd.com    if (status == S_STOPPED || status == S_RETURNING ||
30911308Santhony.gutierrez@amd.com        instructionBuffer.empty()) {
31011308Santhony.gutierrez@amd.com        return 0;
31111308Santhony.gutierrez@amd.com    }
31211308Santhony.gutierrez@amd.com
31311308Santhony.gutierrez@amd.com    // Is the wave waiting at a barrier
31411308Santhony.gutierrez@amd.com    if (stalledAtBarrier) {
31511639Salexandru.dutu@amd.com        if (!computeUnit->AllAtBarrier(barrierId,barrierCnt,
31611639Salexandru.dutu@amd.com                        computeUnit->getRefCounter(dispatchId, wgId))) {
31711308Santhony.gutierrez@amd.com            // Are all threads at barrier?
31811308Santhony.gutierrez@amd.com            return 0;
31911308Santhony.gutierrez@amd.com        }
32011639Salexandru.dutu@amd.com        oldBarrierCnt = barrierCnt;
32111308Santhony.gutierrez@amd.com        stalledAtBarrier = false;
32211308Santhony.gutierrez@amd.com    }
32311308Santhony.gutierrez@amd.com
32411308Santhony.gutierrez@amd.com    // Read instruction
32511308Santhony.gutierrez@amd.com    GPUDynInstPtr ii = instructionBuffer.front();
32611308Santhony.gutierrez@amd.com
32711308Santhony.gutierrez@amd.com    bool ready_inst M5_VAR_USED = false;
32811308Santhony.gutierrez@amd.com    bool glbMemBusRdy = false;
32911308Santhony.gutierrez@amd.com    bool glbMemIssueRdy = false;
33011308Santhony.gutierrez@amd.com    if (type == I_GLOBAL || type == I_FLAT || type == I_PRIVATE) {
33111308Santhony.gutierrez@amd.com        for (int j=0; j < computeUnit->numGlbMemUnits; ++j) {
33211308Santhony.gutierrez@amd.com            if (computeUnit->vrfToGlobalMemPipeBus[j].prerdy())
33311308Santhony.gutierrez@amd.com                glbMemBusRdy = true;
33411308Santhony.gutierrez@amd.com            if (computeUnit->wfWait[j].prerdy())
33511308Santhony.gutierrez@amd.com                glbMemIssueRdy = true;
33611308Santhony.gutierrez@amd.com        }
33711308Santhony.gutierrez@amd.com    }
33811308Santhony.gutierrez@amd.com    bool locMemBusRdy = false;
33911308Santhony.gutierrez@amd.com    bool locMemIssueRdy = false;
34011345Sjohn.kalamatianos@amd.com    if (type == I_SHARED || type == I_FLAT) {
34111308Santhony.gutierrez@amd.com        for (int j=0; j < computeUnit->numLocMemUnits; ++j) {
34211308Santhony.gutierrez@amd.com            if (computeUnit->vrfToLocalMemPipeBus[j].prerdy())
34311308Santhony.gutierrez@amd.com                locMemBusRdy = true;
34411308Santhony.gutierrez@amd.com            if (computeUnit->wfWait[j].prerdy())
34511308Santhony.gutierrez@amd.com                locMemIssueRdy = true;
34611308Santhony.gutierrez@amd.com        }
34711308Santhony.gutierrez@amd.com    }
34811308Santhony.gutierrez@amd.com
34911308Santhony.gutierrez@amd.com    // The following code is very error prone and the entire process for
35011308Santhony.gutierrez@amd.com    // checking readiness will be fixed eventually.  In the meantime, let's
35111308Santhony.gutierrez@amd.com    // make sure that we do not silently let an instruction type slip
35211308Santhony.gutierrez@amd.com    // through this logic and always return not ready.
35311692Santhony.gutierrez@amd.com    if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
35411692Santhony.gutierrez@amd.com        ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
35511692Santhony.gutierrez@amd.com        ii->isMemFence() || ii->isFlat())) {
35611308Santhony.gutierrez@amd.com        panic("next instruction: %s is of unknown type\n", ii->disassemble());
35711308Santhony.gutierrez@amd.com    }
35811308Santhony.gutierrez@amd.com
35911308Santhony.gutierrez@amd.com    DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Read for Inst : %s\n",
36011308Santhony.gutierrez@amd.com            computeUnit->cu_id, simdId, wfSlotId, ii->disassemble());
36111308Santhony.gutierrez@amd.com
36211692Santhony.gutierrez@amd.com    if (type == I_ALU && ii->isBarrier()) {
36311308Santhony.gutierrez@amd.com        // Here for ALU instruction (barrier)
36411308Santhony.gutierrez@amd.com        if (!computeUnit->wfWait[simdId].prerdy()) {
36511308Santhony.gutierrez@amd.com            // Is wave slot free?
36611308Santhony.gutierrez@amd.com            return 0;
36711308Santhony.gutierrez@amd.com        }
36811308Santhony.gutierrez@amd.com
36911308Santhony.gutierrez@amd.com        // Are there in pipe or outstanding memory requests?
37011639Salexandru.dutu@amd.com        if ((outstandingReqs + memReqsInPipe) > 0) {
37111308Santhony.gutierrez@amd.com            return 0;
37211308Santhony.gutierrez@amd.com        }
37311308Santhony.gutierrez@amd.com
37411308Santhony.gutierrez@amd.com        ready_inst = true;
37511692Santhony.gutierrez@amd.com    } else if (type == I_ALU && ii->isNop()) {
37611308Santhony.gutierrez@amd.com        // Here for ALU instruction (nop)
37711308Santhony.gutierrez@amd.com        if (!computeUnit->wfWait[simdId].prerdy()) {
37811308Santhony.gutierrez@amd.com            // Is wave slot free?
37911308Santhony.gutierrez@amd.com            return 0;
38011308Santhony.gutierrez@amd.com        }
38111308Santhony.gutierrez@amd.com
38211308Santhony.gutierrez@amd.com        ready_inst = true;
38311692Santhony.gutierrez@amd.com    } else if (type == I_ALU && ii->isReturn()) {
38411308Santhony.gutierrez@amd.com        // Here for ALU instruction (return)
38511308Santhony.gutierrez@amd.com        if (!computeUnit->wfWait[simdId].prerdy()) {
38611308Santhony.gutierrez@amd.com            // Is wave slot free?
38711308Santhony.gutierrez@amd.com            return 0;
38811308Santhony.gutierrez@amd.com        }
38911308Santhony.gutierrez@amd.com
39011308Santhony.gutierrez@amd.com        // Are there in pipe or outstanding memory requests?
39111639Salexandru.dutu@amd.com        if ((outstandingReqs + memReqsInPipe) > 0) {
39211308Santhony.gutierrez@amd.com            return 0;
39311308Santhony.gutierrez@amd.com        }
39411308Santhony.gutierrez@amd.com
39511308Santhony.gutierrez@amd.com        ready_inst = true;
39611692Santhony.gutierrez@amd.com    } else if (type == I_ALU && (ii->isBranch() ||
39711692Santhony.gutierrez@amd.com               ii->isALU() ||
39811692Santhony.gutierrez@amd.com               (ii->isKernArgSeg() && ii->isLoad()) ||
39911692Santhony.gutierrez@amd.com               ii->isArgSeg())) {
40011308Santhony.gutierrez@amd.com        // Here for ALU instruction (all others)
40111308Santhony.gutierrez@amd.com        if (!computeUnit->wfWait[simdId].prerdy()) {
40211308Santhony.gutierrez@amd.com            // Is alu slot free?
40311308Santhony.gutierrez@amd.com            return 0;
40411308Santhony.gutierrez@amd.com        }
40511308Santhony.gutierrez@amd.com        if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
40611308Santhony.gutierrez@amd.com                    VrfAccessType::RD_WR)) {
40711308Santhony.gutierrez@amd.com            return 0;
40811308Santhony.gutierrez@amd.com        }
40911308Santhony.gutierrez@amd.com
41011308Santhony.gutierrez@amd.com        if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
41111308Santhony.gutierrez@amd.com            return 0;
41211308Santhony.gutierrez@amd.com        }
41311308Santhony.gutierrez@amd.com        ready_inst = true;
41411692Santhony.gutierrez@amd.com    } else if (type == I_GLOBAL && ii->isGlobalMem()) {
41511308Santhony.gutierrez@amd.com        // Here Global memory instruction
41611692Santhony.gutierrez@amd.com        if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
41711308Santhony.gutierrez@amd.com            // Are there in pipe or outstanding global memory write requests?
41811639Salexandru.dutu@amd.com            if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
41911308Santhony.gutierrez@amd.com                return 0;
42011308Santhony.gutierrez@amd.com            }
42111308Santhony.gutierrez@amd.com        }
42211308Santhony.gutierrez@amd.com
42311692Santhony.gutierrez@amd.com        if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
42411308Santhony.gutierrez@amd.com            // Are there in pipe or outstanding global memory read requests?
42511639Salexandru.dutu@amd.com            if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0)
42611308Santhony.gutierrez@amd.com                return 0;
42711308Santhony.gutierrez@amd.com        }
42811308Santhony.gutierrez@amd.com
42911308Santhony.gutierrez@amd.com        if (!glbMemIssueRdy) {
43011308Santhony.gutierrez@amd.com            // Is WV issue slot free?
43111308Santhony.gutierrez@amd.com            return 0;
43211308Santhony.gutierrez@amd.com        }
43311308Santhony.gutierrez@amd.com
43411308Santhony.gutierrez@amd.com        if (!glbMemBusRdy) {
43511308Santhony.gutierrez@amd.com            // Is there an available VRF->Global memory read bus?
43611308Santhony.gutierrez@amd.com            return 0;
43711308Santhony.gutierrez@amd.com        }
43811308Santhony.gutierrez@amd.com
43911308Santhony.gutierrez@amd.com        if (!computeUnit->globalMemoryPipe.
44011639Salexandru.dutu@amd.com            isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
44111308Santhony.gutierrez@amd.com            // Can we insert a new request to the Global Mem Request FIFO?
44211308Santhony.gutierrez@amd.com            return 0;
44311308Santhony.gutierrez@amd.com        }
44411308Santhony.gutierrez@amd.com        // can we schedule source & destination operands on the VRF?
44511308Santhony.gutierrez@amd.com        if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
44611308Santhony.gutierrez@amd.com                    VrfAccessType::RD_WR)) {
44711308Santhony.gutierrez@amd.com            return 0;
44811308Santhony.gutierrez@amd.com        }
44911308Santhony.gutierrez@amd.com        if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
45011308Santhony.gutierrez@amd.com            return 0;
45111308Santhony.gutierrez@amd.com        }
45211308Santhony.gutierrez@amd.com        ready_inst = true;
45311692Santhony.gutierrez@amd.com    } else if (type == I_SHARED && ii->isLocalMem()) {
45411308Santhony.gutierrez@amd.com        // Here for Shared memory instruction
45511692Santhony.gutierrez@amd.com        if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
45611639Salexandru.dutu@amd.com            if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) {
45711308Santhony.gutierrez@amd.com                return 0;
45811308Santhony.gutierrez@amd.com            }
45911308Santhony.gutierrez@amd.com        }
46011308Santhony.gutierrez@amd.com
46111692Santhony.gutierrez@amd.com        if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
46211639Salexandru.dutu@amd.com            if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) {
46311308Santhony.gutierrez@amd.com                return 0;
46411308Santhony.gutierrez@amd.com            }
46511308Santhony.gutierrez@amd.com        }
46611308Santhony.gutierrez@amd.com
46711308Santhony.gutierrez@amd.com        if (!locMemBusRdy) {
46811308Santhony.gutierrez@amd.com            // Is there an available VRF->LDS read bus?
46911308Santhony.gutierrez@amd.com            return 0;
47011308Santhony.gutierrez@amd.com        }
47111308Santhony.gutierrez@amd.com        if (!locMemIssueRdy) {
47211308Santhony.gutierrez@amd.com            // Is wave slot free?
47311308Santhony.gutierrez@amd.com            return 0;
47411308Santhony.gutierrez@amd.com        }
47511308Santhony.gutierrez@amd.com
47611308Santhony.gutierrez@amd.com        if (!computeUnit->localMemoryPipe.
47711639Salexandru.dutu@amd.com            isLMReqFIFOWrRdy(rdLmReqsInPipe + wrLmReqsInPipe)) {
47811308Santhony.gutierrez@amd.com            // Can we insert a new request to the LDS Request FIFO?
47911308Santhony.gutierrez@amd.com            return 0;
48011308Santhony.gutierrez@amd.com        }
48111308Santhony.gutierrez@amd.com        // can we schedule source & destination operands on the VRF?
48211308Santhony.gutierrez@amd.com        if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
48311308Santhony.gutierrez@amd.com                    VrfAccessType::RD_WR)) {
48411308Santhony.gutierrez@amd.com            return 0;
48511308Santhony.gutierrez@amd.com        }
48611308Santhony.gutierrez@amd.com        if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
48711308Santhony.gutierrez@amd.com            return 0;
48811308Santhony.gutierrez@amd.com        }
48911308Santhony.gutierrez@amd.com        ready_inst = true;
49011692Santhony.gutierrez@amd.com    } else if (type == I_FLAT && ii->isFlat()) {
49111308Santhony.gutierrez@amd.com        if (!glbMemBusRdy) {
49211308Santhony.gutierrez@amd.com            // Is there an available VRF->Global memory read bus?
49311308Santhony.gutierrez@amd.com            return 0;
49411308Santhony.gutierrez@amd.com        }
49511308Santhony.gutierrez@amd.com
49611308Santhony.gutierrez@amd.com        if (!locMemBusRdy) {
49711308Santhony.gutierrez@amd.com            // Is there an available VRF->LDS read bus?
49811308Santhony.gutierrez@amd.com            return 0;
49911308Santhony.gutierrez@amd.com        }
50011308Santhony.gutierrez@amd.com
50111308Santhony.gutierrez@amd.com        if (!glbMemIssueRdy) {
50211308Santhony.gutierrez@amd.com            // Is wave slot free?
50311308Santhony.gutierrez@amd.com            return 0;
50411308Santhony.gutierrez@amd.com        }
50511308Santhony.gutierrez@amd.com
50611308Santhony.gutierrez@amd.com        if (!locMemIssueRdy) {
50711308Santhony.gutierrez@amd.com            return 0;
50811308Santhony.gutierrez@amd.com        }
50911308Santhony.gutierrez@amd.com        if (!computeUnit->globalMemoryPipe.
51011639Salexandru.dutu@amd.com            isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
51111308Santhony.gutierrez@amd.com            // Can we insert a new request to the Global Mem Request FIFO?
51211308Santhony.gutierrez@amd.com            return 0;
51311308Santhony.gutierrez@amd.com        }
51411308Santhony.gutierrez@amd.com
51511308Santhony.gutierrez@amd.com        if (!computeUnit->localMemoryPipe.
51611639Salexandru.dutu@amd.com            isLMReqFIFOWrRdy(rdLmReqsInPipe + wrLmReqsInPipe)) {
51711308Santhony.gutierrez@amd.com            // Can we insert a new request to the LDS Request FIFO?
51811308Santhony.gutierrez@amd.com            return 0;
51911308Santhony.gutierrez@amd.com        }
52011308Santhony.gutierrez@amd.com        // can we schedule source & destination operands on the VRF?
52111308Santhony.gutierrez@amd.com        if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
52211308Santhony.gutierrez@amd.com                    VrfAccessType::RD_WR)) {
52311308Santhony.gutierrez@amd.com            return 0;
52411308Santhony.gutierrez@amd.com        }
52511308Santhony.gutierrez@amd.com        // are all the operands ready? (RAW, WAW and WAR depedencies met?)
52611308Santhony.gutierrez@amd.com        if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
52711308Santhony.gutierrez@amd.com            return 0;
52811308Santhony.gutierrez@amd.com        }
52911308Santhony.gutierrez@amd.com        ready_inst = true;
53011308Santhony.gutierrez@amd.com    } else {
53111308Santhony.gutierrez@amd.com        return 0;
53211308Santhony.gutierrez@amd.com    }
53311308Santhony.gutierrez@amd.com
53411308Santhony.gutierrez@amd.com    assert(ready_inst);
53511308Santhony.gutierrez@amd.com
53611308Santhony.gutierrez@amd.com    DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Ready Inst : %s\n", computeUnit->cu_id,
53711308Santhony.gutierrez@amd.com            simdId, wfSlotId, ii->disassemble());
53811308Santhony.gutierrez@amd.com    return 1;
53911308Santhony.gutierrez@amd.com}
54011308Santhony.gutierrez@amd.com
54111308Santhony.gutierrez@amd.comvoid
54211308Santhony.gutierrez@amd.comWavefront::updateResources()
54311308Santhony.gutierrez@amd.com{
54411308Santhony.gutierrez@amd.com    // Get current instruction
54511308Santhony.gutierrez@amd.com    GPUDynInstPtr ii = instructionBuffer.front();
54611308Santhony.gutierrez@amd.com    assert(ii);
54711308Santhony.gutierrez@amd.com    computeUnit->vrf[simdId]->updateResources(this, ii);
54811308Santhony.gutierrez@amd.com    // Single precision ALU or Branch or Return or Special instruction
54911692Santhony.gutierrez@amd.com    if (ii->isALU() || ii->isSpecialOp() ||
55011692Santhony.gutierrez@amd.com        ii->isBranch() ||
55111308Santhony.gutierrez@amd.com        // FIXME: Kernel argument loads are currently treated as ALU operations
55211308Santhony.gutierrez@amd.com        // since we don't send memory packets at execution. If we fix that then
55311308Santhony.gutierrez@amd.com        // we should map them to one of the memory pipelines
55411692Santhony.gutierrez@amd.com        (ii->isKernArgSeg() && ii->isLoad()) || ii->isArgSeg() ||
55511692Santhony.gutierrez@amd.com        ii->isReturn()) {
55611308Santhony.gutierrez@amd.com        computeUnit->aluPipe[simdId].preset(computeUnit->shader->
55711308Santhony.gutierrez@amd.com                                            ticks(computeUnit->spBypassLength()));
55811308Santhony.gutierrez@amd.com        // this is to enforce a fixed number of cycles per issue slot per SIMD
55911308Santhony.gutierrez@amd.com        computeUnit->wfWait[simdId].preset(computeUnit->shader->
56011308Santhony.gutierrez@amd.com                                           ticks(computeUnit->issuePeriod));
56111692Santhony.gutierrez@amd.com    } else if (ii->isBarrier()) {
56211308Santhony.gutierrez@amd.com        computeUnit->wfWait[simdId].preset(computeUnit->shader->
56311308Santhony.gutierrez@amd.com                                           ticks(computeUnit->issuePeriod));
56411692Santhony.gutierrez@amd.com    } else if (ii->isLoad() && ii->isFlat()) {
56511308Santhony.gutierrez@amd.com        assert(Enums::SC_NONE != ii->executedAs());
56611639Salexandru.dutu@amd.com        memReqsInPipe++;
56711639Salexandru.dutu@amd.com        rdGmReqsInPipe++;
56811308Santhony.gutierrez@amd.com        if ( Enums::SC_SHARED == ii->executedAs() ) {
56911308Santhony.gutierrez@amd.com            computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
57011308Santhony.gutierrez@amd.com                preset(computeUnit->shader->ticks(4));
57111308Santhony.gutierrez@amd.com            computeUnit->wfWait[computeUnit->ShrMemUnitId()].
57211308Santhony.gutierrez@amd.com                preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
57311308Santhony.gutierrez@amd.com        } else {
57411308Santhony.gutierrez@amd.com            computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
57511308Santhony.gutierrez@amd.com                preset(computeUnit->shader->ticks(4));
57611308Santhony.gutierrez@amd.com            computeUnit->wfWait[computeUnit->GlbMemUnitId()].
57711308Santhony.gutierrez@amd.com                preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
57811308Santhony.gutierrez@amd.com        }
57911692Santhony.gutierrez@amd.com    } else if (ii->isStore() && ii->isFlat()) {
58011308Santhony.gutierrez@amd.com        assert(Enums::SC_NONE != ii->executedAs());
58111639Salexandru.dutu@amd.com        memReqsInPipe++;
58211639Salexandru.dutu@amd.com        wrGmReqsInPipe++;
58311308Santhony.gutierrez@amd.com        if (Enums::SC_SHARED == ii->executedAs()) {
58411308Santhony.gutierrez@amd.com            computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
58511308Santhony.gutierrez@amd.com                preset(computeUnit->shader->ticks(8));
58611308Santhony.gutierrez@amd.com            computeUnit->wfWait[computeUnit->ShrMemUnitId()].
58711308Santhony.gutierrez@amd.com                preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
58811308Santhony.gutierrez@amd.com        } else {
58911308Santhony.gutierrez@amd.com            computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
59011308Santhony.gutierrez@amd.com                preset(computeUnit->shader->ticks(8));
59111308Santhony.gutierrez@amd.com            computeUnit->wfWait[computeUnit->GlbMemUnitId()].
59211308Santhony.gutierrez@amd.com                preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
59311308Santhony.gutierrez@amd.com        }
59411692Santhony.gutierrez@amd.com    } else if (ii->isLoad() && ii->isGlobalMem()) {
59511639Salexandru.dutu@amd.com        memReqsInPipe++;
59611639Salexandru.dutu@amd.com        rdGmReqsInPipe++;
59711308Santhony.gutierrez@amd.com        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
59811308Santhony.gutierrez@amd.com            preset(computeUnit->shader->ticks(4));
59911308Santhony.gutierrez@amd.com        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
60011308Santhony.gutierrez@amd.com            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
60111692Santhony.gutierrez@amd.com    } else if (ii->isStore() && ii->isGlobalMem()) {
60211639Salexandru.dutu@amd.com        memReqsInPipe++;
60311639Salexandru.dutu@amd.com        wrGmReqsInPipe++;
60411308Santhony.gutierrez@amd.com        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
60511308Santhony.gutierrez@amd.com            preset(computeUnit->shader->ticks(8));
60611308Santhony.gutierrez@amd.com        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
60711308Santhony.gutierrez@amd.com            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
60811692Santhony.gutierrez@amd.com    } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
60911639Salexandru.dutu@amd.com        memReqsInPipe++;
61011639Salexandru.dutu@amd.com        wrGmReqsInPipe++;
61111639Salexandru.dutu@amd.com        rdGmReqsInPipe++;
61211308Santhony.gutierrez@amd.com        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
61311308Santhony.gutierrez@amd.com            preset(computeUnit->shader->ticks(8));
61411308Santhony.gutierrez@amd.com        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
61511308Santhony.gutierrez@amd.com            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
61611692Santhony.gutierrez@amd.com    } else if (ii->isLoad() && ii->isLocalMem()) {
61711639Salexandru.dutu@amd.com        memReqsInPipe++;
61811639Salexandru.dutu@amd.com        rdLmReqsInPipe++;
61911308Santhony.gutierrez@amd.com        computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
62011308Santhony.gutierrez@amd.com            preset(computeUnit->shader->ticks(4));
62111308Santhony.gutierrez@amd.com        computeUnit->wfWait[computeUnit->ShrMemUnitId()].
62211308Santhony.gutierrez@amd.com            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
62311692Santhony.gutierrez@amd.com    } else if (ii->isStore() && ii->isLocalMem()) {
62411639Salexandru.dutu@amd.com        memReqsInPipe++;
62511639Salexandru.dutu@amd.com        wrLmReqsInPipe++;
62611308Santhony.gutierrez@amd.com        computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
62711308Santhony.gutierrez@amd.com            preset(computeUnit->shader->ticks(8));
62811308Santhony.gutierrez@amd.com        computeUnit->wfWait[computeUnit->ShrMemUnitId()].
62911308Santhony.gutierrez@amd.com            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
63011692Santhony.gutierrez@amd.com    } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
63111639Salexandru.dutu@amd.com        memReqsInPipe++;
63211639Salexandru.dutu@amd.com        wrLmReqsInPipe++;
63311639Salexandru.dutu@amd.com        rdLmReqsInPipe++;
63411308Santhony.gutierrez@amd.com        computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
63511308Santhony.gutierrez@amd.com            preset(computeUnit->shader->ticks(8));
63611308Santhony.gutierrez@amd.com        computeUnit->wfWait[computeUnit->ShrMemUnitId()].
63711308Santhony.gutierrez@amd.com            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
63811308Santhony.gutierrez@amd.com    }
63911308Santhony.gutierrez@amd.com}
64011308Santhony.gutierrez@amd.com
64111308Santhony.gutierrez@amd.comvoid
64211308Santhony.gutierrez@amd.comWavefront::exec()
64311308Santhony.gutierrez@amd.com{
64411308Santhony.gutierrez@amd.com    // ---- Exit if wavefront is inactive ----------------------------- //
64511308Santhony.gutierrez@amd.com
64611308Santhony.gutierrez@amd.com    if (status == S_STOPPED || status == S_RETURNING ||
64711308Santhony.gutierrez@amd.com        instructionBuffer.empty()) {
64811308Santhony.gutierrez@amd.com        return;
64911308Santhony.gutierrez@amd.com    }
65011308Santhony.gutierrez@amd.com
65111308Santhony.gutierrez@amd.com    // Get current instruction
65211308Santhony.gutierrez@amd.com
65311308Santhony.gutierrez@amd.com    GPUDynInstPtr ii = instructionBuffer.front();
65411308Santhony.gutierrez@amd.com
65511308Santhony.gutierrez@amd.com    const uint32_t old_pc = pc();
65611308Santhony.gutierrez@amd.com    DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s "
65711308Santhony.gutierrez@amd.com            "(pc: %i)\n", computeUnit->cu_id, simdId, wfSlotId, wfDynId,
65811308Santhony.gutierrez@amd.com            ii->disassemble(), old_pc);
65911695Santhony.gutierrez@amd.com
66011695Santhony.gutierrez@amd.com    // update the instruction stats in the CU
66111695Santhony.gutierrez@amd.com
66211692Santhony.gutierrez@amd.com    ii->execute(ii);
66311695Santhony.gutierrez@amd.com    computeUnit->updateInstStats(ii);
66411308Santhony.gutierrez@amd.com    // access the VRF
66511308Santhony.gutierrez@amd.com    computeUnit->vrf[simdId]->exec(ii, this);
66611308Santhony.gutierrez@amd.com    srcRegOpDist.sample(ii->numSrcRegOperands());
66711308Santhony.gutierrez@amd.com    dstRegOpDist.sample(ii->numDstRegOperands());
66811308Santhony.gutierrez@amd.com    computeUnit->numInstrExecuted++;
66911308Santhony.gutierrez@amd.com    computeUnit->execRateDist.sample(computeUnit->totalCycles.value() -
67011308Santhony.gutierrez@amd.com                                     computeUnit->lastExecCycle[simdId]);
67111308Santhony.gutierrez@amd.com    computeUnit->lastExecCycle[simdId] = computeUnit->totalCycles.value();
67211308Santhony.gutierrez@amd.com    if (pc() == old_pc) {
67311696Santhony.gutierrez@amd.com        uint32_t new_pc = _gpuISA.advancePC(old_pc, ii);
67411308Santhony.gutierrez@amd.com        // PC not modified by instruction, proceed to next or pop frame
67511308Santhony.gutierrez@amd.com        pc(new_pc);
67611308Santhony.gutierrez@amd.com        if (new_pc == rpc()) {
67711308Santhony.gutierrez@amd.com            popFromReconvergenceStack();
67811308Santhony.gutierrez@amd.com            discardFetch();
67911308Santhony.gutierrez@amd.com        } else {
68011308Santhony.gutierrez@amd.com            instructionBuffer.pop_front();
68111308Santhony.gutierrez@amd.com        }
68211694Santhony.gutierrez@amd.com    } else {
68311694Santhony.gutierrez@amd.com        discardFetch();
68411308Santhony.gutierrez@amd.com    }
68511308Santhony.gutierrez@amd.com
68611308Santhony.gutierrez@amd.com    if (computeUnit->shader->hsail_mode==Shader::SIMT) {
68711308Santhony.gutierrez@amd.com        const int num_active_lanes = execMask().count();
68811308Santhony.gutierrez@amd.com        computeUnit->controlFlowDivergenceDist.sample(num_active_lanes);
68911308Santhony.gutierrez@amd.com        computeUnit->numVecOpsExecuted += num_active_lanes;
69011308Santhony.gutierrez@amd.com        if (isGmInstruction(ii)) {
69111308Santhony.gutierrez@amd.com            computeUnit->activeLanesPerGMemInstrDist.sample(num_active_lanes);
69211308Santhony.gutierrez@amd.com        } else if (isLmInstruction(ii)) {
69311308Santhony.gutierrez@amd.com            computeUnit->activeLanesPerLMemInstrDist.sample(num_active_lanes);
69411308Santhony.gutierrez@amd.com        }
69511308Santhony.gutierrez@amd.com    }
69611308Santhony.gutierrez@amd.com
69711308Santhony.gutierrez@amd.com    // ---- Update Vector ALU pipeline and other resources ------------------ //
69811308Santhony.gutierrez@amd.com    // Single precision ALU or Branch or Return or Special instruction
69911692Santhony.gutierrez@amd.com    if (ii->isALU() || ii->isSpecialOp() ||
70011692Santhony.gutierrez@amd.com        ii->isBranch() ||
70111308Santhony.gutierrez@amd.com        // FIXME: Kernel argument loads are currently treated as ALU operations
70211308Santhony.gutierrez@amd.com        // since we don't send memory packets at execution. If we fix that then
70311308Santhony.gutierrez@amd.com        // we should map them to one of the memory pipelines
70411692Santhony.gutierrez@amd.com        (ii->isKernArgSeg() && ii->isLoad()) ||
70511692Santhony.gutierrez@amd.com        ii->isArgSeg() ||
70611692Santhony.gutierrez@amd.com        ii->isReturn()) {
70711308Santhony.gutierrez@amd.com        computeUnit->aluPipe[simdId].set(computeUnit->shader->
70811308Santhony.gutierrez@amd.com                                         ticks(computeUnit->spBypassLength()));
70911308Santhony.gutierrez@amd.com
71011308Santhony.gutierrez@amd.com        // this is to enforce a fixed number of cycles per issue slot per SIMD
71111308Santhony.gutierrez@amd.com        computeUnit->wfWait[simdId].set(computeUnit->shader->
71211308Santhony.gutierrez@amd.com                                        ticks(computeUnit->issuePeriod));
71311692Santhony.gutierrez@amd.com    } else if (ii->isBarrier()) {
71411308Santhony.gutierrez@amd.com        computeUnit->wfWait[simdId].set(computeUnit->shader->
71511308Santhony.gutierrez@amd.com                                        ticks(computeUnit->issuePeriod));
71611692Santhony.gutierrez@amd.com    } else if (ii->isLoad() && ii->isFlat()) {
71711308Santhony.gutierrez@amd.com        assert(Enums::SC_NONE != ii->executedAs());
71811308Santhony.gutierrez@amd.com
71911308Santhony.gutierrez@amd.com        if (Enums::SC_SHARED == ii->executedAs()) {
72011308Santhony.gutierrez@amd.com            computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
72111308Santhony.gutierrez@amd.com                set(computeUnit->shader->ticks(4));
72211308Santhony.gutierrez@amd.com            computeUnit->wfWait[computeUnit->ShrMemUnitId()].
72311308Santhony.gutierrez@amd.com                set(computeUnit->shader->ticks(computeUnit->issuePeriod));
72411308Santhony.gutierrez@amd.com        } else {
72511308Santhony.gutierrez@amd.com            computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
72611308Santhony.gutierrez@amd.com                set(computeUnit->shader->ticks(4));
72711308Santhony.gutierrez@amd.com            computeUnit->wfWait[computeUnit->GlbMemUnitId()].
72811308Santhony.gutierrez@amd.com                set(computeUnit->shader->ticks(computeUnit->issuePeriod));
72911308Santhony.gutierrez@amd.com        }
73011692Santhony.gutierrez@amd.com    } else if (ii->isStore() && ii->isFlat()) {
73111308Santhony.gutierrez@amd.com        assert(Enums::SC_NONE != ii->executedAs());
73211308Santhony.gutierrez@amd.com        if (Enums::SC_SHARED == ii->executedAs()) {
73311308Santhony.gutierrez@amd.com            computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
73411308Santhony.gutierrez@amd.com                set(computeUnit->shader->ticks(8));
73511308Santhony.gutierrez@amd.com            computeUnit->wfWait[computeUnit->ShrMemUnitId()].
73611308Santhony.gutierrez@amd.com                set(computeUnit->shader->ticks(computeUnit->issuePeriod));
73711308Santhony.gutierrez@amd.com        } else {
73811308Santhony.gutierrez@amd.com            computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
73911308Santhony.gutierrez@amd.com                set(computeUnit->shader->ticks(8));
74011308Santhony.gutierrez@amd.com            computeUnit->wfWait[computeUnit->GlbMemUnitId()].
74111308Santhony.gutierrez@amd.com                set(computeUnit->shader->ticks(computeUnit->issuePeriod));
74211308Santhony.gutierrez@amd.com        }
74311692Santhony.gutierrez@amd.com    } else if (ii->isLoad() && ii->isGlobalMem()) {
74411308Santhony.gutierrez@amd.com        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
74511308Santhony.gutierrez@amd.com            set(computeUnit->shader->ticks(4));
74611308Santhony.gutierrez@amd.com        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
74711308Santhony.gutierrez@amd.com            set(computeUnit->shader->ticks(computeUnit->issuePeriod));
74811692Santhony.gutierrez@amd.com    } else if (ii->isStore() && ii->isGlobalMem()) {
74911308Santhony.gutierrez@amd.com        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
75011308Santhony.gutierrez@amd.com            set(computeUnit->shader->ticks(8));
75111308Santhony.gutierrez@amd.com        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
75211308Santhony.gutierrez@amd.com            set(computeUnit->shader->ticks(computeUnit->issuePeriod));
75311692Santhony.gutierrez@amd.com    } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
75411308Santhony.gutierrez@amd.com        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
75511308Santhony.gutierrez@amd.com            set(computeUnit->shader->ticks(8));
75611308Santhony.gutierrez@amd.com        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
75711308Santhony.gutierrez@amd.com            set(computeUnit->shader->ticks(computeUnit->issuePeriod));
75811692Santhony.gutierrez@amd.com    } else if (ii->isLoad() && ii->isLocalMem()) {
75911308Santhony.gutierrez@amd.com        computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
76011308Santhony.gutierrez@amd.com            set(computeUnit->shader->ticks(4));
76111308Santhony.gutierrez@amd.com        computeUnit->wfWait[computeUnit->ShrMemUnitId()].
76211308Santhony.gutierrez@amd.com            set(computeUnit->shader->ticks(computeUnit->issuePeriod));
76311692Santhony.gutierrez@amd.com    } else if (ii->isStore() && ii->isLocalMem()) {
76411308Santhony.gutierrez@amd.com        computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
76511308Santhony.gutierrez@amd.com            set(computeUnit->shader->ticks(8));
76611308Santhony.gutierrez@amd.com        computeUnit->wfWait[computeUnit->ShrMemUnitId()].
76711308Santhony.gutierrez@amd.com            set(computeUnit->shader->ticks(computeUnit->issuePeriod));
76811692Santhony.gutierrez@amd.com    } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
76911308Santhony.gutierrez@amd.com        computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
77011308Santhony.gutierrez@amd.com            set(computeUnit->shader->ticks(8));
77111308Santhony.gutierrez@amd.com        computeUnit->wfWait[computeUnit->ShrMemUnitId()].
77211308Santhony.gutierrez@amd.com            set(computeUnit->shader->ticks(computeUnit->issuePeriod));
77311308Santhony.gutierrez@amd.com    }
77411308Santhony.gutierrez@amd.com}
77511308Santhony.gutierrez@amd.com
77611308Santhony.gutierrez@amd.combool
77711308Santhony.gutierrez@amd.comWavefront::waitingAtBarrier(int lane)
77811308Santhony.gutierrez@amd.com{
77911639Salexandru.dutu@amd.com    return barCnt[lane] < maxBarCnt;
78011308Santhony.gutierrez@amd.com}
78111308Santhony.gutierrez@amd.com
78211308Santhony.gutierrez@amd.comvoid
78311308Santhony.gutierrez@amd.comWavefront::pushToReconvergenceStack(uint32_t pc, uint32_t rpc,
78411308Santhony.gutierrez@amd.com                                    const VectorMask& mask)
78511308Santhony.gutierrez@amd.com{
78611308Santhony.gutierrez@amd.com    assert(mask.count());
78711641Salexandru.dutu@amd.com    reconvergenceStack.emplace_back(new ReconvergenceStackEntry{pc, rpc, mask});
78811308Santhony.gutierrez@amd.com}
78911308Santhony.gutierrez@amd.com
79011308Santhony.gutierrez@amd.comvoid
79111308Santhony.gutierrez@amd.comWavefront::popFromReconvergenceStack()
79211308Santhony.gutierrez@amd.com{
79311308Santhony.gutierrez@amd.com    assert(!reconvergenceStack.empty());
79411308Santhony.gutierrez@amd.com
79511308Santhony.gutierrez@amd.com    DPRINTF(WavefrontStack, "[%2d, %2d, %2d, %2d] %s %3i => ",
79611308Santhony.gutierrez@amd.com            computeUnit->cu_id, simdId, wfSlotId, wfDynId,
79711308Santhony.gutierrez@amd.com            execMask().to_string<char, std::string::traits_type,
79811308Santhony.gutierrez@amd.com            std::string::allocator_type>().c_str(), pc());
79911308Santhony.gutierrez@amd.com
80011641Salexandru.dutu@amd.com    reconvergenceStack.pop_back();
80111308Santhony.gutierrez@amd.com
80211308Santhony.gutierrez@amd.com    DPRINTF(WavefrontStack, "%3i %s\n", pc(),
80311308Santhony.gutierrez@amd.com            execMask().to_string<char, std::string::traits_type,
80411308Santhony.gutierrez@amd.com            std::string::allocator_type>().c_str());
80511308Santhony.gutierrez@amd.com
80611308Santhony.gutierrez@amd.com}
80711308Santhony.gutierrez@amd.com
80811308Santhony.gutierrez@amd.comvoid
80911308Santhony.gutierrez@amd.comWavefront::discardFetch()
81011308Santhony.gutierrez@amd.com{
81111308Santhony.gutierrez@amd.com    instructionBuffer.clear();
81211308Santhony.gutierrez@amd.com    dropFetch |=pendingFetch;
81311308Santhony.gutierrez@amd.com}
81411308Santhony.gutierrez@amd.com
81511308Santhony.gutierrez@amd.comuint32_t
81611308Santhony.gutierrez@amd.comWavefront::pc() const
81711308Santhony.gutierrez@amd.com{
81811641Salexandru.dutu@amd.com    return reconvergenceStack.back()->pc;
81911308Santhony.gutierrez@amd.com}
82011308Santhony.gutierrez@amd.com
82111308Santhony.gutierrez@amd.comuint32_t
82211308Santhony.gutierrez@amd.comWavefront::rpc() const
82311308Santhony.gutierrez@amd.com{
82411641Salexandru.dutu@amd.com    return reconvergenceStack.back()->rpc;
82511308Santhony.gutierrez@amd.com}
82611308Santhony.gutierrez@amd.com
82711308Santhony.gutierrez@amd.comVectorMask
82811308Santhony.gutierrez@amd.comWavefront::execMask() const
82911308Santhony.gutierrez@amd.com{
83011641Salexandru.dutu@amd.com    return reconvergenceStack.back()->execMask;
83111308Santhony.gutierrez@amd.com}
83211308Santhony.gutierrez@amd.com
83311308Santhony.gutierrez@amd.combool
83411308Santhony.gutierrez@amd.comWavefront::execMask(int lane) const
83511308Santhony.gutierrez@amd.com{
83611641Salexandru.dutu@amd.com    return reconvergenceStack.back()->execMask[lane];
83711308Santhony.gutierrez@amd.com}
83811308Santhony.gutierrez@amd.com
83911308Santhony.gutierrez@amd.com
84011308Santhony.gutierrez@amd.comvoid
84111308Santhony.gutierrez@amd.comWavefront::pc(uint32_t new_pc)
84211308Santhony.gutierrez@amd.com{
84311641Salexandru.dutu@amd.com    reconvergenceStack.back()->pc = new_pc;
84411308Santhony.gutierrez@amd.com}
84511640Salexandru.dutu@amd.com
84611640Salexandru.dutu@amd.comuint32_t
84711640Salexandru.dutu@amd.comWavefront::getStaticContextSize() const
84811640Salexandru.dutu@amd.com{
84911643Salexandru.dutu@amd.com    return barCnt.size() * sizeof(int) + sizeof(wfId) + sizeof(maxBarCnt) +
85011640Salexandru.dutu@amd.com           sizeof(oldBarrierCnt) + sizeof(barrierCnt) + sizeof(wgId) +
85111640Salexandru.dutu@amd.com           sizeof(computeUnit->cu_id) + sizeof(barrierId) + sizeof(initMask) +
85211640Salexandru.dutu@amd.com           sizeof(privBase) + sizeof(spillBase) + sizeof(ldsChunk) +
85311640Salexandru.dutu@amd.com           computeUnit->wfSize() * sizeof(ReconvergenceStackEntry);
85411640Salexandru.dutu@amd.com}
85511644Salexandru.dutu@amd.com
85611644Salexandru.dutu@amd.comvoid
85711644Salexandru.dutu@amd.comWavefront::getContext(const void *out)
85811644Salexandru.dutu@amd.com{
85911644Salexandru.dutu@amd.com    uint8_t *iter = (uint8_t *)out;
86011644Salexandru.dutu@amd.com    for (int i = 0; i < barCnt.size(); i++) {
86111644Salexandru.dutu@amd.com        *(int *)iter = barCnt[i]; iter += sizeof(barCnt[i]);
86211644Salexandru.dutu@amd.com    }
86311644Salexandru.dutu@amd.com    *(int *)iter = wfId; iter += sizeof(wfId);
86411644Salexandru.dutu@amd.com    *(int *)iter = maxBarCnt; iter += sizeof(maxBarCnt);
86511644Salexandru.dutu@amd.com    *(int *)iter = oldBarrierCnt; iter += sizeof(oldBarrierCnt);
86611644Salexandru.dutu@amd.com    *(int *)iter = barrierCnt; iter += sizeof(barrierCnt);
86711644Salexandru.dutu@amd.com    *(int *)iter = computeUnit->cu_id; iter += sizeof(computeUnit->cu_id);
86811644Salexandru.dutu@amd.com    *(uint32_t *)iter = wgId; iter += sizeof(wgId);
86911644Salexandru.dutu@amd.com    *(uint32_t *)iter = barrierId; iter += sizeof(barrierId);
87011644Salexandru.dutu@amd.com    *(uint64_t *)iter = initMask.to_ullong(); iter += sizeof(initMask.to_ullong());
87111644Salexandru.dutu@amd.com    *(Addr *)iter = privBase; iter += sizeof(privBase);
87211644Salexandru.dutu@amd.com    *(Addr *)iter = spillBase; iter += sizeof(spillBase);
87311644Salexandru.dutu@amd.com
87411644Salexandru.dutu@amd.com    int stackSize = reconvergenceStack.size();
87511644Salexandru.dutu@amd.com    ReconvergenceStackEntry empty = {std::numeric_limits<uint32_t>::max(),
87611644Salexandru.dutu@amd.com                                    std::numeric_limits<uint32_t>::max(),
87711644Salexandru.dutu@amd.com                                    std::numeric_limits<uint64_t>::max()};
87811644Salexandru.dutu@amd.com    for (int i = 0; i < workItemId[0].size(); i++) {
87911644Salexandru.dutu@amd.com        if (i < stackSize) {
88011644Salexandru.dutu@amd.com            *(ReconvergenceStackEntry *)iter = *reconvergenceStack.back();
88111644Salexandru.dutu@amd.com            iter += sizeof(ReconvergenceStackEntry);
88211644Salexandru.dutu@amd.com            reconvergenceStack.pop_back();
88311644Salexandru.dutu@amd.com        } else {
88411644Salexandru.dutu@amd.com            *(ReconvergenceStackEntry *)iter = empty;
88511644Salexandru.dutu@amd.com            iter += sizeof(ReconvergenceStackEntry);
88611644Salexandru.dutu@amd.com        }
88711644Salexandru.dutu@amd.com    }
88811644Salexandru.dutu@amd.com
88911644Salexandru.dutu@amd.com    int wf_size = computeUnit->wfSize();
89011644Salexandru.dutu@amd.com    for (int i = 0; i < maxSpVgprs; i++) {
89111644Salexandru.dutu@amd.com        uint32_t vgprIdx = remap(i, sizeof(uint32_t), 1);
89211644Salexandru.dutu@amd.com        for (int lane = 0; lane < wf_size; lane++) {
89311644Salexandru.dutu@amd.com            uint32_t regVal = computeUnit->vrf[simdId]->
89411644Salexandru.dutu@amd.com                            read<uint32_t>(vgprIdx,lane);
89511644Salexandru.dutu@amd.com            *(uint32_t *)iter = regVal; iter += sizeof(regVal);
89611644Salexandru.dutu@amd.com        }
89711644Salexandru.dutu@amd.com    }
89811644Salexandru.dutu@amd.com
89911644Salexandru.dutu@amd.com    for (int i = 0; i < maxDpVgprs; i++) {
90011644Salexandru.dutu@amd.com        uint32_t vgprIdx = remap(i, sizeof(uint64_t), 1);
90111644Salexandru.dutu@amd.com        for (int lane = 0; lane < wf_size; lane++) {
90211644Salexandru.dutu@amd.com            uint64_t regVal = computeUnit->vrf[simdId]->
90311644Salexandru.dutu@amd.com                            read<uint64_t>(vgprIdx,lane);
90411644Salexandru.dutu@amd.com            *(uint64_t *)iter = regVal; iter += sizeof(regVal);
90511644Salexandru.dutu@amd.com        }
90611644Salexandru.dutu@amd.com    }
90711644Salexandru.dutu@amd.com
90811644Salexandru.dutu@amd.com    for (int i = 0; i < condRegState->numRegs(); i++) {
90911644Salexandru.dutu@amd.com        for (int lane = 0; lane < wf_size; lane++) {
91011644Salexandru.dutu@amd.com            uint64_t regVal = condRegState->read<uint64_t>(i, lane);
91111644Salexandru.dutu@amd.com            *(uint64_t *)iter = regVal; iter += sizeof(regVal);
91211644Salexandru.dutu@amd.com        }
91311644Salexandru.dutu@amd.com    }
91411644Salexandru.dutu@amd.com
91511644Salexandru.dutu@amd.com    /* saving LDS content */
91611644Salexandru.dutu@amd.com    if (ldsChunk)
91711644Salexandru.dutu@amd.com        for (int i = 0; i < ldsChunk->size(); i++) {
91811644Salexandru.dutu@amd.com            char val = ldsChunk->read<char>(i);
91911644Salexandru.dutu@amd.com            *(char *) iter = val; iter += sizeof(val);
92011644Salexandru.dutu@amd.com        }
92111644Salexandru.dutu@amd.com}
92211644Salexandru.dutu@amd.com
92311644Salexandru.dutu@amd.comvoid
92411644Salexandru.dutu@amd.comWavefront::setContext(const void *in)
92511644Salexandru.dutu@amd.com{
92611644Salexandru.dutu@amd.com    uint8_t *iter = (uint8_t *)in;
92711644Salexandru.dutu@amd.com    for (int i = 0; i < barCnt.size(); i++) {
92811644Salexandru.dutu@amd.com        barCnt[i] = *(int *)iter; iter += sizeof(barCnt[i]);
92911644Salexandru.dutu@amd.com    }
93011644Salexandru.dutu@amd.com    wfId = *(int *)iter; iter += sizeof(wfId);
93111644Salexandru.dutu@amd.com    maxBarCnt = *(int *)iter; iter += sizeof(maxBarCnt);
93211644Salexandru.dutu@amd.com    oldBarrierCnt = *(int *)iter; iter += sizeof(oldBarrierCnt);
93311644Salexandru.dutu@amd.com    barrierCnt = *(int *)iter; iter += sizeof(barrierCnt);
93411644Salexandru.dutu@amd.com    computeUnit->cu_id = *(int *)iter; iter += sizeof(computeUnit->cu_id);
93511644Salexandru.dutu@amd.com    wgId = *(uint32_t *)iter; iter += sizeof(wgId);
93611644Salexandru.dutu@amd.com    barrierId = *(uint32_t *)iter; iter += sizeof(barrierId);
93711644Salexandru.dutu@amd.com    initMask = VectorMask(*(uint64_t *)iter); iter += sizeof(initMask);
93811644Salexandru.dutu@amd.com    privBase = *(Addr *)iter; iter += sizeof(privBase);
93911644Salexandru.dutu@amd.com    spillBase = *(Addr *)iter; iter += sizeof(spillBase);
94011644Salexandru.dutu@amd.com
94111644Salexandru.dutu@amd.com    for (int i = 0; i < workItemId[0].size(); i++) {
94211644Salexandru.dutu@amd.com        ReconvergenceStackEntry newEntry = *(ReconvergenceStackEntry *)iter;
94311644Salexandru.dutu@amd.com        iter += sizeof(ReconvergenceStackEntry);
94411644Salexandru.dutu@amd.com        if (newEntry.pc != std::numeric_limits<uint32_t>::max()) {
94511644Salexandru.dutu@amd.com            pushToReconvergenceStack(newEntry.pc, newEntry.rpc,
94611644Salexandru.dutu@amd.com                                     newEntry.execMask);
94711644Salexandru.dutu@amd.com        }
94811644Salexandru.dutu@amd.com    }
94911644Salexandru.dutu@amd.com    int wf_size = computeUnit->wfSize();
95011644Salexandru.dutu@amd.com
95111644Salexandru.dutu@amd.com    for (int i = 0; i < maxSpVgprs; i++) {
95211644Salexandru.dutu@amd.com        uint32_t vgprIdx = remap(i, sizeof(uint32_t), 1);
95311644Salexandru.dutu@amd.com        for (int lane = 0; lane < wf_size; lane++) {
95411644Salexandru.dutu@amd.com            uint32_t regVal = *(uint32_t *)iter; iter += sizeof(regVal);
95511644Salexandru.dutu@amd.com            computeUnit->vrf[simdId]->write<uint32_t>(vgprIdx, regVal, lane);
95611644Salexandru.dutu@amd.com        }
95711644Salexandru.dutu@amd.com    }
95811644Salexandru.dutu@amd.com
95911644Salexandru.dutu@amd.com    for (int i = 0; i < maxDpVgprs; i++) {
96011644Salexandru.dutu@amd.com        uint32_t vgprIdx = remap(i, sizeof(uint64_t), 1);
96111644Salexandru.dutu@amd.com        for (int lane = 0; lane < wf_size; lane++) {
96211644Salexandru.dutu@amd.com            uint64_t regVal = *(uint64_t *)iter; iter += sizeof(regVal);
96311644Salexandru.dutu@amd.com            computeUnit->vrf[simdId]->write<uint64_t>(vgprIdx, regVal, lane);
96411644Salexandru.dutu@amd.com        }
96511644Salexandru.dutu@amd.com    }
96611644Salexandru.dutu@amd.com
96711644Salexandru.dutu@amd.com    for (int i = 0; i < condRegState->numRegs(); i++) {
96811644Salexandru.dutu@amd.com        for (int lane = 0; lane < wf_size; lane++) {
96911644Salexandru.dutu@amd.com            uint64_t regVal = *(uint64_t *)iter; iter += sizeof(regVal);
97011644Salexandru.dutu@amd.com            condRegState->write<uint64_t>(i, lane, regVal);
97111644Salexandru.dutu@amd.com        }
97211644Salexandru.dutu@amd.com    }
97311644Salexandru.dutu@amd.com    /** Restoring LDS contents */
97411644Salexandru.dutu@amd.com    if (ldsChunk)
97511644Salexandru.dutu@amd.com        for (int i = 0; i < ldsChunk->size(); i++) {
97611644Salexandru.dutu@amd.com            char val = *(char *) iter; iter += sizeof(val);
97711644Salexandru.dutu@amd.com            ldsChunk->write<char>(i, val);
97811644Salexandru.dutu@amd.com        }
97911644Salexandru.dutu@amd.com}
98011657Salexandru.dutu@amd.com
98111657Salexandru.dutu@amd.comvoid
98211657Salexandru.dutu@amd.comWavefront::computeActualWgSz(NDRange *ndr)
98311657Salexandru.dutu@amd.com{
98411657Salexandru.dutu@amd.com    actualWgSzTotal = 1;
98511657Salexandru.dutu@amd.com    for (int d = 0; d < 3; ++d) {
98611657Salexandru.dutu@amd.com        actualWgSz[d] = std::min(workGroupSz[d],
98711657Salexandru.dutu@amd.com                                 gridSz[d] - ndr->wgId[d] * workGroupSz[d]);
98811657Salexandru.dutu@amd.com        actualWgSzTotal *= actualWgSz[d];
98911657Salexandru.dutu@amd.com    }
99011657Salexandru.dutu@amd.com}
991