compute_unit.hh revision 11695
111308Santhony.gutierrez@amd.com/*
211308Santhony.gutierrez@amd.com * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
311308Santhony.gutierrez@amd.com * All rights reserved.
411308Santhony.gutierrez@amd.com *
511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only
611308Santhony.gutierrez@amd.com *
711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without
811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met:
911308Santhony.gutierrez@amd.com *
1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice,
1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer.
1211308Santhony.gutierrez@amd.com *
1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice,
1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation
1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution.
1611308Santhony.gutierrez@amd.com *
1711308Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its contributors
1811308Santhony.gutierrez@amd.com * may be used to endorse or promote products derived from this software
1911308Santhony.gutierrez@amd.com * without specific prior written permission.
2011308Santhony.gutierrez@amd.com *
2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE.
3211308Santhony.gutierrez@amd.com *
3311308Santhony.gutierrez@amd.com * Author: John Kalamatianos, Anthony Gutierrez
3411308Santhony.gutierrez@amd.com */
3511308Santhony.gutierrez@amd.com
3611308Santhony.gutierrez@amd.com#ifndef __COMPUTE_UNIT_HH__
3711308Santhony.gutierrez@amd.com#define __COMPUTE_UNIT_HH__
3811308Santhony.gutierrez@amd.com
3911308Santhony.gutierrez@amd.com#include <deque>
4011308Santhony.gutierrez@amd.com#include <map>
4111308Santhony.gutierrez@amd.com#include <unordered_map>
4211308Santhony.gutierrez@amd.com#include <vector>
4311308Santhony.gutierrez@amd.com
4411308Santhony.gutierrez@amd.com#include "base/callback.hh"
4511308Santhony.gutierrez@amd.com#include "base/statistics.hh"
4611308Santhony.gutierrez@amd.com#include "base/types.hh"
4711308Santhony.gutierrez@amd.com#include "enums/PrefetchType.hh"
4811308Santhony.gutierrez@amd.com#include "gpu-compute/exec_stage.hh"
4911308Santhony.gutierrez@amd.com#include "gpu-compute/fetch_stage.hh"
5011308Santhony.gutierrez@amd.com#include "gpu-compute/global_memory_pipeline.hh"
5111308Santhony.gutierrez@amd.com#include "gpu-compute/local_memory_pipeline.hh"
5211308Santhony.gutierrez@amd.com#include "gpu-compute/qstruct.hh"
5311308Santhony.gutierrez@amd.com#include "gpu-compute/schedule_stage.hh"
5411308Santhony.gutierrez@amd.com#include "gpu-compute/scoreboard_check_stage.hh"
5511308Santhony.gutierrez@amd.com#include "mem/mem_object.hh"
5611308Santhony.gutierrez@amd.com#include "mem/port.hh"
5711308Santhony.gutierrez@amd.com
5811308Santhony.gutierrez@amd.comstatic const int MAX_REGS_FOR_NON_VEC_MEM_INST = 1;
5911308Santhony.gutierrez@amd.comstatic const int MAX_WIDTH_FOR_MEM_INST = 32;
6011308Santhony.gutierrez@amd.com
6111308Santhony.gutierrez@amd.comclass NDRange;
6211308Santhony.gutierrez@amd.comclass Shader;
6311308Santhony.gutierrez@amd.comclass VectorRegisterFile;
6411308Santhony.gutierrez@amd.com
6511308Santhony.gutierrez@amd.comstruct ComputeUnitParams;
6611308Santhony.gutierrez@amd.com
6711308Santhony.gutierrez@amd.comenum EXEC_POLICY
6811308Santhony.gutierrez@amd.com{
6911308Santhony.gutierrez@amd.com    OLDEST = 0,
7011308Santhony.gutierrez@amd.com    RR
7111308Santhony.gutierrez@amd.com};
7211308Santhony.gutierrez@amd.com
7311308Santhony.gutierrez@amd.com// List of execution units
7411308Santhony.gutierrez@amd.comenum EXEC_UNIT
7511308Santhony.gutierrez@amd.com{
7611308Santhony.gutierrez@amd.com    SIMD0 = 0,
7711308Santhony.gutierrez@amd.com    SIMD1,
7811308Santhony.gutierrez@amd.com    SIMD2,
7911308Santhony.gutierrez@amd.com    SIMD3,
8011308Santhony.gutierrez@amd.com    GLBMEM_PIPE,
8111308Santhony.gutierrez@amd.com    LDSMEM_PIPE,
8211308Santhony.gutierrez@amd.com    NUM_UNITS
8311308Santhony.gutierrez@amd.com};
8411308Santhony.gutierrez@amd.com
8511308Santhony.gutierrez@amd.comenum TLB_CACHE
8611308Santhony.gutierrez@amd.com{
8711308Santhony.gutierrez@amd.com    TLB_MISS_CACHE_MISS = 0,
8811308Santhony.gutierrez@amd.com    TLB_MISS_CACHE_HIT,
8911308Santhony.gutierrez@amd.com    TLB_HIT_CACHE_MISS,
9011308Santhony.gutierrez@amd.com    TLB_HIT_CACHE_HIT
9111308Santhony.gutierrez@amd.com};
9211308Santhony.gutierrez@amd.com
9311308Santhony.gutierrez@amd.comclass ComputeUnit : public MemObject
9411308Santhony.gutierrez@amd.com{
9511308Santhony.gutierrez@amd.com  public:
9611308Santhony.gutierrez@amd.com    FetchStage fetchStage;
9711308Santhony.gutierrez@amd.com    ScoreboardCheckStage scoreboardCheckStage;
9811308Santhony.gutierrez@amd.com    ScheduleStage scheduleStage;
9911308Santhony.gutierrez@amd.com    ExecStage execStage;
10011308Santhony.gutierrez@amd.com    GlobalMemPipeline globalMemoryPipe;
10111308Santhony.gutierrez@amd.com    LocalMemPipeline localMemoryPipe;
10211308Santhony.gutierrez@amd.com
10311308Santhony.gutierrez@amd.com    // Buffers used to communicate between various pipeline stages
10411308Santhony.gutierrez@amd.com
10511308Santhony.gutierrez@amd.com    // List of waves which are ready to be scheduled.
10611308Santhony.gutierrez@amd.com    // Each execution resource has a ready list. readyList is
10711308Santhony.gutierrez@amd.com    // used to communicate between scoreboardCheck stage and
10811308Santhony.gutierrez@amd.com    // schedule stage
10911308Santhony.gutierrez@amd.com    // TODO: make enum to index readyList
11011308Santhony.gutierrez@amd.com    std::vector<std::vector<Wavefront*>> readyList;
11111308Santhony.gutierrez@amd.com
11211308Santhony.gutierrez@amd.com    // Stores the status of waves. A READY implies the
11311308Santhony.gutierrez@amd.com    // wave is ready to be scheduled this cycle and
11411308Santhony.gutierrez@amd.com    // is already present in the readyList. waveStatusList is
11511308Santhony.gutierrez@amd.com    // used to communicate between scoreboardCheck stage and
11611308Santhony.gutierrez@amd.com    // schedule stage
11711308Santhony.gutierrez@amd.com    // TODO: convert std::pair to a class to increase readability
11811308Santhony.gutierrez@amd.com    std::vector<std::vector<std::pair<Wavefront*, WAVE_STATUS>>> waveStatusList;
11911308Santhony.gutierrez@amd.com
12011308Santhony.gutierrez@amd.com    // List of waves which will be dispatched to
12111308Santhony.gutierrez@amd.com    // each execution resource. A FILLED implies
12211308Santhony.gutierrez@amd.com    // dispatch list is non-empty and
12311308Santhony.gutierrez@amd.com    // execution unit has something to execute
12411308Santhony.gutierrez@amd.com    // this cycle. Currently, the dispatch list of
12511308Santhony.gutierrez@amd.com    // an execution resource can hold only one wave because
12611308Santhony.gutierrez@amd.com    // an execution resource can execute only one wave in a cycle.
12711308Santhony.gutierrez@amd.com    // dispatchList is used to communicate between schedule
12811308Santhony.gutierrez@amd.com    // and exec stage
12911308Santhony.gutierrez@amd.com    // TODO: convert std::pair to a class to increase readability
13011308Santhony.gutierrez@amd.com    std::vector<std::pair<Wavefront*, DISPATCH_STATUS>> dispatchList;
13111308Santhony.gutierrez@amd.com
13211308Santhony.gutierrez@amd.com    int rrNextMemID; // used by RR WF exec policy to cycle through WF's
13311308Santhony.gutierrez@amd.com    int rrNextALUWp;
13411308Santhony.gutierrez@amd.com    typedef ComputeUnitParams Params;
13511308Santhony.gutierrez@amd.com    std::vector<std::vector<Wavefront*>> wfList;
13611308Santhony.gutierrez@amd.com    int cu_id;
13711308Santhony.gutierrez@amd.com
13811308Santhony.gutierrez@amd.com    // array of vector register files, one per SIMD
13911308Santhony.gutierrez@amd.com    std::vector<VectorRegisterFile*> vrf;
14011308Santhony.gutierrez@amd.com    // Number of vector ALU units (SIMDs) in CU
14111308Santhony.gutierrez@amd.com    int numSIMDs;
14211308Santhony.gutierrez@amd.com    // number of pipe stages for bypassing data to next dependent single
14311308Santhony.gutierrez@amd.com    // precision vector instruction inside the vector ALU pipeline
14411308Santhony.gutierrez@amd.com    int spBypassPipeLength;
14511308Santhony.gutierrez@amd.com    // number of pipe stages for bypassing data to next dependent double
14611308Santhony.gutierrez@amd.com    // precision vector instruction inside the vector ALU pipeline
14711308Santhony.gutierrez@amd.com    int dpBypassPipeLength;
14811308Santhony.gutierrez@amd.com    // number of cycles per issue period
14911308Santhony.gutierrez@amd.com    int issuePeriod;
15011308Santhony.gutierrez@amd.com
15111308Santhony.gutierrez@amd.com    // Number of global and local memory execution resources in CU
15211308Santhony.gutierrez@amd.com    int numGlbMemUnits;
15311308Santhony.gutierrez@amd.com    int numLocMemUnits;
15411308Santhony.gutierrez@amd.com    // tracks the last cycle a vector instruction was executed on a SIMD
15511308Santhony.gutierrez@amd.com    std::vector<uint64_t> lastExecCycle;
15611308Santhony.gutierrez@amd.com
15711308Santhony.gutierrez@amd.com    // true if we allow a separate TLB per lane
15811308Santhony.gutierrez@amd.com    bool perLaneTLB;
15911308Santhony.gutierrez@amd.com    // if 0, TLB prefetching is off.
16011308Santhony.gutierrez@amd.com    int prefetchDepth;
16111308Santhony.gutierrez@amd.com    // if fixed-stride prefetching, this is the stride.
16211308Santhony.gutierrez@amd.com    int prefetchStride;
16311308Santhony.gutierrez@amd.com
16411534Sjohn.kalamatianos@amd.com    std::vector<Addr> lastVaddrCU;
16511534Sjohn.kalamatianos@amd.com    std::vector<std::vector<Addr>> lastVaddrSimd;
16611308Santhony.gutierrez@amd.com    std::vector<std::vector<std::vector<Addr>>> lastVaddrWF;
16711308Santhony.gutierrez@amd.com    Enums::PrefetchType prefetchType;
16811308Santhony.gutierrez@amd.com    EXEC_POLICY exec_policy;
16911308Santhony.gutierrez@amd.com
17011308Santhony.gutierrez@amd.com    bool xact_cas_mode;
17111308Santhony.gutierrez@amd.com    bool debugSegFault;
17211308Santhony.gutierrez@amd.com    bool functionalTLB;
17311308Santhony.gutierrez@amd.com    bool localMemBarrier;
17411308Santhony.gutierrez@amd.com
17511308Santhony.gutierrez@amd.com    /*
17611308Santhony.gutierrez@amd.com     * for Counting page accesses
17711308Santhony.gutierrez@amd.com     *
17811308Santhony.gutierrez@amd.com     * cuExitCallback inherits from Callback. When you register a callback
17911308Santhony.gutierrez@amd.com     * function as an exit callback, it will get added to an exit callback
18011308Santhony.gutierrez@amd.com     * queue, such that on simulation exit, all callbacks in the callback
18111308Santhony.gutierrez@amd.com     * queue will have their process() function called.
18211308Santhony.gutierrez@amd.com     */
18311308Santhony.gutierrez@amd.com    bool countPages;
18411308Santhony.gutierrez@amd.com
18511308Santhony.gutierrez@amd.com    Shader *shader;
18611308Santhony.gutierrez@amd.com    uint32_t barrier_id;
18711308Santhony.gutierrez@amd.com    // vector of Vector ALU (MACC) pipelines
18811308Santhony.gutierrez@amd.com    std::vector<WaitClass> aluPipe;
18911308Santhony.gutierrez@amd.com    // minimum issue period per SIMD unit (in cycles)
19011308Santhony.gutierrez@amd.com    std::vector<WaitClass> wfWait;
19111308Santhony.gutierrez@amd.com
19211308Santhony.gutierrez@amd.com    // Resource control for Vector Register File->Global Memory pipe buses
19311308Santhony.gutierrez@amd.com    std::vector<WaitClass> vrfToGlobalMemPipeBus;
19411308Santhony.gutierrez@amd.com    // Resource control for Vector Register File->Local Memory pipe buses
19511308Santhony.gutierrez@amd.com    std::vector<WaitClass> vrfToLocalMemPipeBus;
19611308Santhony.gutierrez@amd.com    int nextGlbMemBus;
19711308Santhony.gutierrez@amd.com    int nextLocMemBus;
19811308Santhony.gutierrez@amd.com    // Resource control for global memory to VRF data/address bus
19911308Santhony.gutierrez@amd.com    WaitClass glbMemToVrfBus;
20011308Santhony.gutierrez@amd.com    // Resource control for local memory to VRF data/address bus
20111308Santhony.gutierrez@amd.com    WaitClass locMemToVrfBus;
20211308Santhony.gutierrez@amd.com
20311308Santhony.gutierrez@amd.com    uint32_t vrfToCoalescerBusWidth; // VRF->Coalescer data bus width in bytes
20411308Santhony.gutierrez@amd.com    uint32_t coalescerToVrfBusWidth; // Coalescer->VRF data bus width in bytes
20511308Santhony.gutierrez@amd.com    uint32_t numCyclesPerStoreTransfer;  // number of cycles per vector store
20611308Santhony.gutierrez@amd.com    uint32_t numCyclesPerLoadTransfer;  // number of cycles per vector load
20711308Santhony.gutierrez@amd.com
20811308Santhony.gutierrez@amd.com    Tick req_tick_latency;
20911308Santhony.gutierrez@amd.com    Tick resp_tick_latency;
21011308Santhony.gutierrez@amd.com
21111308Santhony.gutierrez@amd.com    // number of vector registers being reserved for each SIMD unit
21211308Santhony.gutierrez@amd.com    std::vector<int> vectorRegsReserved;
21311308Santhony.gutierrez@amd.com    // number of vector registers per SIMD unit
21411308Santhony.gutierrez@amd.com    uint32_t numVecRegsPerSimd;
21511308Santhony.gutierrez@amd.com    // Support for scheduling VGPR status update events
21611308Santhony.gutierrez@amd.com    std::vector<std::pair<uint32_t, uint32_t> > regIdxVec;
21711308Santhony.gutierrez@amd.com    std::vector<uint64_t> timestampVec;
21811308Santhony.gutierrez@amd.com    std::vector<uint8_t>  statusVec;
21911308Santhony.gutierrez@amd.com
22011308Santhony.gutierrez@amd.com    void
22111308Santhony.gutierrez@amd.com    registerEvent(uint32_t simdId,
22211308Santhony.gutierrez@amd.com                  uint32_t regIdx,
22311308Santhony.gutierrez@amd.com                  uint32_t operandSize,
22411308Santhony.gutierrez@amd.com                  uint64_t when,
22511308Santhony.gutierrez@amd.com                  uint8_t newStatus) {
22611308Santhony.gutierrez@amd.com        regIdxVec.push_back(std::make_pair(simdId, regIdx));
22711308Santhony.gutierrez@amd.com        timestampVec.push_back(when);
22811308Santhony.gutierrez@amd.com        statusVec.push_back(newStatus);
22911308Santhony.gutierrez@amd.com        if (operandSize > 4) {
23011308Santhony.gutierrez@amd.com            regIdxVec.push_back(std::make_pair(simdId,
23111308Santhony.gutierrez@amd.com                                               ((regIdx + 1) %
23211308Santhony.gutierrez@amd.com                                                numVecRegsPerSimd)));
23311308Santhony.gutierrez@amd.com            timestampVec.push_back(when);
23411308Santhony.gutierrez@amd.com            statusVec.push_back(newStatus);
23511308Santhony.gutierrez@amd.com        }
23611308Santhony.gutierrez@amd.com    }
23711308Santhony.gutierrez@amd.com
23811308Santhony.gutierrez@amd.com    void updateEvents();
23911308Santhony.gutierrez@amd.com
24011308Santhony.gutierrez@amd.com    // this hash map will keep track of page divergence
24111308Santhony.gutierrez@amd.com    // per memory instruction per wavefront. The hash map
24211308Santhony.gutierrez@amd.com    // is cleared in GPUDynInst::updateStats() in gpu_dyn_inst.cc.
24311308Santhony.gutierrez@amd.com    std::map<Addr, int> pagesTouched;
24411308Santhony.gutierrez@amd.com
24511308Santhony.gutierrez@amd.com    ComputeUnit(const Params *p);
24611308Santhony.gutierrez@amd.com    ~ComputeUnit();
24711308Santhony.gutierrez@amd.com    int spBypassLength() { return spBypassPipeLength; };
24811308Santhony.gutierrez@amd.com    int dpBypassLength() { return dpBypassPipeLength; };
24911308Santhony.gutierrez@amd.com    int storeBusLength() { return numCyclesPerStoreTransfer; };
25011308Santhony.gutierrez@amd.com    int loadBusLength() { return numCyclesPerLoadTransfer; };
25111308Santhony.gutierrez@amd.com    int wfSize() const { return wavefrontSize; };
25211308Santhony.gutierrez@amd.com
25311308Santhony.gutierrez@amd.com    void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs);
25411308Santhony.gutierrez@amd.com    void exec();
25511308Santhony.gutierrez@amd.com    void initiateFetch(Wavefront *wavefront);
25611308Santhony.gutierrez@amd.com    void fetch(PacketPtr pkt, Wavefront *wavefront);
25711657Salexandru.dutu@amd.com    void fillKernelState(Wavefront *w, NDRange *ndr);
25811308Santhony.gutierrez@amd.com
25911657Salexandru.dutu@amd.com    void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,
26011657Salexandru.dutu@amd.com                        NDRange *ndr);
26111308Santhony.gutierrez@amd.com
26211308Santhony.gutierrez@amd.com    void StartWorkgroup(NDRange *ndr);
26311308Santhony.gutierrez@amd.com    int ReadyWorkgroup(NDRange *ndr);
26411308Santhony.gutierrez@amd.com
26511308Santhony.gutierrez@amd.com    bool isVecAlu(int unitId) { return unitId >= SIMD0 && unitId <= SIMD3; }
26611308Santhony.gutierrez@amd.com    bool isGlbMem(int unitId) { return unitId == GLBMEM_PIPE; }
26711308Santhony.gutierrez@amd.com    bool isShrMem(int unitId) { return unitId == LDSMEM_PIPE; }
26811308Santhony.gutierrez@amd.com    int GlbMemUnitId() { return GLBMEM_PIPE; }
26911308Santhony.gutierrez@amd.com    int ShrMemUnitId() { return LDSMEM_PIPE; }
27011308Santhony.gutierrez@amd.com    int nextGlbRdBus() { return (++nextGlbMemBus) % numGlbMemUnits; }
27111308Santhony.gutierrez@amd.com    int nextLocRdBus() { return (++nextLocMemBus) % numLocMemUnits; }
27211308Santhony.gutierrez@amd.com    /* This function cycles through all the wavefronts in all the phases to see
27311308Santhony.gutierrez@amd.com     * if all of the wavefronts which should be associated with one barrier
27411308Santhony.gutierrez@amd.com     * (denoted with _barrier_id), are all at the same barrier in the program
27511308Santhony.gutierrez@amd.com     * (denoted by bcnt). When the number at the barrier matches bslots, then
27611308Santhony.gutierrez@amd.com     * return true.
27711308Santhony.gutierrez@amd.com     */
27811308Santhony.gutierrez@amd.com    int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots);
27911308Santhony.gutierrez@amd.com    bool cedeSIMD(int simdId, int wfSlotId);
28011308Santhony.gutierrez@amd.com
28111308Santhony.gutierrez@amd.com    template<typename c0, typename c1> void doSmReturn(GPUDynInstPtr gpuDynInst);
28211308Santhony.gutierrez@amd.com    virtual void init();
28311308Santhony.gutierrez@amd.com    void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
28411308Santhony.gutierrez@amd.com    void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
28511308Santhony.gutierrez@amd.com    void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
28611308Santhony.gutierrez@amd.com                              bool kernelLaunch=true,
28711308Santhony.gutierrez@amd.com                              RequestPtr req=nullptr);
28811308Santhony.gutierrez@amd.com    void handleMemPacket(PacketPtr pkt, int memport_index);
28911308Santhony.gutierrez@amd.com    bool processTimingPacket(PacketPtr pkt);
29011308Santhony.gutierrez@amd.com    void processFetchReturn(PacketPtr pkt);
29111308Santhony.gutierrez@amd.com    void updatePageDivergenceDist(Addr addr);
29211308Santhony.gutierrez@amd.com
29311308Santhony.gutierrez@amd.com    MasterID masterId() { return _masterId; }
29411308Santhony.gutierrez@amd.com
29511308Santhony.gutierrez@amd.com    bool isDone() const;
29611308Santhony.gutierrez@amd.com    bool isSimdDone(uint32_t) const;
29711308Santhony.gutierrez@amd.com
29811308Santhony.gutierrez@amd.com  protected:
29911308Santhony.gutierrez@amd.com    MasterID _masterId;
30011308Santhony.gutierrez@amd.com
30111308Santhony.gutierrez@amd.com    LdsState &lds;
30211308Santhony.gutierrez@amd.com
30311308Santhony.gutierrez@amd.com  public:
30411695Santhony.gutierrez@amd.com    Stats::Scalar vALUInsts;
30511695Santhony.gutierrez@amd.com    Stats::Formula vALUInstsPerWF;
30611695Santhony.gutierrez@amd.com    Stats::Scalar sALUInsts;
30711695Santhony.gutierrez@amd.com    Stats::Formula sALUInstsPerWF;
30811695Santhony.gutierrez@amd.com    Stats::Scalar instCyclesVALU;
30911695Santhony.gutierrez@amd.com    Stats::Scalar instCyclesSALU;
31011695Santhony.gutierrez@amd.com    Stats::Scalar threadCyclesVALU;
31111695Santhony.gutierrez@amd.com    Stats::Formula vALUUtilization;
31211695Santhony.gutierrez@amd.com    Stats::Scalar ldsNoFlatInsts;
31311695Santhony.gutierrez@amd.com    Stats::Formula ldsNoFlatInstsPerWF;
31411695Santhony.gutierrez@amd.com    Stats::Scalar flatVMemInsts;
31511695Santhony.gutierrez@amd.com    Stats::Formula flatVMemInstsPerWF;
31611695Santhony.gutierrez@amd.com    Stats::Scalar flatLDSInsts;
31711695Santhony.gutierrez@amd.com    Stats::Formula flatLDSInstsPerWF;
31811695Santhony.gutierrez@amd.com    Stats::Scalar vectorMemWrites;
31911695Santhony.gutierrez@amd.com    Stats::Formula vectorMemWritesPerWF;
32011695Santhony.gutierrez@amd.com    Stats::Scalar vectorMemReads;
32111695Santhony.gutierrez@amd.com    Stats::Formula vectorMemReadsPerWF;
32211695Santhony.gutierrez@amd.com    Stats::Scalar scalarMemWrites;
32311695Santhony.gutierrez@amd.com    Stats::Formula scalarMemWritesPerWF;
32411695Santhony.gutierrez@amd.com    Stats::Scalar scalarMemReads;
32511695Santhony.gutierrez@amd.com    Stats::Formula scalarMemReadsPerWF;
32611695Santhony.gutierrez@amd.com
32711695Santhony.gutierrez@amd.com    void updateInstStats(GPUDynInstPtr gpuDynInst);
32811695Santhony.gutierrez@amd.com
32911308Santhony.gutierrez@amd.com    // the following stats compute the avg. TLB accesslatency per
33011308Santhony.gutierrez@amd.com    // uncoalesced request (only for data)
33111308Santhony.gutierrez@amd.com    Stats::Scalar tlbRequests;
33211308Santhony.gutierrez@amd.com    Stats::Scalar tlbCycles;
33311308Santhony.gutierrez@amd.com    Stats::Formula tlbLatency;
33411308Santhony.gutierrez@amd.com    // hitsPerTLBLevel[x] are the hits in Level x TLB. x = 0 is the page table.
33511308Santhony.gutierrez@amd.com    Stats::Vector hitsPerTLBLevel;
33611308Santhony.gutierrez@amd.com
33711308Santhony.gutierrez@amd.com    Stats::Scalar ldsBankAccesses;
33811308Santhony.gutierrez@amd.com    Stats::Distribution ldsBankConflictDist;
33911308Santhony.gutierrez@amd.com
34011308Santhony.gutierrez@amd.com    // over all memory instructions executed over all wavefronts
34111308Santhony.gutierrez@amd.com    // how many touched 0-4 pages, 4-8, ..., 60-64 pages
34211308Santhony.gutierrez@amd.com    Stats::Distribution pageDivergenceDist;
34311308Santhony.gutierrez@amd.com    Stats::Scalar dynamicGMemInstrCnt;
34411308Santhony.gutierrez@amd.com    Stats::Scalar dynamicLMemInstrCnt;
34511308Santhony.gutierrez@amd.com
34611308Santhony.gutierrez@amd.com    Stats::Scalar wgBlockedDueLdsAllocation;
34711308Santhony.gutierrez@amd.com    // Number of instructions executed, i.e. if 64 (or 32 or 7) lanes are active
34811308Santhony.gutierrez@amd.com    // when the instruction is committed, this number is still incremented by 1
34911308Santhony.gutierrez@amd.com    Stats::Scalar numInstrExecuted;
35011308Santhony.gutierrez@amd.com    // Number of cycles among successive instruction executions across all
35111308Santhony.gutierrez@amd.com    // wavefronts of the same CU
35211308Santhony.gutierrez@amd.com    Stats::Distribution execRateDist;
35311308Santhony.gutierrez@amd.com    // number of individual vector operations executed
35411308Santhony.gutierrez@amd.com    Stats::Scalar numVecOpsExecuted;
35511308Santhony.gutierrez@amd.com    // Total cycles that something is running on the GPU
35611308Santhony.gutierrez@amd.com    Stats::Scalar totalCycles;
35711308Santhony.gutierrez@amd.com    Stats::Formula vpc; // vector ops per cycle
35811308Santhony.gutierrez@amd.com    Stats::Formula ipc; // vector instructions per cycle
35911308Santhony.gutierrez@amd.com    Stats::Distribution controlFlowDivergenceDist;
36011308Santhony.gutierrez@amd.com    Stats::Distribution activeLanesPerGMemInstrDist;
36111308Santhony.gutierrez@amd.com    Stats::Distribution activeLanesPerLMemInstrDist;
36211308Santhony.gutierrez@amd.com    // number of vector ALU instructions received
36311308Santhony.gutierrez@amd.com    Stats::Formula numALUInstsExecuted;
36411308Santhony.gutierrez@amd.com    // number of times a WG can not start due to lack of free VGPRs in SIMDs
36511308Santhony.gutierrez@amd.com    Stats::Scalar numTimesWgBlockedDueVgprAlloc;
36611308Santhony.gutierrez@amd.com    Stats::Scalar numCASOps;
36711308Santhony.gutierrez@amd.com    Stats::Scalar numFailedCASOps;
36811308Santhony.gutierrez@amd.com    Stats::Scalar completedWfs;
36911308Santhony.gutierrez@amd.com    // flag per vector SIMD unit that is set when there is at least one
37011308Santhony.gutierrez@amd.com    // WV that has a vector ALU instruction as the oldest in its
37111308Santhony.gutierrez@amd.com    // Instruction Buffer: Defined in the Scoreboard stage, consumed
37211308Santhony.gutierrez@amd.com    // by the Execute stage.
37311308Santhony.gutierrez@amd.com    std::vector<bool> vectorAluInstAvail;
37411308Santhony.gutierrez@amd.com    // number of available (oldest) LDS instructions that could have
37511308Santhony.gutierrez@amd.com    // been issued to the LDS at a specific issue slot
37611308Santhony.gutierrez@amd.com    int shrMemInstAvail;
37711308Santhony.gutierrez@amd.com    // number of available Global memory instructions that could have
37811308Santhony.gutierrez@amd.com    // been issued to TCP at a specific issue slot
37911308Santhony.gutierrez@amd.com    int glbMemInstAvail;
38011308Santhony.gutierrez@amd.com
38111308Santhony.gutierrez@amd.com    void
38211308Santhony.gutierrez@amd.com    regStats();
38311308Santhony.gutierrez@amd.com
38411308Santhony.gutierrez@amd.com    LdsState &
38511308Santhony.gutierrez@amd.com    getLds() const
38611308Santhony.gutierrez@amd.com    {
38711308Santhony.gutierrez@amd.com        return lds;
38811308Santhony.gutierrez@amd.com    }
38911308Santhony.gutierrez@amd.com
39011308Santhony.gutierrez@amd.com    int32_t
39111308Santhony.gutierrez@amd.com    getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;
39211308Santhony.gutierrez@amd.com
39311308Santhony.gutierrez@amd.com    bool
39411308Santhony.gutierrez@amd.com    sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result));
39511308Santhony.gutierrez@amd.com
39611308Santhony.gutierrez@amd.com    typedef std::unordered_map<Addr, std::pair<int, int>> pageDataStruct;
39711308Santhony.gutierrez@amd.com    pageDataStruct pageAccesses;
39811308Santhony.gutierrez@amd.com
39911308Santhony.gutierrez@amd.com    class CUExitCallback : public Callback
40011308Santhony.gutierrez@amd.com    {
40111308Santhony.gutierrez@amd.com      private:
40211308Santhony.gutierrez@amd.com        ComputeUnit *computeUnit;
40311308Santhony.gutierrez@amd.com
40411308Santhony.gutierrez@amd.com      public:
40511308Santhony.gutierrez@amd.com        virtual ~CUExitCallback() { }
40611308Santhony.gutierrez@amd.com
40711308Santhony.gutierrez@amd.com        CUExitCallback(ComputeUnit *_cu)
40811308Santhony.gutierrez@amd.com        {
40911308Santhony.gutierrez@amd.com            computeUnit = _cu;
41011308Santhony.gutierrez@amd.com        }
41111308Santhony.gutierrez@amd.com
41211308Santhony.gutierrez@amd.com        virtual void
41311308Santhony.gutierrez@amd.com        process();
41411308Santhony.gutierrez@amd.com    };
41511308Santhony.gutierrez@amd.com
41611308Santhony.gutierrez@amd.com    CUExitCallback *cuExitCallback;
41711308Santhony.gutierrez@amd.com
41811308Santhony.gutierrez@amd.com    /** Data access Port **/
41911308Santhony.gutierrez@amd.com    class DataPort : public MasterPort
42011308Santhony.gutierrez@amd.com    {
42111308Santhony.gutierrez@amd.com      public:
42211308Santhony.gutierrez@amd.com        DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
42311308Santhony.gutierrez@amd.com            : MasterPort(_name, _cu), computeUnit(_cu),
42411308Santhony.gutierrez@amd.com              index(_index) { }
42511308Santhony.gutierrez@amd.com
42611308Santhony.gutierrez@amd.com        bool snoopRangeSent;
42711308Santhony.gutierrez@amd.com
42811308Santhony.gutierrez@amd.com        struct SenderState : public Packet::SenderState
42911308Santhony.gutierrez@amd.com        {
43011308Santhony.gutierrez@amd.com            GPUDynInstPtr _gpuDynInst;
43111308Santhony.gutierrez@amd.com            int port_index;
43211308Santhony.gutierrez@amd.com            Packet::SenderState *saved;
43311308Santhony.gutierrez@amd.com
43411308Santhony.gutierrez@amd.com            SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,
43511308Santhony.gutierrez@amd.com                        Packet::SenderState *sender_state=nullptr)
43611308Santhony.gutierrez@amd.com                : _gpuDynInst(gpuDynInst),
43711308Santhony.gutierrez@amd.com                  port_index(_port_index),
43811308Santhony.gutierrez@amd.com                  saved(sender_state) { }
43911308Santhony.gutierrez@amd.com        };
44011308Santhony.gutierrez@amd.com
44111308Santhony.gutierrez@amd.com        class MemReqEvent : public Event
44211308Santhony.gutierrez@amd.com        {
44311308Santhony.gutierrez@amd.com          private:
44411308Santhony.gutierrez@amd.com            DataPort *dataPort;
44511308Santhony.gutierrez@amd.com            PacketPtr pkt;
44611308Santhony.gutierrez@amd.com
44711308Santhony.gutierrez@amd.com          public:
44811308Santhony.gutierrez@amd.com            MemReqEvent(DataPort *_data_port, PacketPtr _pkt)
44911308Santhony.gutierrez@amd.com                : Event(), dataPort(_data_port), pkt(_pkt)
45011308Santhony.gutierrez@amd.com            {
45111308Santhony.gutierrez@amd.com              setFlags(Event::AutoDelete);
45211308Santhony.gutierrez@amd.com            }
45311308Santhony.gutierrez@amd.com
45411308Santhony.gutierrez@amd.com            void process();
45511308Santhony.gutierrez@amd.com            const char *description() const;
45611308Santhony.gutierrez@amd.com        };
45711308Santhony.gutierrez@amd.com
45811308Santhony.gutierrez@amd.com        class MemRespEvent : public Event
45911308Santhony.gutierrez@amd.com        {
46011308Santhony.gutierrez@amd.com          private:
46111308Santhony.gutierrez@amd.com            DataPort *dataPort;
46211308Santhony.gutierrez@amd.com            PacketPtr pkt;
46311308Santhony.gutierrez@amd.com
46411308Santhony.gutierrez@amd.com          public:
46511308Santhony.gutierrez@amd.com            MemRespEvent(DataPort *_data_port, PacketPtr _pkt)
46611308Santhony.gutierrez@amd.com                : Event(), dataPort(_data_port), pkt(_pkt)
46711308Santhony.gutierrez@amd.com            {
46811308Santhony.gutierrez@amd.com              setFlags(Event::AutoDelete);
46911308Santhony.gutierrez@amd.com            }
47011308Santhony.gutierrez@amd.com
47111308Santhony.gutierrez@amd.com            void process();
47211308Santhony.gutierrez@amd.com            const char *description() const;
47311308Santhony.gutierrez@amd.com        };
47411308Santhony.gutierrez@amd.com
47511308Santhony.gutierrez@amd.com        std::deque<std::pair<PacketPtr, GPUDynInstPtr>> retries;
47611308Santhony.gutierrez@amd.com
47711308Santhony.gutierrez@amd.com      protected:
47811308Santhony.gutierrez@amd.com        ComputeUnit *computeUnit;
47911308Santhony.gutierrez@amd.com        int index;
48011308Santhony.gutierrez@amd.com
48111308Santhony.gutierrez@amd.com        virtual bool recvTimingResp(PacketPtr pkt);
48211308Santhony.gutierrez@amd.com        virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
48311308Santhony.gutierrez@amd.com        virtual void recvFunctional(PacketPtr pkt) { }
48411308Santhony.gutierrez@amd.com        virtual void recvRangeChange() { }
48511308Santhony.gutierrez@amd.com        virtual void recvReqRetry();
48611308Santhony.gutierrez@amd.com
48711308Santhony.gutierrez@amd.com        virtual void
48811308Santhony.gutierrez@amd.com        getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
48911308Santhony.gutierrez@amd.com        {
49011308Santhony.gutierrez@amd.com            resp.clear();
49111308Santhony.gutierrez@amd.com            snoop = true;
49211308Santhony.gutierrez@amd.com        }
49311308Santhony.gutierrez@amd.com
49411308Santhony.gutierrez@amd.com    };
49511308Santhony.gutierrez@amd.com
49611308Santhony.gutierrez@amd.com    // Instruction cache access port
49711308Santhony.gutierrez@amd.com    class SQCPort : public MasterPort
49811308Santhony.gutierrez@amd.com    {
49911308Santhony.gutierrez@amd.com      public:
50011308Santhony.gutierrez@amd.com        SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
50111308Santhony.gutierrez@amd.com            : MasterPort(_name, _cu), computeUnit(_cu),
50211308Santhony.gutierrez@amd.com              index(_index) { }
50311308Santhony.gutierrez@amd.com
50411308Santhony.gutierrez@amd.com        bool snoopRangeSent;
50511308Santhony.gutierrez@amd.com
50611308Santhony.gutierrez@amd.com        struct SenderState : public Packet::SenderState
50711308Santhony.gutierrez@amd.com        {
50811308Santhony.gutierrez@amd.com            Wavefront *wavefront;
50911308Santhony.gutierrez@amd.com            Packet::SenderState *saved;
51011308Santhony.gutierrez@amd.com
51111308Santhony.gutierrez@amd.com            SenderState(Wavefront *_wavefront, Packet::SenderState
51211308Santhony.gutierrez@amd.com                    *sender_state=nullptr)
51311308Santhony.gutierrez@amd.com                : wavefront(_wavefront), saved(sender_state) { }
51411308Santhony.gutierrez@amd.com        };
51511308Santhony.gutierrez@amd.com
51611308Santhony.gutierrez@amd.com        std::deque<std::pair<PacketPtr, Wavefront*>> retries;
51711308Santhony.gutierrez@amd.com
51811308Santhony.gutierrez@amd.com      protected:
51911308Santhony.gutierrez@amd.com        ComputeUnit *computeUnit;
52011308Santhony.gutierrez@amd.com        int index;
52111308Santhony.gutierrez@amd.com
52211308Santhony.gutierrez@amd.com        virtual bool recvTimingResp(PacketPtr pkt);
52311308Santhony.gutierrez@amd.com        virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
52411308Santhony.gutierrez@amd.com        virtual void recvFunctional(PacketPtr pkt) { }
52511308Santhony.gutierrez@amd.com        virtual void recvRangeChange() { }
52611308Santhony.gutierrez@amd.com        virtual void recvReqRetry();
52711308Santhony.gutierrez@amd.com
52811308Santhony.gutierrez@amd.com        virtual void
52911308Santhony.gutierrez@amd.com        getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
53011308Santhony.gutierrez@amd.com        {
53111308Santhony.gutierrez@amd.com            resp.clear();
53211308Santhony.gutierrez@amd.com            snoop = true;
53311308Santhony.gutierrez@amd.com        }
53411308Santhony.gutierrez@amd.com     };
53511308Santhony.gutierrez@amd.com
53611308Santhony.gutierrez@amd.com    /** Data TLB port **/
53711308Santhony.gutierrez@amd.com    class DTLBPort : public MasterPort
53811308Santhony.gutierrez@amd.com    {
53911308Santhony.gutierrez@amd.com      public:
54011308Santhony.gutierrez@amd.com        DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
54111308Santhony.gutierrez@amd.com            : MasterPort(_name, _cu), computeUnit(_cu),
54211308Santhony.gutierrez@amd.com              index(_index), stalled(false)
54311308Santhony.gutierrez@amd.com        { }
54411308Santhony.gutierrez@amd.com
54511308Santhony.gutierrez@amd.com        bool isStalled() { return stalled; }
54611308Santhony.gutierrez@amd.com        void stallPort() { stalled = true; }
54711308Santhony.gutierrez@amd.com        void unstallPort() { stalled = false; }
54811308Santhony.gutierrez@amd.com
54911308Santhony.gutierrez@amd.com        /**
55011308Santhony.gutierrez@amd.com         * here we queue all the translation requests that were
55111308Santhony.gutierrez@amd.com         * not successfully sent.
55211308Santhony.gutierrez@amd.com         */
55311308Santhony.gutierrez@amd.com        std::deque<PacketPtr> retries;
55411308Santhony.gutierrez@amd.com
55511308Santhony.gutierrez@amd.com        /** SenderState is information carried along with the packet
55611308Santhony.gutierrez@amd.com         * throughout the TLB hierarchy
55711308Santhony.gutierrez@amd.com         */
55811308Santhony.gutierrez@amd.com        struct SenderState: public Packet::SenderState
55911308Santhony.gutierrez@amd.com        {
56011308Santhony.gutierrez@amd.com            // the memInst that this is associated with
56111308Santhony.gutierrez@amd.com            GPUDynInstPtr _gpuDynInst;
56211308Santhony.gutierrez@amd.com
56311308Santhony.gutierrez@amd.com            // the lane in the memInst this is associated with, so we send
56411308Santhony.gutierrez@amd.com            // the memory request down the right port
56511308Santhony.gutierrez@amd.com            int portIndex;
56611308Santhony.gutierrez@amd.com
56711308Santhony.gutierrez@amd.com            // constructor used for packets involved in timing accesses
56811308Santhony.gutierrez@amd.com            SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
56911308Santhony.gutierrez@amd.com                : _gpuDynInst(gpuDynInst), portIndex(port_index) { }
57011308Santhony.gutierrez@amd.com
57111308Santhony.gutierrez@amd.com        };
57211308Santhony.gutierrez@amd.com
57311308Santhony.gutierrez@amd.com      protected:
57411308Santhony.gutierrez@amd.com        ComputeUnit *computeUnit;
57511308Santhony.gutierrez@amd.com        int index;
57611308Santhony.gutierrez@amd.com        bool stalled;
57711308Santhony.gutierrez@amd.com
57811308Santhony.gutierrez@amd.com        virtual bool recvTimingResp(PacketPtr pkt);
57911308Santhony.gutierrez@amd.com        virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
58011308Santhony.gutierrez@amd.com        virtual void recvFunctional(PacketPtr pkt) { }
58111308Santhony.gutierrez@amd.com        virtual void recvRangeChange() { }
58211308Santhony.gutierrez@amd.com        virtual void recvReqRetry();
58311308Santhony.gutierrez@amd.com    };
58411308Santhony.gutierrez@amd.com
58511308Santhony.gutierrez@amd.com    class ITLBPort : public MasterPort
58611308Santhony.gutierrez@amd.com    {
58711308Santhony.gutierrez@amd.com      public:
58811308Santhony.gutierrez@amd.com        ITLBPort(const std::string &_name, ComputeUnit *_cu)
58911308Santhony.gutierrez@amd.com            : MasterPort(_name, _cu), computeUnit(_cu), stalled(false) { }
59011308Santhony.gutierrez@amd.com
59111308Santhony.gutierrez@amd.com
59211308Santhony.gutierrez@amd.com        bool isStalled() { return stalled; }
59311308Santhony.gutierrez@amd.com        void stallPort() { stalled = true; }
59411308Santhony.gutierrez@amd.com        void unstallPort() { stalled = false; }
59511308Santhony.gutierrez@amd.com
59611308Santhony.gutierrez@amd.com        /**
59711308Santhony.gutierrez@amd.com         * here we queue all the translation requests that were
59811308Santhony.gutierrez@amd.com         * not successfully sent.
59911308Santhony.gutierrez@amd.com         */
60011308Santhony.gutierrez@amd.com        std::deque<PacketPtr> retries;
60111308Santhony.gutierrez@amd.com
60211308Santhony.gutierrez@amd.com        /** SenderState is information carried along with the packet
60311308Santhony.gutierrez@amd.com         * throughout the TLB hierarchy
60411308Santhony.gutierrez@amd.com         */
60511308Santhony.gutierrez@amd.com        struct SenderState: public Packet::SenderState
60611308Santhony.gutierrez@amd.com        {
60711308Santhony.gutierrez@amd.com            // The wavefront associated with this request
60811308Santhony.gutierrez@amd.com            Wavefront *wavefront;
60911308Santhony.gutierrez@amd.com
61011308Santhony.gutierrez@amd.com            SenderState(Wavefront *_wavefront) : wavefront(_wavefront) { }
61111308Santhony.gutierrez@amd.com        };
61211308Santhony.gutierrez@amd.com
61311308Santhony.gutierrez@amd.com      protected:
61411308Santhony.gutierrez@amd.com        ComputeUnit *computeUnit;
61511308Santhony.gutierrez@amd.com        bool stalled;
61611308Santhony.gutierrez@amd.com
61711308Santhony.gutierrez@amd.com        virtual bool recvTimingResp(PacketPtr pkt);
61811308Santhony.gutierrez@amd.com        virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
61911308Santhony.gutierrez@amd.com        virtual void recvFunctional(PacketPtr pkt) { }
62011308Santhony.gutierrez@amd.com        virtual void recvRangeChange() { }
62111308Santhony.gutierrez@amd.com        virtual void recvReqRetry();
62211308Santhony.gutierrez@amd.com    };
62311308Santhony.gutierrez@amd.com
62411308Santhony.gutierrez@amd.com    /**
62511308Santhony.gutierrez@amd.com     * the port intended to communicate between the CU and its LDS
62611308Santhony.gutierrez@amd.com     */
62711308Santhony.gutierrez@amd.com    class LDSPort : public MasterPort
62811308Santhony.gutierrez@amd.com    {
62911308Santhony.gutierrez@amd.com      public:
63011308Santhony.gutierrez@amd.com        LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
63111308Santhony.gutierrez@amd.com        : MasterPort(_name, _cu, _id), computeUnit(_cu)
63211308Santhony.gutierrez@amd.com        {
63311308Santhony.gutierrez@amd.com        }
63411308Santhony.gutierrez@amd.com
63511308Santhony.gutierrez@amd.com        bool isStalled() const { return stalled; }
63611308Santhony.gutierrez@amd.com        void stallPort() { stalled = true; }
63711308Santhony.gutierrez@amd.com        void unstallPort() { stalled = false; }
63811308Santhony.gutierrez@amd.com
63911308Santhony.gutierrez@amd.com        /**
64011308Santhony.gutierrez@amd.com         * here we queue all the requests that were
64111308Santhony.gutierrez@amd.com         * not successfully sent.
64211308Santhony.gutierrez@amd.com         */
64311308Santhony.gutierrez@amd.com        std::queue<PacketPtr> retries;
64411308Santhony.gutierrez@amd.com
64511308Santhony.gutierrez@amd.com        /**
64611308Santhony.gutierrez@amd.com         *  SenderState is information carried along with the packet, esp. the
64711308Santhony.gutierrez@amd.com         *  GPUDynInstPtr
64811308Santhony.gutierrez@amd.com         */
64911308Santhony.gutierrez@amd.com        class SenderState: public Packet::SenderState
65011308Santhony.gutierrez@amd.com        {
65111308Santhony.gutierrez@amd.com          protected:
65211308Santhony.gutierrez@amd.com            // The actual read/write/atomic request that goes with this command
65311308Santhony.gutierrez@amd.com            GPUDynInstPtr _gpuDynInst = nullptr;
65411308Santhony.gutierrez@amd.com
65511308Santhony.gutierrez@amd.com          public:
65611308Santhony.gutierrez@amd.com            SenderState(GPUDynInstPtr gpuDynInst):
65711308Santhony.gutierrez@amd.com              _gpuDynInst(gpuDynInst)
65811308Santhony.gutierrez@amd.com            {
65911308Santhony.gutierrez@amd.com            }
66011308Santhony.gutierrez@amd.com
66111308Santhony.gutierrez@amd.com            GPUDynInstPtr
66211308Santhony.gutierrez@amd.com            getMemInst() const
66311308Santhony.gutierrez@amd.com            {
66411308Santhony.gutierrez@amd.com              return _gpuDynInst;
66511308Santhony.gutierrez@amd.com            }
66611308Santhony.gutierrez@amd.com        };
66711308Santhony.gutierrez@amd.com
66811308Santhony.gutierrez@amd.com        virtual bool
66911308Santhony.gutierrez@amd.com        sendTimingReq(PacketPtr pkt);
67011308Santhony.gutierrez@amd.com
67111308Santhony.gutierrez@amd.com      protected:
67211308Santhony.gutierrez@amd.com
67311308Santhony.gutierrez@amd.com        bool stalled = false; ///< whether or not it is stalled
67411308Santhony.gutierrez@amd.com
67511308Santhony.gutierrez@amd.com        ComputeUnit *computeUnit;
67611308Santhony.gutierrez@amd.com
67711308Santhony.gutierrez@amd.com        virtual bool
67811308Santhony.gutierrez@amd.com        recvTimingResp(PacketPtr pkt);
67911308Santhony.gutierrez@amd.com
68011308Santhony.gutierrez@amd.com        virtual Tick
68111308Santhony.gutierrez@amd.com        recvAtomic(PacketPtr pkt) { return 0; }
68211308Santhony.gutierrez@amd.com
68311308Santhony.gutierrez@amd.com        virtual void
68411308Santhony.gutierrez@amd.com        recvFunctional(PacketPtr pkt)
68511308Santhony.gutierrez@amd.com        {
68611308Santhony.gutierrez@amd.com        }
68711308Santhony.gutierrez@amd.com
68811308Santhony.gutierrez@amd.com        virtual void
68911308Santhony.gutierrez@amd.com        recvRangeChange()
69011308Santhony.gutierrez@amd.com        {
69111308Santhony.gutierrez@amd.com        }
69211308Santhony.gutierrez@amd.com
69311308Santhony.gutierrez@amd.com        virtual void
69411308Santhony.gutierrez@amd.com        recvReqRetry();
69511308Santhony.gutierrez@amd.com    };
69611308Santhony.gutierrez@amd.com
69711308Santhony.gutierrez@amd.com    /** The port to access the Local Data Store
69811308Santhony.gutierrez@amd.com     *  Can be connected to a LDS object
69911308Santhony.gutierrez@amd.com     */
70011308Santhony.gutierrez@amd.com    LDSPort *ldsPort = nullptr;
70111308Santhony.gutierrez@amd.com
70211308Santhony.gutierrez@amd.com    LDSPort *
70311308Santhony.gutierrez@amd.com    getLdsPort() const
70411308Santhony.gutierrez@amd.com    {
70511308Santhony.gutierrez@amd.com        return ldsPort;
70611308Santhony.gutierrez@amd.com    }
70711308Santhony.gutierrez@amd.com
70811308Santhony.gutierrez@amd.com    /** The memory port for SIMD data accesses.
70911308Santhony.gutierrez@amd.com     *  Can be connected to PhysMem for Ruby for timing simulations
71011308Santhony.gutierrez@amd.com     */
71111308Santhony.gutierrez@amd.com    std::vector<DataPort*> memPort;
71211308Santhony.gutierrez@amd.com    // port to the TLB hierarchy (i.e., the L1 TLB)
71311308Santhony.gutierrez@amd.com    std::vector<DTLBPort*> tlbPort;
71411308Santhony.gutierrez@amd.com    // port to the SQC (i.e. the I-cache)
71511308Santhony.gutierrez@amd.com    SQCPort *sqcPort;
71611308Santhony.gutierrez@amd.com    // port to the SQC TLB (there's a separate TLB for each I-cache)
71711308Santhony.gutierrez@amd.com    ITLBPort *sqcTLBPort;
71811308Santhony.gutierrez@amd.com
71911308Santhony.gutierrez@amd.com    virtual BaseMasterPort&
72011308Santhony.gutierrez@amd.com    getMasterPort(const std::string &if_name, PortID idx)
72111308Santhony.gutierrez@amd.com    {
72211308Santhony.gutierrez@amd.com        if (if_name == "memory_port") {
72311308Santhony.gutierrez@amd.com            memPort[idx] = new DataPort(csprintf("%s-port%d", name(), idx),
72411308Santhony.gutierrez@amd.com                                        this, idx);
72511308Santhony.gutierrez@amd.com            return *memPort[idx];
72611308Santhony.gutierrez@amd.com        } else if (if_name == "translation_port") {
72711308Santhony.gutierrez@amd.com            tlbPort[idx] = new DTLBPort(csprintf("%s-port%d", name(), idx),
72811308Santhony.gutierrez@amd.com                                        this, idx);
72911308Santhony.gutierrez@amd.com            return *tlbPort[idx];
73011308Santhony.gutierrez@amd.com        } else if (if_name == "sqc_port") {
73111308Santhony.gutierrez@amd.com            sqcPort = new SQCPort(csprintf("%s-port%d", name(), idx),
73211308Santhony.gutierrez@amd.com                                  this, idx);
73311308Santhony.gutierrez@amd.com            return *sqcPort;
73411308Santhony.gutierrez@amd.com        } else if (if_name == "sqc_tlb_port") {
73511308Santhony.gutierrez@amd.com            sqcTLBPort = new ITLBPort(csprintf("%s-port", name()), this);
73611308Santhony.gutierrez@amd.com            return *sqcTLBPort;
73711308Santhony.gutierrez@amd.com        } else if (if_name == "ldsPort") {
73811308Santhony.gutierrez@amd.com            if (ldsPort) {
73911308Santhony.gutierrez@amd.com                fatal("an LDS port was already allocated");
74011308Santhony.gutierrez@amd.com            }
74111308Santhony.gutierrez@amd.com            ldsPort = new LDSPort(csprintf("%s-port", name()), this, idx);
74211308Santhony.gutierrez@amd.com            return *ldsPort;
74311308Santhony.gutierrez@amd.com        } else {
74411308Santhony.gutierrez@amd.com            panic("incorrect port name");
74511308Santhony.gutierrez@amd.com        }
74611308Santhony.gutierrez@amd.com    }
74711308Santhony.gutierrez@amd.com
74811308Santhony.gutierrez@amd.com    // xact_cas_load()
74911308Santhony.gutierrez@amd.com    class waveIdentifier
75011308Santhony.gutierrez@amd.com    {
75111308Santhony.gutierrez@amd.com      public:
75211308Santhony.gutierrez@amd.com        waveIdentifier() { }
75311308Santhony.gutierrez@amd.com        waveIdentifier(int _simdId, int _wfSlotId)
75411308Santhony.gutierrez@amd.com          : simdId(_simdId), wfSlotId(_wfSlotId) { }
75511308Santhony.gutierrez@amd.com
75611308Santhony.gutierrez@amd.com        int simdId;
75711308Santhony.gutierrez@amd.com        int wfSlotId;
75811308Santhony.gutierrez@amd.com    };
75911308Santhony.gutierrez@amd.com
76011308Santhony.gutierrez@amd.com    class waveQueue
76111308Santhony.gutierrez@amd.com    {
76211308Santhony.gutierrez@amd.com      public:
76311308Santhony.gutierrez@amd.com        std::list<waveIdentifier> waveIDQueue;
76411308Santhony.gutierrez@amd.com    };
76511308Santhony.gutierrez@amd.com    std::map<unsigned, waveQueue> xactCasLoadMap;
76611308Santhony.gutierrez@amd.com
76711308Santhony.gutierrez@amd.com    uint64_t getAndIncSeqNum() { return globalSeqNum++; }
76811308Santhony.gutierrez@amd.com
76911308Santhony.gutierrez@amd.com  private:
77011308Santhony.gutierrez@amd.com    uint64_t globalSeqNum;
77111308Santhony.gutierrez@amd.com    int wavefrontSize;
77211692Santhony.gutierrez@amd.com    GPUStaticInst *kernelLaunchInst;
77311308Santhony.gutierrez@amd.com};
77411308Santhony.gutierrez@amd.com
77511308Santhony.gutierrez@amd.com#endif // __COMPUTE_UNIT_HH__
776