compute_unit.hh revision 11534:7106f550afad
12292SN/A/*
22329SN/A * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
32292SN/A * All rights reserved.
42292SN/A *
52292SN/A * For use for simulation and test purposes only
62292SN/A *
72292SN/A * Redistribution and use in source and binary forms, with or without
82292SN/A * modification, are permitted provided that the following conditions are met:
92292SN/A *
102292SN/A * 1. Redistributions of source code must retain the above copyright notice,
112292SN/A * this list of conditions and the following disclaimer.
122292SN/A *
132292SN/A * 2. Redistributions in binary form must reproduce the above copyright notice,
142292SN/A * this list of conditions and the following disclaimer in the documentation
152292SN/A * and/or other materials provided with the distribution.
162292SN/A *
172292SN/A * 3. Neither the name of the copyright holder nor the names of its contributors
182292SN/A * may be used to endorse or promote products derived from this software
192292SN/A * without specific prior written permission.
202292SN/A *
212292SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
222292SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
232292SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
242292SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
252292SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
262292SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
272689Sktlim@umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
282689Sktlim@umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
292689Sktlim@umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
302292SN/A * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
312292SN/A * POSSIBILITY OF SUCH DAMAGE.
322292SN/A *
332292SN/A * Author: John Kalamatianos, Anthony Gutierrez
342292SN/A */
352329SN/A
362292SN/A#ifndef __COMPUTE_UNIT_HH__
372292SN/A#define __COMPUTE_UNIT_HH__
382292SN/A
392329SN/A#include <deque>
403326Sktlim@umich.edu#include <map>
412292SN/A#include <unordered_map>
422292SN/A#include <vector>
432292SN/A
443348Sbinkertn@umich.edu#include "base/callback.hh"
452669Sktlim@umich.edu#include "base/statistics.hh"
462292SN/A#include "base/types.hh"
472292SN/A#include "enums/PrefetchType.hh"
482329SN/A#include "gpu-compute/exec_stage.hh"
492329SN/A#include "gpu-compute/fetch_stage.hh"
502329SN/A#include "gpu-compute/global_memory_pipeline.hh"
512329SN/A#include "gpu-compute/local_memory_pipeline.hh"
522329SN/A#include "gpu-compute/qstruct.hh"
532329SN/A#include "gpu-compute/schedule_stage.hh"
542329SN/A#include "gpu-compute/scoreboard_check_stage.hh"
552329SN/A#include "mem/mem_object.hh"
562329SN/A#include "mem/port.hh"
572329SN/A
582292SN/Astatic const int MAX_REGS_FOR_NON_VEC_MEM_INST = 1;
592292SN/Astatic const int MAX_WIDTH_FOR_MEM_INST = 32;
602292SN/A
612292SN/Aclass NDRange;
622292SN/Aclass Shader;
632292SN/Aclass VectorRegisterFile;
642292SN/A
652733Sktlim@umich.edustruct ComputeUnitParams;
662292SN/A
672292SN/Aenum EXEC_POLICY
682907Sktlim@umich.edu{
692292SN/A    OLDEST = 0,
702292SN/A    RR
712292SN/A};
722292SN/A
732292SN/A// List of execution units
742292SN/Aenum EXEC_UNIT
752292SN/A{
762907Sktlim@umich.edu    SIMD0 = 0,
772292SN/A    SIMD1,
782292SN/A    SIMD2,
792292SN/A    SIMD3,
802292SN/A    GLBMEM_PIPE,
812292SN/A    LDSMEM_PIPE,
822727Sktlim@umich.edu    NUM_UNITS
832727Sktlim@umich.edu};
842727Sktlim@umich.edu
852292SN/Aenum TLB_CACHE
862733Sktlim@umich.edu{
872292SN/A    TLB_MISS_CACHE_MISS = 0,
882292SN/A    TLB_MISS_CACHE_HIT,
892292SN/A    TLB_HIT_CACHE_MISS,
902292SN/A    TLB_HIT_CACHE_HIT
912292SN/A};
922907Sktlim@umich.edu
932907Sktlim@umich.educlass ComputeUnit : public MemObject
942907Sktlim@umich.edu{
952907Sktlim@umich.edu  public:
962348SN/A    FetchStage fetchStage;
972307SN/A    ScoreboardCheckStage scoreboardCheckStage;
982307SN/A    ScheduleStage scheduleStage;
992348SN/A    ExecStage execStage;
1002307SN/A    GlobalMemPipeline globalMemoryPipe;
1012307SN/A    LocalMemPipeline localMemoryPipe;
1022348SN/A
1032307SN/A    // Buffers used to communicate between various pipeline stages
1042307SN/A
1052292SN/A    // List of waves which are ready to be scheduled.
1062292SN/A    // Each execution resource has a ready list. readyList is
1072292SN/A    // used to communicate between scoreboardCheck stage and
1082292SN/A    // schedule stage
1092292SN/A    // TODO: make enum to index readyList
1102292SN/A    std::vector<std::vector<Wavefront*>> readyList;
1112292SN/A
1122292SN/A    // Stores the status of waves. A READY implies the
1132292SN/A    // wave is ready to be scheduled this cycle and
1142292SN/A    // is already present in the readyList. waveStatusList is
1152292SN/A    // used to communicate between scoreboardCheck stage and
1162292SN/A    // schedule stage
1172292SN/A    // TODO: convert std::pair to a class to increase readability
1182292SN/A    std::vector<std::vector<std::pair<Wavefront*, WAVE_STATUS>>> waveStatusList;
1192292SN/A
1202292SN/A    // List of waves which will be dispatched to
1212292SN/A    // each execution resource. A FILLED implies
1222329SN/A    // dispatch list is non-empty and
1232292SN/A    // execution unit has something to execute
1242292SN/A    // this cycle. Currently, the dispatch list of
1252292SN/A    // an execution resource can hold only one wave because
1262292SN/A    // an execution resource can execute only one wave in a cycle.
1272292SN/A    // dispatchList is used to communicate between schedule
1282292SN/A    // and exec stage
1292292SN/A    // TODO: convert std::pair to a class to increase readability
1302292SN/A    std::vector<std::pair<Wavefront*, DISPATCH_STATUS>> dispatchList;
1312292SN/A
1322292SN/A    int rrNextMemID; // used by RR WF exec policy to cycle through WF's
1332292SN/A    int rrNextALUWp;
1342292SN/A    typedef ComputeUnitParams Params;
1352292SN/A    std::vector<std::vector<Wavefront*>> wfList;
1362292SN/A    int cu_id;
1372790Sktlim@umich.edu
1382790Sktlim@umich.edu    // array of vector register files, one per SIMD
1392669Sktlim@umich.edu    std::vector<VectorRegisterFile*> vrf;
1402669Sktlim@umich.edu    // Number of vector ALU units (SIMDs) in CU
1412292SN/A    int numSIMDs;
1422292SN/A    // number of pipe stages for bypassing data to next dependent single
1432292SN/A    // precision vector instruction inside the vector ALU pipeline
1442292SN/A    int spBypassPipeLength;
1452292SN/A    // number of pipe stages for bypassing data to next dependent double
1462292SN/A    // precision vector instruction inside the vector ALU pipeline
1472292SN/A    int dpBypassPipeLength;
1482292SN/A    // number of cycles per issue period
1492292SN/A    int issuePeriod;
1502292SN/A
1512292SN/A    // Number of global and local memory execution resources in CU
1522292SN/A    int numGlbMemUnits;
1532292SN/A    int numLocMemUnits;
1542292SN/A    // tracks the last cycle a vector instruction was executed on a SIMD
1552292SN/A    std::vector<uint64_t> lastExecCycle;
1562292SN/A
1572292SN/A    // true if we allow a separate TLB per lane
1582292SN/A    bool perLaneTLB;
1592292SN/A    // if 0, TLB prefetching is off.
1602292SN/A    int prefetchDepth;
1612292SN/A    // if fixed-stride prefetching, this is the stride.
1622292SN/A    int prefetchStride;
1632292SN/A
1642329SN/A    std::vector<Addr> lastVaddrCU;
1652292SN/A    std::vector<std::vector<Addr>> lastVaddrSimd;
1662292SN/A    std::vector<std::vector<std::vector<Addr>>> lastVaddrWF;
1672292SN/A    Enums::PrefetchType prefetchType;
1682348SN/A    EXEC_POLICY exec_policy;
1692292SN/A
1702292SN/A    bool xact_cas_mode;
1712292SN/A    bool debugSegFault;
1722348SN/A    bool functionalTLB;
1732292SN/A    bool localMemBarrier;
1742292SN/A
1752292SN/A    /*
1762348SN/A     * for Counting page accesses
1772292SN/A     *
1782292SN/A     * cuExitCallback inherits from Callback. When you register a callback
1792292SN/A     * function as an exit callback, it will get added to an exit callback
1802292SN/A     * queue, such that on simulation exit, all callbacks in the callback
1812292SN/A     * queue will have their process() function called.
1822292SN/A     */
1832292SN/A    bool countPages;
1842292SN/A
1852292SN/A    Shader *shader;
1862292SN/A    uint32_t barrier_id;
1872292SN/A    // vector of Vector ALU (MACC) pipelines
1882292SN/A    std::vector<WaitClass> aluPipe;
1892292SN/A    // minimum issue period per SIMD unit (in cycles)
1902292SN/A    std::vector<WaitClass> wfWait;
1912292SN/A
1922292SN/A    // Resource control for Vector Register File->Global Memory pipe buses
1932292SN/A    std::vector<WaitClass> vrfToGlobalMemPipeBus;
1942292SN/A    // Resource control for Vector Register File->Local Memory pipe buses
1952292SN/A    std::vector<WaitClass> vrfToLocalMemPipeBus;
1962292SN/A    int nextGlbMemBus;
1972292SN/A    int nextLocMemBus;
1982292SN/A    // Resource control for global memory to VRF data/address bus
1992292SN/A    WaitClass glbMemToVrfBus;
2002292SN/A    // Resource control for local memory to VRF data/address bus
2012292SN/A    WaitClass locMemToVrfBus;
2022292SN/A
2032292SN/A    uint32_t vrfToCoalescerBusWidth; // VRF->Coalescer data bus width in bytes
2042292SN/A    uint32_t coalescerToVrfBusWidth; // Coalescer->VRF data bus width in bytes
2052292SN/A    uint32_t numCyclesPerStoreTransfer;  // number of cycles per vector store
2062292SN/A    uint32_t numCyclesPerLoadTransfer;  // number of cycles per vector load
2072292SN/A
2082292SN/A    Tick req_tick_latency;
2092292SN/A    Tick resp_tick_latency;
2102292SN/A
2112292SN/A    // number of vector registers being reserved for each SIMD unit
2122678Sktlim@umich.edu    std::vector<int> vectorRegsReserved;
2132678Sktlim@umich.edu    // number of vector registers per SIMD unit
2142292SN/A    uint32_t numVecRegsPerSimd;
2152907Sktlim@umich.edu    // Support for scheduling VGPR status update events
2162907Sktlim@umich.edu    std::vector<std::pair<uint32_t, uint32_t> > regIdxVec;
2172907Sktlim@umich.edu    std::vector<uint64_t> timestampVec;
2182292SN/A    std::vector<uint8_t>  statusVec;
2192698Sktlim@umich.edu
2202678Sktlim@umich.edu    void
2212678Sktlim@umich.edu    registerEvent(uint32_t simdId,
2222698Sktlim@umich.edu                  uint32_t regIdx,
2233349Sbinkertn@umich.edu                  uint32_t operandSize,
2242693Sktlim@umich.edu                  uint64_t when,
2252292SN/A                  uint8_t newStatus) {
2262292SN/A        regIdxVec.push_back(std::make_pair(simdId, regIdx));
2272292SN/A        timestampVec.push_back(when);
2282292SN/A        statusVec.push_back(newStatus);
2292292SN/A        if (operandSize > 4) {
2302292SN/A            regIdxVec.push_back(std::make_pair(simdId,
2312292SN/A                                               ((regIdx + 1) %
2322292SN/A                                                numVecRegsPerSimd)));
2332292SN/A            timestampVec.push_back(when);
2342292SN/A            statusVec.push_back(newStatus);
2352292SN/A        }
2362292SN/A    }
2372329SN/A
2382329SN/A    void updateEvents();
2392329SN/A
2402329SN/A    // this hash map will keep track of page divergence
2412292SN/A    // per memory instruction per wavefront. The hash map
2422292SN/A    // is cleared in GPUDynInst::updateStats() in gpu_dyn_inst.cc.
2432733Sktlim@umich.edu    std::map<Addr, int> pagesTouched;
2442292SN/A
2452292SN/A    ComputeUnit(const Params *p);
2462292SN/A    ~ComputeUnit();
2472292SN/A    int spBypassLength() { return spBypassPipeLength; };
2482907Sktlim@umich.edu    int dpBypassLength() { return dpBypassPipeLength; };
2492907Sktlim@umich.edu    int storeBusLength() { return numCyclesPerStoreTransfer; };
2502669Sktlim@umich.edu    int loadBusLength() { return numCyclesPerLoadTransfer; };
2512907Sktlim@umich.edu    int wfSize() const { return wavefrontSize; };
2522907Sktlim@umich.edu
2532292SN/A    void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs);
2542698Sktlim@umich.edu    void exec();
2552678Sktlim@umich.edu    void initiateFetch(Wavefront *wavefront);
2562678Sktlim@umich.edu    void fetch(PacketPtr pkt, Wavefront *wavefront);
2572678Sktlim@umich.edu    void FillKernelState(Wavefront *w, NDRange *ndr);
2582698Sktlim@umich.edu
2592678Sktlim@umich.edu    void StartWF(Wavefront *w, WFContext *wfCtx, int trueWgSize[],
2602678Sktlim@umich.edu                 int trueWgSizeTotal);
2612678Sktlim@umich.edu
2622678Sktlim@umich.edu    void InitializeWFContext(WFContext *wfCtx, NDRange *ndr, int cnt,
2632698Sktlim@umich.edu                             int trueWgSize[], int trueWgSizeTotal,
2642678Sktlim@umich.edu                             LdsChunk *ldsChunk, uint64_t origSpillMemStart);
2652698Sktlim@umich.edu
2662678Sktlim@umich.edu    void StartWorkgroup(NDRange *ndr);
2672698Sktlim@umich.edu    int ReadyWorkgroup(NDRange *ndr);
2682678Sktlim@umich.edu
2692698Sktlim@umich.edu    bool isVecAlu(int unitId) { return unitId >= SIMD0 && unitId <= SIMD3; }
2702678Sktlim@umich.edu    bool isGlbMem(int unitId) { return unitId == GLBMEM_PIPE; }
2712678Sktlim@umich.edu    bool isShrMem(int unitId) { return unitId == LDSMEM_PIPE; }
2722678Sktlim@umich.edu    int GlbMemUnitId() { return GLBMEM_PIPE; }
2732698Sktlim@umich.edu    int ShrMemUnitId() { return LDSMEM_PIPE; }
2742678Sktlim@umich.edu    int nextGlbRdBus() { return (++nextGlbMemBus) % numGlbMemUnits; }
2752678Sktlim@umich.edu    int nextLocRdBus() { return (++nextLocMemBus) % numLocMemUnits; }
2762678Sktlim@umich.edu    /* This function cycles through all the wavefronts in all the phases to see
2772678Sktlim@umich.edu     * if all of the wavefronts which should be associated with one barrier
2782678Sktlim@umich.edu     * (denoted with _barrier_id), are all at the same barrier in the program
2792678Sktlim@umich.edu     * (denoted by bcnt). When the number at the barrier matches bslots, then
2802678Sktlim@umich.edu     * return true.
2812678Sktlim@umich.edu     */
2822678Sktlim@umich.edu    int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots);
2832678Sktlim@umich.edu    bool cedeSIMD(int simdId, int wfSlotId);
2842678Sktlim@umich.edu
2852678Sktlim@umich.edu    template<typename c0, typename c1> void doSmReturn(GPUDynInstPtr gpuDynInst);
2862698Sktlim@umich.edu    virtual void init();
2872678Sktlim@umich.edu    void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
2882678Sktlim@umich.edu    void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
2892698Sktlim@umich.edu    void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
2902678Sktlim@umich.edu                              bool kernelLaunch=true,
2912678Sktlim@umich.edu                              RequestPtr req=nullptr);
2922678Sktlim@umich.edu    void handleMemPacket(PacketPtr pkt, int memport_index);
2932678Sktlim@umich.edu    bool processTimingPacket(PacketPtr pkt);
2942678Sktlim@umich.edu    void processFetchReturn(PacketPtr pkt);
2952678Sktlim@umich.edu    void updatePageDivergenceDist(Addr addr);
2962292SN/A
2972292SN/A    MasterID masterId() { return _masterId; }
2982292SN/A
2992292SN/A    bool isDone() const;
3004326Sgblack@eecs.umich.edu    bool isSimdDone(uint32_t) const;
3012292SN/A
3024326Sgblack@eecs.umich.edu  protected:
3034326Sgblack@eecs.umich.edu    MasterID _masterId;
3044326Sgblack@eecs.umich.edu
3052292SN/A    LdsState &lds;
3062292SN/A
3072292SN/A  public:
3084326Sgblack@eecs.umich.edu    // the following stats compute the avg. TLB accesslatency per
3092292SN/A    // uncoalesced request (only for data)
3104326Sgblack@eecs.umich.edu    Stats::Scalar tlbRequests;
3114326Sgblack@eecs.umich.edu    Stats::Scalar tlbCycles;
3124326Sgblack@eecs.umich.edu    Stats::Formula tlbLatency;
3132292SN/A    // hitsPerTLBLevel[x] are the hits in Level x TLB. x = 0 is the page table.
3142292SN/A    Stats::Vector hitsPerTLBLevel;
3152292SN/A
3162669Sktlim@umich.edu    Stats::Scalar ldsBankAccesses;
3172669Sktlim@umich.edu    Stats::Distribution ldsBankConflictDist;
3182292SN/A
3192292SN/A    // over all memory instructions executed over all wavefronts
3202292SN/A    // how many touched 0-4 pages, 4-8, ..., 60-64 pages
3214326Sgblack@eecs.umich.edu    Stats::Distribution pageDivergenceDist;
3222292SN/A    Stats::Scalar dynamicGMemInstrCnt;
3232292SN/A    Stats::Scalar dynamicLMemInstrCnt;
3242292SN/A
3252292SN/A    Stats::Scalar wgBlockedDueLdsAllocation;
3262292SN/A    // Number of instructions executed, i.e. if 64 (or 32 or 7) lanes are active
3272292SN/A    // when the instruction is committed, this number is still incremented by 1
3282292SN/A    Stats::Scalar numInstrExecuted;
3292329SN/A    // Number of cycles among successive instruction executions across all
3302292SN/A    // wavefronts of the same CU
3312292SN/A    Stats::Distribution execRateDist;
3322292SN/A    // number of individual vector operations executed
3332292SN/A    Stats::Scalar numVecOpsExecuted;
3342292SN/A    // Total cycles that something is running on the GPU
3352292SN/A    Stats::Scalar totalCycles;
3362292SN/A    Stats::Formula vpc; // vector ops per cycle
3372292SN/A    Stats::Formula ipc; // vector instructions per cycle
3382292SN/A    Stats::Distribution controlFlowDivergenceDist;
3392292SN/A    Stats::Distribution activeLanesPerGMemInstrDist;
3402329SN/A    Stats::Distribution activeLanesPerLMemInstrDist;
3412329SN/A    // number of vector ALU instructions received
3422329SN/A    Stats::Formula numALUInstsExecuted;
3432292SN/A    // number of times a WG can not start due to lack of free VGPRs in SIMDs
3442329SN/A    Stats::Scalar numTimesWgBlockedDueVgprAlloc;
3452329SN/A    Stats::Scalar numCASOps;
3462329SN/A    Stats::Scalar numFailedCASOps;
3472292SN/A    Stats::Scalar completedWfs;
3482292SN/A    // flag per vector SIMD unit that is set when there is at least one
3492292SN/A    // WV that has a vector ALU instruction as the oldest in its
3502292SN/A    // Instruction Buffer: Defined in the Scoreboard stage, consumed
3512329SN/A    // by the Execute stage.
3522292SN/A    std::vector<bool> vectorAluInstAvail;
3532292SN/A    // number of available (oldest) LDS instructions that could have
3542292SN/A    // been issued to the LDS at a specific issue slot
3552292SN/A    int shrMemInstAvail;
3562292SN/A    // number of available Global memory instructions that could have
3572292SN/A    // been issued to TCP at a specific issue slot
3582292SN/A    int glbMemInstAvail;
3592292SN/A
3602292SN/A    void
3612292SN/A    regStats();
3622292SN/A
3632329SN/A    LdsState &
3642329SN/A    getLds() const
3652292SN/A    {
3662292SN/A        return lds;
3672292SN/A    }
3682292SN/A
3692292SN/A    int32_t
3702292SN/A    getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;
3712292SN/A
3722292SN/A    bool
3732292SN/A    sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result));
3742292SN/A
3752292SN/A    typedef std::unordered_map<Addr, std::pair<int, int>> pageDataStruct;
3762292SN/A    pageDataStruct pageAccesses;
3772348SN/A
3782307SN/A    class CUExitCallback : public Callback
3792307SN/A    {
3802292SN/A      private:
3812292SN/A        ComputeUnit *computeUnit;
3822292SN/A
3832292SN/A      public:
3842292SN/A        virtual ~CUExitCallback() { }
3852292SN/A
3862292SN/A        CUExitCallback(ComputeUnit *_cu)
3872292SN/A        {
3882292SN/A            computeUnit = _cu;
3892292SN/A        }
3902292SN/A
3912292SN/A        virtual void
3922292SN/A        process();
3932292SN/A    };
3942698Sktlim@umich.edu
3952698Sktlim@umich.edu    CUExitCallback *cuExitCallback;
3962693Sktlim@umich.edu
3972698Sktlim@umich.edu    /** Data access Port **/
3982678Sktlim@umich.edu    class DataPort : public MasterPort
3992678Sktlim@umich.edu    {
4002329SN/A      public:
4012292SN/A        DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
4022292SN/A            : MasterPort(_name, _cu), computeUnit(_cu),
4032348SN/A              index(_index) { }
4042292SN/A
4052292SN/A        bool snoopRangeSent;
4062348SN/A
4072292SN/A        struct SenderState : public Packet::SenderState
4082292SN/A        {
4092292SN/A            GPUDynInstPtr _gpuDynInst;
4102292SN/A            int port_index;
4112292SN/A            Packet::SenderState *saved;
4122292SN/A
4132292SN/A            SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,
4142292SN/A                        Packet::SenderState *sender_state=nullptr)
4152727Sktlim@umich.edu                : _gpuDynInst(gpuDynInst),
4162727Sktlim@umich.edu                  port_index(_port_index),
4172307SN/A                  saved(sender_state) { }
4183126Sktlim@umich.edu        };
4193126Sktlim@umich.edu
4203126Sktlim@umich.edu        class MemReqEvent : public Event
4213126Sktlim@umich.edu        {
4223126Sktlim@umich.edu          private:
4233126Sktlim@umich.edu            DataPort *dataPort;
4243126Sktlim@umich.edu            PacketPtr pkt;
4253126Sktlim@umich.edu
4263126Sktlim@umich.edu          public:
4273126Sktlim@umich.edu            MemReqEvent(DataPort *_data_port, PacketPtr _pkt)
4283126Sktlim@umich.edu                : Event(), dataPort(_data_port), pkt(_pkt)
4293126Sktlim@umich.edu            {
4303126Sktlim@umich.edu              setFlags(Event::AutoDelete);
4312727Sktlim@umich.edu            }
4322727Sktlim@umich.edu
4332727Sktlim@umich.edu            void process();
4342727Sktlim@umich.edu            const char *description() const;
4352727Sktlim@umich.edu        };
4362727Sktlim@umich.edu
4372727Sktlim@umich.edu        class MemRespEvent : public Event
4382727Sktlim@umich.edu        {
4392727Sktlim@umich.edu          private:
4402727Sktlim@umich.edu            DataPort *dataPort;
4412727Sktlim@umich.edu            PacketPtr pkt;
4422727Sktlim@umich.edu
4432727Sktlim@umich.edu          public:
4442727Sktlim@umich.edu            MemRespEvent(DataPort *_data_port, PacketPtr _pkt)
4452727Sktlim@umich.edu                : Event(), dataPort(_data_port), pkt(_pkt)
4462292SN/A            {
4472292SN/A              setFlags(Event::AutoDelete);
4482292SN/A            }
4492669Sktlim@umich.edu
4502292SN/A            void process();
4512292SN/A            const char *description() const;
4522292SN/A        };
4532669Sktlim@umich.edu
4542292SN/A        std::deque<std::pair<PacketPtr, GPUDynInstPtr>> retries;
4552292SN/A
4562292SN/A      protected:
4572292SN/A        ComputeUnit *computeUnit;
4582292SN/A        int index;
4592292SN/A
4602292SN/A        virtual bool recvTimingResp(PacketPtr pkt);
4612292SN/A        virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
4622292SN/A        virtual void recvFunctional(PacketPtr pkt) { }
4632292SN/A        virtual void recvRangeChange() { }
4642292SN/A        virtual void recvReqRetry();
4652292SN/A
4662292SN/A        virtual void
4672292SN/A        getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
4682292SN/A        {
4692292SN/A            resp.clear();
4702292SN/A            snoop = true;
4712292SN/A        }
4722292SN/A
4732292SN/A    };
4742292SN/A
4752292SN/A    // Instruction cache access port
4762292SN/A    class SQCPort : public MasterPort
4772292SN/A    {
4782292SN/A      public:
4792292SN/A        SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
4802292SN/A            : MasterPort(_name, _cu), computeUnit(_cu),
4812292SN/A              index(_index) { }
4822292SN/A
4832292SN/A        bool snoopRangeSent;
4842292SN/A
4852292SN/A        struct SenderState : public Packet::SenderState
4862292SN/A        {
4872292SN/A            Wavefront *wavefront;
4882669Sktlim@umich.edu            Packet::SenderState *saved;
4892292SN/A
4902669Sktlim@umich.edu            SenderState(Wavefront *_wavefront, Packet::SenderState
4912292SN/A                    *sender_state=nullptr)
4922669Sktlim@umich.edu                : wavefront(_wavefront), saved(sender_state) { }
4932669Sktlim@umich.edu        };
4942669Sktlim@umich.edu
4952292SN/A        std::deque<std::pair<PacketPtr, Wavefront*>> retries;
4962292SN/A
4972292SN/A      protected:
4982292SN/A        ComputeUnit *computeUnit;
4992292SN/A        int index;
5003172Sstever@eecs.umich.edu
5012731Sktlim@umich.edu        virtual bool recvTimingResp(PacketPtr pkt);
5022669Sktlim@umich.edu        virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
5032727Sktlim@umich.edu        virtual void recvFunctional(PacketPtr pkt) { }
5044032Sktlim@umich.edu        virtual void recvRangeChange() { }
5054032Sktlim@umich.edu        virtual void recvReqRetry();
5064032Sktlim@umich.edu
5074032Sktlim@umich.edu        virtual void
5084032Sktlim@umich.edu        getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
5092292SN/A        {
5102292SN/A            resp.clear();
5112292SN/A            snoop = true;
5122292SN/A        }
5132669Sktlim@umich.edu     };
5142292SN/A
5152292SN/A    /** Data TLB port **/
5162292SN/A    class DTLBPort : public MasterPort
5172292SN/A    {
5182292SN/A      public:
5192669Sktlim@umich.edu        DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
5202292SN/A            : MasterPort(_name, _cu), computeUnit(_cu),
5213172Sstever@eecs.umich.edu              index(_index), stalled(false)
5223326Sktlim@umich.edu        { }
5233326Sktlim@umich.edu
5243326Sktlim@umich.edu        bool isStalled() { return stalled; }
5253326Sktlim@umich.edu        void stallPort() { stalled = true; }
5263326Sktlim@umich.edu        void unstallPort() { stalled = false; }
5273326Sktlim@umich.edu
5282292SN/A        /**
5292292SN/A         * here we queue all the translation requests that were
5302292SN/A         * not successfully sent.
5312292SN/A         */
5322292SN/A        std::deque<PacketPtr> retries;
5332292SN/A
5342292SN/A        /** SenderState is information carried along with the packet
5352292SN/A         * throughout the TLB hierarchy
5362292SN/A         */
5372292SN/A        struct SenderState: public Packet::SenderState
5382292SN/A        {
5392292SN/A            // the memInst that this is associated with
5402292SN/A            GPUDynInstPtr _gpuDynInst;
5412292SN/A
5422292SN/A            // the lane in the memInst this is associated with, so we send
5432292SN/A            // the memory request down the right port
5442292SN/A            int portIndex;
5452292SN/A
5464032Sktlim@umich.edu            // constructor used for packets involved in timing accesses
5474032Sktlim@umich.edu            SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
5484032Sktlim@umich.edu                : _gpuDynInst(gpuDynInst), portIndex(port_index) { }
5494032Sktlim@umich.edu
5502292SN/A        };
5512292SN/A
5522292SN/A      protected:
5532292SN/A        ComputeUnit *computeUnit;
5542669Sktlim@umich.edu        int index;
5552292SN/A        bool stalled;
5562669Sktlim@umich.edu
5572669Sktlim@umich.edu        virtual bool recvTimingResp(PacketPtr pkt);
5582292SN/A        virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
5592669Sktlim@umich.edu        virtual void recvFunctional(PacketPtr pkt) { }
5602292SN/A        virtual void recvRangeChange() { }
5612292SN/A        virtual void recvReqRetry();
5622669Sktlim@umich.edu    };
5632669Sktlim@umich.edu
5642292SN/A    class ITLBPort : public MasterPort
5652292SN/A    {
5664032Sktlim@umich.edu      public:
5672329SN/A        ITLBPort(const std::string &_name, ComputeUnit *_cu)
5682669Sktlim@umich.edu            : MasterPort(_name, _cu), computeUnit(_cu), stalled(false) { }
5692292SN/A
5704326Sgblack@eecs.umich.edu
5713803Sgblack@eecs.umich.edu        bool isStalled() { return stalled; }
5722669Sktlim@umich.edu        void stallPort() { stalled = true; }
5732669Sktlim@umich.edu        void unstallPort() { stalled = false; }
5742292SN/A
5754326Sgblack@eecs.umich.edu        /**
5764326Sgblack@eecs.umich.edu         * here we queue all the translation requests that were
5772292SN/A         * not successfully sent.
5782292SN/A         */
5792292SN/A        std::deque<PacketPtr> retries;
5802693Sktlim@umich.edu
5812678Sktlim@umich.edu        /** SenderState is information carried along with the packet
5824022Sstever@eecs.umich.edu         * throughout the TLB hierarchy
5834022Sstever@eecs.umich.edu         */
5842678Sktlim@umich.edu        struct SenderState: public Packet::SenderState
5852678Sktlim@umich.edu        {
5862678Sktlim@umich.edu            // The wavefront associated with this request
5872292SN/A            Wavefront *wavefront;
5882292SN/A
5892292SN/A            SenderState(Wavefront *_wavefront) : wavefront(_wavefront) { }
5902292SN/A        };
5912292SN/A
5922678Sktlim@umich.edu      protected:
5932727Sktlim@umich.edu        ComputeUnit *computeUnit;
5942292SN/A        bool stalled;
5952292SN/A
5962292SN/A        virtual bool recvTimingResp(PacketPtr pkt);
5972292SN/A        virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
5982292SN/A        virtual void recvFunctional(PacketPtr pkt) { }
5992292SN/A        virtual void recvRangeChange() { }
6002292SN/A        virtual void recvReqRetry();
6012292SN/A    };
6022292SN/A
6032292SN/A    /**
6044032Sktlim@umich.edu     * the port intended to communicate between the CU and its LDS
6052292SN/A     */
6062292SN/A    class LDSPort : public MasterPort
6072292SN/A    {
6082292SN/A      public:
6092292SN/A        LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
6102292SN/A        : MasterPort(_name, _cu, _id), computeUnit(_cu)
6112292SN/A        {
6122669Sktlim@umich.edu        }
6132292SN/A
6142292SN/A        bool isStalled() const { return stalled; }
6152292SN/A        void stallPort() { stalled = true; }
6162292SN/A        void unstallPort() { stalled = false; }
6172292SN/A
6182292SN/A        /**
6192292SN/A         * here we queue all the requests that were
6202292SN/A         * not successfully sent.
6212669Sktlim@umich.edu         */
6222927Sktlim@umich.edu        std::queue<PacketPtr> retries;
6234032Sktlim@umich.edu
6242727Sktlim@umich.edu        /**
6252292SN/A         *  SenderState is information carried along with the packet, esp. the
6262292SN/A         *  GPUDynInstPtr
6272292SN/A         */
6282292SN/A        class SenderState: public Packet::SenderState
6292292SN/A        {
6302669Sktlim@umich.edu          protected:
6312292SN/A            // The actual read/write/atomic request that goes with this command
6324032Sktlim@umich.edu            GPUDynInstPtr _gpuDynInst = nullptr;
6334032Sktlim@umich.edu
6344032Sktlim@umich.edu          public:
6354032Sktlim@umich.edu            SenderState(GPUDynInstPtr gpuDynInst):
6364032Sktlim@umich.edu              _gpuDynInst(gpuDynInst)
6372292SN/A            {
6382292SN/A            }
6392292SN/A
6402292SN/A            GPUDynInstPtr
6412292SN/A            getMemInst() const
6422907Sktlim@umich.edu            {
6432669Sktlim@umich.edu              return _gpuDynInst;
6442292SN/A            }
6452669Sktlim@umich.edu        };
6462669Sktlim@umich.edu
6472292SN/A        virtual bool
6482292SN/A        sendTimingReq(PacketPtr pkt);
6492292SN/A
6502907Sktlim@umich.edu      protected:
6512907Sktlim@umich.edu
6523228Sktlim@umich.edu        bool stalled = false; ///< whether or not it is stalled
6534022Sstever@eecs.umich.edu
6543228Sktlim@umich.edu        ComputeUnit *computeUnit;
6553228Sktlim@umich.edu
6563228Sktlim@umich.edu        virtual bool
6573228Sktlim@umich.edu        recvTimingResp(PacketPtr pkt);
6583228Sktlim@umich.edu
6593228Sktlim@umich.edu        virtual Tick
6603228Sktlim@umich.edu        recvAtomic(PacketPtr pkt) { return 0; }
6613228Sktlim@umich.edu
6622907Sktlim@umich.edu        virtual void
6633228Sktlim@umich.edu        recvFunctional(PacketPtr pkt)
6643228Sktlim@umich.edu        {
6653228Sktlim@umich.edu        }
6663228Sktlim@umich.edu
6673228Sktlim@umich.edu        virtual void
6684032Sktlim@umich.edu        recvRangeChange()
6693228Sktlim@umich.edu        {
6703228Sktlim@umich.edu        }
6714032Sktlim@umich.edu
6724032Sktlim@umich.edu        virtual void
6733228Sktlim@umich.edu        recvReqRetry();
6743221Sktlim@umich.edu    };
6753221Sktlim@umich.edu
6763221Sktlim@umich.edu    /** The port to access the Local Data Store
6772907Sktlim@umich.edu     *  Can be connected to a LDS object
6782907Sktlim@umich.edu     */
6792907Sktlim@umich.edu    LDSPort *ldsPort = nullptr;
6802907Sktlim@umich.edu
6812907Sktlim@umich.edu    LDSPort *
6822907Sktlim@umich.edu    getLdsPort() const
6832907Sktlim@umich.edu    {
6842907Sktlim@umich.edu        return ldsPort;
6852907Sktlim@umich.edu    }
6864032Sktlim@umich.edu
6874032Sktlim@umich.edu    /** The memory port for SIMD data accesses.
6884032Sktlim@umich.edu     *  Can be connected to PhysMem for Ruby for timing simulations
6892727Sktlim@umich.edu     */
6903014Srdreslin@umich.edu    std::vector<DataPort*> memPort;
6913014Srdreslin@umich.edu    // port to the TLB hierarchy (i.e., the L1 TLB)
6922669Sktlim@umich.edu    std::vector<DTLBPort*> tlbPort;
6932669Sktlim@umich.edu    // port to the SQC (i.e. the I-cache)
6942669Sktlim@umich.edu    SQCPort *sqcPort;
6952292SN/A    // port to the SQC TLB (there's a separate TLB for each I-cache)
6962669Sktlim@umich.edu    ITLBPort *sqcTLBPort;
6972669Sktlim@umich.edu
6982669Sktlim@umich.edu    virtual BaseMasterPort&
6992669Sktlim@umich.edu    getMasterPort(const std::string &if_name, PortID idx)
7002669Sktlim@umich.edu    {
7012669Sktlim@umich.edu        if (if_name == "memory_port") {
7022669Sktlim@umich.edu            memPort[idx] = new DataPort(csprintf("%s-port%d", name(), idx),
7032669Sktlim@umich.edu                                        this, idx);
7042292SN/A            return *memPort[idx];
7052292SN/A        } else if (if_name == "translation_port") {
7062669Sktlim@umich.edu            tlbPort[idx] = new DTLBPort(csprintf("%s-port%d", name(), idx),
7072292SN/A                                        this, idx);
7082292SN/A            return *tlbPort[idx];
7092292SN/A        } else if (if_name == "sqc_port") {
7102292SN/A            sqcPort = new SQCPort(csprintf("%s-port%d", name(), idx),
7112292SN/A                                  this, idx);
7122669Sktlim@umich.edu            return *sqcPort;
7132292SN/A        } else if (if_name == "sqc_tlb_port") {
7142292SN/A            sqcTLBPort = new ITLBPort(csprintf("%s-port", name()), this);
7152292SN/A            return *sqcTLBPort;
7162292SN/A        } else if (if_name == "ldsPort") {
7172292SN/A            if (ldsPort) {
7182669Sktlim@umich.edu                fatal("an LDS port was already allocated");
7192292SN/A            }
7202329SN/A            ldsPort = new LDSPort(csprintf("%s-port", name()), this, idx);
7212292SN/A            return *ldsPort;
7222292SN/A        } else {
7234326Sgblack@eecs.umich.edu            panic("incorrect port name");
7244326Sgblack@eecs.umich.edu        }
7254326Sgblack@eecs.umich.edu    }
7264326Sgblack@eecs.umich.edu
7272329SN/A    // xact_cas_load()
7282292SN/A    class waveIdentifier
7292292SN/A    {
7302292SN/A      public:
7312292SN/A        waveIdentifier() { }
7322292SN/A        waveIdentifier(int _simdId, int _wfSlotId)
7332292SN/A          : simdId(_simdId), wfSlotId(_wfSlotId) { }
734
735        int simdId;
736        int wfSlotId;
737    };
738
739    class waveQueue
740    {
741      public:
742        std::list<waveIdentifier> waveIDQueue;
743    };
744    std::map<unsigned, waveQueue> xactCasLoadMap;
745
746    uint64_t getAndIncSeqNum() { return globalSeqNum++; }
747
748  private:
749    uint64_t globalSeqNum;
750    int wavefrontSize;
751};
752
753#endif // __COMPUTE_UNIT_HH__
754