compute_unit.hh revision 11534:7106f550afad
12292SN/A/* 22329SN/A * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 32292SN/A * All rights reserved. 42292SN/A * 52292SN/A * For use for simulation and test purposes only 62292SN/A * 72292SN/A * Redistribution and use in source and binary forms, with or without 82292SN/A * modification, are permitted provided that the following conditions are met: 92292SN/A * 102292SN/A * 1. Redistributions of source code must retain the above copyright notice, 112292SN/A * this list of conditions and the following disclaimer. 122292SN/A * 132292SN/A * 2. Redistributions in binary form must reproduce the above copyright notice, 142292SN/A * this list of conditions and the following disclaimer in the documentation 152292SN/A * and/or other materials provided with the distribution. 162292SN/A * 172292SN/A * 3. Neither the name of the copyright holder nor the names of its contributors 182292SN/A * may be used to endorse or promote products derived from this software 192292SN/A * without specific prior written permission. 202292SN/A * 212292SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 222292SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 232292SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 242292SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 252292SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 262292SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 272689Sktlim@umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 282689Sktlim@umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 292689Sktlim@umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 302292SN/A * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 312292SN/A * POSSIBILITY OF SUCH DAMAGE. 322292SN/A * 332292SN/A * Author: John Kalamatianos, Anthony Gutierrez 342292SN/A */ 352329SN/A 362292SN/A#ifndef __COMPUTE_UNIT_HH__ 372292SN/A#define __COMPUTE_UNIT_HH__ 382292SN/A 392329SN/A#include <deque> 403326Sktlim@umich.edu#include <map> 412292SN/A#include <unordered_map> 422292SN/A#include <vector> 432292SN/A 443348Sbinkertn@umich.edu#include "base/callback.hh" 452669Sktlim@umich.edu#include "base/statistics.hh" 462292SN/A#include "base/types.hh" 472292SN/A#include "enums/PrefetchType.hh" 482329SN/A#include "gpu-compute/exec_stage.hh" 492329SN/A#include "gpu-compute/fetch_stage.hh" 502329SN/A#include "gpu-compute/global_memory_pipeline.hh" 512329SN/A#include "gpu-compute/local_memory_pipeline.hh" 522329SN/A#include "gpu-compute/qstruct.hh" 532329SN/A#include "gpu-compute/schedule_stage.hh" 542329SN/A#include "gpu-compute/scoreboard_check_stage.hh" 552329SN/A#include "mem/mem_object.hh" 562329SN/A#include "mem/port.hh" 572329SN/A 582292SN/Astatic const int MAX_REGS_FOR_NON_VEC_MEM_INST = 1; 592292SN/Astatic const int MAX_WIDTH_FOR_MEM_INST = 32; 602292SN/A 612292SN/Aclass NDRange; 622292SN/Aclass Shader; 632292SN/Aclass VectorRegisterFile; 642292SN/A 652733Sktlim@umich.edustruct ComputeUnitParams; 662292SN/A 672292SN/Aenum EXEC_POLICY 682907Sktlim@umich.edu{ 692292SN/A OLDEST = 0, 702292SN/A RR 712292SN/A}; 722292SN/A 732292SN/A// List of execution units 742292SN/Aenum EXEC_UNIT 752292SN/A{ 762907Sktlim@umich.edu SIMD0 = 0, 772292SN/A SIMD1, 782292SN/A SIMD2, 792292SN/A SIMD3, 802292SN/A GLBMEM_PIPE, 812292SN/A LDSMEM_PIPE, 822727Sktlim@umich.edu NUM_UNITS 832727Sktlim@umich.edu}; 842727Sktlim@umich.edu 852292SN/Aenum TLB_CACHE 862733Sktlim@umich.edu{ 872292SN/A TLB_MISS_CACHE_MISS = 0, 882292SN/A TLB_MISS_CACHE_HIT, 892292SN/A TLB_HIT_CACHE_MISS, 902292SN/A TLB_HIT_CACHE_HIT 912292SN/A}; 922907Sktlim@umich.edu 932907Sktlim@umich.educlass ComputeUnit : public MemObject 942907Sktlim@umich.edu{ 952907Sktlim@umich.edu public: 962348SN/A FetchStage fetchStage; 972307SN/A ScoreboardCheckStage scoreboardCheckStage; 982307SN/A ScheduleStage scheduleStage; 992348SN/A ExecStage execStage; 1002307SN/A GlobalMemPipeline globalMemoryPipe; 1012307SN/A LocalMemPipeline localMemoryPipe; 1022348SN/A 1032307SN/A // Buffers used to communicate between various pipeline stages 1042307SN/A 1052292SN/A // List of waves which are ready to be scheduled. 1062292SN/A // Each execution resource has a ready list. readyList is 1072292SN/A // used to communicate between scoreboardCheck stage and 1082292SN/A // schedule stage 1092292SN/A // TODO: make enum to index readyList 1102292SN/A std::vector<std::vector<Wavefront*>> readyList; 1112292SN/A 1122292SN/A // Stores the status of waves. A READY implies the 1132292SN/A // wave is ready to be scheduled this cycle and 1142292SN/A // is already present in the readyList. waveStatusList is 1152292SN/A // used to communicate between scoreboardCheck stage and 1162292SN/A // schedule stage 1172292SN/A // TODO: convert std::pair to a class to increase readability 1182292SN/A std::vector<std::vector<std::pair<Wavefront*, WAVE_STATUS>>> waveStatusList; 1192292SN/A 1202292SN/A // List of waves which will be dispatched to 1212292SN/A // each execution resource. A FILLED implies 1222329SN/A // dispatch list is non-empty and 1232292SN/A // execution unit has something to execute 1242292SN/A // this cycle. Currently, the dispatch list of 1252292SN/A // an execution resource can hold only one wave because 1262292SN/A // an execution resource can execute only one wave in a cycle. 1272292SN/A // dispatchList is used to communicate between schedule 1282292SN/A // and exec stage 1292292SN/A // TODO: convert std::pair to a class to increase readability 1302292SN/A std::vector<std::pair<Wavefront*, DISPATCH_STATUS>> dispatchList; 1312292SN/A 1322292SN/A int rrNextMemID; // used by RR WF exec policy to cycle through WF's 1332292SN/A int rrNextALUWp; 1342292SN/A typedef ComputeUnitParams Params; 1352292SN/A std::vector<std::vector<Wavefront*>> wfList; 1362292SN/A int cu_id; 1372790Sktlim@umich.edu 1382790Sktlim@umich.edu // array of vector register files, one per SIMD 1392669Sktlim@umich.edu std::vector<VectorRegisterFile*> vrf; 1402669Sktlim@umich.edu // Number of vector ALU units (SIMDs) in CU 1412292SN/A int numSIMDs; 1422292SN/A // number of pipe stages for bypassing data to next dependent single 1432292SN/A // precision vector instruction inside the vector ALU pipeline 1442292SN/A int spBypassPipeLength; 1452292SN/A // number of pipe stages for bypassing data to next dependent double 1462292SN/A // precision vector instruction inside the vector ALU pipeline 1472292SN/A int dpBypassPipeLength; 1482292SN/A // number of cycles per issue period 1492292SN/A int issuePeriod; 1502292SN/A 1512292SN/A // Number of global and local memory execution resources in CU 1522292SN/A int numGlbMemUnits; 1532292SN/A int numLocMemUnits; 1542292SN/A // tracks the last cycle a vector instruction was executed on a SIMD 1552292SN/A std::vector<uint64_t> lastExecCycle; 1562292SN/A 1572292SN/A // true if we allow a separate TLB per lane 1582292SN/A bool perLaneTLB; 1592292SN/A // if 0, TLB prefetching is off. 1602292SN/A int prefetchDepth; 1612292SN/A // if fixed-stride prefetching, this is the stride. 1622292SN/A int prefetchStride; 1632292SN/A 1642329SN/A std::vector<Addr> lastVaddrCU; 1652292SN/A std::vector<std::vector<Addr>> lastVaddrSimd; 1662292SN/A std::vector<std::vector<std::vector<Addr>>> lastVaddrWF; 1672292SN/A Enums::PrefetchType prefetchType; 1682348SN/A EXEC_POLICY exec_policy; 1692292SN/A 1702292SN/A bool xact_cas_mode; 1712292SN/A bool debugSegFault; 1722348SN/A bool functionalTLB; 1732292SN/A bool localMemBarrier; 1742292SN/A 1752292SN/A /* 1762348SN/A * for Counting page accesses 1772292SN/A * 1782292SN/A * cuExitCallback inherits from Callback. When you register a callback 1792292SN/A * function as an exit callback, it will get added to an exit callback 1802292SN/A * queue, such that on simulation exit, all callbacks in the callback 1812292SN/A * queue will have their process() function called. 1822292SN/A */ 1832292SN/A bool countPages; 1842292SN/A 1852292SN/A Shader *shader; 1862292SN/A uint32_t barrier_id; 1872292SN/A // vector of Vector ALU (MACC) pipelines 1882292SN/A std::vector<WaitClass> aluPipe; 1892292SN/A // minimum issue period per SIMD unit (in cycles) 1902292SN/A std::vector<WaitClass> wfWait; 1912292SN/A 1922292SN/A // Resource control for Vector Register File->Global Memory pipe buses 1932292SN/A std::vector<WaitClass> vrfToGlobalMemPipeBus; 1942292SN/A // Resource control for Vector Register File->Local Memory pipe buses 1952292SN/A std::vector<WaitClass> vrfToLocalMemPipeBus; 1962292SN/A int nextGlbMemBus; 1972292SN/A int nextLocMemBus; 1982292SN/A // Resource control for global memory to VRF data/address bus 1992292SN/A WaitClass glbMemToVrfBus; 2002292SN/A // Resource control for local memory to VRF data/address bus 2012292SN/A WaitClass locMemToVrfBus; 2022292SN/A 2032292SN/A uint32_t vrfToCoalescerBusWidth; // VRF->Coalescer data bus width in bytes 2042292SN/A uint32_t coalescerToVrfBusWidth; // Coalescer->VRF data bus width in bytes 2052292SN/A uint32_t numCyclesPerStoreTransfer; // number of cycles per vector store 2062292SN/A uint32_t numCyclesPerLoadTransfer; // number of cycles per vector load 2072292SN/A 2082292SN/A Tick req_tick_latency; 2092292SN/A Tick resp_tick_latency; 2102292SN/A 2112292SN/A // number of vector registers being reserved for each SIMD unit 2122678Sktlim@umich.edu std::vector<int> vectorRegsReserved; 2132678Sktlim@umich.edu // number of vector registers per SIMD unit 2142292SN/A uint32_t numVecRegsPerSimd; 2152907Sktlim@umich.edu // Support for scheduling VGPR status update events 2162907Sktlim@umich.edu std::vector<std::pair<uint32_t, uint32_t> > regIdxVec; 2172907Sktlim@umich.edu std::vector<uint64_t> timestampVec; 2182292SN/A std::vector<uint8_t> statusVec; 2192698Sktlim@umich.edu 2202678Sktlim@umich.edu void 2212678Sktlim@umich.edu registerEvent(uint32_t simdId, 2222698Sktlim@umich.edu uint32_t regIdx, 2233349Sbinkertn@umich.edu uint32_t operandSize, 2242693Sktlim@umich.edu uint64_t when, 2252292SN/A uint8_t newStatus) { 2262292SN/A regIdxVec.push_back(std::make_pair(simdId, regIdx)); 2272292SN/A timestampVec.push_back(when); 2282292SN/A statusVec.push_back(newStatus); 2292292SN/A if (operandSize > 4) { 2302292SN/A regIdxVec.push_back(std::make_pair(simdId, 2312292SN/A ((regIdx + 1) % 2322292SN/A numVecRegsPerSimd))); 2332292SN/A timestampVec.push_back(when); 2342292SN/A statusVec.push_back(newStatus); 2352292SN/A } 2362292SN/A } 2372329SN/A 2382329SN/A void updateEvents(); 2392329SN/A 2402329SN/A // this hash map will keep track of page divergence 2412292SN/A // per memory instruction per wavefront. The hash map 2422292SN/A // is cleared in GPUDynInst::updateStats() in gpu_dyn_inst.cc. 2432733Sktlim@umich.edu std::map<Addr, int> pagesTouched; 2442292SN/A 2452292SN/A ComputeUnit(const Params *p); 2462292SN/A ~ComputeUnit(); 2472292SN/A int spBypassLength() { return spBypassPipeLength; }; 2482907Sktlim@umich.edu int dpBypassLength() { return dpBypassPipeLength; }; 2492907Sktlim@umich.edu int storeBusLength() { return numCyclesPerStoreTransfer; }; 2502669Sktlim@umich.edu int loadBusLength() { return numCyclesPerLoadTransfer; }; 2512907Sktlim@umich.edu int wfSize() const { return wavefrontSize; }; 2522907Sktlim@umich.edu 2532292SN/A void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs); 2542698Sktlim@umich.edu void exec(); 2552678Sktlim@umich.edu void initiateFetch(Wavefront *wavefront); 2562678Sktlim@umich.edu void fetch(PacketPtr pkt, Wavefront *wavefront); 2572678Sktlim@umich.edu void FillKernelState(Wavefront *w, NDRange *ndr); 2582698Sktlim@umich.edu 2592678Sktlim@umich.edu void StartWF(Wavefront *w, WFContext *wfCtx, int trueWgSize[], 2602678Sktlim@umich.edu int trueWgSizeTotal); 2612678Sktlim@umich.edu 2622678Sktlim@umich.edu void InitializeWFContext(WFContext *wfCtx, NDRange *ndr, int cnt, 2632698Sktlim@umich.edu int trueWgSize[], int trueWgSizeTotal, 2642678Sktlim@umich.edu LdsChunk *ldsChunk, uint64_t origSpillMemStart); 2652698Sktlim@umich.edu 2662678Sktlim@umich.edu void StartWorkgroup(NDRange *ndr); 2672698Sktlim@umich.edu int ReadyWorkgroup(NDRange *ndr); 2682678Sktlim@umich.edu 2692698Sktlim@umich.edu bool isVecAlu(int unitId) { return unitId >= SIMD0 && unitId <= SIMD3; } 2702678Sktlim@umich.edu bool isGlbMem(int unitId) { return unitId == GLBMEM_PIPE; } 2712678Sktlim@umich.edu bool isShrMem(int unitId) { return unitId == LDSMEM_PIPE; } 2722678Sktlim@umich.edu int GlbMemUnitId() { return GLBMEM_PIPE; } 2732698Sktlim@umich.edu int ShrMemUnitId() { return LDSMEM_PIPE; } 2742678Sktlim@umich.edu int nextGlbRdBus() { return (++nextGlbMemBus) % numGlbMemUnits; } 2752678Sktlim@umich.edu int nextLocRdBus() { return (++nextLocMemBus) % numLocMemUnits; } 2762678Sktlim@umich.edu /* This function cycles through all the wavefronts in all the phases to see 2772678Sktlim@umich.edu * if all of the wavefronts which should be associated with one barrier 2782678Sktlim@umich.edu * (denoted with _barrier_id), are all at the same barrier in the program 2792678Sktlim@umich.edu * (denoted by bcnt). When the number at the barrier matches bslots, then 2802678Sktlim@umich.edu * return true. 2812678Sktlim@umich.edu */ 2822678Sktlim@umich.edu int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots); 2832678Sktlim@umich.edu bool cedeSIMD(int simdId, int wfSlotId); 2842678Sktlim@umich.edu 2852678Sktlim@umich.edu template<typename c0, typename c1> void doSmReturn(GPUDynInstPtr gpuDynInst); 2862698Sktlim@umich.edu virtual void init(); 2872678Sktlim@umich.edu void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt); 2882678Sktlim@umich.edu void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt); 2892698Sktlim@umich.edu void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, 2902678Sktlim@umich.edu bool kernelLaunch=true, 2912678Sktlim@umich.edu RequestPtr req=nullptr); 2922678Sktlim@umich.edu void handleMemPacket(PacketPtr pkt, int memport_index); 2932678Sktlim@umich.edu bool processTimingPacket(PacketPtr pkt); 2942678Sktlim@umich.edu void processFetchReturn(PacketPtr pkt); 2952678Sktlim@umich.edu void updatePageDivergenceDist(Addr addr); 2962292SN/A 2972292SN/A MasterID masterId() { return _masterId; } 2982292SN/A 2992292SN/A bool isDone() const; 3004326Sgblack@eecs.umich.edu bool isSimdDone(uint32_t) const; 3012292SN/A 3024326Sgblack@eecs.umich.edu protected: 3034326Sgblack@eecs.umich.edu MasterID _masterId; 3044326Sgblack@eecs.umich.edu 3052292SN/A LdsState &lds; 3062292SN/A 3072292SN/A public: 3084326Sgblack@eecs.umich.edu // the following stats compute the avg. TLB accesslatency per 3092292SN/A // uncoalesced request (only for data) 3104326Sgblack@eecs.umich.edu Stats::Scalar tlbRequests; 3114326Sgblack@eecs.umich.edu Stats::Scalar tlbCycles; 3124326Sgblack@eecs.umich.edu Stats::Formula tlbLatency; 3132292SN/A // hitsPerTLBLevel[x] are the hits in Level x TLB. x = 0 is the page table. 3142292SN/A Stats::Vector hitsPerTLBLevel; 3152292SN/A 3162669Sktlim@umich.edu Stats::Scalar ldsBankAccesses; 3172669Sktlim@umich.edu Stats::Distribution ldsBankConflictDist; 3182292SN/A 3192292SN/A // over all memory instructions executed over all wavefronts 3202292SN/A // how many touched 0-4 pages, 4-8, ..., 60-64 pages 3214326Sgblack@eecs.umich.edu Stats::Distribution pageDivergenceDist; 3222292SN/A Stats::Scalar dynamicGMemInstrCnt; 3232292SN/A Stats::Scalar dynamicLMemInstrCnt; 3242292SN/A 3252292SN/A Stats::Scalar wgBlockedDueLdsAllocation; 3262292SN/A // Number of instructions executed, i.e. if 64 (or 32 or 7) lanes are active 3272292SN/A // when the instruction is committed, this number is still incremented by 1 3282292SN/A Stats::Scalar numInstrExecuted; 3292329SN/A // Number of cycles among successive instruction executions across all 3302292SN/A // wavefronts of the same CU 3312292SN/A Stats::Distribution execRateDist; 3322292SN/A // number of individual vector operations executed 3332292SN/A Stats::Scalar numVecOpsExecuted; 3342292SN/A // Total cycles that something is running on the GPU 3352292SN/A Stats::Scalar totalCycles; 3362292SN/A Stats::Formula vpc; // vector ops per cycle 3372292SN/A Stats::Formula ipc; // vector instructions per cycle 3382292SN/A Stats::Distribution controlFlowDivergenceDist; 3392292SN/A Stats::Distribution activeLanesPerGMemInstrDist; 3402329SN/A Stats::Distribution activeLanesPerLMemInstrDist; 3412329SN/A // number of vector ALU instructions received 3422329SN/A Stats::Formula numALUInstsExecuted; 3432292SN/A // number of times a WG can not start due to lack of free VGPRs in SIMDs 3442329SN/A Stats::Scalar numTimesWgBlockedDueVgprAlloc; 3452329SN/A Stats::Scalar numCASOps; 3462329SN/A Stats::Scalar numFailedCASOps; 3472292SN/A Stats::Scalar completedWfs; 3482292SN/A // flag per vector SIMD unit that is set when there is at least one 3492292SN/A // WV that has a vector ALU instruction as the oldest in its 3502292SN/A // Instruction Buffer: Defined in the Scoreboard stage, consumed 3512329SN/A // by the Execute stage. 3522292SN/A std::vector<bool> vectorAluInstAvail; 3532292SN/A // number of available (oldest) LDS instructions that could have 3542292SN/A // been issued to the LDS at a specific issue slot 3552292SN/A int shrMemInstAvail; 3562292SN/A // number of available Global memory instructions that could have 3572292SN/A // been issued to TCP at a specific issue slot 3582292SN/A int glbMemInstAvail; 3592292SN/A 3602292SN/A void 3612292SN/A regStats(); 3622292SN/A 3632329SN/A LdsState & 3642329SN/A getLds() const 3652292SN/A { 3662292SN/A return lds; 3672292SN/A } 3682292SN/A 3692292SN/A int32_t 3702292SN/A getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const; 3712292SN/A 3722292SN/A bool 3732292SN/A sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result)); 3742292SN/A 3752292SN/A typedef std::unordered_map<Addr, std::pair<int, int>> pageDataStruct; 3762292SN/A pageDataStruct pageAccesses; 3772348SN/A 3782307SN/A class CUExitCallback : public Callback 3792307SN/A { 3802292SN/A private: 3812292SN/A ComputeUnit *computeUnit; 3822292SN/A 3832292SN/A public: 3842292SN/A virtual ~CUExitCallback() { } 3852292SN/A 3862292SN/A CUExitCallback(ComputeUnit *_cu) 3872292SN/A { 3882292SN/A computeUnit = _cu; 3892292SN/A } 3902292SN/A 3912292SN/A virtual void 3922292SN/A process(); 3932292SN/A }; 3942698Sktlim@umich.edu 3952698Sktlim@umich.edu CUExitCallback *cuExitCallback; 3962693Sktlim@umich.edu 3972698Sktlim@umich.edu /** Data access Port **/ 3982678Sktlim@umich.edu class DataPort : public MasterPort 3992678Sktlim@umich.edu { 4002329SN/A public: 4012292SN/A DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index) 4022292SN/A : MasterPort(_name, _cu), computeUnit(_cu), 4032348SN/A index(_index) { } 4042292SN/A 4052292SN/A bool snoopRangeSent; 4062348SN/A 4072292SN/A struct SenderState : public Packet::SenderState 4082292SN/A { 4092292SN/A GPUDynInstPtr _gpuDynInst; 4102292SN/A int port_index; 4112292SN/A Packet::SenderState *saved; 4122292SN/A 4132292SN/A SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, 4142292SN/A Packet::SenderState *sender_state=nullptr) 4152727Sktlim@umich.edu : _gpuDynInst(gpuDynInst), 4162727Sktlim@umich.edu port_index(_port_index), 4172307SN/A saved(sender_state) { } 4183126Sktlim@umich.edu }; 4193126Sktlim@umich.edu 4203126Sktlim@umich.edu class MemReqEvent : public Event 4213126Sktlim@umich.edu { 4223126Sktlim@umich.edu private: 4233126Sktlim@umich.edu DataPort *dataPort; 4243126Sktlim@umich.edu PacketPtr pkt; 4253126Sktlim@umich.edu 4263126Sktlim@umich.edu public: 4273126Sktlim@umich.edu MemReqEvent(DataPort *_data_port, PacketPtr _pkt) 4283126Sktlim@umich.edu : Event(), dataPort(_data_port), pkt(_pkt) 4293126Sktlim@umich.edu { 4303126Sktlim@umich.edu setFlags(Event::AutoDelete); 4312727Sktlim@umich.edu } 4322727Sktlim@umich.edu 4332727Sktlim@umich.edu void process(); 4342727Sktlim@umich.edu const char *description() const; 4352727Sktlim@umich.edu }; 4362727Sktlim@umich.edu 4372727Sktlim@umich.edu class MemRespEvent : public Event 4382727Sktlim@umich.edu { 4392727Sktlim@umich.edu private: 4402727Sktlim@umich.edu DataPort *dataPort; 4412727Sktlim@umich.edu PacketPtr pkt; 4422727Sktlim@umich.edu 4432727Sktlim@umich.edu public: 4442727Sktlim@umich.edu MemRespEvent(DataPort *_data_port, PacketPtr _pkt) 4452727Sktlim@umich.edu : Event(), dataPort(_data_port), pkt(_pkt) 4462292SN/A { 4472292SN/A setFlags(Event::AutoDelete); 4482292SN/A } 4492669Sktlim@umich.edu 4502292SN/A void process(); 4512292SN/A const char *description() const; 4522292SN/A }; 4532669Sktlim@umich.edu 4542292SN/A std::deque<std::pair<PacketPtr, GPUDynInstPtr>> retries; 4552292SN/A 4562292SN/A protected: 4572292SN/A ComputeUnit *computeUnit; 4582292SN/A int index; 4592292SN/A 4602292SN/A virtual bool recvTimingResp(PacketPtr pkt); 4612292SN/A virtual Tick recvAtomic(PacketPtr pkt) { return 0; } 4622292SN/A virtual void recvFunctional(PacketPtr pkt) { } 4632292SN/A virtual void recvRangeChange() { } 4642292SN/A virtual void recvReqRetry(); 4652292SN/A 4662292SN/A virtual void 4672292SN/A getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) 4682292SN/A { 4692292SN/A resp.clear(); 4702292SN/A snoop = true; 4712292SN/A } 4722292SN/A 4732292SN/A }; 4742292SN/A 4752292SN/A // Instruction cache access port 4762292SN/A class SQCPort : public MasterPort 4772292SN/A { 4782292SN/A public: 4792292SN/A SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index) 4802292SN/A : MasterPort(_name, _cu), computeUnit(_cu), 4812292SN/A index(_index) { } 4822292SN/A 4832292SN/A bool snoopRangeSent; 4842292SN/A 4852292SN/A struct SenderState : public Packet::SenderState 4862292SN/A { 4872292SN/A Wavefront *wavefront; 4882669Sktlim@umich.edu Packet::SenderState *saved; 4892292SN/A 4902669Sktlim@umich.edu SenderState(Wavefront *_wavefront, Packet::SenderState 4912292SN/A *sender_state=nullptr) 4922669Sktlim@umich.edu : wavefront(_wavefront), saved(sender_state) { } 4932669Sktlim@umich.edu }; 4942669Sktlim@umich.edu 4952292SN/A std::deque<std::pair<PacketPtr, Wavefront*>> retries; 4962292SN/A 4972292SN/A protected: 4982292SN/A ComputeUnit *computeUnit; 4992292SN/A int index; 5003172Sstever@eecs.umich.edu 5012731Sktlim@umich.edu virtual bool recvTimingResp(PacketPtr pkt); 5022669Sktlim@umich.edu virtual Tick recvAtomic(PacketPtr pkt) { return 0; } 5032727Sktlim@umich.edu virtual void recvFunctional(PacketPtr pkt) { } 5044032Sktlim@umich.edu virtual void recvRangeChange() { } 5054032Sktlim@umich.edu virtual void recvReqRetry(); 5064032Sktlim@umich.edu 5074032Sktlim@umich.edu virtual void 5084032Sktlim@umich.edu getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) 5092292SN/A { 5102292SN/A resp.clear(); 5112292SN/A snoop = true; 5122292SN/A } 5132669Sktlim@umich.edu }; 5142292SN/A 5152292SN/A /** Data TLB port **/ 5162292SN/A class DTLBPort : public MasterPort 5172292SN/A { 5182292SN/A public: 5192669Sktlim@umich.edu DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index) 5202292SN/A : MasterPort(_name, _cu), computeUnit(_cu), 5213172Sstever@eecs.umich.edu index(_index), stalled(false) 5223326Sktlim@umich.edu { } 5233326Sktlim@umich.edu 5243326Sktlim@umich.edu bool isStalled() { return stalled; } 5253326Sktlim@umich.edu void stallPort() { stalled = true; } 5263326Sktlim@umich.edu void unstallPort() { stalled = false; } 5273326Sktlim@umich.edu 5282292SN/A /** 5292292SN/A * here we queue all the translation requests that were 5302292SN/A * not successfully sent. 5312292SN/A */ 5322292SN/A std::deque<PacketPtr> retries; 5332292SN/A 5342292SN/A /** SenderState is information carried along with the packet 5352292SN/A * throughout the TLB hierarchy 5362292SN/A */ 5372292SN/A struct SenderState: public Packet::SenderState 5382292SN/A { 5392292SN/A // the memInst that this is associated with 5402292SN/A GPUDynInstPtr _gpuDynInst; 5412292SN/A 5422292SN/A // the lane in the memInst this is associated with, so we send 5432292SN/A // the memory request down the right port 5442292SN/A int portIndex; 5452292SN/A 5464032Sktlim@umich.edu // constructor used for packets involved in timing accesses 5474032Sktlim@umich.edu SenderState(GPUDynInstPtr gpuDynInst, PortID port_index) 5484032Sktlim@umich.edu : _gpuDynInst(gpuDynInst), portIndex(port_index) { } 5494032Sktlim@umich.edu 5502292SN/A }; 5512292SN/A 5522292SN/A protected: 5532292SN/A ComputeUnit *computeUnit; 5542669Sktlim@umich.edu int index; 5552292SN/A bool stalled; 5562669Sktlim@umich.edu 5572669Sktlim@umich.edu virtual bool recvTimingResp(PacketPtr pkt); 5582292SN/A virtual Tick recvAtomic(PacketPtr pkt) { return 0; } 5592669Sktlim@umich.edu virtual void recvFunctional(PacketPtr pkt) { } 5602292SN/A virtual void recvRangeChange() { } 5612292SN/A virtual void recvReqRetry(); 5622669Sktlim@umich.edu }; 5632669Sktlim@umich.edu 5642292SN/A class ITLBPort : public MasterPort 5652292SN/A { 5664032Sktlim@umich.edu public: 5672329SN/A ITLBPort(const std::string &_name, ComputeUnit *_cu) 5682669Sktlim@umich.edu : MasterPort(_name, _cu), computeUnit(_cu), stalled(false) { } 5692292SN/A 5704326Sgblack@eecs.umich.edu 5713803Sgblack@eecs.umich.edu bool isStalled() { return stalled; } 5722669Sktlim@umich.edu void stallPort() { stalled = true; } 5732669Sktlim@umich.edu void unstallPort() { stalled = false; } 5742292SN/A 5754326Sgblack@eecs.umich.edu /** 5764326Sgblack@eecs.umich.edu * here we queue all the translation requests that were 5772292SN/A * not successfully sent. 5782292SN/A */ 5792292SN/A std::deque<PacketPtr> retries; 5802693Sktlim@umich.edu 5812678Sktlim@umich.edu /** SenderState is information carried along with the packet 5824022Sstever@eecs.umich.edu * throughout the TLB hierarchy 5834022Sstever@eecs.umich.edu */ 5842678Sktlim@umich.edu struct SenderState: public Packet::SenderState 5852678Sktlim@umich.edu { 5862678Sktlim@umich.edu // The wavefront associated with this request 5872292SN/A Wavefront *wavefront; 5882292SN/A 5892292SN/A SenderState(Wavefront *_wavefront) : wavefront(_wavefront) { } 5902292SN/A }; 5912292SN/A 5922678Sktlim@umich.edu protected: 5932727Sktlim@umich.edu ComputeUnit *computeUnit; 5942292SN/A bool stalled; 5952292SN/A 5962292SN/A virtual bool recvTimingResp(PacketPtr pkt); 5972292SN/A virtual Tick recvAtomic(PacketPtr pkt) { return 0; } 5982292SN/A virtual void recvFunctional(PacketPtr pkt) { } 5992292SN/A virtual void recvRangeChange() { } 6002292SN/A virtual void recvReqRetry(); 6012292SN/A }; 6022292SN/A 6032292SN/A /** 6044032Sktlim@umich.edu * the port intended to communicate between the CU and its LDS 6052292SN/A */ 6062292SN/A class LDSPort : public MasterPort 6072292SN/A { 6082292SN/A public: 6092292SN/A LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id) 6102292SN/A : MasterPort(_name, _cu, _id), computeUnit(_cu) 6112292SN/A { 6122669Sktlim@umich.edu } 6132292SN/A 6142292SN/A bool isStalled() const { return stalled; } 6152292SN/A void stallPort() { stalled = true; } 6162292SN/A void unstallPort() { stalled = false; } 6172292SN/A 6182292SN/A /** 6192292SN/A * here we queue all the requests that were 6202292SN/A * not successfully sent. 6212669Sktlim@umich.edu */ 6222927Sktlim@umich.edu std::queue<PacketPtr> retries; 6234032Sktlim@umich.edu 6242727Sktlim@umich.edu /** 6252292SN/A * SenderState is information carried along with the packet, esp. the 6262292SN/A * GPUDynInstPtr 6272292SN/A */ 6282292SN/A class SenderState: public Packet::SenderState 6292292SN/A { 6302669Sktlim@umich.edu protected: 6312292SN/A // The actual read/write/atomic request that goes with this command 6324032Sktlim@umich.edu GPUDynInstPtr _gpuDynInst = nullptr; 6334032Sktlim@umich.edu 6344032Sktlim@umich.edu public: 6354032Sktlim@umich.edu SenderState(GPUDynInstPtr gpuDynInst): 6364032Sktlim@umich.edu _gpuDynInst(gpuDynInst) 6372292SN/A { 6382292SN/A } 6392292SN/A 6402292SN/A GPUDynInstPtr 6412292SN/A getMemInst() const 6422907Sktlim@umich.edu { 6432669Sktlim@umich.edu return _gpuDynInst; 6442292SN/A } 6452669Sktlim@umich.edu }; 6462669Sktlim@umich.edu 6472292SN/A virtual bool 6482292SN/A sendTimingReq(PacketPtr pkt); 6492292SN/A 6502907Sktlim@umich.edu protected: 6512907Sktlim@umich.edu 6523228Sktlim@umich.edu bool stalled = false; ///< whether or not it is stalled 6534022Sstever@eecs.umich.edu 6543228Sktlim@umich.edu ComputeUnit *computeUnit; 6553228Sktlim@umich.edu 6563228Sktlim@umich.edu virtual bool 6573228Sktlim@umich.edu recvTimingResp(PacketPtr pkt); 6583228Sktlim@umich.edu 6593228Sktlim@umich.edu virtual Tick 6603228Sktlim@umich.edu recvAtomic(PacketPtr pkt) { return 0; } 6613228Sktlim@umich.edu 6622907Sktlim@umich.edu virtual void 6633228Sktlim@umich.edu recvFunctional(PacketPtr pkt) 6643228Sktlim@umich.edu { 6653228Sktlim@umich.edu } 6663228Sktlim@umich.edu 6673228Sktlim@umich.edu virtual void 6684032Sktlim@umich.edu recvRangeChange() 6693228Sktlim@umich.edu { 6703228Sktlim@umich.edu } 6714032Sktlim@umich.edu 6724032Sktlim@umich.edu virtual void 6733228Sktlim@umich.edu recvReqRetry(); 6743221Sktlim@umich.edu }; 6753221Sktlim@umich.edu 6763221Sktlim@umich.edu /** The port to access the Local Data Store 6772907Sktlim@umich.edu * Can be connected to a LDS object 6782907Sktlim@umich.edu */ 6792907Sktlim@umich.edu LDSPort *ldsPort = nullptr; 6802907Sktlim@umich.edu 6812907Sktlim@umich.edu LDSPort * 6822907Sktlim@umich.edu getLdsPort() const 6832907Sktlim@umich.edu { 6842907Sktlim@umich.edu return ldsPort; 6852907Sktlim@umich.edu } 6864032Sktlim@umich.edu 6874032Sktlim@umich.edu /** The memory port for SIMD data accesses. 6884032Sktlim@umich.edu * Can be connected to PhysMem for Ruby for timing simulations 6892727Sktlim@umich.edu */ 6903014Srdreslin@umich.edu std::vector<DataPort*> memPort; 6913014Srdreslin@umich.edu // port to the TLB hierarchy (i.e., the L1 TLB) 6922669Sktlim@umich.edu std::vector<DTLBPort*> tlbPort; 6932669Sktlim@umich.edu // port to the SQC (i.e. the I-cache) 6942669Sktlim@umich.edu SQCPort *sqcPort; 6952292SN/A // port to the SQC TLB (there's a separate TLB for each I-cache) 6962669Sktlim@umich.edu ITLBPort *sqcTLBPort; 6972669Sktlim@umich.edu 6982669Sktlim@umich.edu virtual BaseMasterPort& 6992669Sktlim@umich.edu getMasterPort(const std::string &if_name, PortID idx) 7002669Sktlim@umich.edu { 7012669Sktlim@umich.edu if (if_name == "memory_port") { 7022669Sktlim@umich.edu memPort[idx] = new DataPort(csprintf("%s-port%d", name(), idx), 7032669Sktlim@umich.edu this, idx); 7042292SN/A return *memPort[idx]; 7052292SN/A } else if (if_name == "translation_port") { 7062669Sktlim@umich.edu tlbPort[idx] = new DTLBPort(csprintf("%s-port%d", name(), idx), 7072292SN/A this, idx); 7082292SN/A return *tlbPort[idx]; 7092292SN/A } else if (if_name == "sqc_port") { 7102292SN/A sqcPort = new SQCPort(csprintf("%s-port%d", name(), idx), 7112292SN/A this, idx); 7122669Sktlim@umich.edu return *sqcPort; 7132292SN/A } else if (if_name == "sqc_tlb_port") { 7142292SN/A sqcTLBPort = new ITLBPort(csprintf("%s-port", name()), this); 7152292SN/A return *sqcTLBPort; 7162292SN/A } else if (if_name == "ldsPort") { 7172292SN/A if (ldsPort) { 7182669Sktlim@umich.edu fatal("an LDS port was already allocated"); 7192292SN/A } 7202329SN/A ldsPort = new LDSPort(csprintf("%s-port", name()), this, idx); 7212292SN/A return *ldsPort; 7222292SN/A } else { 7234326Sgblack@eecs.umich.edu panic("incorrect port name"); 7244326Sgblack@eecs.umich.edu } 7254326Sgblack@eecs.umich.edu } 7264326Sgblack@eecs.umich.edu 7272329SN/A // xact_cas_load() 7282292SN/A class waveIdentifier 7292292SN/A { 7302292SN/A public: 7312292SN/A waveIdentifier() { } 7322292SN/A waveIdentifier(int _simdId, int _wfSlotId) 7332292SN/A : simdId(_simdId), wfSlotId(_wfSlotId) { } 734 735 int simdId; 736 int wfSlotId; 737 }; 738 739 class waveQueue 740 { 741 public: 742 std::list<waveIdentifier> waveIDQueue; 743 }; 744 std::map<unsigned, waveQueue> xactCasLoadMap; 745 746 uint64_t getAndIncSeqNum() { return globalSeqNum++; } 747 748 private: 749 uint64_t globalSeqNum; 750 int wavefrontSize; 751}; 752 753#endif // __COMPUTE_UNIT_HH__ 754