wavefront.hh (11534:7106f550afad) | wavefront.hh (11639:2e8d4bd8108d) |
---|---|
1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 141 unchanged lines hidden (view full) --- 150 151class Wavefront : public SimObject 152{ 153 public: 154 enum itype_e {I_ALU,I_GLOBAL,I_SHARED,I_FLAT,I_PRIVATE}; 155 enum status_e {S_STOPPED,S_RETURNING,S_RUNNING}; 156 157 // Base pointer for array of instruction pointers | 1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 141 unchanged lines hidden (view full) --- 150 151class Wavefront : public SimObject 152{ 153 public: 154 enum itype_e {I_ALU,I_GLOBAL,I_SHARED,I_FLAT,I_PRIVATE}; 155 enum status_e {S_STOPPED,S_RETURNING,S_RUNNING}; 156 157 // Base pointer for array of instruction pointers |
158 uint64_t base_ptr; | 158 uint64_t basePtr; |
159 | 159 |
160 uint32_t old_barrier_cnt; 161 uint32_t barrier_cnt; 162 uint32_t barrier_id; 163 uint32_t barrier_slots; | 160 uint32_t oldBarrierCnt; 161 uint32_t barrierCnt; 162 uint32_t barrierId; 163 uint32_t barrierSlots; |
164 status_e status; 165 // HW slot id where the WF is mapped to inside a SIMD unit 166 int wfSlotId; | 164 status_e status; 165 // HW slot id where the WF is mapped to inside a SIMD unit 166 int wfSlotId; |
167 int kern_id; | 167 int kernId; |
168 // SIMD unit where the WV has been scheduled 169 int simdId; 170 // pointer to parent CU 171 ComputeUnit *computeUnit; 172 173 std::deque<GPUDynInstPtr> instructionBuffer; 174 175 bool pendingFetch; --- 12 unchanged lines hidden (view full) --- 188 bool isLmInstruction(GPUDynInstPtr ii); 189 bool isOldestInstGMem(); 190 bool isOldestInstLMem(); 191 bool isOldestInstPrivMem(); 192 bool isOldestInstFlatMem(); 193 bool isOldestInstALU(); 194 bool isOldestInstBarrier(); 195 // used for passing spill address to DDInstGPU | 168 // SIMD unit where the WV has been scheduled 169 int simdId; 170 // pointer to parent CU 171 ComputeUnit *computeUnit; 172 173 std::deque<GPUDynInstPtr> instructionBuffer; 174 175 bool pendingFetch; --- 12 unchanged lines hidden (view full) --- 188 bool isLmInstruction(GPUDynInstPtr ii); 189 bool isOldestInstGMem(); 190 bool isOldestInstLMem(); 191 bool isOldestInstPrivMem(); 192 bool isOldestInstFlatMem(); 193 bool isOldestInstALU(); 194 bool isOldestInstBarrier(); 195 // used for passing spill address to DDInstGPU |
196 std::vector<Addr> last_addr; 197 std::vector<uint32_t> workitemid[3]; 198 std::vector<uint32_t> workitemFlatId; 199 uint32_t workgroupid[3]; 200 uint32_t workgroupsz[3]; 201 uint32_t gridsz[3]; 202 uint32_t wg_id; 203 uint32_t wg_sz; 204 uint32_t dynwaveid; 205 uint32_t maxdynwaveid; 206 uint32_t dispatchid; | 196 std::vector<Addr> lastAddr; 197 std::vector<uint32_t> workItemId[3]; 198 std::vector<uint32_t> workItemFlatId; 199 uint32_t workGroupId[3]; 200 uint32_t workGroupSz[3]; 201 uint32_t gridSz[3]; 202 uint32_t wgId; 203 uint32_t wgSz; 204 uint32_t dynWaveId; 205 uint32_t maxDynWaveId; 206 uint32_t dispatchId; |
207 // outstanding global+local memory requests | 207 // outstanding global+local memory requests |
208 uint32_t outstanding_reqs; | 208 uint32_t outstandingReqs; |
209 // memory requests between scoreboard 210 // and execute stage not yet executed | 209 // memory requests between scoreboard 210 // and execute stage not yet executed |
211 uint32_t mem_reqs_in_pipe; | 211 uint32_t memReqsInPipe; |
212 // outstanding global memory write requests | 212 // outstanding global memory write requests |
213 uint32_t outstanding_reqs_wr_gm; | 213 uint32_t outstandingReqsWrGm; |
214 // outstanding local memory write requests | 214 // outstanding local memory write requests |
215 uint32_t outstanding_reqs_wr_lm; | 215 uint32_t outstandingReqsWrLm; |
216 // outstanding global memory read requests | 216 // outstanding global memory read requests |
217 uint32_t outstanding_reqs_rd_gm; | 217 uint32_t outstandingReqsRdGm; |
218 // outstanding local memory read requests | 218 // outstanding local memory read requests |
219 uint32_t outstanding_reqs_rd_lm; 220 uint32_t rd_lm_reqs_in_pipe; 221 uint32_t rd_gm_reqs_in_pipe; 222 uint32_t wr_lm_reqs_in_pipe; 223 uint32_t wr_gm_reqs_in_pipe; | 219 uint32_t outstandingReqsRdLm; 220 uint32_t rdLmReqsInPipe; 221 uint32_t rdGmReqsInPipe; 222 uint32_t wrLmReqsInPipe; 223 uint32_t wrGmReqsInPipe; |
224 | 224 |
225 int mem_trace_busy; 226 uint64_t last_trace; | 225 int memTraceBusy; 226 uint64_t lastTrace; |
227 // number of vector registers reserved by WF 228 int reservedVectorRegs; 229 // Index into the Vector Register File's namespace where the WF's registers 230 // will live while the WF is executed 231 uint32_t startVgprIndex; 232 233 // Old value of destination gpr (for trace) | 227 // number of vector registers reserved by WF 228 int reservedVectorRegs; 229 // Index into the Vector Register File's namespace where the WF's registers 230 // will live while the WF is executed 231 uint32_t startVgprIndex; 232 233 // Old value of destination gpr (for trace) |
234 std::vector<uint32_t> old_vgpr; | 234 std::vector<uint32_t> oldVgpr; |
235 // Id of destination gpr (for trace) | 235 // Id of destination gpr (for trace) |
236 uint32_t old_vgpr_id; | 236 uint32_t oldVgprId; |
237 // Tick count of last old_vgpr copy | 237 // Tick count of last old_vgpr copy |
238 uint64_t old_vgpr_tcnt; | 238 uint64_t oldVgprTcnt; |
239 240 // Old value of destination gpr (for trace) | 239 240 // Old value of destination gpr (for trace) |
241 std::vector<uint64_t> old_dgpr; | 241 std::vector<uint64_t> oldDgpr; |
242 // Id of destination gpr (for trace) | 242 // Id of destination gpr (for trace) |
243 uint32_t old_dgpr_id; | 243 uint32_t oldDgprId; |
244 // Tick count of last old_vgpr copy | 244 // Tick count of last old_vgpr copy |
245 uint64_t old_dgpr_tcnt; | 245 uint64_t oldDgprTcnt; |
246 247 // Execution mask at wavefront start | 246 247 // Execution mask at wavefront start |
248 VectorMask init_mask; | 248 VectorMask initMask; |
249 250 // number of barriers this WF has joined | 249 250 // number of barriers this WF has joined |
251 std::vector<int> bar_cnt; 252 int max_bar_cnt; | 251 std::vector<int> barCnt; 252 int maxBarCnt; |
253 // Flag to stall a wave on barrier 254 bool stalledAtBarrier; 255 256 // a pointer to the fraction of the LDS allocated 257 // to this workgroup (thus this wavefront) 258 LdsChunk *ldsChunk; 259 260 // A pointer to the spill area --- 67 unchanged lines hidden (view full) --- 328 } 329 330 void start(uint64_t _wfDynId, uint64_t _base_ptr); 331 void exec(); 332 void updateResources(); 333 int ready(itype_e type); 334 bool instructionBufferHasBranch(); 335 void regStats(); | 253 // Flag to stall a wave on barrier 254 bool stalledAtBarrier; 255 256 // a pointer to the fraction of the LDS allocated 257 // to this workgroup (thus this wavefront) 258 LdsChunk *ldsChunk; 259 260 // A pointer to the spill area --- 67 unchanged lines hidden (view full) --- 328 } 329 330 void start(uint64_t _wfDynId, uint64_t _base_ptr); 331 void exec(); 332 void updateResources(); 333 int ready(itype_e type); 334 bool instructionBufferHasBranch(); 335 void regStats(); |
336 VectorMask get_pred() { return execMask() & init_mask; } | 336 VectorMask getPred() { return execMask() & initMask; } |
337 338 bool waitingAtBarrier(int lane); 339 340 void pushToReconvergenceStack(uint32_t pc, uint32_t rpc, 341 const VectorMask& exec_mask); 342 343 void popFromReconvergenceStack(); 344 --- 24 unchanged lines hidden --- | 337 338 bool waitingAtBarrier(int lane); 339 340 void pushToReconvergenceStack(uint32_t pc, uint32_t rpc, 341 const VectorMask& exec_mask); 342 343 void popFromReconvergenceStack(); 344 --- 24 unchanged lines hidden --- |