wavefront.hh (11308:7d8836fd043d) wavefront.hh (11534:7106f550afad)
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:

--- 69 unchanged lines hidden (view full) ---

78 * | 512: arg2.0 | 520: arg2.1 | ... | 1016: arg2.63 |
79 * ___________________________________________________
80 */
81class CallArgMem
82{
83 public:
84 // pointer to buffer for storing function arguments
85 uint8_t *mem;
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:

--- 69 unchanged lines hidden (view full) ---

78 * | 512: arg2.0 | 520: arg2.1 | ... | 1016: arg2.63 |
79 * ___________________________________________________
80 */
81class CallArgMem
82{
83 public:
84 // pointer to buffer for storing function arguments
85 uint8_t *mem;
86 int wfSize;
86 // size of function args
87 int funcArgsSizePerItem;
88
89 template<typename CType>
90 int
91 getLaneOffset(int lane, int addr)
92 {
87 // size of function args
88 int funcArgsSizePerItem;
89
90 template<typename CType>
91 int
92 getLaneOffset(int lane, int addr)
93 {
93 return addr * VSZ + sizeof(CType) * lane;
94 return addr * wfSize + sizeof(CType) * lane;
94 }
95
95 }
96
96 CallArgMem(int func_args_size_per_item)
97 : funcArgsSizePerItem(func_args_size_per_item)
97 CallArgMem(int func_args_size_per_item, int wf_size)
98 : wfSize(wf_size), funcArgsSizePerItem(func_args_size_per_item)
98 {
99 {
99 mem = (uint8_t*)malloc(funcArgsSizePerItem * VSZ);
100 mem = (uint8_t*)malloc(funcArgsSizePerItem * wfSize);
100 }
101
102 ~CallArgMem()
103 {
104 free(mem);
105 }
106
107 template<typename CType>

--- 79 unchanged lines hidden (view full) ---

187 bool isLmInstruction(GPUDynInstPtr ii);
188 bool isOldestInstGMem();
189 bool isOldestInstLMem();
190 bool isOldestInstPrivMem();
191 bool isOldestInstFlatMem();
192 bool isOldestInstALU();
193 bool isOldestInstBarrier();
194 // used for passing spill address to DDInstGPU
101 }
102
103 ~CallArgMem()
104 {
105 free(mem);
106 }
107
108 template<typename CType>

--- 79 unchanged lines hidden (view full) ---

188 bool isLmInstruction(GPUDynInstPtr ii);
189 bool isOldestInstGMem();
190 bool isOldestInstLMem();
191 bool isOldestInstPrivMem();
192 bool isOldestInstFlatMem();
193 bool isOldestInstALU();
194 bool isOldestInstBarrier();
195 // used for passing spill address to DDInstGPU
195 uint64_t last_addr[VSZ];
196 uint32_t workitemid[3][VSZ];
197 uint32_t workitemFlatId[VSZ];
196 std::vector<Addr> last_addr;
197 std::vector<uint32_t> workitemid[3];
198 std::vector<uint32_t> workitemFlatId;
198 uint32_t workgroupid[3];
199 uint32_t workgroupsz[3];
200 uint32_t gridsz[3];
201 uint32_t wg_id;
202 uint32_t wg_sz;
203 uint32_t dynwaveid;
204 uint32_t maxdynwaveid;
205 uint32_t dispatchid;

--- 19 unchanged lines hidden (view full) ---

225 uint64_t last_trace;
226 // number of vector registers reserved by WF
227 int reservedVectorRegs;
228 // Index into the Vector Register File's namespace where the WF's registers
229 // will live while the WF is executed
230 uint32_t startVgprIndex;
231
232 // Old value of destination gpr (for trace)
199 uint32_t workgroupid[3];
200 uint32_t workgroupsz[3];
201 uint32_t gridsz[3];
202 uint32_t wg_id;
203 uint32_t wg_sz;
204 uint32_t dynwaveid;
205 uint32_t maxdynwaveid;
206 uint32_t dispatchid;

--- 19 unchanged lines hidden (view full) ---

226 uint64_t last_trace;
227 // number of vector registers reserved by WF
228 int reservedVectorRegs;
229 // Index into the Vector Register File's namespace where the WF's registers
230 // will live while the WF is executed
231 uint32_t startVgprIndex;
232
233 // Old value of destination gpr (for trace)
233 uint32_t old_vgpr[VSZ];
234 std::vector<uint32_t> old_vgpr;
234 // Id of destination gpr (for trace)
235 uint32_t old_vgpr_id;
236 // Tick count of last old_vgpr copy
237 uint64_t old_vgpr_tcnt;
238
239 // Old value of destination gpr (for trace)
235 // Id of destination gpr (for trace)
236 uint32_t old_vgpr_id;
237 // Tick count of last old_vgpr copy
238 uint64_t old_vgpr_tcnt;
239
240 // Old value of destination gpr (for trace)
240 uint64_t old_dgpr[VSZ];
241 std::vector<uint64_t> old_dgpr;
241 // Id of destination gpr (for trace)
242 uint32_t old_dgpr_id;
243 // Tick count of last old_vgpr copy
244 uint64_t old_dgpr_tcnt;
245
246 // Execution mask at wavefront start
247 VectorMask init_mask;
248
249 // number of barriers this WF has joined
242 // Id of destination gpr (for trace)
243 uint32_t old_dgpr_id;
244 // Tick count of last old_vgpr copy
245 uint64_t old_dgpr_tcnt;
246
247 // Execution mask at wavefront start
248 VectorMask init_mask;
249
250 // number of barriers this WF has joined
250 int bar_cnt[VSZ];
251 std::vector<int> bar_cnt;
251 int max_bar_cnt;
252 // Flag to stall a wave on barrier
253 bool stalledAtBarrier;
254
255 // a pointer to the fraction of the LDS allocated
256 // to this workgroup (thus this wavefront)
257 LdsChunk *ldsChunk;
258

--- 32 unchanged lines hidden (view full) ---

291 // operands; this is used to highlight the load on the VRF
292 Stats::Distribution srcRegOpDist;
293 Stats::Distribution dstRegOpDist;
294
295 // Functions to operate on call argument memory
296 // argument memory for hsail call instruction
297 CallArgMem *callArgMem;
298 void
252 int max_bar_cnt;
253 // Flag to stall a wave on barrier
254 bool stalledAtBarrier;
255
256 // a pointer to the fraction of the LDS allocated
257 // to this workgroup (thus this wavefront)
258 LdsChunk *ldsChunk;
259

--- 32 unchanged lines hidden (view full) ---

292 // operands; this is used to highlight the load on the VRF
293 Stats::Distribution srcRegOpDist;
294 Stats::Distribution dstRegOpDist;
295
296 // Functions to operate on call argument memory
297 // argument memory for hsail call instruction
298 CallArgMem *callArgMem;
299 void
299 initCallArgMem(int func_args_size_per_item)
300 initCallArgMem(int func_args_size_per_item, int wf_size)
300 {
301 {
301 callArgMem = new CallArgMem(func_args_size_per_item);
302 callArgMem = new CallArgMem(func_args_size_per_item, wf_size);
302 }
303
304 template<typename CType>
305 CType
306 readCallArgMem(int lane, int addr)
307 {
308 return *((CType*)(callArgMem->getLaneAddr<CType>(lane, addr)));
309 }

--- 12 unchanged lines hidden (view full) ---

322
323 void
324 setParent(ComputeUnit *cu)
325 {
326 computeUnit = cu;
327 }
328
329 void start(uint64_t _wfDynId, uint64_t _base_ptr);
303 }
304
305 template<typename CType>
306 CType
307 readCallArgMem(int lane, int addr)
308 {
309 return *((CType*)(callArgMem->getLaneAddr<CType>(lane, addr)));
310 }

--- 12 unchanged lines hidden (view full) ---

323
324 void
325 setParent(ComputeUnit *cu)
326 {
327 computeUnit = cu;
328 }
329
330 void start(uint64_t _wfDynId, uint64_t _base_ptr);
330
331 void exec();
332 void updateResources();
333 int ready(itype_e type);
334 bool instructionBufferHasBranch();
335 void regStats();
336 VectorMask get_pred() { return execMask() & init_mask; }
337
338 bool waitingAtBarrier(int lane);

--- 30 unchanged lines hidden ---
331 void exec();
332 void updateResources();
333 int ready(itype_e type);
334 bool instructionBufferHasBranch();
335 void regStats();
336 VectorMask get_pred() { return execMask() & init_mask; }
337
338 bool waitingAtBarrier(int lane);

--- 30 unchanged lines hidden ---