wavefront.hh (11308:7d8836fd043d) | wavefront.hh (11534:7106f550afad) |
---|---|
1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 69 unchanged lines hidden (view full) --- 78 * | 512: arg2.0 | 520: arg2.1 | ... | 1016: arg2.63 | 79 * ___________________________________________________ 80 */ 81class CallArgMem 82{ 83 public: 84 // pointer to buffer for storing function arguments 85 uint8_t *mem; | 1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 69 unchanged lines hidden (view full) --- 78 * | 512: arg2.0 | 520: arg2.1 | ... | 1016: arg2.63 | 79 * ___________________________________________________ 80 */ 81class CallArgMem 82{ 83 public: 84 // pointer to buffer for storing function arguments 85 uint8_t *mem; |
86 int wfSize; |
|
86 // size of function args 87 int funcArgsSizePerItem; 88 89 template<typename CType> 90 int 91 getLaneOffset(int lane, int addr) 92 { | 87 // size of function args 88 int funcArgsSizePerItem; 89 90 template<typename CType> 91 int 92 getLaneOffset(int lane, int addr) 93 { |
93 return addr * VSZ + sizeof(CType) * lane; | 94 return addr * wfSize + sizeof(CType) * lane; |
94 } 95 | 95 } 96 |
96 CallArgMem(int func_args_size_per_item) 97 : funcArgsSizePerItem(func_args_size_per_item) | 97 CallArgMem(int func_args_size_per_item, int wf_size) 98 : wfSize(wf_size), funcArgsSizePerItem(func_args_size_per_item) |
98 { | 99 { |
99 mem = (uint8_t*)malloc(funcArgsSizePerItem * VSZ); | 100 mem = (uint8_t*)malloc(funcArgsSizePerItem * wfSize); |
100 } 101 102 ~CallArgMem() 103 { 104 free(mem); 105 } 106 107 template<typename CType> --- 79 unchanged lines hidden (view full) --- 187 bool isLmInstruction(GPUDynInstPtr ii); 188 bool isOldestInstGMem(); 189 bool isOldestInstLMem(); 190 bool isOldestInstPrivMem(); 191 bool isOldestInstFlatMem(); 192 bool isOldestInstALU(); 193 bool isOldestInstBarrier(); 194 // used for passing spill address to DDInstGPU | 101 } 102 103 ~CallArgMem() 104 { 105 free(mem); 106 } 107 108 template<typename CType> --- 79 unchanged lines hidden (view full) --- 188 bool isLmInstruction(GPUDynInstPtr ii); 189 bool isOldestInstGMem(); 190 bool isOldestInstLMem(); 191 bool isOldestInstPrivMem(); 192 bool isOldestInstFlatMem(); 193 bool isOldestInstALU(); 194 bool isOldestInstBarrier(); 195 // used for passing spill address to DDInstGPU |
195 uint64_t last_addr[VSZ]; 196 uint32_t workitemid[3][VSZ]; 197 uint32_t workitemFlatId[VSZ]; | 196 std::vector<Addr> last_addr; 197 std::vector<uint32_t> workitemid[3]; 198 std::vector<uint32_t> workitemFlatId; |
198 uint32_t workgroupid[3]; 199 uint32_t workgroupsz[3]; 200 uint32_t gridsz[3]; 201 uint32_t wg_id; 202 uint32_t wg_sz; 203 uint32_t dynwaveid; 204 uint32_t maxdynwaveid; 205 uint32_t dispatchid; --- 19 unchanged lines hidden (view full) --- 225 uint64_t last_trace; 226 // number of vector registers reserved by WF 227 int reservedVectorRegs; 228 // Index into the Vector Register File's namespace where the WF's registers 229 // will live while the WF is executed 230 uint32_t startVgprIndex; 231 232 // Old value of destination gpr (for trace) | 199 uint32_t workgroupid[3]; 200 uint32_t workgroupsz[3]; 201 uint32_t gridsz[3]; 202 uint32_t wg_id; 203 uint32_t wg_sz; 204 uint32_t dynwaveid; 205 uint32_t maxdynwaveid; 206 uint32_t dispatchid; --- 19 unchanged lines hidden (view full) --- 226 uint64_t last_trace; 227 // number of vector registers reserved by WF 228 int reservedVectorRegs; 229 // Index into the Vector Register File's namespace where the WF's registers 230 // will live while the WF is executed 231 uint32_t startVgprIndex; 232 233 // Old value of destination gpr (for trace) |
233 uint32_t old_vgpr[VSZ]; | 234 std::vector<uint32_t> old_vgpr; |
234 // Id of destination gpr (for trace) 235 uint32_t old_vgpr_id; 236 // Tick count of last old_vgpr copy 237 uint64_t old_vgpr_tcnt; 238 239 // Old value of destination gpr (for trace) | 235 // Id of destination gpr (for trace) 236 uint32_t old_vgpr_id; 237 // Tick count of last old_vgpr copy 238 uint64_t old_vgpr_tcnt; 239 240 // Old value of destination gpr (for trace) |
240 uint64_t old_dgpr[VSZ]; | 241 std::vector<uint64_t> old_dgpr; |
241 // Id of destination gpr (for trace) 242 uint32_t old_dgpr_id; 243 // Tick count of last old_vgpr copy 244 uint64_t old_dgpr_tcnt; 245 246 // Execution mask at wavefront start 247 VectorMask init_mask; 248 249 // number of barriers this WF has joined | 242 // Id of destination gpr (for trace) 243 uint32_t old_dgpr_id; 244 // Tick count of last old_vgpr copy 245 uint64_t old_dgpr_tcnt; 246 247 // Execution mask at wavefront start 248 VectorMask init_mask; 249 250 // number of barriers this WF has joined |
250 int bar_cnt[VSZ]; | 251 std::vector<int> bar_cnt; |
251 int max_bar_cnt; 252 // Flag to stall a wave on barrier 253 bool stalledAtBarrier; 254 255 // a pointer to the fraction of the LDS allocated 256 // to this workgroup (thus this wavefront) 257 LdsChunk *ldsChunk; 258 --- 32 unchanged lines hidden (view full) --- 291 // operands; this is used to highlight the load on the VRF 292 Stats::Distribution srcRegOpDist; 293 Stats::Distribution dstRegOpDist; 294 295 // Functions to operate on call argument memory 296 // argument memory for hsail call instruction 297 CallArgMem *callArgMem; 298 void | 252 int max_bar_cnt; 253 // Flag to stall a wave on barrier 254 bool stalledAtBarrier; 255 256 // a pointer to the fraction of the LDS allocated 257 // to this workgroup (thus this wavefront) 258 LdsChunk *ldsChunk; 259 --- 32 unchanged lines hidden (view full) --- 292 // operands; this is used to highlight the load on the VRF 293 Stats::Distribution srcRegOpDist; 294 Stats::Distribution dstRegOpDist; 295 296 // Functions to operate on call argument memory 297 // argument memory for hsail call instruction 298 CallArgMem *callArgMem; 299 void |
299 initCallArgMem(int func_args_size_per_item) | 300 initCallArgMem(int func_args_size_per_item, int wf_size) |
300 { | 301 { |
301 callArgMem = new CallArgMem(func_args_size_per_item); | 302 callArgMem = new CallArgMem(func_args_size_per_item, wf_size); |
302 } 303 304 template<typename CType> 305 CType 306 readCallArgMem(int lane, int addr) 307 { 308 return *((CType*)(callArgMem->getLaneAddr<CType>(lane, addr))); 309 } --- 12 unchanged lines hidden (view full) --- 322 323 void 324 setParent(ComputeUnit *cu) 325 { 326 computeUnit = cu; 327 } 328 329 void start(uint64_t _wfDynId, uint64_t _base_ptr); | 303 } 304 305 template<typename CType> 306 CType 307 readCallArgMem(int lane, int addr) 308 { 309 return *((CType*)(callArgMem->getLaneAddr<CType>(lane, addr))); 310 } --- 12 unchanged lines hidden (view full) --- 323 324 void 325 setParent(ComputeUnit *cu) 326 { 327 computeUnit = cu; 328 } 329 330 void start(uint64_t _wfDynId, uint64_t _base_ptr); |
330 | |
331 void exec(); 332 void updateResources(); 333 int ready(itype_e type); 334 bool instructionBufferHasBranch(); 335 void regStats(); 336 VectorMask get_pred() { return execMask() & init_mask; } 337 338 bool waitingAtBarrier(int lane); --- 30 unchanged lines hidden --- | 331 void exec(); 332 void updateResources(); 333 int ready(itype_e type); 334 bool instructionBufferHasBranch(); 335 void regStats(); 336 VectorMask get_pred() { return execMask() & init_mask; } 337 338 bool waitingAtBarrier(int lane); --- 30 unchanged lines hidden --- |