1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 179 unchanged lines hidden (view full) --- 188 w->kernelArgs = ndr->q.args; 189 w->privSizePerItem = ndr->q.privMemPerItem; 190 w->spillSizePerItem = ndr->q.spillMemPerItem; 191 w->roBase = ndr->q.roMemStart; 192 w->roSize = ndr->q.roMemTotal; 193} 194 195void |
196ComputeUnit::updateEvents() { 197 198 if (!timestampVec.empty()) { 199 uint32_t vecSize = timestampVec.size(); 200 uint32_t i = 0; 201 while (i < vecSize) { 202 if (timestampVec[i] <= shader->tick_cnt) { 203 std::pair<uint32_t, uint32_t> regInfo = regIdxVec[i]; --- 11 unchanged lines hidden (view full) --- 215 216 for (int i = 0; i< numSIMDs; ++i) { 217 vrf[i]->updateEvents(); 218 } 219} 220 221 222void |
223ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal, 224 int cnt, LdsChunk *ldsChunk, NDRange *ndr) |
225{ 226 static int _n_wave = 0; |
227 228 // Fill in Kernel state 229 FillKernelState(w, ndr); 230 |
231 VectorMask init_mask; 232 init_mask.reset(); 233 234 for (int k = 0; k < wfSize(); ++k) { 235 if (k + cnt * wfSize() < trueWgSizeTotal) 236 init_mask[k] = 1; 237 } 238 |
239 w->kern_id = ndr->dispatchId; 240 w->dynwaveid = cnt; |
241 w->init_mask = init_mask.to_ullong(); |
242 243 for (int k = 0; k < wfSize(); ++k) { 244 w->workitemid[0][k] = (k+cnt*wfSize()) % trueWgSize[0]; 245 w->workitemid[1][k] = 246 ((k + cnt * wfSize()) / trueWgSize[0]) % trueWgSize[1]; 247 w->workitemid[2][k] = 248 (k + cnt * wfSize()) / (trueWgSize[0] * trueWgSize[1]); 249 250 w->workitemFlatId[k] = w->workitemid[2][k] * trueWgSize[0] * 251 trueWgSize[1] + w->workitemid[1][k] * trueWgSize[0] + 252 w->workitemid[0][k]; 253 } 254 |
255 w->barrier_slots = divCeil(trueWgSizeTotal, wfSize()); 256 |
257 w->bar_cnt.resize(wfSize(), 0); |
258 |
259 w->max_bar_cnt = 0; 260 w->old_barrier_cnt = 0; 261 w->barrier_cnt = 0; |
262 |
263 w->privBase = ndr->q.privMemStart; 264 ndr->q.privMemStart += ndr->q.privMemPerItem * wfSize(); |
265 |
266 w->spillBase = ndr->q.spillMemStart; 267 ndr->q.spillMemStart += ndr->q.spillMemPerItem * wfSize(); 268 269 w->pushToReconvergenceStack(0, UINT32_MAX, init_mask.to_ulong()); 270 |
271 // WG state |
272 w->wg_id = ndr->globalWgId; 273 w->dispatchid = ndr->dispatchId; |
274 w->workgroupid[0] = w->wg_id % ndr->numWg[0]; 275 w->workgroupid[1] = (w->wg_id / ndr->numWg[0]) % ndr->numWg[1]; 276 w->workgroupid[2] = w->wg_id / (ndr->numWg[0] * ndr->numWg[1]); 277 |
278 w->barrier_id = barrier_id; |
279 w->stalledAtBarrier = false; 280 |
281 // set the wavefront context to have a pointer to this section of the LDS 282 w->ldsChunk = ldsChunk; |
283 284 int32_t refCount M5_VAR_USED = 285 lds.increaseRefCounter(w->dispatchid, w->wg_id); 286 DPRINTF(GPUDisp, "CU%d: increase ref ctr wg[%d] to [%d]\n", 287 cu_id, w->wg_id, refCount); 288 289 w->instructionBuffer.clear(); 290 --- 8 unchanged lines hidden (view full) --- 299 } else { 300 w->spillWidth = wfSize(); 301 } 302 303 DPRINTF(GPUDisp, "Scheduling wfDynId/barrier_id %d/%d on CU%d: " 304 "WF[%d][%d]\n", _n_wave, barrier_id, cu_id, w->simdId, w->wfSlotId); 305 306 w->start(++_n_wave, ndr->q.code_ptr); |
307} 308 309void 310ComputeUnit::StartWorkgroup(NDRange *ndr) 311{ 312 // reserve the LDS capacity allocated to the work group 313 // disambiguated by the dispatch ID and workgroup ID, which should be 314 // globally unique --- 19 unchanged lines hidden (view full) --- 334 335 for (int d = 0; d < 3; ++d) { 336 trueWgSize[d] = std::min(ndr->q.wgSize[d], ndr->q.gdSize[d] - 337 ndr->wgId[d] * ndr->q.wgSize[d]); 338 339 trueWgSizeTotal *= trueWgSize[d]; 340 } 341 |
342 // calculate the number of 32-bit vector registers required by wavefront 343 int vregDemand = ndr->q.sRegCount + (2 * ndr->q.dRegCount); 344 int cnt = 0; 345 346 // Assign WFs by spreading them across SIMDs, 1 WF per SIMD at a time 347 for (int m = 0; m < shader->n_wf * numSIMDs; ++m) { 348 Wavefront *w = wfList[m % numSIMDs][m / numSIMDs]; 349 // Check if this wavefront slot is available: --- 10 unchanged lines hidden (view full) --- 360 uint32_t normSize = 0; 361 362 w->startVgprIndex = vrf[m % numSIMDs]->manager-> 363 allocateRegion(vregDemand, &normSize); 364 365 w->reservedVectorRegs = normSize; 366 vectorRegsReserved[m % numSIMDs] += w->reservedVectorRegs; 367 |
368 StartWF(w, trueWgSize, trueWgSizeTotal, cnt, ldsChunk, ndr); |
369 ++cnt; 370 } 371 } 372 ++barrier_id; 373} 374 375int 376ComputeUnit::ReadyWorkgroup(NDRange *ndr) --- 1417 unchanged lines hidden --- |