177c177
< ComputeUnit::FillKernelState(Wavefront *w, NDRange *ndr)
---
> ComputeUnit::fillKernelState(Wavefront *w, NDRange *ndr)
192a193
> w->computeActualWgSz(ndr);
223,224c224,225
< ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal,
< int waveId, LdsChunk *ldsChunk, NDRange *ndr)
---
> ComputeUnit::startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,
> NDRange *ndr)
228,230d228
< // Fill in Kernel state
< FillKernelState(w, ndr);
<
235c233
< if (k + waveId * wfSize() < trueWgSizeTotal)
---
> if (k + waveId * wfSize() < w->actualWgSzTotal)
244,248c242,246
< w->workItemId[0][k] = (k + waveId * wfSize()) % trueWgSize[0];
< w->workItemId[1][k] =
< ((k + waveId * wfSize()) / trueWgSize[0]) % trueWgSize[1];
< w->workItemId[2][k] =
< (k + waveId * wfSize()) / (trueWgSize[0] * trueWgSize[1]);
---
> w->workItemId[0][k] = (k + waveId * wfSize()) % w->actualWgSz[0];
> w->workItemId[1][k] = ((k + waveId * wfSize()) / w->actualWgSz[0]) %
> w->actualWgSz[1];
> w->workItemId[2][k] = (k + waveId * wfSize()) /
> (w->actualWgSz[0] * w->actualWgSz[1]);
250,251c248,249
< w->workItemFlatId[k] = w->workItemId[2][k] * trueWgSize[0] *
< trueWgSize[1] + w->workItemId[1][k] * trueWgSize[0] +
---
> w->workItemFlatId[k] = w->workItemId[2][k] * w->actualWgSz[0] *
> w->actualWgSz[1] + w->workItemId[1][k] * w->actualWgSz[0] +
255c253
< w->barrierSlots = divCeil(trueWgSizeTotal, wfSize());
---
> w->barrierSlots = divCeil(w->actualWgSzTotal, wfSize());
297,298c295,296
< if ((waveId + 1) * wfSize() >= trueWgSizeTotal) {
< w->spillWidth = trueWgSizeTotal - (waveId * wfSize());
---
> if ((waveId + 1) * wfSize() >= w->actualWgSzTotal) {
> w->spillWidth = w->actualWgSzTotal - (waveId * wfSize());
331,341d328
< // Get true size of workgroup (after clamping to grid size)
< int trueWgSize[3];
< int trueWgSizeTotal = 1;
<
< for (int d = 0; d < 3; ++d) {
< trueWgSize[d] = std::min(ndr->q.wgSize[d], ndr->q.gdSize[d] -
< ndr->wgId[d] * ndr->q.wgSize[d]);
<
< trueWgSizeTotal *= trueWgSize[d];
< }
<
352a340
> fillKernelState(w, ndr);
355c343
< if (wave_id * wfSize() >= trueWgSizeTotal)
---
> if (wave_id * wfSize() >= w->actualWgSzTotal)
368c356
< StartWF(w, trueWgSize, trueWgSizeTotal, wave_id, ldsChunk, ndr);
---
> startWavefront(w, wave_id, ldsChunk, ndr);