compute_unit.cc (11638:b511733958d0) | compute_unit.cc (11639:2e8d4bd8108d) |
---|---|
1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 164 unchanged lines hidden (view full) --- 173 delete ldsPort; 174} 175 176void 177ComputeUnit::FillKernelState(Wavefront *w, NDRange *ndr) 178{ 179 w->resizeRegFiles(ndr->q.cRegCount, ndr->q.sRegCount, ndr->q.dRegCount); 180 | 1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 164 unchanged lines hidden (view full) --- 173 delete ldsPort; 174} 175 176void 177ComputeUnit::FillKernelState(Wavefront *w, NDRange *ndr) 178{ 179 w->resizeRegFiles(ndr->q.cRegCount, ndr->q.sRegCount, ndr->q.dRegCount); 180 |
181 w->workgroupsz[0] = ndr->q.wgSize[0]; 182 w->workgroupsz[1] = ndr->q.wgSize[1]; 183 w->workgroupsz[2] = ndr->q.wgSize[2]; 184 w->wg_sz = w->workgroupsz[0] * w->workgroupsz[1] * w->workgroupsz[2]; 185 w->gridsz[0] = ndr->q.gdSize[0]; 186 w->gridsz[1] = ndr->q.gdSize[1]; 187 w->gridsz[2] = ndr->q.gdSize[2]; | 181 w->workGroupSz[0] = ndr->q.wgSize[0]; 182 w->workGroupSz[1] = ndr->q.wgSize[1]; 183 w->workGroupSz[2] = ndr->q.wgSize[2]; 184 w->wgSz = w->workGroupSz[0] * w->workGroupSz[1] * w->workGroupSz[2]; 185 w->gridSz[0] = ndr->q.gdSize[0]; 186 w->gridSz[1] = ndr->q.gdSize[1]; 187 w->gridSz[2] = ndr->q.gdSize[2]; |
188 w->kernelArgs = ndr->q.args; 189 w->privSizePerItem = ndr->q.privMemPerItem; 190 w->spillSizePerItem = ndr->q.spillMemPerItem; 191 w->roBase = ndr->q.roMemStart; 192 w->roSize = ndr->q.roMemTotal; 193} 194 195void --- 35 unchanged lines hidden (view full) --- 231 VectorMask init_mask; 232 init_mask.reset(); 233 234 for (int k = 0; k < wfSize(); ++k) { 235 if (k + cnt * wfSize() < trueWgSizeTotal) 236 init_mask[k] = 1; 237 } 238 | 188 w->kernelArgs = ndr->q.args; 189 w->privSizePerItem = ndr->q.privMemPerItem; 190 w->spillSizePerItem = ndr->q.spillMemPerItem; 191 w->roBase = ndr->q.roMemStart; 192 w->roSize = ndr->q.roMemTotal; 193} 194 195void --- 35 unchanged lines hidden (view full) --- 231 VectorMask init_mask; 232 init_mask.reset(); 233 234 for (int k = 0; k < wfSize(); ++k) { 235 if (k + cnt * wfSize() < trueWgSizeTotal) 236 init_mask[k] = 1; 237 } 238 |
239 w->kern_id = ndr->dispatchId; 240 w->dynwaveid = cnt; 241 w->init_mask = init_mask.to_ullong(); | 239 w->kernId = ndr->dispatchId; 240 w->dynWaveId = cnt; 241 w->initMask = init_mask.to_ullong(); |
242 243 for (int k = 0; k < wfSize(); ++k) { | 242 243 for (int k = 0; k < wfSize(); ++k) { |
244 w->workitemid[0][k] = (k+cnt*wfSize()) % trueWgSize[0]; 245 w->workitemid[1][k] = | 244 w->workItemId[0][k] = (k+cnt*wfSize()) % trueWgSize[0]; 245 w->workItemId[1][k] = |
246 ((k + cnt * wfSize()) / trueWgSize[0]) % trueWgSize[1]; | 246 ((k + cnt * wfSize()) / trueWgSize[0]) % trueWgSize[1]; |
247 w->workitemid[2][k] = | 247 w->workItemId[2][k] = |
248 (k + cnt * wfSize()) / (trueWgSize[0] * trueWgSize[1]); 249 | 248 (k + cnt * wfSize()) / (trueWgSize[0] * trueWgSize[1]); 249 |
250 w->workitemFlatId[k] = w->workitemid[2][k] * trueWgSize[0] * 251 trueWgSize[1] + w->workitemid[1][k] * trueWgSize[0] + 252 w->workitemid[0][k]; | 250 w->workItemFlatId[k] = w->workItemId[2][k] * trueWgSize[0] * 251 trueWgSize[1] + w->workItemId[1][k] * trueWgSize[0] + 252 w->workItemId[0][k]; |
253 } 254 | 253 } 254 |
255 w->barrier_slots = divCeil(trueWgSizeTotal, wfSize()); | 255 w->barrierSlots = divCeil(trueWgSizeTotal, wfSize()); |
256 | 256 |
257 w->bar_cnt.resize(wfSize(), 0); | 257 w->barCnt.resize(wfSize(), 0); |
258 | 258 |
259 w->max_bar_cnt = 0; 260 w->old_barrier_cnt = 0; 261 w->barrier_cnt = 0; | 259 w->maxBarCnt = 0; 260 w->oldBarrierCnt = 0; 261 w->barrierCnt = 0; |
262 263 w->privBase = ndr->q.privMemStart; 264 ndr->q.privMemStart += ndr->q.privMemPerItem * wfSize(); 265 266 w->spillBase = ndr->q.spillMemStart; 267 ndr->q.spillMemStart += ndr->q.spillMemPerItem * wfSize(); 268 269 w->pushToReconvergenceStack(0, UINT32_MAX, init_mask.to_ulong()); 270 271 // WG state | 262 263 w->privBase = ndr->q.privMemStart; 264 ndr->q.privMemStart += ndr->q.privMemPerItem * wfSize(); 265 266 w->spillBase = ndr->q.spillMemStart; 267 ndr->q.spillMemStart += ndr->q.spillMemPerItem * wfSize(); 268 269 w->pushToReconvergenceStack(0, UINT32_MAX, init_mask.to_ulong()); 270 271 // WG state |
272 w->wg_id = ndr->globalWgId; 273 w->dispatchid = ndr->dispatchId; 274 w->workgroupid[0] = w->wg_id % ndr->numWg[0]; 275 w->workgroupid[1] = (w->wg_id / ndr->numWg[0]) % ndr->numWg[1]; 276 w->workgroupid[2] = w->wg_id / (ndr->numWg[0] * ndr->numWg[1]); | 272 w->wgId = ndr->globalWgId; 273 w->dispatchId = ndr->dispatchId; 274 w->workGroupId[0] = w->wgId % ndr->numWg[0]; 275 w->workGroupId[1] = (w->wgId / ndr->numWg[0]) % ndr->numWg[1]; 276 w->workGroupId[2] = w->wgId / (ndr->numWg[0] * ndr->numWg[1]); |
277 | 277 |
278 w->barrier_id = barrier_id; | 278 w->barrierId = barrier_id; |
279 w->stalledAtBarrier = false; 280 281 // set the wavefront context to have a pointer to this section of the LDS 282 w->ldsChunk = ldsChunk; 283 284 int32_t refCount M5_VAR_USED = | 279 w->stalledAtBarrier = false; 280 281 // set the wavefront context to have a pointer to this section of the LDS 282 w->ldsChunk = ldsChunk; 283 284 int32_t refCount M5_VAR_USED = |
285 lds.increaseRefCounter(w->dispatchid, w->wg_id); | 285 lds.increaseRefCounter(w->dispatchId, w->wgId); |
286 DPRINTF(GPUDisp, "CU%d: increase ref ctr wg[%d] to [%d]\n", | 286 DPRINTF(GPUDisp, "CU%d: increase ref ctr wg[%d] to [%d]\n", |
287 cu_id, w->wg_id, refCount); | 287 cu_id, w->wgId, refCount); |
288 289 w->instructionBuffer.clear(); 290 291 if (w->pendingFetch) 292 w->dropFetch = true; 293 294 // is this the last wavefront in the workgroup 295 // if set the spillWidth to be the remaining work-items --- 167 unchanged lines hidden (view full) --- 463 for (int i_simd = 0; i_simd < numSIMDs; ++i_simd) { 464 for (int i_wf = 0; i_wf < shader->n_wf; ++i_wf) { 465 Wavefront *w = wfList[i_simd][i_wf]; 466 467 if (w->status == Wavefront::S_RUNNING) { 468 DPRINTF(GPUSync, "Checking WF[%d][%d]\n", i_simd, i_wf); 469 470 DPRINTF(GPUSync, "wf->barrier_id = %d, _barrier_id = %d\n", | 288 289 w->instructionBuffer.clear(); 290 291 if (w->pendingFetch) 292 w->dropFetch = true; 293 294 // is this the last wavefront in the workgroup 295 // if set the spillWidth to be the remaining work-items --- 167 unchanged lines hidden (view full) --- 463 for (int i_simd = 0; i_simd < numSIMDs; ++i_simd) { 464 for (int i_wf = 0; i_wf < shader->n_wf; ++i_wf) { 465 Wavefront *w = wfList[i_simd][i_wf]; 466 467 if (w->status == Wavefront::S_RUNNING) { 468 DPRINTF(GPUSync, "Checking WF[%d][%d]\n", i_simd, i_wf); 469 470 DPRINTF(GPUSync, "wf->barrier_id = %d, _barrier_id = %d\n", |
471 w->barrier_id, _barrier_id); | 471 w->barrierId, _barrier_id); |
472 473 DPRINTF(GPUSync, "wf->barrier_cnt %d, bcnt = %d\n", | 472 473 DPRINTF(GPUSync, "wf->barrier_cnt %d, bcnt = %d\n", |
474 w->barrier_cnt, bcnt); | 474 w->barrierCnt, bcnt); |
475 } 476 477 if (w->status == Wavefront::S_RUNNING && | 475 } 476 477 if (w->status == Wavefront::S_RUNNING && |
478 w->barrier_id == _barrier_id && w->barrier_cnt == bcnt && 479 !w->outstanding_reqs) { | 478 w->barrierId == _barrier_id && w->barrierCnt == bcnt && 479 !w->outstandingReqs) { |
480 ++ccnt; 481 482 DPRINTF(GPUSync, "WF[%d][%d] at barrier, increment ccnt to " 483 "%d\n", i_simd, i_wf, ccnt); 484 } 485 } 486 } 487 --- 153 unchanged lines hidden (view full) --- 641 if (pkt->req->isKernel() && pkt->req->isRelease()) { 642 Wavefront *w = 643 computeUnit->wfList[gpuDynInst->simdId][gpuDynInst->wfSlotId]; 644 645 // Check if we are waiting on Kernel End Release 646 if (w->status == Wavefront::S_RETURNING) { 647 DPRINTF(GPUDisp, "CU%d: WF[%d][%d][wv=%d]: WG id completed %d\n", 648 computeUnit->cu_id, w->simdId, w->wfSlotId, | 480 ++ccnt; 481 482 DPRINTF(GPUSync, "WF[%d][%d] at barrier, increment ccnt to " 483 "%d\n", i_simd, i_wf, ccnt); 484 } 485 } 486 } 487 --- 153 unchanged lines hidden (view full) --- 641 if (pkt->req->isKernel() && pkt->req->isRelease()) { 642 Wavefront *w = 643 computeUnit->wfList[gpuDynInst->simdId][gpuDynInst->wfSlotId]; 644 645 // Check if we are waiting on Kernel End Release 646 if (w->status == Wavefront::S_RETURNING) { 647 DPRINTF(GPUDisp, "CU%d: WF[%d][%d][wv=%d]: WG id completed %d\n", 648 computeUnit->cu_id, w->simdId, w->wfSlotId, |
649 w->wfDynId, w->kern_id); | 649 w->wfDynId, w->kernId); |
650 651 computeUnit->shader->dispatcher->notifyWgCompl(w); 652 w->status = Wavefront::S_STOPPED; 653 } else { | 650 651 computeUnit->shader->dispatcher->notifyWgCompl(w); 652 w->status = Wavefront::S_STOPPED; 653 } else { |
654 w->outstanding_reqs--; | 654 w->outstandingReqs--; |
655 } 656 657 DPRINTF(GPUSync, "CU%d: WF[%d][%d]: barrier_cnt = %d\n", 658 computeUnit->cu_id, gpuDynInst->simdId, | 655 } 656 657 DPRINTF(GPUSync, "CU%d: WF[%d][%d]: barrier_cnt = %d\n", 658 computeUnit->cu_id, gpuDynInst->simdId, |
659 gpuDynInst->wfSlotId, w->barrier_cnt); | 659 gpuDynInst->wfSlotId, w->barrierCnt); |
660 661 if (gpuDynInst->useContinuation) { 662 assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE); 663 gpuDynInst->execContinuation(gpuDynInst->staticInstruction(), 664 gpuDynInst); 665 } 666 667 delete pkt->senderState; --- 1126 unchanged lines hidden --- | 660 661 if (gpuDynInst->useContinuation) { 662 assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE); 663 gpuDynInst->execContinuation(gpuDynInst->staticInstruction(), 664 gpuDynInst); 665 } 666 667 delete pkt->senderState; --- 1126 unchanged lines hidden --- |