Cross Reference: /gem5/src/gpu-compute/compute

Deleted Added

sdiff udiff text old ( 11639:2e8d4bd8108d ) new ( 11643:42a1873be45c )

full compact

compute_unit.cc (11639:2e8d4bd8108d)	compute_unit.cc (11643:42a1873be45c)
1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 207 unchanged lines hidden (view full) --- 216 for (int i = 0; i< numSIMDs; ++i) { 217 vrf[i]->updateEvents(); 218 } 219} 220 221 222void 223ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal,	1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 207 unchanged lines hidden (view full) --- 216 for (int i = 0; i< numSIMDs; ++i) { 217 vrf[i]->updateEvents(); 218 } 219} 220 221 222void 223ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal,
224 int cnt, LdsChunk ldsChunk, NDRange ndr)	224 int waveId, LdsChunk ldsChunk, NDRange ndr)
225{ 226 static int _n_wave = 0; 227 228 // Fill in Kernel state 229 FillKernelState(w, ndr); 230 231 VectorMask init_mask; 232 init_mask.reset(); 233 234 for (int k = 0; k < wfSize(); ++k) {	225{ 226 static int _n_wave = 0; 227 228 // Fill in Kernel state 229 FillKernelState(w, ndr); 230 231 VectorMask init_mask; 232 init_mask.reset(); 233 234 for (int k = 0; k < wfSize(); ++k) {
235 if (k + cnt * wfSize() < trueWgSizeTotal)	235 if (k + waveId * wfSize() < trueWgSizeTotal)
236 init_mask[k] = 1; 237 } 238 239 w->kernId = ndr->dispatchId;	236 init_mask[k] = 1; 237 } 238 239 w->kernId = ndr->dispatchId;
240 w->dynWaveId = cnt;	240 w->wfId = waveId;
241 w->initMask = init_mask.to_ullong(); 242 243 for (int k = 0; k < wfSize(); ++k) {	241 w->initMask = init_mask.to_ullong(); 242 243 for (int k = 0; k < wfSize(); ++k) {
244 w->workItemId[0][k] = (k+cnt*wfSize()) % trueWgSize[0];	244 w->workItemId[0][k] = (k + waveId * wfSize()) % trueWgSize[0];
245 w->workItemId[1][k] =	245 w->workItemId[1][k] =
246 ((k + cnt * wfSize()) / trueWgSize[0]) % trueWgSize[1];	246 ((k + waveId * wfSize()) / trueWgSize[0]) % trueWgSize[1];
247 w->workItemId[2][k] =	247 w->workItemId[2][k] =
248 (k + cnt * wfSize()) / (trueWgSize[0] * trueWgSize[1]);	248 (k + waveId * wfSize()) / (trueWgSize[0] * trueWgSize[1]);
249 250 w->workItemFlatId[k] = w->workItemId[2][k] * trueWgSize[0] * 251 trueWgSize[1] + w->workItemId[1][k] * trueWgSize[0] + 252 w->workItemId[0][k]; 253 } 254 255 w->barrierSlots = divCeil(trueWgSizeTotal, wfSize()); 256 --- 32 unchanged lines hidden (view full) --- 289 w->instructionBuffer.clear(); 290 291 if (w->pendingFetch) 292 w->dropFetch = true; 293 294 // is this the last wavefront in the workgroup 295 // if set the spillWidth to be the remaining work-items 296 // so that the vector access is correct	249 250 w->workItemFlatId[k] = w->workItemId[2][k] * trueWgSize[0] * 251 trueWgSize[1] + w->workItemId[1][k] * trueWgSize[0] + 252 w->workItemId[0][k]; 253 } 254 255 w->barrierSlots = divCeil(trueWgSizeTotal, wfSize()); 256 --- 32 unchanged lines hidden (view full) --- 289 w->instructionBuffer.clear(); 290 291 if (w->pendingFetch) 292 w->dropFetch = true; 293 294 // is this the last wavefront in the workgroup 295 // if set the spillWidth to be the remaining work-items 296 // so that the vector access is correct
297 if ((cnt + 1) * wfSize() >= trueWgSizeTotal) { 298 w->spillWidth = trueWgSizeTotal - (cnt * wfSize());	297 if ((waveId + 1) * wfSize() >= trueWgSizeTotal) { 298 w->spillWidth = trueWgSizeTotal - (waveId * wfSize());
299 } else { 300 w->spillWidth = wfSize(); 301 } 302 303 DPRINTF(GPUDisp, "Scheduling wfDynId/barrier_id %d/%d on CU%d: " 304 "WF[%d][%d]\n", _n_wave, barrier_id, cu_id, w->simdId, w->wfSlotId); 305 306 w->start(++_n_wave, ndr->q.code_ptr); --- 29 unchanged lines hidden (view full) --- 336 trueWgSize[d] = std::min(ndr->q.wgSize[d], ndr->q.gdSize[d] - 337 ndr->wgId[d] * ndr->q.wgSize[d]); 338 339 trueWgSizeTotal = trueWgSize[d]; 340* } 341 342 // calculate the number of 32-bit vector registers required by wavefront 343 int vregDemand = ndr->q.sRegCount + (2 * ndr->q.dRegCount);	299 } else { 300 w->spillWidth = wfSize(); 301 } 302 303 DPRINTF(GPUDisp, "Scheduling wfDynId/barrier_id %d/%d on CU%d: " 304 "WF[%d][%d]\n", _n_wave, barrier_id, cu_id, w->simdId, w->wfSlotId); 305 306 w->start(++_n_wave, ndr->q.code_ptr); --- 29 unchanged lines hidden (view full) --- 336 trueWgSize[d] = std::min(ndr->q.wgSize[d], ndr->q.gdSize[d] - 337 ndr->wgId[d] * ndr->q.wgSize[d]); 338 339 trueWgSizeTotal = trueWgSize[d]; 340* } 341 342 // calculate the number of 32-bit vector registers required by wavefront 343 int vregDemand = ndr->q.sRegCount + (2 * ndr->q.dRegCount);
344 int cnt = 0;	344 int wave_id = 0;
345 346 // Assign WFs by spreading them across SIMDs, 1 WF per SIMD at a time 347 for (int m = 0; m < shader->n_wf * numSIMDs; ++m) { 348 Wavefront w = wfList[m % numSIMDs][m / numSIMDs]; 349* // Check if this wavefront slot is available: 350 // It must be stopped and not waiting 351 // for a release to complete S_RETURNING 352 if (w->status == Wavefront::S_STOPPED) { 353 // if we have scheduled all work items then stop 354 // scheduling wavefronts	345 346 // Assign WFs by spreading them across SIMDs, 1 WF per SIMD at a time 347 for (int m = 0; m < shader->n_wf * numSIMDs; ++m) { 348 Wavefront w = wfList[m % numSIMDs][m / numSIMDs]; 349* // Check if this wavefront slot is available: 350 // It must be stopped and not waiting 351 // for a release to complete S_RETURNING 352 if (w->status == Wavefront::S_STOPPED) { 353 // if we have scheduled all work items then stop 354 // scheduling wavefronts
355 if (cnt * wfSize() >= trueWgSizeTotal)	355 if (wave_id * wfSize() >= trueWgSizeTotal)
356 break; 357 358 // reserve vector registers for the scheduled wavefront 359 assert(vectorRegsReserved[m % numSIMDs] <= numVecRegsPerSimd); 360 uint32_t normSize = 0; 361 362 w->startVgprIndex = vrf[m % numSIMDs]->manager-> 363 allocateRegion(vregDemand, &normSize); 364 365 w->reservedVectorRegs = normSize; 366 vectorRegsReserved[m % numSIMDs] += w->reservedVectorRegs; 367	356 break; 357 358 // reserve vector registers for the scheduled wavefront 359 assert(vectorRegsReserved[m % numSIMDs] <= numVecRegsPerSimd); 360 uint32_t normSize = 0; 361 362 w->startVgprIndex = vrf[m % numSIMDs]->manager-> 363 allocateRegion(vregDemand, &normSize); 364 365 w->reservedVectorRegs = normSize; 366 vectorRegsReserved[m % numSIMDs] += w->reservedVectorRegs; 367
368 StartWF(w, trueWgSize, trueWgSizeTotal, cnt, ldsChunk, ndr); 369 ++cnt;	368 StartWF(w, trueWgSize, trueWgSizeTotal, wave_id, ldsChunk, ndr); 369 ++wave_id;
370 } 371 } 372 ++barrier_id; 373} 374 375int 376ComputeUnit::ReadyWorkgroup(NDRange ndr) 377{ --- 1416 unchanged lines hidden* ---	370 } 371 } 372 ++barrier_id; 373} 374 375int 376ComputeUnit::ReadyWorkgroup(NDRange ndr) 377{ --- 1416 unchanged lines hidden* ---