dispatcher.cc revision 11435
14661Sksewell@umich.edu/* 24661Sksewell@umich.edu * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 35268Sksewell@umich.edu * All rights reserved. 45268Sksewell@umich.edu * 55268Sksewell@umich.edu * For use for simulation and test purposes only 65268Sksewell@umich.edu * 75268Sksewell@umich.edu * Redistribution and use in source and binary forms, with or without 85268Sksewell@umich.edu * modification, are permitted provided that the following conditions are met: 95268Sksewell@umich.edu * 105268Sksewell@umich.edu * 1. Redistributions of source code must retain the above copyright notice, 115268Sksewell@umich.edu * this list of conditions and the following disclaimer. 125268Sksewell@umich.edu * 135268Sksewell@umich.edu * 2. Redistributions in binary form must reproduce the above copyright notice, 145268Sksewell@umich.edu * this list of conditions and the following disclaimer in the documentation 155268Sksewell@umich.edu * and/or other materials provided with the distribution. 165268Sksewell@umich.edu * 175268Sksewell@umich.edu * 3. Neither the name of the copyright holder nor the names of its contributors 185268Sksewell@umich.edu * may be used to endorse or promote products derived from this software 195268Sksewell@umich.edu * without specific prior written permission. 205268Sksewell@umich.edu * 215268Sksewell@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 225268Sksewell@umich.edu * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 235268Sksewell@umich.edu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 245268Sksewell@umich.edu * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 255268Sksewell@umich.edu * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 265268Sksewell@umich.edu * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 275268Sksewell@umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 285268Sksewell@umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 295268Sksewell@umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 305268Sksewell@umich.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 314661Sksewell@umich.edu * POSSIBILITY OF SUCH DAMAGE. 324661Sksewell@umich.edu * 334661Sksewell@umich.edu * Author: Brad Beckmann, Marc Orr 344661Sksewell@umich.edu */ 354661Sksewell@umich.edu 364661Sksewell@umich.edu 374661Sksewell@umich.edu#include "gpu-compute/dispatcher.hh" 384661Sksewell@umich.edu 394661Sksewell@umich.edu#include "cpu/base.hh" 404661Sksewell@umich.edu#include "debug/GPUDisp.hh" 414661Sksewell@umich.edu#include "gpu-compute/cl_driver.hh" 424661Sksewell@umich.edu#include "gpu-compute/cl_event.hh" 434661Sksewell@umich.edu#include "gpu-compute/shader.hh" 444661Sksewell@umich.edu#include "gpu-compute/wavefront.hh" 454661Sksewell@umich.edu#include "mem/packet_access.hh" 464661Sksewell@umich.edu 474661Sksewell@umich.eduGpuDispatcher *GpuDispatcher::instance = nullptr; 484661Sksewell@umich.edu 494661Sksewell@umich.eduGpuDispatcher::GpuDispatcher(const Params *p) 504661Sksewell@umich.edu : DmaDevice(p), _masterId(p->system->getMasterId(name() + ".disp")), 514661Sksewell@umich.edu pioAddr(p->pio_addr), pioSize(4096), pioDelay(p->pio_latency), 524661Sksewell@umich.edu dispatchCount(0), dispatchActive(false), cpu(p->cpu), 534661Sksewell@umich.edu shader(p->shader_pointer), driver(p->cl_driver), tickEvent(this) 544661Sksewell@umich.edu{ 554661Sksewell@umich.edu shader->handshake(this); 564661Sksewell@umich.edu driver->handshake(this); 574661Sksewell@umich.edu 584661Sksewell@umich.edu ndRange.wg_disp_rem = false; 594661Sksewell@umich.edu ndRange.globalWgId = 0; 604661Sksewell@umich.edu 614661Sksewell@umich.edu schedule(&tickEvent, 0); 624661Sksewell@umich.edu 634661Sksewell@umich.edu // translation port for the dispatcher 644661Sksewell@umich.edu tlbPort = new TLBPort(csprintf("%s-port%d", name()), this); 654661Sksewell@umich.edu 664661Sksewell@umich.edu num_kernelLaunched 6710196SCurtis.Dunham@arm.com .name(name() + ".num_kernel_launched") 684661Sksewell@umich.edu .desc("number of kernel launched") 694661Sksewell@umich.edu ; 704661Sksewell@umich.edu} 714661Sksewell@umich.edu 724661Sksewell@umich.eduGpuDispatcher *GpuDispatcherParams::create() 734661Sksewell@umich.edu{ 744661Sksewell@umich.edu GpuDispatcher *dispatcher = new GpuDispatcher(this); 754661Sksewell@umich.edu GpuDispatcher::setInstance(dispatcher); 764661Sksewell@umich.edu 774661Sksewell@umich.edu return GpuDispatcher::getInstance(); 784661Sksewell@umich.edu} 794661Sksewell@umich.edu 804661Sksewell@umich.eduvoid 814661Sksewell@umich.eduGpuDispatcher::serialize(CheckpointOut &cp) const 8210474Sandreas.hansson@arm.com{ 834661Sksewell@umich.edu Tick event_tick = 0; 844661Sksewell@umich.edu 854661Sksewell@umich.edu if (ndRange.wg_disp_rem) 864661Sksewell@umich.edu fatal("Checkpointing not supported during active workgroup execution"); 8710474Sandreas.hansson@arm.com 884661Sksewell@umich.edu if (tickEvent.scheduled()) 894661Sksewell@umich.edu event_tick = tickEvent.when(); 904661Sksewell@umich.edu 914661Sksewell@umich.edu SERIALIZE_SCALAR(event_tick); 924661Sksewell@umich.edu 934661Sksewell@umich.edu} 944661Sksewell@umich.edu 954661Sksewell@umich.eduvoid 964661Sksewell@umich.eduGpuDispatcher::unserialize(CheckpointIn &cp) 974661Sksewell@umich.edu{ 984661Sksewell@umich.edu Tick event_tick; 994661Sksewell@umich.edu 10010196SCurtis.Dunham@arm.com if (tickEvent.scheduled()) 1014661Sksewell@umich.edu deschedule(&tickEvent); 1024661Sksewell@umich.edu 1034661Sksewell@umich.edu UNSERIALIZE_SCALAR(event_tick); 1044661Sksewell@umich.edu 1054661Sksewell@umich.edu if (event_tick) 1064661Sksewell@umich.edu schedule(&tickEvent, event_tick); 1074661Sksewell@umich.edu} 1084661Sksewell@umich.edu 1094661Sksewell@umich.eduAddrRangeList 1104661Sksewell@umich.eduGpuDispatcher::getAddrRanges() const 1114661Sksewell@umich.edu{ 1124661Sksewell@umich.edu AddrRangeList ranges; 1134661Sksewell@umich.edu 1144661Sksewell@umich.edu DPRINTF(GPUDisp, "dispatcher registering addr range at %#x size %#x\n", 11510474Sandreas.hansson@arm.com pioAddr, pioSize); 1164661Sksewell@umich.edu 1174661Sksewell@umich.edu ranges.push_back(RangeSize(pioAddr, pioSize)); 1184661Sksewell@umich.edu 1194661Sksewell@umich.edu return ranges; 12010474Sandreas.hansson@arm.com} 1214661Sksewell@umich.edu 1224661Sksewell@umich.eduTick 1234661Sksewell@umich.eduGpuDispatcher::read(PacketPtr pkt) 1244661Sksewell@umich.edu{ 1254661Sksewell@umich.edu assert(pkt->getAddr() >= pioAddr); 1264661Sksewell@umich.edu assert(pkt->getAddr() < pioAddr + pioSize); 1274661Sksewell@umich.edu 1284661Sksewell@umich.edu int offset = pkt->getAddr() - pioAddr; 1294661Sksewell@umich.edu pkt->allocate(); 1304661Sksewell@umich.edu 1314661Sksewell@umich.edu DPRINTF(GPUDisp, " read register %#x size=%d\n", offset, pkt->getSize()); 1324661Sksewell@umich.edu 1334661Sksewell@umich.edu if (offset < 8) { 1344661Sksewell@umich.edu assert(!offset); 1354661Sksewell@umich.edu assert(pkt->getSize() == 8); 1364661Sksewell@umich.edu 1374661Sksewell@umich.edu uint64_t retval = dispatchActive; 1389554Sandreas.hansson@arm.com pkt->set(retval); 1399554Sandreas.hansson@arm.com } else { 1409554Sandreas.hansson@arm.com offset -= 8; 1419554Sandreas.hansson@arm.com assert(offset + pkt->getSize() < sizeof(HsaQueueEntry)); 1429554Sandreas.hansson@arm.com char *curTaskPtr = (char*)&curTask; 1439554Sandreas.hansson@arm.com 1444661Sksewell@umich.edu memcpy(pkt->getPtr<const void*>(), curTaskPtr + offset, pkt->getSize()); 1454661Sksewell@umich.edu } 1464661Sksewell@umich.edu 1474661Sksewell@umich.edu pkt->makeAtomicResponse(); 1484661Sksewell@umich.edu 1498564Sgblack@eecs.umich.edu return pioDelay; 15010196SCurtis.Dunham@arm.com} 1514661Sksewell@umich.edu 1528738Sgblack@eecs.umich.eduTick 1534661Sksewell@umich.eduGpuDispatcher::write(PacketPtr pkt) 1544661Sksewell@umich.edu{ 1554661Sksewell@umich.edu assert(pkt->getAddr() >= pioAddr); 1564661Sksewell@umich.edu assert(pkt->getAddr() < pioAddr + pioSize); 1578564Sgblack@eecs.umich.edu 15810196SCurtis.Dunham@arm.com int offset = pkt->getAddr() - pioAddr; 1594661Sksewell@umich.edu 1608738Sgblack@eecs.umich.edu#if TRACING_ON 1614661Sksewell@umich.edu uint64_t data_val = 0; 1624661Sksewell@umich.edu 1634661Sksewell@umich.edu switch (pkt->getSize()) { 1644661Sksewell@umich.edu case 1: 1654661Sksewell@umich.edu data_val = pkt->get<uint8_t>(); 1664661Sksewell@umich.edu break; 1674661Sksewell@umich.edu case 2: 1684661Sksewell@umich.edu data_val = pkt->get<uint16_t>(); 1694661Sksewell@umich.edu break; 1704661Sksewell@umich.edu case 4: 1714661Sksewell@umich.edu data_val = pkt->get<uint32_t>(); 1724661Sksewell@umich.edu break; 1734661Sksewell@umich.edu case 8: 1744661Sksewell@umich.edu data_val = pkt->get<uint64_t>(); 1754661Sksewell@umich.edu break; 1764661Sksewell@umich.edu default: 1775222Sksewell@umich.edu DPRINTF(GPUDisp, "bad size %d\n", pkt->getSize()); 1785222Sksewell@umich.edu } 1794661Sksewell@umich.edu 1804661Sksewell@umich.edu DPRINTF(GPUDisp, "write register %#x value %#x size=%d\n", offset, data_val, 1814661Sksewell@umich.edu pkt->getSize()); 1824661Sksewell@umich.edu#endif 1834661Sksewell@umich.edu if (!offset) { 1844661Sksewell@umich.edu static int nextId = 0; 1854661Sksewell@umich.edu 1864661Sksewell@umich.edu // The depends field of the qstruct, which was previously unused, is 1874661Sksewell@umich.edu // used to communicate with simulated application. 1884661Sksewell@umich.edu if (curTask.depends) { 1894661Sksewell@umich.edu HostState hs; 1904661Sksewell@umich.edu shader->ReadMem((uint64_t)(curTask.depends), &hs, 1914661Sksewell@umich.edu sizeof(HostState), 0); 1924661Sksewell@umich.edu 1934661Sksewell@umich.edu // update event start time (in nano-seconds) 1944661Sksewell@umich.edu uint64_t start = curTick() / 1000; 1954661Sksewell@umich.edu 1964661Sksewell@umich.edu shader->WriteMem((uint64_t)(&((_cl_event*)hs.event)->start), 1974661Sksewell@umich.edu &start, sizeof(uint64_t), 0); 1984661Sksewell@umich.edu } 1994661Sksewell@umich.edu 2004661Sksewell@umich.edu // launch kernel 2014661Sksewell@umich.edu ++num_kernelLaunched; 2024661Sksewell@umich.edu 2034661Sksewell@umich.edu NDRange *ndr = &(ndRangeMap[nextId]); 2044661Sksewell@umich.edu // copy dispatch info 2054661Sksewell@umich.edu ndr->q = curTask; 2064661Sksewell@umich.edu 2074661Sksewell@umich.edu // update the numDispTask polled by the runtime 2085222Sksewell@umich.edu accessUserVar(cpu, (uint64_t)(curTask.numDispLeft), 0, 1); 2095222Sksewell@umich.edu 2104661Sksewell@umich.edu ndr->numWgTotal = 1; 2114661Sksewell@umich.edu 2124661Sksewell@umich.edu for (int i = 0; i < 3; ++i) { 2134661Sksewell@umich.edu ndr->wgId[i] = 0; 2144661Sksewell@umich.edu ndr->numWg[i] = divCeil(curTask.gdSize[i], curTask.wgSize[i]); 2154661Sksewell@umich.edu ndr->numWgTotal *= ndr->numWg[i]; 2164661Sksewell@umich.edu } 2174661Sksewell@umich.edu 2184661Sksewell@umich.edu ndr->numWgCompleted = 0; 2194661Sksewell@umich.edu ndr->globalWgId = 0; 220 ndr->wg_disp_rem = true; 221 ndr->execDone = false; 222 ndr->addrToNotify = (volatile bool*)curTask.addrToNotify; 223 ndr->numDispLeft = (volatile uint32_t*)curTask.numDispLeft; 224 ndr->dispatchId = nextId; 225 ndr->curCid = pkt->req->contextId(); 226 DPRINTF(GPUDisp, "launching kernel %d\n",nextId); 227 execIds.push(nextId); 228 ++nextId; 229 230 dispatchActive = true; 231 232 if (!tickEvent.scheduled()) { 233 schedule(&tickEvent, curTick() + shader->ticks(1)); 234 } 235 } else { 236 // populate current task struct 237 // first 64 bits are launch reg 238 offset -= 8; 239 assert(offset < sizeof(HsaQueueEntry)); 240 char *curTaskPtr = (char*)&curTask; 241 memcpy(curTaskPtr + offset, pkt->getPtr<const void*>(), pkt->getSize()); 242 } 243 244 pkt->makeAtomicResponse(); 245 246 return pioDelay; 247} 248 249 250BaseMasterPort& 251GpuDispatcher::getMasterPort(const std::string &if_name, PortID idx) 252{ 253 if (if_name == "translation_port") { 254 return *tlbPort; 255 } 256 257 return DmaDevice::getMasterPort(if_name, idx); 258} 259 260void 261GpuDispatcher::exec() 262{ 263 int fail_count = 0; 264 265 // There are potentially multiple outstanding kernel launches. 266 // It is possible that the workgroups in a different kernel 267 // can fit on the GPU even if another kernel's workgroups cannot 268 DPRINTF(GPUDisp, "Launching %d Kernels\n", execIds.size()); 269 270 while (execIds.size() > fail_count) { 271 int execId = execIds.front(); 272 273 while (ndRangeMap[execId].wg_disp_rem) { 274 //update the thread context 275 shader->updateContext(ndRangeMap[execId].curCid); 276 277 // attempt to dispatch_workgroup 278 if (!shader->dispatch_workgroups(&ndRangeMap[execId])) { 279 // if we failed try the next kernel, 280 // it may have smaller workgroups. 281 // put it on the queue to rety latter 282 DPRINTF(GPUDisp, "kernel %d failed to launch\n", execId); 283 execIds.push(execId); 284 ++fail_count; 285 break; 286 } 287 } 288 // let's try the next kernel_id 289 execIds.pop(); 290 } 291 292 DPRINTF(GPUDisp, "Returning %d Kernels\n", doneIds.size()); 293 294 if (doneIds.size() && cpu) { 295 shader->hostWakeUp(cpu); 296 } 297 298 while (doneIds.size()) { 299 // wakeup the CPU if any Kernels completed this cycle 300 DPRINTF(GPUDisp, "WorkGroup %d completed\n", doneIds.front()); 301 doneIds.pop(); 302 } 303} 304 305void 306GpuDispatcher::notifyWgCompl(Wavefront *w) 307{ 308 int kern_id = w->kern_id; 309 DPRINTF(GPUDisp, "notify WgCompl %d\n",kern_id); 310 assert(ndRangeMap[kern_id].dispatchId == kern_id); 311 ndRangeMap[kern_id].numWgCompleted++; 312 313 if (ndRangeMap[kern_id].numWgCompleted == ndRangeMap[kern_id].numWgTotal) { 314 ndRangeMap[kern_id].execDone = true; 315 doneIds.push(kern_id); 316 317 if (ndRangeMap[kern_id].addrToNotify) { 318 accessUserVar(cpu, (uint64_t)(ndRangeMap[kern_id].addrToNotify), 1, 319 0); 320 } 321 322 accessUserVar(cpu, (uint64_t)(ndRangeMap[kern_id].numDispLeft), 0, -1); 323 324 // update event end time (in nano-seconds) 325 if (ndRangeMap[kern_id].q.depends) { 326 HostState *host_state = (HostState*)ndRangeMap[kern_id].q.depends; 327 uint64_t event; 328 shader->ReadMem((uint64_t)(&host_state->event), &event, 329 sizeof(uint64_t), 0); 330 331 uint64_t end = curTick() / 1000; 332 333 shader->WriteMem((uint64_t)(&((_cl_event*)event)->end), &end, 334 sizeof(uint64_t), 0); 335 } 336 } 337 338 if (!tickEvent.scheduled()) { 339 schedule(&tickEvent, curTick() + shader->ticks(1)); 340 } 341} 342 343void 344GpuDispatcher::scheduleDispatch() 345{ 346 if (!tickEvent.scheduled()) 347 schedule(&tickEvent, curTick() + shader->ticks(1)); 348} 349 350void 351GpuDispatcher::accessUserVar(BaseCPU *cpu, uint64_t addr, int val, int off) 352{ 353 if (cpu) { 354 if (off) { 355 shader->AccessMem(addr, &val, sizeof(int), 0, MemCmd::ReadReq, 356 true); 357 val += off; 358 } 359 360 shader->AccessMem(addr, &val, sizeof(int), 0, MemCmd::WriteReq, true); 361 } else { 362 panic("Cannot find host"); 363 } 364} 365 366GpuDispatcher::TickEvent::TickEvent(GpuDispatcher *_dispatcher) 367 : Event(CPU_Tick_Pri), dispatcher(_dispatcher) 368{ 369} 370 371void 372GpuDispatcher::TickEvent::process() 373{ 374 dispatcher->exec(); 375} 376 377const char* 378GpuDispatcher::TickEvent::description() const 379{ 380 return "GPU Dispatcher tick"; 381} 382 383// helper functions for driver to retrieve GPU attributes 384int 385GpuDispatcher::getNumCUs() 386{ 387 return shader->cuList.size(); 388} 389 390void 391GpuDispatcher::setFuncargsSize(int funcargs_size) 392{ 393 shader->funcargs_size = funcargs_size; 394} 395